Line data Source code
1 : /*
2 : * SPDX-License-Identifier: MPL-2.0
3 : *
4 : * This Source Code Form is subject to the terms of the Mozilla Public
5 : * License, v. 2.0. If a copy of the MPL was not distributed with this
6 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
7 : *
8 : * Copyright 2024 MonetDB Foundation;
9 : * Copyright August 2008 - 2023 MonetDB B.V.;
10 : * Copyright 1997 - July 2008 CWI.
11 : */
12 :
13 : /*
14 : * @t The Goblin Database Kernel
15 : * @v Version 3.05
16 : * @a Martin L. Kersten, Peter Boncz, Niels Nes, Sjoerd Mullender
17 : *
18 : * @+ The Inner Core
19 : * The innermost library of the MonetDB database system is formed by
20 : * the library called GDK, an abbreviation of Goblin Database Kernel.
21 : * Its development was originally rooted in the design of a pure
22 : * active-object-oriented programming language, before development
23 : * was shifted towards a reusable database kernel engine.
24 : *
25 : * GDK is a C library that provides ACID properties on a DSM model
26 : * @tex
27 : * [@cite{Copeland85}]
28 : * @end tex
29 : * , using main-memory
30 : * database algorithms
31 : * @tex
32 : * [@cite{Garcia-Molina92}]
33 : * @end tex
34 : * built on virtual-memory
35 : * OS primitives and multi-threaded parallelism.
36 : * Its implementation has undergone various changes over its decade
37 : * of development, many of which were driven by external needs to
38 : * obtain a robust and fast database system.
39 : *
40 : * The coding scheme explored in GDK has also laid a foundation to
41 : * communicate over time experiences and to provide (hopefully)
42 : * helpful advice near to the place where the code-reader needs it.
43 : * Of course, over such a long time the documentation diverges from
44 : * reality. Especially in areas where the environment of this package
45 : * is being described.
46 : * Consider such deviations as historic landmarks, e.g. crystallization
47 : * of brave ideas and mistakes rectified at a later stage.
48 : *
49 : * @+ Short Outline
50 : * The facilities provided in this implementation are:
51 : * @itemize
52 : * @item
53 : * GDK or Goblin Database Kernel routines for session management
54 : * @item
55 : * BAT routines that define the primitive operations on the
56 : * database tables (BATs).
57 : * @item
58 : * BBP routines to manage the BAT Buffer Pool (BBP).
59 : * @item
60 : * ATOM routines to manipulate primitive types, define new types
61 : * using an ADT interface.
62 : * @item
63 : * HEAP routines for manipulating heaps: linear spaces of memory
64 : * that are GDK's vehicle of mass storage (on which BATs are built).
65 : * @item
66 : * DELTA routines to access inserted/deleted elements within a
67 : * transaction.
68 : * @item
69 : * HASH routines for manipulating GDK's built-in linear-chained
70 : * hash tables, for accelerating lookup searches on BATs.
71 : * @item
72 : * TM routines that provide basic transaction management primitives.
73 : * @item
74 : * TRG routines that provided active database support. [DEPRECATED]
75 : * @item
76 : * ALIGN routines that implement BAT alignment management.
77 : * @end itemize
78 : *
79 : * The Binary Association Table (BAT) is the lowest level of storage
80 : * considered in the Goblin runtime system
81 : * @tex
82 : * [@cite{Goblin}]
83 : * @end tex
84 : * . A BAT is a
85 : * self-descriptive main-memory structure that represents the
86 : * @strong{binary relationship} between two atomic types. The
87 : * association can be defined over:
88 : * @table @code
89 : * @item void:
90 : * virtual-OIDs: a densely ascending column of OIDs (takes zero-storage).
91 : * @item bit:
92 : * Booleans, implemented as one byte values.
93 : * @item bte:
94 : * Tiny (1-byte) integers (8-bit @strong{integer}s).
95 : * @item sht:
96 : * Short integers (16-bit @strong{integer}s).
97 : * @item int:
98 : * This is the C @strong{int} type (32-bit).
99 : * @item oid:
100 : * Unique @strong{long int} values uses as object identifier. Highest
101 : * bit cleared always. Thus, oids-s are 31-bit numbers on
102 : * 32-bit systems, and 63-bit numbers on 64-bit systems.
103 : * @item ptr:
104 : * Memory pointer values. DEPRECATED. Can only be stored in transient
105 : * BATs.
106 : * @item flt:
107 : * The IEEE @strong{float} type.
108 : * @item dbl:
109 : * The IEEE @strong{double} type.
110 : * @item lng:
111 : * Longs: the C @strong{long long} type (64-bit integers).
112 : * @item hge:
113 : * "huge" integers: the GCC @strong{__int128} type (128-bit integers).
114 : * @item str:
115 : * UTF-8 strings (Unicode). A zero-terminated byte sequence.
116 : * @item bat:
117 : * Bat descriptor. This allows for recursive administered tables, but
118 : * severely complicates transaction management. Therefore, they CAN
119 : * ONLY BE STORED IN TRANSIENT BATs.
120 : * @end table
121 : *
122 : * This model can be used as a back-end model underlying other -higher
123 : * level- models, in order to achieve @strong{better performance} and
124 : * @strong{data independence} in one go. The relational model and the
125 : * object-oriented model can be mapped on BATs by vertically splitting
126 : * every table (or class) for each attribute. Each such a column is
127 : * then stored in a BAT with type @strong{bat[oid,attribute]}, where
128 : * the unique object identifiers link tuples in the different BATs.
129 : * Relationship attributes in the object-oriented model hence are
130 : * mapped to @strong{bat[oid,oid]} tables, being equivalent to the
131 : * concept of @emph{join indexes} @tex [@cite{Valduriez87}] @end tex .
132 : *
133 : * The set of built-in types can be extended with user-defined types
134 : * through an ADT interface. They are linked with the kernel to
135 : * obtain an enhanced library, or they are dynamically loaded upon
136 : * request.
137 : *
138 : * Types can be derived from other types. They represent something
139 : * different than that from which they are derived, but their internal
140 : * storage management is equal. This feature facilitates the work of
141 : * extension programmers, by enabling reuse of implementation code,
142 : * but is also used to keep the GDK code portable from 32-bits to
143 : * 64-bits machines: the @strong{oid} and @strong{ptr} types are
144 : * derived from @strong{int} on 32-bits machines, but is derived from
145 : * @strong{lng} on 64 bits machines. This requires changes in only two
146 : * lines of code each.
147 : *
148 : * To accelerate lookup and search in BATs, GDK supports one built-in
149 : * search accelerator: hash tables. We choose an implementation
150 : * efficient for main-memory: bucket chained hash
151 : * @tex
152 : * [@cite{LehCar86,Analyti92}]
153 : * @end tex
154 : * . Alternatively, when the table is sorted, it will resort to
155 : * merge-scan operations or binary lookups.
156 : *
157 : * BATs are built on the concept of heaps, which are large pieces of
158 : * main memory. They can also consist of virtual memory, in case the
159 : * working set exceeds main-memory. In this case, GDK supports
160 : * operations that cluster the heaps of a BAT, in order to improve
161 : * performance of its main-memory.
162 : *
163 : *
164 : * @- Rationale
165 : * The rationale for choosing a BAT as the building block for both
166 : * relational and object-oriented system is based on the following
167 : * observations:
168 : *
169 : * @itemize
170 : * @item -
171 : * Given the fact that CPU speed and main-memory increase in current
172 : * workstation hardware for the last years has been exceeding IO
173 : * access speed increase, traditional disk-page oriented algorithms do
174 : * no longer take best advantage of hardware, in most database
175 : * operations.
176 : *
177 : * Instead of having a disk-block oriented kernel with a large memory
178 : * cache, we choose to build a main-memory kernel, that only under
179 : * large data volumes slowly degrades to IO-bound performance,
180 : * comparable to traditional systems
181 : * @tex
182 : * [@cite{boncz95,boncz96}]
183 : * @end tex
184 : * .
185 : *
186 : * @item -
187 : * Traditional (disk-based) relational systems move too much data
188 : * around to save on (main-memory) join operations.
189 : *
190 : * The fully decomposed store (DSM
191 : * @tex
192 : * [@cite{Copeland85})]
193 : * @end tex
194 : * assures that only those attributes of a relation that are needed,
195 : * will have to be accessed.
196 : *
197 : * @item -
198 : * The data management issues for a binary association is much
199 : * easier to deal with than traditional @emph{struct}-based approaches
200 : * encountered in relational systems.
201 : *
202 : * @item -
203 : * Object-oriented systems often maintain a double cache, one with the
204 : * disk-based representation and a C pointer-based main-memory
205 : * structure. This causes expensive conversions and replicated
206 : * storage management. GDK does not do such `pointer swizzling'. It
207 : * used virtual-memory (@strong{mmap()}) and buffer management advice
208 : * (@strong{madvise()}) OS primitives to cache only once. Tables take
209 : * the same form in memory as on disk, making the use of this
210 : * technique transparent
211 : * @tex
212 : * [@cite{oo7}]
213 : * @end tex
214 : * .
215 : * @end itemize
216 : *
217 : * A RDBMS or OODBMS based on BATs strongly depends on our ability to
218 : * efficiently support tuples and to handle small joins, respectively.
219 : *
220 : * The remainder of this document describes the Goblin Database kernel
221 : * implementation at greater detail. It is organized as follows:
222 : * @table @code
223 : * @item @strong{GDK Interface}:
224 : *
225 : * It describes the global interface with which GDK sessions can be
226 : * started and ended, and environment variables used.
227 : *
228 : * @item @strong{Binary Association Tables}:
229 : *
230 : * As already mentioned, these are the primary data structure of GDK.
231 : * This chapter describes the kernel operations for creation,
232 : * destruction and basic manipulation of BATs and BUNs (i.e. tuples:
233 : * Binary UNits).
234 : *
235 : * @item @strong{BAT Buffer Pool:}
236 : *
237 : * All BATs are registered in the BAT Buffer Pool. This directory is
238 : * used to guide swapping in and out of BATs. Here we find routines
239 : * that guide this swapping process.
240 : *
241 : * @item @strong{GDK Extensibility:}
242 : *
243 : * Atoms can be defined using a unified ADT interface. There is also
244 : * an interface to extend the GDK library with dynamically linked
245 : * object code.
246 : *
247 : * @item @strong{GDK Utilities:}
248 : *
249 : * Memory allocation and error handling primitives are
250 : * provided. Layers built on top of GDK should use them, for proper
251 : * system monitoring. Thread management is also included here.
252 : *
253 : * @item @strong{Transaction Management:}
254 : *
255 : * For the time being, we just provide BAT-grained concurrency and
256 : * global transactions. Work is needed here.
257 : *
258 : * @item @strong{BAT Alignment:}
259 : * Due to the mapping of multi-ary datamodels onto the BAT model, we
260 : * expect many correspondences among BATs, e.g.
261 : * @emph{bat(oid,attr1),.. bat(oid,attrN)} vertical
262 : * decompositions. Frequent activities will be to jump from one
263 : * attribute to the other (`bunhopping'). If the head columns are
264 : * equal lists in two BATs, merge or even array lookups can be used
265 : * instead of hash lookups. The alignment interface makes these
266 : * relations explicitly manageable.
267 : *
268 : * In GDK, complex data models are mapped with DSM on binary tables.
269 : * Usually, one decomposes @emph{N}-ary relations into @emph{N} BATs
270 : * with an @strong{oid} in the head column, and the attribute in the
271 : * tail column. There may well be groups of tables that have the same
272 : * sets of @strong{oid}s, equally ordered. The alignment interface is
273 : * intended to make this explicit. Implementations can use this
274 : * interface to detect this situation, and use cheaper algorithms
275 : * (like merge-join, or even array lookup) instead.
276 : *
277 : * @item @strong{BAT Iterators:}
278 : *
279 : * Iterators are C macros that generally encapsulate a complex
280 : * for-loop. They would be the equivalent of cursors in the SQL
281 : * model. The macro interface (instead of a function call interface)
282 : * is chosen to achieve speed when iterating main-memory tables.
283 : *
284 : * @item @strong{Common BAT Operations:}
285 : *
286 : * These are much used operations on BATs, such as aggregate functions
287 : * and relational operators. They are implemented in terms of BAT- and
288 : * BUN-manipulation GDK primitives.
289 : * @end table
290 : *
291 : * @+ Interface Files
292 : * In this section we summarize the user interface to the GDK library.
293 : * It consist of a header file (gdk.h) and an object library
294 : * (gdklib.a), which implements the required functionality. The header
295 : * file must be included in any program that uses the library. The
296 : * library must be linked with such a program.
297 : *
298 : * @- Database Context
299 : *
300 : * The MonetDB environment settings are collected in a configuration
301 : * file. Amongst others it contains the location of the database
302 : * directory. First, the database directory is closed for other
303 : * servers running at the same time. Second, performance enhancements
304 : * may take effect, such as locking the code into memory (if the OS
305 : * permits) and preloading the data dictionary. An error at this
306 : * stage normally lead to an abort.
307 : */
308 :
309 : #ifndef _GDK_H_
310 : #define _GDK_H_
311 :
312 : /* standard includes upon which all configure tests depend */
313 : #ifdef HAVE_SYS_TYPES_H
314 : # include <sys/types.h>
315 : #endif
316 : #ifdef HAVE_SYS_STAT_H
317 : # include <sys/stat.h>
318 : #endif
319 : #include <stddef.h>
320 : #include <string.h>
321 : #ifdef HAVE_UNISTD_H
322 : # include <unistd.h>
323 : #endif
324 :
325 : #include <ctype.h> /* isspace etc. */
326 :
327 : #ifdef HAVE_SYS_FILE_H
328 : # include <sys/file.h>
329 : #endif
330 :
331 : #ifdef HAVE_DIRENT_H
332 : # include <dirent.h>
333 : #endif
334 :
335 : #include <limits.h> /* for *_MIN and *_MAX */
336 : #include <float.h> /* for FLT_MAX and DBL_MAX */
337 :
338 : #ifdef WIN32
339 : #ifndef LIBGDK
340 : #define gdk_export extern __declspec(dllimport)
341 : #else
342 : #define gdk_export extern __declspec(dllexport)
343 : #endif
344 : #else
345 : #define gdk_export extern
346 : #endif
347 :
348 : /* Only ever compare with GDK_SUCCEED, never with GDK_FAIL, and do not
349 : * use as a Boolean. */
350 : typedef enum { GDK_FAIL, GDK_SUCCEED } gdk_return;
351 :
352 : gdk_export _Noreturn void GDKfatal(_In_z_ _Printf_format_string_ const char *format, ...)
353 : __attribute__((__format__(__printf__, 1, 2)));
354 :
355 : #include "gdk_system.h"
356 : #include "gdk_posix.h"
357 : #include "stream.h"
358 : #include "mstring.h"
359 :
360 : #undef MIN
361 : #undef MAX
362 : #define MAX(A,B) ((A)<(B)?(B):(A))
363 : #define MIN(A,B) ((A)>(B)?(B):(A))
364 :
365 : /* defines from ctype with casts that allow passing char values */
366 : #define GDKisspace(c) isspace((unsigned char) (c))
367 : #define GDKisalnum(c) isalnum((unsigned char) (c))
368 : #define GDKisdigit(c) isdigit((unsigned char) (c))
369 : #define GDKisxdigit(c) isxdigit((unsigned char) (c))
370 :
371 : #define BATDIR "bat"
372 : #define TEMPDIR_NAME "TEMP_DATA"
373 :
374 : #define DELDIR BATDIR DIR_SEP_STR "DELETE_ME"
375 : #define BAKDIR BATDIR DIR_SEP_STR "BACKUP"
376 : #define SUBDIR BAKDIR DIR_SEP_STR "SUBCOMMIT" /* note K, not T */
377 : #define LEFTDIR BATDIR DIR_SEP_STR "LEFTOVERS"
378 : #define TEMPDIR BATDIR DIR_SEP_STR TEMPDIR_NAME
379 :
380 : /*
381 : See `man mserver5` or tools/mserver/mserver5.1
382 : for a documentation of the following debug options.
383 : */
384 :
385 : #define THRDMASK (1U)
386 : #define CHECKMASK (1U<<1)
387 : #define CHECKDEBUG if (ATOMIC_GET(&GDKdebug) & CHECKMASK)
388 : #define PROPMASK (1U<<3) /* unused */
389 : #define PROPDEBUG if (ATOMIC_GET(&GDKdebug) & PROPMASK) /* unused */
390 : #define IOMASK (1U<<4)
391 : #define BATMASK (1U<<5)
392 : #define PARMASK (1U<<7)
393 : #define TESTINGMASK (1U<<8)
394 : #define TMMASK (1U<<9)
395 : #define TEMMASK (1U<<10)
396 : #define PERFMASK (1U<<12)
397 : #define DELTAMASK (1U<<13)
398 : #define LOADMASK (1U<<14)
399 : #define PUSHCANDMASK (1U<<15) /* used in opt_pushselect.c */
400 : #define TAILCHKMASK (1U<<16) /* check .tail file size during commit */
401 : #define ACCELMASK (1U<<20)
402 : #define ALGOMASK (1U<<21)
403 :
404 : #define NOSYNCMASK (1U<<24)
405 :
406 : #define DEADBEEFMASK (1U<<25)
407 : #define DEADBEEFCHK if (!(ATOMIC_GET(&GDKdebug) & DEADBEEFMASK))
408 :
409 : #define ALLOCMASK (1U<<26)
410 :
411 : /* M5, only; cf.,
412 : * monetdb5/mal/mal.h
413 : */
414 : #define OPTMASK (1U<<27)
415 :
416 : #define HEAPMASK (1U<<28)
417 :
418 : #define FORCEMITOMASK (1U<<29)
419 : #define FORCEMITODEBUG if (ATOMIC_GET(&GDKdebug) & FORCEMITOMASK)
420 :
421 : #ifndef TRUE
422 : #define TRUE true
423 : #define FALSE false
424 : #endif
425 :
426 : #define BATMARGIN 1.2 /* extra free margin for new heaps */
427 : #define BATTINY_BITS 8
428 : #define BATTINY ((BUN)1<<BATTINY_BITS) /* minimum allocation buncnt for a BAT */
429 :
430 : enum {
431 : TYPE_void = 0,
432 : TYPE_msk, /* bit mask */
433 : TYPE_bit, /* TRUE, FALSE, or nil */
434 : TYPE_bte,
435 : TYPE_sht,
436 : TYPE_int,
437 : TYPE_oid,
438 : TYPE_ptr, /* C pointer! */
439 : TYPE_flt,
440 : TYPE_dbl,
441 : TYPE_lng,
442 : #ifdef HAVE_HGE
443 : TYPE_hge,
444 : #endif
445 : TYPE_date,
446 : TYPE_daytime,
447 : TYPE_timestamp,
448 : TYPE_uuid,
449 : TYPE_str,
450 : TYPE_blob,
451 : TYPE_any = 255, /* limit types to <255! */
452 : };
453 :
454 : typedef bool msk;
455 : typedef int8_t bit;
456 : typedef int8_t bte;
457 : typedef int16_t sht;
458 : /* typedef int64_t lng; -- defined in gdk_system.h */
459 : typedef uint64_t ulng;
460 :
461 : #define SIZEOF_OID SIZEOF_SIZE_T
462 : typedef size_t oid;
463 : #define OIDFMT "%zu"
464 :
465 : typedef int bat; /* Index into BBP */
466 : typedef void *ptr; /* Internal coding of types */
467 :
468 : #define SIZEOF_PTR SIZEOF_VOID_P
469 : typedef float flt;
470 : typedef double dbl;
471 : typedef char *str;
472 :
473 : #define UUID_SIZE 16 /* size of a UUID */
474 : #define UUID_STRLEN 36 /* length of string representation */
475 :
476 : typedef union {
477 : #ifdef HAVE_HGE
478 : hge h; /* force alignment, not otherwise used */
479 : #else
480 : lng l[2]; /* force alignment, not otherwise used */
481 : #endif
482 : uint8_t u[UUID_SIZE] __attribute__((__nonstring__));
483 : } uuid;
484 :
485 : typedef struct {
486 : size_t nitems;
487 : uint8_t data[] __attribute__((__nonstring__));
488 : } blob;
489 : gdk_export size_t blobsize(size_t nitems) __attribute__((__const__));
490 :
491 : #define SIZEOF_LNG 8
492 : #define LL_CONSTANT(val) INT64_C(val)
493 : #define LLFMT "%" PRId64
494 : #define ULLFMT "%" PRIu64
495 : #define LLSCN "%" SCNd64
496 : #define ULLSCN "%" SCNu64
497 :
498 : typedef oid var_t; /* type used for heap index of var-sized BAT */
499 : #define SIZEOF_VAR_T SIZEOF_OID
500 : #define VARFMT OIDFMT
501 :
502 : #if SIZEOF_VAR_T == SIZEOF_INT
503 : #define VAR_MAX ((var_t) INT_MAX)
504 : #else
505 : #define VAR_MAX ((var_t) INT64_MAX)
506 : #endif
507 :
508 : typedef oid BUN; /* BUN position */
509 : #define SIZEOF_BUN SIZEOF_OID
510 : #define BUNFMT OIDFMT
511 : /* alternatively:
512 : typedef size_t BUN;
513 : #define SIZEOF_BUN SIZEOF_SIZE_T
514 : #define BUNFMT "%zu"
515 : */
516 : #if SIZEOF_BUN == SIZEOF_INT
517 : #define BUN_NONE ((BUN) INT_MAX)
518 : #else
519 : #define BUN_NONE ((BUN) INT64_MAX)
520 : #endif
521 : #define BUN_MAX (BUN_NONE - 1) /* maximum allowed size of a BAT */
522 :
523 : /*
524 : * @- Checking and Error definitions:
525 : */
526 : #define ATOMextern(t) (ATOMstorage(t) >= TYPE_str)
527 :
528 : typedef enum {
529 : PERSISTENT = 0,
530 : TRANSIENT,
531 : SYSTRANS,
532 : } role_t;
533 :
534 : /* Heap storage modes */
535 : typedef enum {
536 : STORE_INVALID = 0, /* invalid value, used to indicate error */
537 : STORE_MEM, /* load into GDKmalloced memory */
538 : STORE_MMAP, /* mmap() into virtual memory */
539 : STORE_PRIV, /* BAT copy of copy-on-write mmap */
540 : STORE_CMEM, /* load into malloc (not GDKmalloc) memory*/
541 : STORE_NOWN, /* memory not owned by the BAT */
542 : STORE_MMAPABS, /* mmap() into virtual memory from an
543 : * absolute path (not part of dbfarm) */
544 : } storage_t;
545 :
546 : typedef struct {
547 : size_t free; /* index where free area starts. */
548 : size_t size; /* size of the heap (bytes) */
549 : char *base; /* base pointer in memory. */
550 : #if SIZEOF_VOID_P == 4
551 : char filename[32]; /* file containing image of the heap */
552 : #else
553 : char filename[40]; /* file containing image of the heap */
554 : #endif
555 :
556 : ATOMIC_TYPE refs; /* reference count for this heap */
557 : bte farmid; /* id of farm where heap is located */
558 : bool cleanhash; /* string heaps must clean hash */
559 : bool dirty; /* specific heap dirty marker */
560 : bool remove; /* remove storage file when freeing */
561 : bool wasempty; /* heap was empty when last saved/created */
562 : bool hasfile; /* .filename exists on disk */
563 : storage_t storage; /* storage mode (mmap/malloc). */
564 : storage_t newstorage; /* new desired storage mode at re-allocation. */
565 : bat parentid; /* cache id of VIEW parent bat */
566 : } Heap;
567 :
568 : typedef struct Hash Hash;
569 : typedef struct Strimps Strimps;
570 :
571 : #ifdef HAVE_RTREE
572 : typedef struct RTree RTree;
573 : #endif
574 :
575 : /*
576 : * @+ Binary Association Tables
577 : * Having gone to the previous preliminary definitions, we will now
578 : * introduce the structure of Binary Association Tables (BATs) in
579 : * detail. They are the basic storage unit on which GDK is modeled.
580 : *
581 : * The BAT holds an unlimited number of binary associations, called
582 : * BUNs (@strong{Binary UNits}). The two attributes of a BUN are
583 : * called @strong{head} (left) and @strong{tail} (right) in the
584 : * remainder of this document.
585 : *
586 : * @c image{http://monetdb.cwi.nl/projects/monetdb-mk/imgs/bat1,,,,feps}
587 : *
588 : * The above figure shows what a BAT looks like. It consists of two
589 : * columns, called head and tail, such that we have always binary
590 : * tuples (BUNs). The overlooking structure is the @strong{BAT
591 : * record}. It points to a heap structure called the @strong{BUN
592 : * heap}. This heap contains the atomic values inside the two
593 : * columns. If they are fixed-sized atoms, these atoms reside directly
594 : * in the BUN heap. If they are variable-sized atoms (such as string
595 : * or polygon), however, the columns has an extra heap for storing
596 : * those (such @strong{variable-sized atom heaps} are then referred to
597 : * as @strong{Head Heap}s and @strong{Tail Heap}s). The BUN heap then
598 : * contains integer byte-offsets (fixed-sized, of course) into a head-
599 : * or tail-heap.
600 : *
601 : * The BUN heap contains a contiguous range of BUNs. It starts after
602 : * the @strong{first} pointer, and finishes at the end in the
603 : * @strong{free} area of the BUN. All BUNs after the @strong{inserted}
604 : * pointer have been added in the last transaction (and will be
605 : * deleted on a transaction abort). All BUNs between the
606 : * @strong{deleted} pointer and the @strong{first} have been deleted
607 : * in this transaction (and will be reinserted at a transaction
608 : * abort).
609 : *
610 : * The location of a certain BUN in a BAT may change between
611 : * successive library routine invocations. Therefore, one should
612 : * avoid keeping references into the BAT storage area for long
613 : * periods.
614 : *
615 : * Passing values between the library routines and the enclosing C
616 : * program is primarily through value pointers of type ptr. Pointers
617 : * into the BAT storage area should only be used for retrieval. Direct
618 : * updates of data stored in a BAT is forbidden. The user should
619 : * adhere to the interface conventions to guarantee the integrity
620 : * rules and to maintain the (hidden) auxiliary search structures.
621 : *
622 : * @- GDK variant record type
623 : * When manipulating values, MonetDB puts them into value records.
624 : * The built-in types have a direct entry in the union. Others should
625 : * be represented as a pointer of memory in pval or as a string, which
626 : * is basically the same. In such cases the len field indicates the
627 : * size of this piece of memory.
628 : */
629 : typedef struct {
630 : union { /* storage is first in the record */
631 : int ival;
632 : oid oval;
633 : sht shval;
634 : bte btval;
635 : msk mval;
636 : flt fval;
637 : ptr pval;
638 : bat bval;
639 : str sval;
640 : dbl dval;
641 : lng lval;
642 : #ifdef HAVE_HGE
643 : hge hval;
644 : #endif
645 : uuid uval;
646 : } val;
647 : size_t len;
648 : short vtype;
649 : bool bat;
650 : } *ValPtr, ValRecord;
651 :
652 : /* interface definitions */
653 : gdk_export void *VALconvert(int typ, ValPtr t);
654 : gdk_export char *VALformat(const ValRecord *res)
655 : __attribute__((__warn_unused_result__));
656 : gdk_export ValPtr VALcopy(ValPtr dst, const ValRecord *src)
657 : __attribute__((__access__(write_only, 1)));
658 : gdk_export ValPtr VALinit(ValPtr d, int tpe, const void *s)
659 : __attribute__((__access__(write_only, 1)));
660 : gdk_export void VALempty(ValPtr v)
661 : __attribute__((__access__(write_only, 1)));
662 : gdk_export void VALclear(ValPtr v);
663 : gdk_export ValPtr VALset(ValPtr v, int t, void *p);
664 : gdk_export void *VALget(ValPtr v);
665 : gdk_export int VALcmp(const ValRecord *p, const ValRecord *q);
666 : gdk_export bool VALisnil(const ValRecord *v);
667 :
668 : /*
669 : * @- The BAT record
670 : * The elements of the BAT structure are introduced in the remainder.
671 : * Instead of using the underlying types hidden beneath it, one should
672 : * use a @emph{BAT} type that is supposed to look like this:
673 : * @verbatim
674 : * typedef struct {
675 : * // static BAT properties
676 : * bat batCacheid; // bat id: index in BBPcache
677 : * bool batTransient; // persistence mode
678 : * bool batCopiedtodisk; // BAT is saved on disk?
679 : * // dynamic BAT properties
680 : * int batHeat; // heat of BAT in the BBP
681 : * Heap* batBuns; // Heap where the buns are stored
682 : * // DELTA status
683 : * BUN batInserted; // first inserted BUN
684 : * BUN batCount; // Tuple count
685 : * // Tail properties
686 : * int ttype; // Tail type number
687 : * bool tkey; // tail values are unique
688 : * bool tnonil; // tail has no nils
689 : * bool tsorted; // are tail values currently ordered?
690 : * // Tail storage
691 : * int tloc; // byte-offset in BUN for tail elements
692 : * Heap *theap; // heap for varsized tail values
693 : * Hash *thash; // linear chained hash table on tail
694 : * orderidx torderidx; // order oid index on tail
695 : * } BAT;
696 : * @end verbatim
697 : *
698 : * The internal structure of the @strong{BAT} record is in fact much
699 : * more complex, but GDK programmers should refrain of making use of
700 : * that.
701 : *
702 : * Since we don't want to pay cost to keep both views in line with
703 : * each other under BAT updates, we work with shared pieces of memory
704 : * between the two views. An update to one will thus automatically
705 : * update the other. In the same line, we allow @strong{synchronized
706 : * BATs} (BATs with identical head columns, and marked as such in the
707 : * @strong{BAT Alignment} interface) now to be clustered horizontally.
708 : *
709 : * @c image{http://monetdb.cwi.nl/projects/monetdb-mk/imgs/bat2,,,,feps}
710 : */
711 :
712 : typedef struct PROPrec PROPrec;
713 :
714 : /* see also comment near BATassertProps() for more information about
715 : * the properties */
716 : typedef struct {
717 : uint16_t width; /* byte-width of the atom array */
718 : int8_t type; /* type id. */
719 : uint8_t shift; /* log2 of bun width */
720 : bool key:1, /* no duplicate values present */
721 : nonil:1, /* there are no nils in the column */
722 : nil:1, /* there is a nil in the column */
723 : sorted:1, /* column is sorted in ascending order */
724 : revsorted:1, /* column is sorted in descending order */
725 : ascii:1; /* string column is fully ASCII (7 bit) */
726 : BUN nokey[2]; /* positions that prove key==FALSE */
727 : BUN nosorted; /* position that proves sorted==FALSE */
728 : BUN norevsorted; /* position that proves revsorted==FALSE */
729 : BUN minpos, maxpos; /* location of min/max value */
730 : double unique_est; /* estimated number of unique values */
731 : oid seq; /* start of dense sequence */
732 :
733 : Heap *heap; /* space for the column. */
734 : BUN baseoff; /* offset in heap->base (in whole items) */
735 : Heap *vheap; /* space for the varsized data. */
736 : Hash *hash; /* hash table */
737 : #ifdef HAVE_RTREE
738 : RTree *rtree; /* rtree geometric index */
739 : #endif
740 : Heap *orderidx; /* order oid index */
741 : Strimps *strimps; /* string imprint index */
742 :
743 : PROPrec *props; /* list of dynamic properties stored in the bat descriptor */
744 : } COLrec;
745 :
746 : #define ORDERIDXOFF 3
747 :
748 : /* assert that atom width is power of 2, i.e., width == 1<<shift */
749 : #define assert_shift_width(shift,width) assert(((shift) == 0 && (width) == 0) || ((unsigned)1<<(shift)) == (unsigned)(width))
750 :
751 : #define GDKLIBRARY_HASHASH 061044U /* first in Jul2021: hashash bit in string heaps */
752 : #define GDKLIBRARY_HSIZE 061045U /* first in Jan2022: heap "size" values */
753 : #define GDKLIBRARY_JSON 061046U /* first in Sep2022: json storage changes*/
754 : #define GDKLIBRARY_STATUS 061047U /* first in Dec2023: no status/filename columns */
755 : #define GDKLIBRARY 061050U /* first in Aug2024 */
756 :
757 : /* The batRestricted field indicates whether a BAT is readonly.
758 : * we have modes: BAT_WRITE = all permitted
759 : * BAT_APPEND = append-only
760 : * BAT_READ = read-only
761 : * VIEW bats are always mapped read-only.
762 : */
763 : typedef enum {
764 : BAT_WRITE, /* all kinds of access allowed */
765 : BAT_READ, /* only read-access allowed */
766 : BAT_APPEND, /* only reads and appends allowed */
767 : } restrict_t;
768 :
769 : /* theaplock: this lock should be held when reading or writing any of
770 : * the fields that are saved in the BBP.dir file (plus any, if any, that
771 : * share bitfields with any of the fields), i.e. hseqbase,
772 : * batRestricted, batTransient, batCount, and the theap properties tkey,
773 : * tseqbase, tsorted, trevsorted, twidth, tshift, tnonil, tnil, tnokey,
774 : * tnosorted, tnorevsorted, tminpos, tmaxpos, and tunique_est, also when
775 : * BBP_logical(bid) is changed, and also when reading or writing any of
776 : * the following fields: theap, tvheap, batInserted, batCapacity. There
777 : * is no need for the lock if the bat cannot possibly be modified
778 : * concurrently, e.g. when it is new and not yet returned to the
779 : * interpreter or during system initialization.
780 : * If multiple bats need to be locked at the same time by the same
781 : * thread, first lock the view, then the view's parent(s). */
782 : typedef struct BAT {
783 : /* static bat properties */
784 : oid hseqbase; /* head seq base */
785 : MT_Id creator_tid; /* which thread created it */
786 : bat batCacheid; /* index into BBP */
787 : role_t batRole; /* role of the bat */
788 :
789 : /* dynamic bat properties */
790 : restrict_t batRestricted:2; /* access privileges */
791 : bool
792 : batTransient:1, /* should the BAT persist on disk? */
793 : batCopiedtodisk:1; /* once written */
794 : uint16_t selcnt; /* how often used in equi select without hash */
795 : uint16_t unused; /* value=0 for now (sneakily used by mat.c) */
796 :
797 : /* delta status administration */
798 : BUN batInserted; /* start of inserted elements */
799 : BUN batCount; /* tuple count */
800 : BUN batCapacity; /* tuple capacity */
801 :
802 : /* dynamic column properties */
803 : COLrec T; /* column info */
804 : MT_Lock theaplock; /* lock protecting heap reference changes */
805 : MT_RWLock thashlock; /* lock specifically for hash management */
806 : MT_Lock batIdxLock; /* lock to manipulate other indexes/properties */
807 : Heap *oldtail; /* old tail heap, to be destroyed after commit */
808 : } BAT;
809 :
810 : /* macros to hide complexity of the BAT structure */
811 : #define ttype T.type
812 : #define tkey T.key
813 : #define tseqbase T.seq
814 : #define tsorted T.sorted
815 : #define trevsorted T.revsorted
816 : #define tascii T.ascii
817 : #define torderidx T.orderidx
818 : #define twidth T.width
819 : #define tshift T.shift
820 : #define tnonil T.nonil
821 : #define tnil T.nil
822 : #define tnokey T.nokey
823 : #define tnosorted T.nosorted
824 : #define tnorevsorted T.norevsorted
825 : #define tminpos T.minpos
826 : #define tmaxpos T.maxpos
827 : #define tunique_est T.unique_est
828 : #define theap T.heap
829 : #define tbaseoff T.baseoff
830 : #define tvheap T.vheap
831 : #define thash T.hash
832 : #define tprops T.props
833 : #define tstrimps T.strimps
834 : #ifdef HAVE_RTREE
835 : #define trtree T.rtree
836 : #endif
837 :
838 : /* some access functions for the bitmask type */
839 : static inline void
840 198 : mskSet(BAT *b, BUN p)
841 : {
842 198 : ((uint32_t *) b->theap->base)[p / 32] |= 1U << (p % 32);
843 198 : }
844 :
845 : static inline void
846 7929 : mskClr(BAT *b, BUN p)
847 : {
848 7929 : ((uint32_t *) b->theap->base)[p / 32] &= ~(1U << (p % 32));
849 7929 : }
850 :
851 : static inline void
852 8127 : mskSetVal(BAT *b, BUN p, msk v)
853 : {
854 8127 : if (v)
855 198 : mskSet(b, p);
856 : else
857 7929 : mskClr(b, p);
858 8127 : }
859 :
860 : static inline msk
861 0 : mskGetVal(BAT *b, BUN p)
862 : {
863 0 : return ((uint32_t *) b->theap->base)[p / 32] & (1U << (p % 32));
864 : }
865 :
866 : /*
867 : * @- Heap Management
868 : * Heaps are the low-level entities of mass storage in
869 : * BATs. Currently, they can either be stored on disk, loaded into
870 : * memory, or memory mapped.
871 : * @multitable @columnfractions 0.08 0.7
872 : * @item int
873 : * @tab
874 : * HEAPalloc (Heap *h, size_t nitems, size_t itemsize);
875 : * @item int
876 : * @tab
877 : * HEAPfree (Heap *h, bool remove);
878 : * @item int
879 : * @tab
880 : * HEAPextend (Heap *h, size_t size, bool mayshare);
881 : * @item int
882 : * @tab
883 : * HEAPload (Heap *h, str nme,ext, bool trunc);
884 : * @item int
885 : * @tab
886 : * HEAPsave (Heap *h, str nme,ext, bool dosync);
887 : * @item int
888 : * @tab
889 : * HEAPcopy (Heap *dst,*src);
890 : * @end multitable
891 : *
892 : *
893 : * These routines should be used to alloc free or extend heaps; they
894 : * isolate you from the different ways heaps can be accessed.
895 : */
896 : gdk_export gdk_return HEAPextend(Heap *h, size_t size, bool mayshare)
897 : __attribute__((__warn_unused_result__));
898 : gdk_export size_t HEAPvmsize(Heap *h);
899 : gdk_export size_t HEAPmemsize(Heap *h);
900 : gdk_export void HEAPdecref(Heap *h, bool remove);
901 : gdk_export void HEAPincref(Heap *h);
902 :
903 : #define VIEWtparent(x) ((x)->theap == NULL || (x)->theap->parentid == (x)->batCacheid ? 0 : (x)->theap->parentid)
904 : #define VIEWvtparent(x) ((x)->tvheap == NULL || (x)->tvheap->parentid == (x)->batCacheid ? 0 : (x)->tvheap->parentid)
905 :
906 : #define isVIEW(x) (VIEWtparent(x) != 0 || VIEWvtparent(x) != 0)
907 :
908 : /*
909 : * @+ BAT Buffer Pool
910 : * @multitable @columnfractions 0.08 0.7
911 : * @item int
912 : * @tab BBPfix (bat bi)
913 : * @item int
914 : * @tab BBPunfix (bat bi)
915 : * @item int
916 : * @tab BBPretain (bat bi)
917 : * @item int
918 : * @tab BBPrelease (bat bi)
919 : * @item bat
920 : * @tab BBPindex (str nme)
921 : * @item BAT*
922 : * @tab BATdescriptor (bat bi)
923 : * @end multitable
924 : *
925 : * The BAT Buffer Pool module contains the code to manage the storage
926 : * location of BATs.
927 : *
928 : * The remaining BBP tables contain status information to load, swap
929 : * and migrate the BATs. The core table is BBPcache which contains a
930 : * pointer to the BAT descriptor with its heaps. A zero entry means
931 : * that the file resides on disk. Otherwise it has been read or mapped
932 : * into memory.
933 : *
934 : * BATs loaded into memory are retained in a BAT buffer pool. They
935 : * retain their position within the cache during their life cycle,
936 : * which make indexing BATs a stable operation.
937 : *
938 : * The BBPindex routine checks if a BAT with a certain name is
939 : * registered in the buffer pools. If so, it returns its BAT id. The
940 : * BATdescriptor routine has a BAT id parameter, and returns a pointer
941 : * to the corresponding BAT record (after incrementing the reference
942 : * count). The BAT will be loaded into memory, if necessary.
943 : *
944 : * The structure of the BBP file obeys the tuple format for GDK.
945 : *
946 : * The status and BAT persistency information is encoded in the status
947 : * field.
948 : */
949 : typedef struct {
950 : char *logical; /* logical name (may point at bak) */
951 : char bak[16]; /* logical name backup (tmp_%o) */
952 : BAT descr; /* the BAT descriptor */
953 : char *options; /* A string list of options */
954 : #if SIZEOF_VOID_P == 4
955 : char physical[20]; /* dir + basename for storage */
956 : #else
957 : char physical[24]; /* dir + basename for storage */
958 : #endif
959 : bat next; /* next BBP slot in linked list */
960 : int refs; /* in-memory references on which the loaded status of a BAT relies */
961 : int lrefs; /* logical references on which the existence of a BAT relies */
962 : ATOMIC_TYPE status; /* status mask used for spin locking */
963 : MT_Id pid; /* creator of this bat while "private" */
964 : } BBPrec;
965 :
966 : gdk_export bat BBPlimit;
967 : #if SIZEOF_VOID_P == 4
968 : #define N_BBPINIT 1000
969 : #define BBPINITLOG 11
970 : #else
971 : #define N_BBPINIT 10000
972 : #define BBPINITLOG 14
973 : #endif
974 : #define BBPINIT (1 << BBPINITLOG)
975 : /* absolute maximum number of BATs is N_BBPINIT * BBPINIT
976 : * this also gives the longest possible "physical" name and "bak" name
977 : * of a BAT: the "bak" name is "tmp_%o", so at most 14 + \0 bytes on 64
978 : * bit architecture and 11 + \0 on 32 bit architecture; the physical
979 : * name is a bit more complicated, but the longest possible name is 22 +
980 : * \0 bytes (16 + \0 on 32 bits), the longest possible extension adds
981 : * another 17 bytes (.thsh(grp|uni)(l|b)%08x) */
982 : gdk_export BBPrec *BBP[N_BBPINIT];
983 :
984 : /* fast defines without checks; internal use only */
985 : #define BBP_record(i) BBP[(i)>>BBPINITLOG][(i)&(BBPINIT-1)]
986 : #define BBP_logical(i) BBP_record(i).logical
987 : #define BBP_bak(i) BBP_record(i).bak
988 : #define BBP_next(i) BBP_record(i).next
989 : #define BBP_physical(i) BBP_record(i).physical
990 : #define BBP_options(i) BBP_record(i).options
991 : #define BBP_desc(i) (&BBP_record(i).descr)
992 : #define BBP_refs(i) BBP_record(i).refs
993 : #define BBP_lrefs(i) BBP_record(i).lrefs
994 : #define BBP_status(i) ((unsigned) ATOMIC_GET(&BBP_record(i).status))
995 : #define BBP_pid(i) BBP_record(i).pid
996 : #define BATgetId(b) BBP_logical((b)->batCacheid)
997 : #define BBPvalid(i) (BBP_logical(i) != NULL)
998 :
999 : #define BBPRENAME_ALREADY (-1)
1000 : #define BBPRENAME_ILLEGAL (-2)
1001 : #define BBPRENAME_LONG (-3)
1002 : #define BBPRENAME_MEMORY (-4)
1003 :
1004 : gdk_export void BBPlock(void);
1005 : gdk_export void BBPunlock(void);
1006 : gdk_export void BBPtmlock(void);
1007 : gdk_export void BBPtmunlock(void);
1008 :
1009 : gdk_export BAT *BBPquickdesc(bat b);
1010 :
1011 : /* BAT iterator, also protects use of BAT heaps with reference counts.
1012 : *
1013 : * A BAT iterator has to be used with caution, but it does have to be
1014 : * used in many place.
1015 : *
1016 : * An iterator is initialized by assigning it the result of a call to
1017 : * either bat_iterator or bat_iterator_nolock. The former must be
1018 : * accompanied by a call to bat_iterator_end to release resources.
1019 : *
1020 : * bat_iterator should be used for BATs that could possibly be modified
1021 : * in another thread while we're reading the contents of the BAT.
1022 : * Alternatively, but only for very quick access, the theaplock can be
1023 : * taken, the data read, and the lock released. For longer duration
1024 : * accesses, it is better to use the iterator, even without the BUNt*
1025 : * macros, since the theaplock is only held very briefly.
1026 : *
1027 : * Note, bat_iterator must only be used for read-only access.
1028 : *
1029 : * If BATs are to be modified, higher level code must assure that no
1030 : * other thread is going to modify the same BAT at the same time. A
1031 : * to-be-modified BAT should not use bat_iterator. It can use
1032 : * bat_iterator_nolock, but be aware that this creates a copy of the
1033 : * heap pointer(s) (i.e. theap and tvheap) and if the heaps get
1034 : * extended, the pointers in the BAT structure may be modified, but that
1035 : * does not modify the pointers in the iterator. This means that after
1036 : * operations that may grow a heap, the iterator should be
1037 : * reinitialized.
1038 : *
1039 : * The BAT iterator provides a number of fields that can (and often
1040 : * should) be used to access information about the BAT. For string
1041 : * BATs, if a parallel threads adds values, the offset heap (theap) may
1042 : * get replaced by one that is wider. This involves changing the twidth
1043 : * and tshift values in the BAT structure. These changed values should
1044 : * not be used to access the data in the iterator. Instead, use the
1045 : * width and shift values in the iterator itself.
1046 : */
1047 : typedef struct BATiter {
1048 : BAT *b;
1049 : Heap *h;
1050 : void *base;
1051 : Heap *vh;
1052 : BUN count;
1053 : BUN baseoff;
1054 : oid tseq;
1055 : BUN hfree, vhfree;
1056 : BUN nokey[2];
1057 : BUN nosorted, norevsorted;
1058 : BUN minpos, maxpos;
1059 : double unique_est;
1060 : uint16_t width;
1061 : uint8_t shift;
1062 : int8_t type;
1063 : bool key:1,
1064 : nonil:1,
1065 : nil:1,
1066 : sorted:1,
1067 : revsorted:1,
1068 : hdirty:1,
1069 : vhdirty:1,
1070 : copiedtodisk:1,
1071 : transient:1,
1072 : ascii:1;
1073 : restrict_t restricted:2;
1074 : #ifndef NDEBUG
1075 : bool locked:1;
1076 : #endif
1077 : union {
1078 : oid tvid;
1079 : bool tmsk;
1080 : };
1081 : } BATiter;
1082 :
1083 : static inline BATiter
1084 130560857 : bat_iterator_nolock(BAT *b)
1085 : {
1086 : /* does not get matched by bat_iterator_end */
1087 130560857 : if (b) {
1088 130560857 : const bool isview = VIEWtparent(b) != 0;
1089 261121714 : return (BATiter) {
1090 : .b = b,
1091 : .h = b->theap,
1092 130560857 : .base = b->theap->base ? b->theap->base + (b->tbaseoff << b->tshift) : NULL,
1093 130560857 : .baseoff = b->tbaseoff,
1094 130560857 : .vh = b->tvheap,
1095 130560857 : .count = b->batCount,
1096 130560857 : .width = b->twidth,
1097 130560857 : .shift = b->tshift,
1098 : .type = b->ttype,
1099 130560857 : .tseq = b->tseqbase,
1100 : /* don't use b->theap->free in case b is a slice */
1101 130560857 : .hfree = b->ttype ?
1102 : b->ttype == TYPE_msk ?
1103 127447389 : (((size_t) b->batCount + 31) / 32) * 4 :
1104 257365608 : (size_t) b->batCount << b->tshift :
1105 : 0,
1106 130560857 : .vhfree = b->tvheap ? b->tvheap->free : 0,
1107 130560857 : .nokey[0] = b->tnokey[0],
1108 130560857 : .nokey[1] = b->tnokey[1],
1109 130560857 : .nosorted = b->tnosorted,
1110 130560857 : .norevsorted = b->tnorevsorted,
1111 130560857 : .minpos = isview ? BUN_NONE : b->tminpos,
1112 110980238 : .maxpos = isview ? BUN_NONE : b->tmaxpos,
1113 130560857 : .unique_est = b->tunique_est,
1114 130560857 : .key = b->tkey,
1115 130560857 : .nonil = b->tnonil,
1116 130560857 : .nil = b->tnil,
1117 130560857 : .sorted = b->tsorted,
1118 130560857 : .revsorted = b->trevsorted,
1119 130560857 : .ascii = b->tascii,
1120 : /* only look at heap dirty flag if we own it */
1121 130560857 : .hdirty = b->theap->parentid == b->batCacheid && b->theap->dirty,
1122 : /* also, if there is no vheap, it's not dirty */
1123 130560857 : .vhdirty = b->tvheap && b->tvheap->parentid == b->batCacheid && b->tvheap->dirty,
1124 130560857 : .copiedtodisk = b->batCopiedtodisk,
1125 130560857 : .transient = b->batTransient,
1126 130560857 : .restricted = b->batRestricted,
1127 : #ifndef NDEBUG
1128 : .locked = false,
1129 : #endif
1130 : };
1131 : }
1132 0 : return (BATiter) {0};
1133 : }
1134 :
1135 : static inline void
1136 41678049 : bat_iterator_incref(BATiter *bi)
1137 : {
1138 : #ifndef NDEBUG
1139 41678049 : bi->locked = true;
1140 : #endif
1141 41678049 : HEAPincref(bi->h);
1142 41711174 : if (bi->vh)
1143 8740204 : HEAPincref(bi->vh);
1144 41710989 : }
1145 :
1146 : static inline BATiter
1147 41852086 : bat_iterator(BAT *b)
1148 : {
1149 : /* needs matching bat_iterator_end */
1150 41852086 : BATiter bi;
1151 41852086 : if (b) {
1152 38879475 : BAT *pb = NULL, *pvb = NULL;
1153 : /* for a view, always first lock the view and then the
1154 : * parent(s)
1155 : * note that a varsized bat can have two different
1156 : * parents and that the parent for the tail can itself
1157 : * have a parent for its vheap (which would have to be
1158 : * our own vheap parent), so lock the vheap after the
1159 : * tail */
1160 38879475 : MT_lock_set(&b->theaplock);
1161 38922363 : if (b->theap->parentid != b->batCacheid) {
1162 12293683 : pb = BBP_desc(b->theap->parentid);
1163 12293683 : MT_lock_set(&pb->theaplock);
1164 : }
1165 38924850 : if (b->tvheap &&
1166 8054159 : b->tvheap->parentid != b->batCacheid &&
1167 2859800 : b->tvheap->parentid != b->theap->parentid) {
1168 205066 : pvb = BBP_desc(b->tvheap->parentid);
1169 205066 : MT_lock_set(&pvb->theaplock);
1170 : }
1171 38924893 : bi = bat_iterator_nolock(b);
1172 38924893 : bat_iterator_incref(&bi);
1173 38951764 : if (pvb)
1174 205116 : MT_lock_unset(&pvb->theaplock);
1175 38964984 : if (pb)
1176 12303185 : MT_lock_unset(&pb->theaplock);
1177 38966720 : MT_lock_unset(&b->theaplock);
1178 : } else {
1179 2972611 : bi = (BATiter) {
1180 : .b = NULL,
1181 : #ifndef NDEBUG
1182 : .locked = true,
1183 : #endif
1184 : };
1185 : }
1186 41936702 : return bi;
1187 : }
1188 :
1189 : /* return a copy of a BATiter instance; needs to be released with
1190 : * bat_iterator_end */
1191 : static inline BATiter
1192 48693 : bat_iterator_copy(BATiter *bip)
1193 : {
1194 48693 : assert(bip);
1195 48693 : assert(bip->locked);
1196 48693 : if (bip->h)
1197 48693 : HEAPincref(bip->h);
1198 48693 : if (bip->vh)
1199 17263 : HEAPincref(bip->vh);
1200 48693 : return *bip;
1201 : }
1202 :
1203 : static inline void
1204 44650634 : bat_iterator_end(BATiter *bip)
1205 : {
1206 : /* matches bat_iterator */
1207 44650634 : assert(bip);
1208 44650634 : assert(bip->locked);
1209 44650634 : if (bip->h)
1210 41680899 : HEAPdecref(bip->h, false);
1211 44702778 : if (bip->vh)
1212 8755840 : HEAPdecref(bip->vh, false);
1213 44703531 : *bip = (BATiter) {0};
1214 44703531 : }
1215 :
1216 : /*
1217 : * @- Internal HEAP Chunk Management
1218 : * Heaps are used in BATs to store data for variable-size atoms. The
1219 : * implementer must manage malloc()/free() functionality for atoms in
1220 : * this heap. A standard implementation is provided here.
1221 : *
1222 : * @table @code
1223 : * @item void
1224 : * HEAP_initialize (Heap* h, size_t nbytes, size_t nprivate, int align )
1225 : * @item void
1226 : * HEAP_destroy (Heap* h)
1227 : * @item var_t
1228 : * HEAP_malloc (Heap* heap, size_t nbytes)
1229 : * @item void
1230 : * HEAP_free (Heap *heap, var_t block)
1231 : * @item int
1232 : * HEAP_private (Heap* h)
1233 : * @item void
1234 : * HEAP_printstatus (Heap* h)
1235 : * @end table
1236 : *
1237 : * The heap space starts with a private space that is left untouched
1238 : * by the normal chunk allocation. You can use this private space
1239 : * e.g. to store the root of an rtree HEAP_malloc allocates a chunk of
1240 : * memory on the heap, and returns an index to it. HEAP_free frees a
1241 : * previously allocated chunk HEAP_private returns an integer index to
1242 : * private space.
1243 : */
1244 :
1245 : gdk_export gdk_return HEAP_initialize(
1246 : Heap *heap, /* nbytes -- Initial size of the heap. */
1247 : size_t nbytes, /* alignment -- for objects on the heap. */
1248 : size_t nprivate, /* nprivate -- Size of private space */
1249 : int alignment /* alignment restriction for allocated chunks */
1250 : );
1251 :
1252 : gdk_export var_t HEAP_malloc(BAT *b, size_t nbytes);
1253 : gdk_export void HEAP_free(Heap *heap, var_t block);
1254 :
1255 : /*
1256 : * @- BAT construction
1257 : * @multitable @columnfractions 0.08 0.7
1258 : * @item @code{BAT* }
1259 : * @tab COLnew (oid headseq, int tailtype, BUN cap, role_t role)
1260 : * @item @code{BAT* }
1261 : * @tab BATextend (BAT *b, BUN newcap)
1262 : * @end multitable
1263 : *
1264 : * A temporary BAT is instantiated using COLnew with the type aliases
1265 : * of the required binary association. The aliases include the
1266 : * built-in types, such as TYPE_int....TYPE_ptr, and the atomic types
1267 : * introduced by the user. The initial capacity to be accommodated
1268 : * within a BAT is indicated by cap. Their extend is automatically
1269 : * incremented upon storage overflow. Failure to create the BAT
1270 : * results in a NULL pointer.
1271 : *
1272 : * The routine BATclone creates an empty BAT storage area with the
1273 : * properties inherited from its argument.
1274 : */
1275 : gdk_export BAT *COLnew(oid hseq, int tltype, BUN capacity, role_t role)
1276 : __attribute__((__warn_unused_result__));
1277 : gdk_export BAT *COLnew2(oid hseq, int tt, BUN cap, role_t role, uint16_t width)
1278 : __attribute__((__warn_unused_result__));
1279 : gdk_export BAT *BATdense(oid hseq, oid tseq, BUN cnt)
1280 : __attribute__((__warn_unused_result__));
1281 : gdk_export gdk_return BATextend(BAT *b, BUN newcap)
1282 : __attribute__((__warn_unused_result__));
1283 :
1284 : /* internal */
1285 : gdk_export uint8_t ATOMelmshift(int sz)
1286 : __attribute__((__const__));
1287 : gdk_export gdk_return ATOMheap(int id, Heap *hp, size_t cap)
1288 : __attribute__((__warn_unused_result__));
1289 : gdk_export const char *BATtailname(const BAT *b);
1290 :
1291 : gdk_export gdk_return GDKupgradevarheap(BAT *b, var_t v, BUN cap, BUN ncopy)
1292 : __attribute__((__warn_unused_result__));
1293 : gdk_export gdk_return BUNappend(BAT *b, const void *right, bool force)
1294 : __attribute__((__warn_unused_result__));
1295 : gdk_export gdk_return BUNappendmulti(BAT *b, const void *values, BUN count, bool force)
1296 : __attribute__((__warn_unused_result__));
1297 : gdk_export gdk_return BATappend(BAT *b, BAT *n, BAT *s, bool force)
1298 : __attribute__((__warn_unused_result__));
1299 :
1300 : gdk_export gdk_return BUNreplace(BAT *b, oid left, const void *right, bool force)
1301 : __attribute__((__warn_unused_result__));
1302 : gdk_export gdk_return BUNreplacemulti(BAT *b, const oid *positions, const void *values, BUN count, bool force)
1303 : __attribute__((__warn_unused_result__));
1304 : gdk_export gdk_return BUNreplacemultiincr(BAT *b, oid position, const void *values, BUN count, bool force)
1305 : __attribute__((__warn_unused_result__));
1306 :
1307 : gdk_export gdk_return BUNdelete(BAT *b, oid o)
1308 : __attribute__((__warn_unused_result__));
1309 : gdk_export gdk_return BATdel(BAT *b, BAT *d)
1310 : __attribute__((__warn_unused_result__));
1311 :
1312 : gdk_export gdk_return BATreplace(BAT *b, BAT *p, BAT *n, bool force)
1313 : __attribute__((__warn_unused_result__));
1314 : gdk_export gdk_return BATupdate(BAT *b, BAT *p, BAT *n, bool force)
1315 : __attribute__((__warn_unused_result__));
1316 : gdk_export gdk_return BATupdatepos(BAT *b, const oid *positions, BAT *n, bool autoincr, bool force)
1317 : __attribute__((__warn_unused_result__));
1318 :
1319 : /* Functions to perform a binary search on a sorted BAT.
1320 : * See gdk_search.c for details. */
1321 : gdk_export BUN SORTfnd(BAT *b, const void *v);
1322 : gdk_export BUN SORTfndfirst(BAT *b, const void *v);
1323 : gdk_export BUN SORTfndlast(BAT *b, const void *v);
1324 :
1325 : gdk_export BUN ORDERfnd(BAT *b, Heap *oidxh, const void *v);
1326 : gdk_export BUN ORDERfndfirst(BAT *b, Heap *oidxh, const void *v);
1327 : gdk_export BUN ORDERfndlast(BAT *b, Heap *oidxh, const void *v);
1328 :
1329 : gdk_export BUN BUNfnd(BAT *b, const void *right);
1330 :
1331 : #define BUNfndVOID(b, v) \
1332 : (((is_oid_nil(*(const oid*)(v)) ^ is_oid_nil((b)->tseqbase)) | \
1333 : (*(const oid*)(v) < (b)->tseqbase) | \
1334 : (*(const oid*)(v) >= (b)->tseqbase + (b)->batCount)) ? \
1335 : BUN_NONE : \
1336 : (BUN) (*(const oid*)(v) - (b)->tseqbase))
1337 :
1338 : #define BATttype(b) (BATtdense(b) ? TYPE_oid : (b)->ttype)
1339 :
1340 : #define tailsize(b,p) ((b)->ttype ? \
1341 : (ATOMstorage((b)->ttype) == TYPE_msk ? \
1342 : (((size_t) (p) + 31) / 32) * 4 : \
1343 : ((size_t) (p)) << (b)->tshift) : \
1344 : 0)
1345 :
1346 : #define Tloc(b,p) ((void *)((b)->theap->base+(((size_t)(p)+(b)->tbaseoff)<<(b)->tshift)))
1347 :
1348 : typedef var_t stridx_t;
1349 : #define SIZEOF_STRIDX_T SIZEOF_VAR_T
1350 : #define GDK_VARALIGN SIZEOF_STRIDX_T
1351 :
1352 : #define BUNtvaroff(bi,p) VarHeapVal((bi).base, (p), (bi).width)
1353 :
1354 : #define BUNtmsk(bi,p) Tmsk(&(bi), (p))
1355 : #define BUNtloc(bi,p) (assert((bi).type != TYPE_msk), ((void *) ((char *) (bi).base + ((p) << (bi).shift))))
1356 : #define BUNtpos(bi,p) Tpos(&(bi),p)
1357 : #define BUNtvar(bi,p) (assert((bi).type && (bi).vh), (void *) ((bi).vh->base+BUNtvaroff(bi,p)))
1358 : #define BUNtail(bi,p) ((bi).type?(bi).vh?BUNtvar(bi,p):(bi).type==TYPE_msk?BUNtmsk(bi,p):BUNtloc(bi,p):BUNtpos(bi,p))
1359 :
1360 : #define BATcount(b) ((b)->batCount)
1361 :
1362 : #include "gdk_atoms.h"
1363 :
1364 : #include "gdk_cand.h"
1365 :
1366 : /*
1367 : * @- BAT properties
1368 : * @multitable @columnfractions 0.08 0.7
1369 : * @item BUN
1370 : * @tab BATcount (BAT *b)
1371 : * @item void
1372 : * @tab BATsetcapacity (BAT *b, BUN cnt)
1373 : * @item void
1374 : * @tab BATsetcount (BAT *b, BUN cnt)
1375 : * @item BAT *
1376 : * @tab BATkey (BAT *b, bool onoff)
1377 : * @item BAT *
1378 : * @tab BATmode (BAT *b, bool transient)
1379 : * @item BAT *
1380 : * @tab BATsetaccess (BAT *b, restrict_t mode)
1381 : * @item int
1382 : * @tab BATdirty (BAT *b)
1383 : * @item restrict_t
1384 : * @tab BATgetaccess (BAT *b)
1385 : * @end multitable
1386 : *
1387 : * The function BATcount returns the number of associations stored in
1388 : * the BAT.
1389 : *
1390 : * The BAT is given a new logical name using BBPrename.
1391 : *
1392 : * The integrity properties to be maintained for the BAT are
1393 : * controlled separately. A key property indicates that duplicates in
1394 : * the association dimension are not permitted.
1395 : *
1396 : * The persistency indicator tells the retention period of BATs. The
1397 : * system support two modes: PERSISTENT and TRANSIENT.
1398 : * The PERSISTENT BATs are automatically saved upon session boundary
1399 : * or transaction commit. TRANSIENT BATs are removed upon transaction
1400 : * boundary. All BATs are initially TRANSIENT unless their mode is
1401 : * changed using the routine BATmode.
1402 : *
1403 : * The BAT properties may be changed at any time using BATkey
1404 : * and BATmode.
1405 : *
1406 : * Valid BAT access properties can be set with BATsetaccess and
1407 : * BATgetaccess: BAT_READ, BAT_APPEND, and BAT_WRITE. BATs can be
1408 : * designated to be read-only. In this case some memory optimizations
1409 : * may be made (slice and fragment bats can point to stable subsets of
1410 : * a parent bat). A special mode is append-only. It is then allowed
1411 : * to insert BUNs at the end of the BAT, but not to modify anything
1412 : * that already was in there.
1413 : */
1414 : gdk_export BUN BATcount_no_nil(BAT *b, BAT *s);
1415 : gdk_export void BATsetcapacity(BAT *b, BUN cnt);
1416 : gdk_export void BATsetcount(BAT *b, BUN cnt);
1417 : gdk_export BUN BATgrows(BAT *b);
1418 : gdk_export gdk_return BATkey(BAT *b, bool onoff);
1419 : gdk_export gdk_return BATmode(BAT *b, bool transient);
1420 : gdk_export void BAThseqbase(BAT *b, oid o);
1421 : gdk_export void BATtseqbase(BAT *b, oid o);
1422 :
1423 : gdk_export BAT *BATsetaccess(BAT *b, restrict_t mode)
1424 : __attribute__((__warn_unused_result__));
1425 : gdk_export restrict_t BATgetaccess(BAT *b);
1426 :
1427 :
1428 : #define BATdirty(b) (!(b)->batCopiedtodisk || \
1429 : (b)->theap->dirty || \
1430 : ((b)->tvheap != NULL && (b)->tvheap->dirty))
1431 : #define BATdirtybi(bi) (!(bi).copiedtodisk || (bi).hdirty || (bi).vhdirty)
1432 :
1433 : #define BATcapacity(b) (b)->batCapacity
1434 : /*
1435 : * @- BAT manipulation
1436 : * @multitable @columnfractions 0.08 0.7
1437 : * @item BAT *
1438 : * @tab BATclear (BAT *b, bool force)
1439 : * @item BAT *
1440 : * @tab COLcopy (BAT *b, int tt, bool writeable, role_t role)
1441 : * @end multitable
1442 : *
1443 : * The routine BATclear removes the binary associations, leading to an
1444 : * empty, but (re-)initialized BAT. Its properties are retained. A
1445 : * temporary copy is obtained with Colcopy. The new BAT has an unique
1446 : * name.
1447 : */
1448 : gdk_export gdk_return BATclear(BAT *b, bool force);
1449 : gdk_export BAT *COLcopy(BAT *b, int tt, bool writable, role_t role);
1450 :
1451 : gdk_export gdk_return BATgroup(BAT **groups, BAT **extents, BAT **histo, BAT *b, BAT *s, BAT *g, BAT *e, BAT *h)
1452 : __attribute__((__access__(write_only, 1)))
1453 : __attribute__((__access__(write_only, 2)))
1454 : __attribute__((__access__(write_only, 3)))
1455 : __attribute__((__warn_unused_result__));
1456 : /*
1457 : * @- BAT Input/Output
1458 : * @multitable @columnfractions 0.08 0.7
1459 : * @item BAT *
1460 : * @tab BATload (str name)
1461 : * @item BAT *
1462 : * @tab BATsave (BAT *b)
1463 : * @item int
1464 : * @tab BATdelete (BAT *b)
1465 : * @end multitable
1466 : *
1467 : * A BAT created by COLnew is considered temporary until one calls the
1468 : * routine BATsave or BATmode. This routine reserves disk space and
1469 : * checks for name clashes in the BAT directory. It also makes the BAT
1470 : * persistent. The empty BAT is initially marked as ordered on both
1471 : * columns.
1472 : *
1473 : * Failure to read or write the BAT results in a NULL, otherwise it
1474 : * returns the BAT pointer.
1475 : *
1476 : * @- Heap Storage Modes
1477 : * The discriminative storage modes are memory-mapped, compressed, or
1478 : * loaded in memory. As can be seen in the bat record, each BAT has
1479 : * one BUN-heap (@emph{bn}), and possibly two heaps (@emph{hh} and
1480 : * @emph{th}) for variable-sized atoms.
1481 : */
1482 :
1483 : gdk_export gdk_return BATsave(BAT *b)
1484 : __attribute__((__warn_unused_result__));
1485 :
1486 : #define NOFARM (-1) /* indicate to GDKfilepath to create relative path */
1487 : #define MAXPATH 1024 /* maximum supported file path */
1488 :
1489 : gdk_export gdk_return GDKfilepath(char *buf, size_t bufsize, int farmid, const char *dir, const char *nme, const char *ext)
1490 : __attribute__((__access__(write_only, 1, 2)));
1491 : gdk_export bool GDKinmemory(int farmid);
1492 : gdk_export bool GDKembedded(void);
1493 : gdk_export gdk_return GDKcreatedir(const char *nme);
1494 :
1495 : gdk_export void OIDXdestroy(BAT *b);
1496 :
1497 : /*
1498 : * @- Printing
1499 : * @multitable @columnfractions 0.08 0.7
1500 : * @item int
1501 : * @tab BATprintcolumns (stream *f, int argc, BAT *b[]);
1502 : * @end multitable
1503 : *
1504 : * The functions to convert BATs into ASCII. They are primarily meant for ease of
1505 : * debugging and to a lesser extent for output processing. Printing a
1506 : * BAT is done essentially by looping through its components, printing
1507 : * each association.
1508 : *
1509 : */
1510 : gdk_export gdk_return BATprintcolumns(stream *s, int argc, BAT *argv[]);
1511 : gdk_export gdk_return BATprint(stream *s, BAT *b);
1512 :
1513 : /*
1514 : * @- BAT clustering
1515 : * @multitable @columnfractions 0.08 0.7
1516 : * @item bool
1517 : * @tab BATordered (BAT *b)
1518 : * @end multitable
1519 : *
1520 : * When working in a main-memory situation, clustering of data on
1521 : * disk-pages is not important. Whenever mmap()-ed data is used
1522 : * intensively, reducing the number of page faults is a hot issue.
1523 : *
1524 : * The above functions rearrange data in MonetDB heaps (used for
1525 : * storing BUNs var-sized atoms, or accelerators). Applying these
1526 : * clusterings will allow that MonetDB's main-memory oriented
1527 : * algorithms work efficiently also in a disk-oriented context.
1528 : *
1529 : * BATordered starts a check on the tail values to see if they are
1530 : * ordered. The result is returned and stored in the tsorted field of
1531 : * the BAT.
1532 : */
1533 : gdk_export bool BATordered(BAT *b);
1534 : gdk_export bool BATordered_rev(BAT *b);
1535 : gdk_export gdk_return BATsort(BAT **sorted, BAT **order, BAT **groups, BAT *b, BAT *o, BAT *g, bool reverse, bool nilslast, bool stable)
1536 : __attribute__((__access__(write_only, 1)))
1537 : __attribute__((__access__(write_only, 2)))
1538 : __attribute__((__access__(write_only, 3)))
1539 : __attribute__((__warn_unused_result__));
1540 :
1541 :
1542 : gdk_export void GDKqsort(void *restrict h, void *restrict t, const void *restrict base, size_t n, int hs, int ts, int tpe, bool reverse, bool nilslast);
1543 :
1544 : /* BAT is dense (i.e., BATtvoid() is true and tseqbase is not NIL) */
1545 : #define BATtdense(b) (!is_oid_nil((b)->tseqbase) && \
1546 : ((b)->tvheap == NULL || (b)->tvheap->free == 0))
1547 : #define BATtdensebi(bi) (!is_oid_nil((bi)->tseq) && \
1548 : ((bi)->vh == NULL || (bi)->vhfree == 0))
1549 : /* BATtvoid: BAT can be (or actually is) represented by TYPE_void */
1550 : #define BATtvoid(b) (BATtdense(b) || (b)->ttype==TYPE_void)
1551 : #define BATtkey(b) ((b)->tkey || BATtdense(b))
1552 :
1553 : /* set some properties that are trivial to deduce; called with theaplock
1554 : * held */
1555 : static inline void
1556 8657031 : BATsettrivprop(BAT *b)
1557 : {
1558 8657031 : assert(!is_oid_nil(b->hseqbase));
1559 8657031 : assert(is_oid_nil(b->tseqbase) || ATOMtype(b->ttype) == TYPE_oid);
1560 8657031 : if (b->ttype == TYPE_void) {
1561 2632660 : if (is_oid_nil(b->tseqbase)) {
1562 153 : b->tnonil = b->batCount == 0;
1563 153 : b->tnil = !b->tnonil;
1564 153 : b->trevsorted = true;
1565 153 : b->tkey = b->batCount <= 1;
1566 : } else {
1567 2632507 : b->tnonil = true;
1568 2632507 : b->tnil = false;
1569 2632507 : b->tkey = true;
1570 2632507 : b->trevsorted = b->batCount <= 1;
1571 : }
1572 2632660 : b->tsorted = true;
1573 6024371 : } else if (b->batCount <= 1) {
1574 2782939 : b->tnosorted = b->tnorevsorted = 0;
1575 2782939 : b->tnokey[0] = b->tnokey[1] = 0;
1576 2782939 : b->tunique_est = (double) b->batCount;
1577 2782939 : b->tkey = true;
1578 2782939 : if (ATOMlinear(b->ttype)) {
1579 2782939 : b->tsorted = true;
1580 2782939 : b->trevsorted = true;
1581 2782939 : if (b->batCount == 0) {
1582 2095891 : b->tminpos = BUN_NONE;
1583 2095891 : b->tmaxpos = BUN_NONE;
1584 2095891 : b->tnonil = true;
1585 2095891 : b->tnil = false;
1586 2095891 : if (b->ttype == TYPE_oid) {
1587 26805 : b->tseqbase = 0;
1588 : }
1589 687048 : } else if (b->ttype == TYPE_oid) {
1590 73081 : oid sqbs = ((const oid *) b->theap->base)[b->tbaseoff];
1591 73081 : if (is_oid_nil(sqbs)) {
1592 397 : b->tnonil = false;
1593 397 : b->tnil = true;
1594 397 : b->tminpos = BUN_NONE;
1595 397 : b->tmaxpos = BUN_NONE;
1596 : } else {
1597 72684 : b->tnonil = true;
1598 72684 : b->tnil = false;
1599 72684 : b->tminpos = 0;
1600 72684 : b->tmaxpos = 0;
1601 : }
1602 73081 : b->tseqbase = sqbs;
1603 614010 : } else if ((b->tvheap
1604 165210 : ? ATOMcmp(b->ttype,
1605 : b->tvheap->base + VarHeapVal(Tloc(b, 0), 0, b->twidth),
1606 : ATOMnilptr(b->ttype))
1607 448757 : : ATOMcmp(b->ttype, Tloc(b, 0),
1608 1227977 : ATOMnilptr(b->ttype))) == 0) {
1609 : /* the only value is NIL */
1610 25874 : b->tminpos = BUN_NONE;
1611 25874 : b->tmaxpos = BUN_NONE;
1612 : } else {
1613 : /* the only value is both min and max */
1614 588136 : b->tminpos = 0;
1615 588136 : b->tmaxpos = 0;
1616 : }
1617 : } else {
1618 0 : b->tsorted = false;
1619 0 : b->trevsorted = false;
1620 0 : b->tminpos = BUN_NONE;
1621 0 : b->tmaxpos = BUN_NONE;
1622 : }
1623 3241432 : } else if (b->batCount == 2 && ATOMlinear(b->ttype)) {
1624 232873 : int c;
1625 232873 : if (b->tvheap)
1626 38459 : c = ATOMcmp(b->ttype,
1627 : b->tvheap->base + VarHeapVal(Tloc(b, 0), 0, b->twidth),
1628 : b->tvheap->base + VarHeapVal(Tloc(b, 0), 1, b->twidth));
1629 : else
1630 194414 : c = ATOMcmp(b->ttype, Tloc(b, 0), Tloc(b, 1));
1631 232319 : b->tsorted = c <= 0;
1632 232319 : b->tnosorted = !b->tsorted;
1633 232319 : b->trevsorted = c >= 0;
1634 232319 : b->tnorevsorted = !b->trevsorted;
1635 232319 : b->tkey = c != 0;
1636 232319 : b->tnokey[0] = 0;
1637 232319 : b->tnokey[1] = !b->tkey;
1638 232319 : b->tunique_est = (double) (1 + b->tkey);
1639 3008559 : } else if (!ATOMlinear(b->ttype)) {
1640 0 : b->tsorted = false;
1641 0 : b->trevsorted = false;
1642 0 : b->tminpos = BUN_NONE;
1643 0 : b->tmaxpos = BUN_NONE;
1644 : }
1645 8656520 : }
1646 :
1647 : static inline void
1648 482 : BATnegateprops(BAT *b)
1649 : {
1650 : /* disable all properties here */
1651 482 : b->tnonil = false;
1652 482 : b->tnil = false;
1653 482 : if (b->ttype) {
1654 482 : b->tsorted = false;
1655 482 : b->trevsorted = false;
1656 482 : b->tnosorted = 0;
1657 482 : b->tnorevsorted = 0;
1658 : }
1659 482 : b->tseqbase = oid_nil;
1660 482 : b->tkey = false;
1661 482 : b->tnokey[0] = 0;
1662 482 : b->tnokey[1] = 0;
1663 482 : b->tmaxpos = b->tminpos = BUN_NONE;
1664 482 : }
1665 :
1666 : /*
1667 : * @- GDK error handling
1668 : * @multitable @columnfractions 0.08 0.7
1669 : * @item str
1670 : * @tab
1671 : * GDKmessage
1672 : * @item bit
1673 : * @tab
1674 : * GDKfatal(str msg)
1675 : * @item int
1676 : * @tab
1677 : * GDKwarning(str msg)
1678 : * @item int
1679 : * @tab
1680 : * GDKerror (str msg)
1681 : * @item int
1682 : * @tab
1683 : * GDKgoterrors ()
1684 : * @item int
1685 : * @tab
1686 : * GDKsyserror (str msg)
1687 : * @item str
1688 : * @tab
1689 : * GDKerrbuf
1690 : * @item
1691 : * @tab GDKsetbuf (str buf)
1692 : * @end multitable
1693 : *
1694 : * The error handling mechanism is not sophisticated yet. Experience
1695 : * should show if this mechanism is sufficient. Most routines return
1696 : * a pointer with zero to indicate an error.
1697 : *
1698 : * The error messages are also copied to standard output. The last
1699 : * error message is kept around in a global variable.
1700 : *
1701 : * Error messages can also be collected in a user-provided buffer,
1702 : * instead of being echoed to a stream. This is a thread-specific
1703 : * issue; you want to decide on the error mechanism on a
1704 : * thread-specific basis. This effect is established with
1705 : * GDKsetbuf. The memory (de)allocation of this buffer, that must at
1706 : * least be 1024 chars long, is entirely by the user. A pointer to
1707 : * this buffer is kept in the pseudo-variable GDKerrbuf. Normally,
1708 : * this is a NULL pointer.
1709 : */
1710 : #define GDKMAXERRLEN 10240
1711 : #define GDKWARNING "!WARNING: "
1712 : #define GDKERROR "!ERROR: "
1713 : #define GDKMESSAGE "!OS: "
1714 : #define GDKFATAL "!FATAL: "
1715 :
1716 : /* Data Distilleries uses ICU for internationalization of some MonetDB error messages */
1717 :
1718 : #include "gdk_tracer.h"
1719 :
1720 : gdk_export gdk_return GDKtracer_fill_comp_info(BAT *id, BAT *component, BAT *log_level);
1721 :
1722 : #define GDKerror(...) \
1723 : GDKtracer_log(__FILE__, __func__, __LINE__, M_ERROR, \
1724 : GDK, NULL, __VA_ARGS__)
1725 : #define GDKsyserr(errno, ...) \
1726 : GDKtracer_log(__FILE__, __func__, __LINE__, M_ERROR, \
1727 : GDK, GDKstrerror(errno, (char[64]){0}, 64), \
1728 : __VA_ARGS__)
1729 : #define GDKsyserror(...) GDKsyserr(errno, __VA_ARGS__)
1730 :
1731 : gdk_export void GDKclrerr(void);
1732 :
1733 :
1734 : /* tfastins* family: update a value at a particular location in the bat
1735 : * bunfastapp* family: append a value to the bat
1736 : * *_nocheck: do not check whether the capacity is large enough
1737 : * * (without _nocheck): check bat capacity and possibly extend
1738 : *
1739 : * This means, for tfastins* it is the caller's responsibility to set
1740 : * the batCount and theap->free values correctly (e.g. by calling
1741 : * BATsetcount(), and for *_nocheck to make sure there is enough space
1742 : * allocated in the theap (tvheap for variable-sized types is still
1743 : * extended if needed, making that these functions can fail).
1744 : */
1745 : __attribute__((__warn_unused_result__))
1746 : static inline gdk_return
1747 101711371 : tfastins_nocheckVAR(BAT *b, BUN p, const void *v)
1748 : {
1749 101711371 : var_t d;
1750 101711371 : gdk_return rc;
1751 101711371 : assert(b->tbaseoff == 0);
1752 101711371 : assert(b->theap->parentid == b->batCacheid);
1753 101711371 : MT_lock_set(&b->theaplock);
1754 102007312 : rc = ATOMputVAR(b, &d, v);
1755 101990485 : MT_lock_unset(&b->theaplock);
1756 104228804 : if (rc != GDK_SUCCEED)
1757 : return rc;
1758 104800242 : if (b->twidth < SIZEOF_VAR_T &&
1759 93183060 : (b->twidth <= 2 ? d - GDK_VAROFFSET : d) >= ((size_t) 1 << (8 << b->tshift))) {
1760 : /* doesn't fit in current heap, upgrade it */
1761 14138 : rc = GDKupgradevarheap(b, d, 0, MAX(p, b->batCount));
1762 14098 : if (rc != GDK_SUCCEED)
1763 : return rc;
1764 : }
1765 104800213 : switch (b->twidth) {
1766 30319191 : case 1:
1767 30319191 : ((uint8_t *) b->theap->base)[p] = (uint8_t) (d - GDK_VAROFFSET);
1768 30319191 : break;
1769 17651628 : case 2:
1770 17651628 : ((uint16_t *) b->theap->base)[p] = (uint16_t) (d - GDK_VAROFFSET);
1771 17651628 : break;
1772 45226165 : case 4:
1773 45226165 : ((uint32_t *) b->theap->base)[p] = (uint32_t) d;
1774 45226165 : break;
1775 : #if SIZEOF_VAR_T == 8
1776 11603229 : case 8:
1777 11603229 : ((uint64_t *) b->theap->base)[p] = (uint64_t) d;
1778 11603229 : break;
1779 : #endif
1780 : default:
1781 0 : MT_UNREACHABLE();
1782 : }
1783 : return GDK_SUCCEED;
1784 : }
1785 :
1786 : __attribute__((__warn_unused_result__))
1787 : static inline gdk_return
1788 317068356 : tfastins_nocheckFIX(BAT *b, BUN p, const void *v)
1789 : {
1790 317068356 : return ATOMputFIX(b->ttype, Tloc(b, p), v);
1791 : }
1792 :
1793 : __attribute__((__warn_unused_result__))
1794 : static inline gdk_return
1795 323967058 : tfastins_nocheck(BAT *b, BUN p, const void *v)
1796 : {
1797 323967058 : assert(b->theap->parentid == b->batCacheid);
1798 323967058 : assert(b->tbaseoff == 0);
1799 323967058 : if (b->ttype == TYPE_void) {
1800 : ;
1801 323967058 : } else if (ATOMstorage(b->ttype) == TYPE_msk) {
1802 0 : mskSetVal(b, p, * (msk *) v);
1803 323967058 : } else if (b->tvheap) {
1804 39454937 : return tfastins_nocheckVAR(b, p, v);
1805 : } else {
1806 284512121 : return tfastins_nocheckFIX(b, p, v);
1807 : }
1808 : return GDK_SUCCEED;
1809 : }
1810 :
1811 : __attribute__((__warn_unused_result__))
1812 : static inline gdk_return
1813 304705206 : tfastins(BAT *b, BUN p, const void *v)
1814 : {
1815 304705206 : if (p >= BATcapacity(b)) {
1816 0 : if (p >= BUN_MAX) {
1817 0 : GDKerror("tfastins: too many elements to accommodate (" BUNFMT ")\n", BUN_MAX);
1818 0 : return GDK_FAIL;
1819 : }
1820 0 : BUN sz = BATgrows(b);
1821 0 : if (sz <= p)
1822 0 : sz = p + BATTINY;
1823 0 : gdk_return rc = BATextend(b, sz);
1824 0 : if (rc != GDK_SUCCEED)
1825 : return rc;
1826 : }
1827 304705206 : return tfastins_nocheck(b, p, v);
1828 : }
1829 :
1830 : __attribute__((__warn_unused_result__))
1831 : static inline gdk_return
1832 8786736 : bunfastapp_nocheck(BAT *b, const void *v)
1833 : {
1834 8786736 : BUN p = b->batCount;
1835 8786736 : if (ATOMstorage(b->ttype) == TYPE_msk && p % 32 == 0)
1836 0 : ((uint32_t *) b->theap->base)[p / 32] = 0;
1837 8786736 : gdk_return rc = tfastins_nocheck(b, p, v);
1838 8794981 : if (rc == GDK_SUCCEED) {
1839 8810781 : b->batCount++;
1840 8810781 : if (ATOMstorage(b->ttype) == TYPE_msk) {
1841 0 : if (p % 32 == 0)
1842 0 : b->theap->free += 4;
1843 : } else
1844 8810781 : b->theap->free += b->twidth;
1845 : }
1846 8794981 : return rc;
1847 : }
1848 :
1849 : __attribute__((__warn_unused_result__))
1850 : static inline gdk_return
1851 301787948 : bunfastapp(BAT *b, const void *v)
1852 : {
1853 301787948 : BUN p = b->batCount;
1854 301787948 : if (ATOMstorage(b->ttype) == TYPE_msk && p % 32 == 0)
1855 0 : ((uint32_t *) b->theap->base)[p / 32] = 0;
1856 301787948 : gdk_return rc = tfastins(b, p, v);
1857 299191130 : if (rc == GDK_SUCCEED) {
1858 296250823 : b->batCount++;
1859 296250823 : if (ATOMstorage(b->ttype) == TYPE_msk) {
1860 0 : if (p % 32 == 0)
1861 0 : b->theap->free += 4;
1862 : } else
1863 296250823 : b->theap->free += b->twidth;
1864 : }
1865 299191130 : return rc;
1866 : }
1867 :
1868 : __attribute__((__warn_unused_result__))
1869 : static inline gdk_return
1870 106594 : bunfastappOID(BAT *b, oid o)
1871 : {
1872 106594 : BUN p = b->batCount;
1873 106594 : if (p >= BATcapacity(b)) {
1874 22 : if (p >= BUN_MAX) {
1875 0 : GDKerror("tfastins: too many elements to accommodate (" BUNFMT ")\n", BUN_MAX);
1876 0 : return GDK_FAIL;
1877 : }
1878 22 : gdk_return rc = BATextend(b, BATgrows(b));
1879 22 : if (rc != GDK_SUCCEED)
1880 : return rc;
1881 : }
1882 106594 : ((oid *) b->theap->base)[b->batCount++] = o;
1883 106594 : b->theap->free += sizeof(oid);
1884 106594 : return GDK_SUCCEED;
1885 : }
1886 :
1887 : #define bunfastappTYPE(TYPE, b, v) \
1888 : (BATcount(b) >= BATcapacity(b) && \
1889 : ((BATcount(b) == BUN_MAX && \
1890 : (GDKerror("bunfastapp: too many elements to accommodate (" BUNFMT ")\n", BUN_MAX), \
1891 : true)) || \
1892 : BATextend((b), BATgrows(b)) != GDK_SUCCEED) ? \
1893 : GDK_FAIL : \
1894 : (assert((b)->theap->parentid == (b)->batCacheid), \
1895 : (b)->theap->free += sizeof(TYPE), \
1896 : ((TYPE *) (b)->theap->base)[(b)->batCount++] = * (const TYPE *) (v), \
1897 : GDK_SUCCEED))
1898 :
1899 : __attribute__((__warn_unused_result__))
1900 : static inline gdk_return
1901 342 : bunfastapp_nocheckVAR(BAT *b, const void *v)
1902 : {
1903 342 : gdk_return rc;
1904 342 : rc = tfastins_nocheckVAR(b, b->batCount, v);
1905 343 : if (rc == GDK_SUCCEED) {
1906 343 : b->batCount++;
1907 343 : b->theap->free += b->twidth;
1908 : }
1909 343 : return rc;
1910 : }
1911 :
1912 : /* Strimps exported functions */
1913 : gdk_export gdk_return STRMPcreate(BAT *b, BAT *s);
1914 : gdk_export BAT *STRMPfilter(BAT *b, BAT *s, const char *q, const bool keep_nils);
1915 : gdk_export void STRMPdestroy(BAT *b);
1916 : gdk_export bool BAThasstrimps(BAT *b);
1917 : gdk_export gdk_return BATsetstrimps(BAT *b);
1918 :
1919 : /* Rtree structure functions */
1920 : #ifdef HAVE_RTREE
1921 : gdk_export bool RTREEexists(BAT *b);
1922 : gdk_export bool RTREEexists_bid(bat bid);
1923 : gdk_export gdk_return BATrtree(BAT *wkb, BAT* mbr);
1924 : /* inMBR is really a struct mbr * from geom module, but that is not
1925 : * available here */
1926 : gdk_export BUN* RTREEsearch(BAT *b, const void *inMBR, int result_limit);
1927 : #endif
1928 :
1929 : gdk_export void RTREEdestroy(BAT *b);
1930 : gdk_export void RTREEfree(BAT *b);
1931 :
1932 : /* The ordered index structure */
1933 :
1934 : gdk_export gdk_return BATorderidx(BAT *b, bool stable);
1935 : gdk_export gdk_return GDKmergeidx(BAT *b, BAT**a, int n_ar);
1936 : gdk_export bool BATcheckorderidx(BAT *b);
1937 :
1938 : #include "gdk_delta.h"
1939 : #include "gdk_hash.h"
1940 : #include "gdk_bbp.h"
1941 : #include "gdk_utils.h"
1942 :
1943 : /* functions defined in gdk_bat.c */
1944 : gdk_export gdk_return void_inplace(BAT *b, oid id, const void *val, bool force)
1945 : __attribute__((__warn_unused_result__));
1946 : gdk_export BAT *BATattach(int tt, const char *heapfile, role_t role);
1947 :
1948 : #ifdef NATIVE_WIN32
1949 : #ifdef _MSC_VER
1950 : #define fileno _fileno
1951 : #endif
1952 : #define fdopen _fdopen
1953 : #define putenv _putenv
1954 : #endif
1955 :
1956 : /* Return a pointer to the value contained in V. Also see VALget
1957 : * which returns a void *. */
1958 : __attribute__((__pure__))
1959 : static inline const void *
1960 418910191 : VALptr(const ValRecord *v)
1961 : {
1962 418910191 : switch (ATOMstorage(v->vtype)) {
1963 709865 : case TYPE_void: return (const void *) &v->val.oval;
1964 0 : case TYPE_msk: return (const void *) &v->val.mval;
1965 22558023 : case TYPE_bte: return (const void *) &v->val.btval;
1966 1278164 : case TYPE_sht: return (const void *) &v->val.shval;
1967 239848284 : case TYPE_int: return (const void *) &v->val.ival;
1968 13111 : case TYPE_flt: return (const void *) &v->val.fval;
1969 570966 : case TYPE_dbl: return (const void *) &v->val.dval;
1970 52945623 : case TYPE_lng: return (const void *) &v->val.lval;
1971 : #ifdef HAVE_HGE
1972 18028 : case TYPE_hge: return (const void *) &v->val.hval;
1973 : #endif
1974 775 : case TYPE_uuid: return (const void *) &v->val.uval;
1975 271599 : case TYPE_ptr: return (const void *) &v->val.pval;
1976 100693067 : case TYPE_str: return (const void *) v->val.sval;
1977 2686 : default: return (const void *) v->val.pval;
1978 : }
1979 : }
1980 :
1981 : #define THREADS 1024 /* maximum value for gdk_nr_threads */
1982 :
1983 : gdk_export stream *GDKstdout;
1984 : gdk_export stream *GDKstdin;
1985 :
1986 : #define GDKerrbuf (GDKgetbuf())
1987 :
1988 : static inline bat
1989 445636314 : BBPcheck(bat x)
1990 : {
1991 445636314 : if (!is_bat_nil(x)) {
1992 445128638 : assert(x > 0);
1993 :
1994 445128638 : if (x < 0 || x >= getBBPsize() || BBP_logical(x) == NULL) {
1995 0 : TRC_DEBUG(CHECK_, "range error %d\n", (int) x);
1996 : } else {
1997 446258714 : assert(BBP_pid(x) == 0 || BBP_pid(x) == MT_getpid());
1998 446283167 : return x;
1999 : }
2000 : }
2001 : return 0;
2002 : }
2003 :
2004 : gdk_export BAT *BATdescriptor(bat i);
2005 :
2006 : static inline void *
2007 9664690 : Tpos(BATiter *bi, BUN p)
2008 : {
2009 9664690 : assert(bi->base == NULL);
2010 9664690 : if (bi->vh) {
2011 3430191 : oid o;
2012 3430191 : assert(!is_oid_nil(bi->tseq));
2013 3430191 : if (((ccand_t *) bi->vh)->type == CAND_NEGOID) {
2014 3430191 : BUN nexc = (bi->vhfree - sizeof(ccand_t)) / SIZEOF_OID;
2015 3430191 : o = bi->tseq + p;
2016 3430191 : if (nexc > 0) {
2017 3430607 : const oid *exc = (const oid *) (bi->vh->base + sizeof(ccand_t));
2018 3430607 : if (o >= exc[0]) {
2019 17454 : if (o + nexc > exc[nexc - 1]) {
2020 : o += nexc;
2021 : } else {
2022 4001 : BUN lo = 0;
2023 4001 : BUN hi = nexc - 1;
2024 27638 : while (hi - lo > 1) {
2025 19636 : BUN mid = (hi + lo) / 2;
2026 19636 : if (exc[mid] - mid > o)
2027 : hi = mid;
2028 : else
2029 11237 : lo = mid;
2030 : }
2031 4001 : o += hi;
2032 : }
2033 : }
2034 : }
2035 : } else {
2036 0 : const uint32_t *msk = (const uint32_t *) (bi->vh->base + sizeof(ccand_t));
2037 0 : BUN nmsk = (bi->vhfree - sizeof(ccand_t)) / sizeof(uint32_t);
2038 0 : o = 0;
2039 0 : for (BUN i = 0; i < nmsk; i++) {
2040 0 : uint32_t m = candmask_pop(msk[i]);
2041 0 : if (o + m > p) {
2042 0 : m = msk[i];
2043 0 : for (i = 0; i < 32; i++) {
2044 0 : if (m & (1U << i) && ++o == p)
2045 : break;
2046 : }
2047 : break;
2048 : }
2049 0 : o += m;
2050 : }
2051 : }
2052 3430191 : bi->tvid = o;
2053 6234499 : } else if (is_oid_nil(bi->tseq)) {
2054 0 : bi->tvid = oid_nil;
2055 : } else {
2056 6234499 : bi->tvid = bi->tseq + p;
2057 : }
2058 9664690 : return (void *) &bi->tvid;
2059 : }
2060 :
2061 : __attribute__((__pure__))
2062 : static inline bool
2063 501 : Tmskval(BATiter *bi, BUN p)
2064 : {
2065 501 : assert(ATOMstorage(bi->type) == TYPE_msk);
2066 501 : return ((uint32_t *) bi->base)[p / 32] & (1U << (p % 32));
2067 : }
2068 :
2069 : static inline void *
2070 501 : Tmsk(BATiter *bi, BUN p)
2071 : {
2072 501 : bi->tmsk = Tmskval(bi, p);
2073 501 : return &bi->tmsk;
2074 : }
2075 :
2076 : /* return the oid value at BUN position p from the (v)oid bat b
2077 : * works with any TYPE_void or TYPE_oid bat */
2078 : __attribute__((__pure__))
2079 : static inline oid
2080 23103013 : BUNtoid(BAT *b, BUN p)
2081 : {
2082 23103013 : assert(ATOMtype(b->ttype) == TYPE_oid);
2083 : /* BATcount is the number of valid entries, so with
2084 : * exceptions, the last value can well be larger than
2085 : * b->tseqbase + BATcount(b) */
2086 23103013 : assert(p < BATcount(b));
2087 23103013 : assert(b->ttype == TYPE_void || b->tvheap == NULL);
2088 23103013 : if (is_oid_nil(b->tseqbase)) {
2089 22552912 : if (b->ttype == TYPE_void)
2090 0 : return oid_nil;
2091 22552912 : MT_lock_set(&b->theaplock);
2092 22077136 : oid o = ((const oid *) b->theap->base)[p + b->tbaseoff];
2093 22077136 : MT_lock_unset(&b->theaplock);
2094 21985757 : return o;
2095 : }
2096 550101 : if (b->ttype == TYPE_oid || b->tvheap == NULL) {
2097 536869 : return b->tseqbase + p;
2098 : }
2099 : /* b->tvheap != NULL, so we know there will be no parallel
2100 : * modifications (so no locking) */
2101 13232 : BATiter bi = bat_iterator_nolock(b);
2102 13232 : return * (oid *) Tpos(&bi, p);
2103 : }
2104 :
2105 : /*
2106 : * @+ Transaction Management
2107 : */
2108 : gdk_export gdk_return TMsubcommit_list(bat *restrict subcommit, BUN *restrict sizes, int cnt, lng logno)
2109 : __attribute__((__warn_unused_result__));
2110 :
2111 : /*
2112 : * @- Delta Management
2113 : * @multitable @columnfractions 0.08 0.6
2114 : * @item BAT *
2115 : * @tab BATcommit (BAT *b)
2116 : * @end multitable
2117 : *
2118 : * The BAT keeps track of updates with respect to a 'previous state'.
2119 : * Do not confuse 'previous state' with 'stable' or 'commited-on-disk',
2120 : * because these concepts are not always the same. In particular, they
2121 : * diverge when BATcommit and BATfakecommit are called explicitly,
2122 : * bypassing the normal global TMcommit protocol (some applications need
2123 : * that flexibility).
2124 : *
2125 : * BATcommit make the current BAT state the new 'stable state'. This
2126 : * happens inside the global TMcommit on all persistent BATs previous
2127 : * to writing all bats to persistent storage using a BBPsync.
2128 : */
2129 : gdk_export void BATcommit(BAT *b, BUN size);
2130 :
2131 : /*
2132 : * @+ BAT Alignment and BAT views
2133 : * @multitable @columnfractions 0.08 0.7
2134 : * @item int
2135 : * @tab ALIGNsynced (BAT* b1, BAT* b2)
2136 : * @item int
2137 : * @tab ALIGNsync (BAT *b1, BAT *b2)
2138 : * @item int
2139 : * @tab ALIGNrelated (BAT *b1, BAT *b2)
2140 : *
2141 : * @item BAT*
2142 : * @tab VIEWcreate (oid seq, BAT *b, BUN lo, BUN hi)
2143 : * @item int
2144 : * @tab isVIEW (BAT *b)
2145 : * @item bat
2146 : * @tab VIEWhparent (BAT *b)
2147 : * @item bat
2148 : * @tab VIEWtparent (BAT *b)
2149 : * @end multitable
2150 : *
2151 : * Alignments of two columns of a BAT means that the system knows
2152 : * whether these two columns are exactly equal. Relatedness of two
2153 : * BATs means that one pair of columns (either head or tail) of both
2154 : * BATs is aligned. The first property is checked by ALIGNsynced, the
2155 : * latter by ALIGNrelated.
2156 : *
2157 : * All algebraic BAT commands propagate the properties - including
2158 : * alignment properly on their results.
2159 : *
2160 : * VIEW BATs are BATs that lend their storage from a parent BAT. They
2161 : * are just a descriptor that points to the data in this parent BAT. A
2162 : * view is created with VIEWcreate. The cache id of the parent (if
2163 : * any) is returned by VIEWtparent (otherwise it returns 0).
2164 : *
2165 : * VIEW bats are read-only!!
2166 : */
2167 : gdk_export int ALIGNsynced(BAT *b1, BAT *b2);
2168 :
2169 : gdk_export void BATassertProps(BAT *b);
2170 :
2171 : gdk_export BAT *VIEWcreate(oid seq, BAT *b, BUN l, BUN h);
2172 : gdk_export void VIEWbounds(BAT *b, BAT *view, BUN l, BUN h);
2173 :
2174 : #define ALIGNapp(x, f, e) \
2175 : do { \
2176 : if (!(f)) { \
2177 : MT_lock_set(&(x)->theaplock); \
2178 : if ((x)->batRestricted == BAT_READ || \
2179 : ((ATOMIC_GET(&(x)->theap->refs) & HEAPREFS) > 1)) { \
2180 : GDKerror("access denied to %s, aborting.\n", BATgetId(x)); \
2181 : MT_lock_unset(&(x)->theaplock); \
2182 : return (e); \
2183 : } \
2184 : MT_lock_unset(&(x)->theaplock); \
2185 : } \
2186 : } while (false)
2187 :
2188 : /*
2189 : * @+ BAT Iterators
2190 : * @multitable @columnfractions 0.15 0.7
2191 : * @item BATloop
2192 : * @tab
2193 : * (BAT *b; BUN p, BUN q)
2194 : * @item BATloopDEL
2195 : * @tab
2196 : * (BAT *b; BUN p; BUN q; int dummy)
2197 : * @item HASHloop
2198 : * @tab
2199 : * (BAT *b; Hash *h, size_t dummy; ptr value)
2200 : * @item HASHloop_bte
2201 : * @tab
2202 : * (BAT *b; Hash *h, size_t idx; bte *value, BUN w)
2203 : * @item HASHloop_sht
2204 : * @tab
2205 : * (BAT *b; Hash *h, size_t idx; sht *value, BUN w)
2206 : * @item HASHloop_int
2207 : * @tab
2208 : * (BAT *b; Hash *h, size_t idx; int *value, BUN w)
2209 : * @item HASHloop_flt
2210 : * @tab
2211 : * (BAT *b; Hash *h, size_t idx; flt *value, BUN w)
2212 : * @item HASHloop_lng
2213 : * @tab
2214 : * (BAT *b; Hash *h, size_t idx; lng *value, BUN w)
2215 : * @item HASHloop_hge
2216 : * @tab
2217 : * (BAT *b; Hash *h, size_t idx; hge *value, BUN w)
2218 : * @item HASHloop_dbl
2219 : * @tab
2220 : * (BAT *b; Hash *h, size_t idx; dbl *value, BUN w)
2221 : * @item HASHloop_str
2222 : * @tab
2223 : * (BAT *b; Hash *h, size_t idx; str value, BUN w)
2224 : * @item HASHlooploc
2225 : * @tab
2226 : * (BAT *b; Hash *h, size_t idx; ptr value, BUN w)
2227 : * @item HASHloopvar
2228 : * @tab
2229 : * (BAT *b; Hash *h, size_t idx; ptr value, BUN w)
2230 : * @end multitable
2231 : *
2232 : * The @emph{BATloop()} looks like a function call, but is actually a
2233 : * macro.
2234 : *
2235 : * @- simple sequential scan
2236 : * The first parameter is a BAT, the p and q are BUN pointers, where p
2237 : * is the iteration variable.
2238 : */
2239 : #define BATloop(r, p, q) \
2240 : for (q = BATcount(r), p = 0; p < q; p++)
2241 :
2242 : /*
2243 : * @+ Common BAT Operations
2244 : * Much used, but not necessarily kernel-operations on BATs.
2245 : *
2246 : * For each BAT we maintain its dimensions as separately accessible
2247 : * properties. They can be used to improve query processing at higher
2248 : * levels.
2249 : */
2250 : enum prop_t {
2251 : GDK_MIN_BOUND, /* MINimum allowed value for range partitions [min, max> */
2252 : GDK_MAX_BOUND, /* MAXimum of the range partitions [min, max>, ie. excluding this max value */
2253 : GDK_NOT_NULL, /* bat bound to be not null */
2254 : /* CURRENTLY_NO_PROPERTIES_DEFINED, */
2255 : };
2256 :
2257 : gdk_export ValPtr BATgetprop(BAT *b, enum prop_t idx);
2258 : gdk_export ValPtr BATgetprop_nolock(BAT *b, enum prop_t idx);
2259 : gdk_export void BATrmprop(BAT *b, enum prop_t idx);
2260 : gdk_export void BATrmprop_nolock(BAT *b, enum prop_t idx);
2261 : gdk_export ValPtr BATsetprop(BAT *b, enum prop_t idx, int type, const void *v);
2262 : gdk_export ValPtr BATsetprop_nolock(BAT *b, enum prop_t idx, int type, const void *v);
2263 :
2264 : /*
2265 : * @- BAT relational operators
2266 : *
2267 : * The full-materialization policy intermediate results in MonetDB
2268 : * means that a join can produce an arbitrarily large result and choke
2269 : * the system. The Data Distilleries tool therefore first computes the
2270 : * join result size before the actual join (better waste time than
2271 : * crash the server). To exploit that perfect result size knowledge,
2272 : * an result-size estimate parameter was added to all equi-join
2273 : * implementations. TODO: add this for
2274 : * semijoin/select/unique/diff/intersect
2275 : *
2276 : * @- modes for thethajoin
2277 : */
2278 : #define JOIN_EQ 0
2279 : #define JOIN_LT (-1)
2280 : #define JOIN_LE (-2)
2281 : #define JOIN_GT 1
2282 : #define JOIN_GE 2
2283 : #define JOIN_BAND 3
2284 : #define JOIN_NE (-3)
2285 :
2286 : gdk_export BAT *BATselect(BAT *b, BAT *s, const void *tl, const void *th, bool li, bool hi, bool anti, bool nil_matches);
2287 : gdk_export BAT *BATthetaselect(BAT *b, BAT *s, const void *val, const char *op);
2288 :
2289 : gdk_export BAT *BATconstant(oid hseq, int tt, const void *val, BUN cnt, role_t role);
2290 : gdk_export gdk_return BATsubcross(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr, bool max_one)
2291 : __attribute__((__access__(write_only, 1)))
2292 : __attribute__((__access__(write_only, 2)))
2293 : __attribute__((__warn_unused_result__));
2294 : gdk_export gdk_return BAToutercross(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr, bool max_one)
2295 : __attribute__((__access__(write_only, 1)))
2296 : __attribute__((__access__(write_only, 2)))
2297 : __attribute__((__warn_unused_result__));
2298 :
2299 : gdk_export gdk_return BATleftjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr, bool nil_matches, BUN estimate)
2300 : __attribute__((__access__(write_only, 1)))
2301 : __attribute__((__access__(write_only, 2)))
2302 : __attribute__((__warn_unused_result__));
2303 : gdk_export gdk_return BATmarkjoin(BAT **r1p, BAT **r2p, BAT **r3p, BAT *l, BAT *r, BAT *sl, BAT *sr, BUN estimate)
2304 : __attribute__((__access__(write_only, 1)))
2305 : __attribute__((__access__(write_only, 2)))
2306 : __attribute__((__access__(write_only, 3)))
2307 : __attribute__((__warn_unused_result__));
2308 : gdk_export gdk_return BATouterjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr, bool nil_matches, bool match_one, BUN estimate)
2309 : __attribute__((__access__(write_only, 1)))
2310 : __attribute__((__access__(write_only, 2)))
2311 : __attribute__((__warn_unused_result__));
2312 : gdk_export gdk_return BATthetajoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr, int op, bool nil_matches, BUN estimate)
2313 : __attribute__((__access__(write_only, 1)))
2314 : __attribute__((__access__(write_only, 2)))
2315 : __attribute__((__warn_unused_result__));
2316 : gdk_export gdk_return BATsemijoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr, bool nil_matches, bool max_one, BUN estimate)
2317 : __attribute__((__access__(write_only, 1)))
2318 : __attribute__((__access__(write_only, 2)))
2319 : __attribute__((__warn_unused_result__));
2320 : gdk_export BAT *BATintersect(BAT *l, BAT *r, BAT *sl, BAT *sr, bool nil_matches, bool max_one, BUN estimate);
2321 : gdk_export BAT *BATdiff(BAT *l, BAT *r, BAT *sl, BAT *sr, bool nil_matches, bool not_in, BUN estimate);
2322 : gdk_export gdk_return BATjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr, bool nil_matches, BUN estimate)
2323 : __attribute__((__access__(write_only, 1)))
2324 : __attribute__((__access__(write_only, 2)))
2325 : __attribute__((__warn_unused_result__));
2326 : gdk_export BUN BATguess_uniques(BAT *b, struct canditer *ci);
2327 : gdk_export gdk_return BATbandjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr, const void *c1, const void *c2, bool li, bool hi, BUN estimate)
2328 : __attribute__((__access__(write_only, 1)))
2329 : __attribute__((__access__(write_only, 2)))
2330 : __attribute__((__warn_unused_result__));
2331 : gdk_export gdk_return BATrangejoin(BAT **r1p, BAT **r2p, BAT *l, BAT *rl, BAT *rh, BAT *sl, BAT *sr, bool li, bool hi, bool anti, bool symmetric, BUN estimate)
2332 : __attribute__((__warn_unused_result__));
2333 : gdk_export BAT *BATproject(BAT *restrict l, BAT *restrict r);
2334 : gdk_export BAT *BATproject2(BAT *restrict l, BAT *restrict r1, BAT *restrict r2);
2335 : gdk_export BAT *BATprojectchain(BAT **bats);
2336 :
2337 : gdk_export BAT *BATslice(BAT *b, BUN low, BUN high);
2338 :
2339 : gdk_export BAT *BATunique(BAT *b, BAT *s);
2340 :
2341 : gdk_export gdk_return BATfirstn(BAT **topn, BAT **gids, BAT *b, BAT *cands, BAT *grps, BUN n, bool asc, bool nilslast, bool distinct)
2342 : __attribute__((__access__(write_only, 1)))
2343 : __attribute__((__access__(write_only, 2)))
2344 : __attribute__((__warn_unused_result__));
2345 : gdk_export BAT *BATgroupedfirstn(BUN n, BAT *s, BAT *g, int nbats, BAT **bats, bool *asc, bool *nilslast)
2346 : __attribute__((__warn_unused_result__));
2347 :
2348 : #include "gdk_calc.h"
2349 :
2350 : gdk_export gdk_return GDKtoupper(char **restrict buf, size_t *restrict buflen, const char *restrict s)
2351 : __attribute__((__access__(read_write, 1)))
2352 : __attribute__((__access__(read_write, 2)));
2353 : gdk_export gdk_return GDKtolower(char **restrict buf, size_t *restrict buflen, const char *restrict s)
2354 : __attribute__((__access__(read_write, 1)))
2355 : __attribute__((__access__(read_write, 2)));
2356 : gdk_export gdk_return GDKcasefold(char **restrict buf, size_t *restrict buflen, const char *restrict s)
2357 : __attribute__((__access__(read_write, 1)))
2358 : __attribute__((__access__(read_write, 2)));
2359 : gdk_export int GDKstrncasecmp(const char *str1, const char *str2, size_t l1, size_t l2);
2360 : gdk_export int GDKstrcasecmp(const char *s1, const char *s2);
2361 : gdk_export char *GDKstrcasestr(const char *haystack, const char *needle);
2362 : gdk_export BAT *BATtoupper(BAT *b, BAT *s);
2363 : gdk_export BAT *BATtolower(BAT *b, BAT *s);
2364 : gdk_export BAT *BATcasefold(BAT *b, BAT *s);
2365 : gdk_export gdk_return GDKasciify(char **restrict buf, size_t *restrict buflen, const char *restrict s);
2366 : gdk_export BAT *BATasciify(BAT *b, BAT *s);
2367 :
2368 : /*
2369 : * @- BAT sample operators
2370 : *
2371 : * @multitable @columnfractions 0.08 0.7
2372 : * @item BAT *
2373 : * @tab BATsample (BAT *b, n)
2374 : * @end multitable
2375 : *
2376 : * The routine BATsample returns a random sample on n BUNs of a BAT.
2377 : *
2378 : */
2379 : gdk_export BAT *BATsample(BAT *b, BUN n);
2380 : gdk_export BAT *BATsample_with_seed(BAT *b, BUN n, uint64_t seed);
2381 :
2382 : /*
2383 : *
2384 : */
2385 : #define MAXPARAMS 32
2386 :
2387 : #define CHECK_QRY_TIMEOUT_SHIFT 14
2388 : #define CHECK_QRY_TIMEOUT_STEP (1 << CHECK_QRY_TIMEOUT_SHIFT)
2389 : #define CHECK_QRY_TIMEOUT_MASK (CHECK_QRY_TIMEOUT_STEP - 1)
2390 :
2391 : #define TIMEOUT_MSG "Timeout was reached!"
2392 : #define INTERRUPT_MSG "Query interrupted!"
2393 : #define DISCONNECT_MSG "Client is disconnected!"
2394 : #define EXITING_MSG "Server is exiting!"
2395 :
2396 : #define QRY_TIMEOUT (-1) /* query timed out */
2397 : #define QRY_INTERRUPT (-2) /* client indicated interrupt */
2398 : #define QRY_DISCONNECT (-3) /* client disconnected */
2399 :
2400 : static const char *
2401 11 : TIMEOUT_MESSAGE(const QryCtx *qc)
2402 : {
2403 11 : if (GDKexiting())
2404 : return EXITING_MSG;
2405 11 : if (qc) {
2406 11 : switch (qc->endtime) {
2407 : case QRY_TIMEOUT:
2408 : return TIMEOUT_MSG;
2409 0 : case QRY_INTERRUPT:
2410 0 : return INTERRUPT_MSG;
2411 0 : case QRY_DISCONNECT:
2412 0 : return DISCONNECT_MSG;
2413 : default:
2414 0 : MT_UNREACHABLE();
2415 : }
2416 : }
2417 : return NULL;
2418 : }
2419 :
2420 : static inline void
2421 11 : TIMEOUT_ERROR(const QryCtx *qc, const char *file, const char *func, int lineno)
2422 : {
2423 11 : const char *e = TIMEOUT_MESSAGE(qc);
2424 11 : if (e) {
2425 11 : GDKtracer_log(file, func, lineno, M_ERROR, GDK, NULL,
2426 : "%s\n", e);
2427 : }
2428 11 : }
2429 :
2430 : #define TIMEOUT_HANDLER(rtpe, qc) \
2431 : do { \
2432 : TIMEOUT_ERROR(qc, __FILE__, __func__, __LINE__); \
2433 : return rtpe; \
2434 : } while(0)
2435 :
2436 : static inline bool
2437 15608164 : TIMEOUT_TEST(QryCtx *qc)
2438 : {
2439 15608164 : if (qc == NULL)
2440 : return false;
2441 15597670 : if (qc->endtime < 0)
2442 : return true;
2443 15597663 : if (qc->endtime && GDKusec() > qc->endtime) {
2444 3 : qc->endtime = QRY_TIMEOUT;
2445 3 : return true;
2446 : }
2447 15597662 : switch (bstream_getoob(qc->bs)) {
2448 0 : case -1:
2449 0 : qc->endtime = QRY_DISCONNECT;
2450 0 : return true;
2451 : case 0:
2452 : return false;
2453 0 : default:
2454 0 : qc->endtime = QRY_INTERRUPT;
2455 0 : return true;
2456 : }
2457 : }
2458 :
2459 : #define GOTO_LABEL_TIMEOUT_HANDLER(label, qc) \
2460 : do { \
2461 : TIMEOUT_ERROR(qc, __FILE__, __func__, __LINE__); \
2462 : goto label; \
2463 : } while(0)
2464 :
2465 : #define GDK_CHECK_TIMEOUT_BODY(qc, callback) \
2466 : do { \
2467 : if (GDKexiting() || TIMEOUT_TEST(qc)) { \
2468 : callback; \
2469 : } \
2470 : } while (0)
2471 :
2472 : #define GDK_CHECK_TIMEOUT(qc, counter, callback) \
2473 : do { \
2474 : if (counter > CHECK_QRY_TIMEOUT_STEP) { \
2475 : GDK_CHECK_TIMEOUT_BODY(qc, callback); \
2476 : counter = 0; \
2477 : } else { \
2478 : counter++; \
2479 : } \
2480 : } while (0)
2481 :
2482 : /* here are some useful constructs to iterate a number of times (the
2483 : * REPEATS argument--only evaluated once) and checking for a timeout
2484 : * every once in a while; the QC->endtime value is a variable of type lng
2485 : * which is either 0 or the GDKusec() compatible time after which the
2486 : * loop should terminate; check for this condition after the loop using
2487 : * the TIMEOUT_CHECK macro; in order to break out of any of these loops,
2488 : * use TIMEOUT_LOOP_BREAK since plain break won't do it; it is perfectly
2489 : * ok to use continue inside the body */
2490 :
2491 : /* use IDX as a loop variable (already declared), initializing it to 0
2492 : * and incrementing it on each iteration */
2493 : #define TIMEOUT_LOOP_IDX(IDX, REPEATS, QC) \
2494 : for (BUN REPS = (IDX = 0, (REPEATS)); REPS > 0; REPS = 0) /* "loops" at most once */ \
2495 : for (BUN CTR1 = 0, END1 = (REPS + CHECK_QRY_TIMEOUT_STEP) >> CHECK_QRY_TIMEOUT_SHIFT; CTR1 < END1 && !GDKexiting() && ((QC) == NULL || (QC)->endtime >= 0); CTR1++) \
2496 : if (CTR1 > 0 && TIMEOUT_TEST(QC)) { \
2497 : break; \
2498 : } else \
2499 : for (BUN CTR2 = 0, END2 = CTR1 == END1 - 1 ? REPS & CHECK_QRY_TIMEOUT_MASK : CHECK_QRY_TIMEOUT_STEP; CTR2 < END2; CTR2++, IDX++)
2500 :
2501 : /* declare and use IDX as a loop variable, initializing it to 0 and
2502 : * incrementing it on each iteration */
2503 : #define TIMEOUT_LOOP_IDX_DECL(IDX, REPEATS, QC) \
2504 : for (BUN IDX = 0, REPS = (REPEATS); REPS > 0; REPS = 0) /* "loops" at most once */ \
2505 : for (BUN CTR1 = 0, END1 = (REPS + CHECK_QRY_TIMEOUT_STEP) >> CHECK_QRY_TIMEOUT_SHIFT; CTR1 < END1 && !GDKexiting() && ((QC) == NULL || (QC)->endtime >= 0); CTR1++) \
2506 : if (CTR1 > 0 && TIMEOUT_TEST(QC)) { \
2507 : break; \
2508 : } else \
2509 : for (BUN CTR2 = 0, END2 = CTR1 == END1 - 1 ? REPS & CHECK_QRY_TIMEOUT_MASK : CHECK_QRY_TIMEOUT_STEP; CTR2 < END2; CTR2++, IDX++)
2510 :
2511 : /* there is no user-visible loop variable */
2512 : #define TIMEOUT_LOOP(REPEATS, QC) \
2513 : for (BUN CTR1 = 0, REPS = (REPEATS), END1 = (REPS + CHECK_QRY_TIMEOUT_STEP) >> CHECK_QRY_TIMEOUT_SHIFT; CTR1 < END1 && !GDKexiting() && ((QC) == NULL || (QC)->endtime >= 0); CTR1++) \
2514 : if (CTR1 > 0 && TIMEOUT_TEST(QC)) { \
2515 : break; \
2516 : } else \
2517 : for (BUN CTR2 = 0, END2 = CTR1 == END1 - 1 ? REPS & CHECK_QRY_TIMEOUT_MASK : CHECK_QRY_TIMEOUT_STEP; CTR2 < END2; CTR2++)
2518 :
2519 : /* break out of the loop (cannot use do/while trick here) */
2520 : #define TIMEOUT_LOOP_BREAK \
2521 : { \
2522 : END1 = END2 = 0; \
2523 : continue; \
2524 : }
2525 :
2526 : /* check whether a timeout occurred, and execute the CALLBACK argument
2527 : * if it did */
2528 : #define TIMEOUT_CHECK(QC, CALLBACK) \
2529 : do { \
2530 : if (GDKexiting() || ((QC) && (QC)->endtime < 0)) \
2531 : CALLBACK; \
2532 : } while (0)
2533 :
2534 : typedef struct gdk_callback {
2535 : const char *name;
2536 : int argc;
2537 : int interval; // units sec
2538 : lng last_called; // timestamp GDKusec
2539 : gdk_return (*func)(int argc, void *argv[]);
2540 : struct gdk_callback *next;
2541 : void *argv[];
2542 : } gdk_callback;
2543 :
2544 : typedef gdk_return gdk_callback_func(int argc, void *argv[]);
2545 :
2546 : gdk_export gdk_return gdk_add_callback(const char *name, gdk_callback_func *f,
2547 : int argc, void *argv[], int interval);
2548 : gdk_export gdk_return gdk_remove_callback(const char *, gdk_callback_func *f);
2549 :
2550 :
2551 : #define SQLSTATE(sqlstate) #sqlstate "!"
2552 : #define MAL_MALLOC_FAIL "Could not allocate space"
2553 :
2554 : #include <setjmp.h>
2555 :
2556 : typedef struct exception_buffer {
2557 : #ifdef HAVE_SIGLONGJMP
2558 : sigjmp_buf state;
2559 : #else
2560 : jmp_buf state;
2561 : #endif
2562 : int code;
2563 : const char *msg;
2564 : int enabled;
2565 : } exception_buffer;
2566 :
2567 : gdk_export exception_buffer *eb_init(exception_buffer *eb)
2568 : __attribute__((__access__(write_only, 1)));
2569 :
2570 : /* != 0 on when we return to the savepoint */
2571 : #ifdef HAVE_SIGLONGJMP
2572 : #define eb_savepoint(eb) ((eb)->enabled = 1, sigsetjmp((eb)->state, 0))
2573 : #else
2574 : #define eb_savepoint(eb) ((eb)->enabled = 1, setjmp((eb)->state))
2575 : #endif
2576 : gdk_export _Noreturn void eb_error(exception_buffer *eb, const char *msg, int val);
2577 :
2578 : typedef struct allocator {
2579 : struct allocator *pa;
2580 : size_t size;
2581 : size_t nr;
2582 : char **blks;
2583 : size_t used; /* memory used in last block */
2584 : size_t usedmem; /* used memory */
2585 : void *freelist; /* list of freed blocks */
2586 : exception_buffer eb;
2587 : } allocator;
2588 :
2589 : gdk_export allocator *sa_create( allocator *pa );
2590 : gdk_export allocator *sa_reset( allocator *sa );
2591 : gdk_export void *sa_alloc( allocator *sa, size_t sz );
2592 : gdk_export void *sa_zalloc( allocator *sa, size_t sz );
2593 : gdk_export void *sa_realloc( allocator *sa, void *ptr, size_t sz, size_t osz );
2594 : gdk_export void sa_destroy( allocator *sa );
2595 : gdk_export char *sa_strndup( allocator *sa, const char *s, size_t l);
2596 : gdk_export char *sa_strdup( allocator *sa, const char *s);
2597 : gdk_export char *sa_strconcat( allocator *sa, const char *s1, const char *s2);
2598 : gdk_export size_t sa_size( allocator *sa );
2599 :
2600 : #if !defined(NDEBUG) && !defined(__COVERITY__) && defined(__GNUC__)
2601 : #define sa_alloc(sa, sz) \
2602 : ({ \
2603 : allocator *_sa = (sa); \
2604 : size_t _sz = (sz); \
2605 : void *_res = sa_alloc(_sa, _sz); \
2606 : TRC_DEBUG(ALLOC, \
2607 : "sa_alloc(%p,%zu) -> %p\n", \
2608 : _sa, _sz, _res); \
2609 : _res; \
2610 : })
2611 : #define sa_zalloc(sa, sz) \
2612 : ({ \
2613 : allocator *_sa = (sa); \
2614 : size_t _sz = (sz); \
2615 : void *_res = sa_zalloc(_sa, _sz); \
2616 : TRC_DEBUG(ALLOC, \
2617 : "sa_zalloc(%p,%zu) -> %p\n", \
2618 : _sa, _sz, _res); \
2619 : _res; \
2620 : })
2621 : #define sa_realloc(sa, ptr, sz, osz) \
2622 : ({ \
2623 : allocator *_sa = (sa); \
2624 : void *_ptr = (ptr); \
2625 : size_t _sz = (sz); \
2626 : size_t _osz = (osz); \
2627 : void *_res = sa_realloc(_sa, _ptr, _sz, _osz); \
2628 : TRC_DEBUG(ALLOC, \
2629 : "sa_realloc(%p,%p,%zu,%zu) -> %p\n", \
2630 : _sa, _ptr, _sz, _osz, _res); \
2631 : _res; \
2632 : })
2633 : #define sa_strdup(sa, s) \
2634 : ({ \
2635 : allocator *_sa = (sa); \
2636 : const char *_s = (s); \
2637 : char *_res = sa_strdup(_sa, _s); \
2638 : TRC_DEBUG(ALLOC, \
2639 : "sa_strdup(%p,len=%zu) -> %p\n", \
2640 : _sa, strlen(_s), _res); \
2641 : _res; \
2642 : })
2643 : #define sa_strndup(sa, s, l) \
2644 : ({ \
2645 : allocator *_sa = (sa); \
2646 : const char *_s = (s); \
2647 : size_t _l = (l); \
2648 : char *_res = sa_strndup(_sa, _s, _l); \
2649 : TRC_DEBUG(ALLOC, \
2650 : "sa_strndup(%p,len=%zu) -> %p\n", \
2651 : _sa, _l, _res); \
2652 : _res; \
2653 : })
2654 : #endif
2655 :
2656 : #endif /* _GDK_H_ */
|