Line data Source code
1 : /*
2 : * SPDX-License-Identifier: MPL-2.0
3 : *
4 : * This Source Code Form is subject to the terms of the Mozilla Public
5 : * License, v. 2.0. If a copy of the MPL was not distributed with this
6 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
7 : *
8 : * Copyright 2024 MonetDB Foundation;
9 : * Copyright August 2008 - 2023 MonetDB B.V.;
10 : * Copyright 1997 - July 2008 CWI.
11 : */
12 :
13 : #include "monetdb_config.h"
14 : #include "mal.h"
15 : #include "mal_stack.h"
16 : #include "mal_linker.h"
17 : #include "gdk.h"
18 : #include "sql_catalog.h"
19 : #include "sql_scenario.h"
20 : #include "sql_cast.h"
21 : #include "sql_execute.h"
22 : #include "sql_storage.h"
23 : #include "cheader.h"
24 : #include "cheader.text.h"
25 :
26 : #include "gdk_time.h"
27 : #include "mutils.h"
28 :
29 : #include <setjmp.h>
30 : #include <signal.h>
31 : #include <sys/mman.h>
32 : #include <unistd.h>
33 : #include <string.h>
34 :
35 : #if defined(__GNUC__) && !defined(__clang__)
36 : #pragma GCC diagnostic ignored "-Wclobbered"
37 : #endif
38 :
39 : static const char mprotect_enableflag[] = "enable_mprotect";
40 : static bool option_enable_mprotect = false;
41 : static const char longjmp_enableflag[] = "enable_longjmp";
42 : static bool option_enable_longjmp = false;
43 :
44 : typedef struct _allocated_region {
45 : struct _allocated_region *next;
46 : } allocated_region;
47 :
48 : struct _mprotected_region;
49 : typedef struct _mprotected_region {
50 : void *addr;
51 : size_t len;
52 :
53 : struct _mprotected_region *next;
54 : } mprotected_region;
55 :
56 : static char *mprotect_region(void *addr, size_t len,
57 : mprotected_region **regions);
58 : struct capi_tls_s {
59 : allocated_region *ar;
60 : jmp_buf jb;
61 : };
62 : static MT_TLS_t capi_tls_key;
63 :
64 : typedef char *(*jitted_function)(void **inputs, void **outputs,
65 : malloc_function_ptr malloc, free_function_ptr free);
66 :
67 : struct _cached_functions;
68 : typedef struct _cached_functions {
69 : jitted_function function;
70 : BUN expression_hash;
71 : char *parameters;
72 : void *dll_handle;
73 : struct _cached_functions *next;
74 : } cached_functions;
75 :
76 : #define FUNCTION_CACHE_SIZE 128
77 :
78 : static cached_functions *function_cache[FUNCTION_CACHE_SIZE];
79 : static MT_Lock cache_lock = MT_LOCK_INITIALIZER(cache_lock);
80 : static int cudf_initialized = 0;
81 :
82 : static str CUDFeval(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci,
83 : bool grouped);
84 :
85 36 : static str CUDFevalStd(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
86 : {
87 36 : return CUDFeval(cntxt, mb, stk, pci, false);
88 : }
89 :
90 12 : static str CUDFevalAggr(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
91 : {
92 12 : return CUDFeval(cntxt, mb, stk, pci, true);
93 : }
94 :
95 6 : static str CUDFprelude(void)
96 : {
97 6 : if (!cudf_initialized) {
98 6 : cudf_initialized = true;
99 6 : option_enable_mprotect = GDKgetenv_istrue(mprotect_enableflag) || GDKgetenv_isyes(mprotect_enableflag);
100 6 : option_enable_longjmp = GDKgetenv_istrue(longjmp_enableflag) || GDKgetenv_isyes(longjmp_enableflag);
101 6 : MT_alloc_tls(&capi_tls_key);
102 : }
103 6 : return MAL_SUCCEED;
104 : }
105 :
106 722 : static bool WriteDataToFile(FILE *f, const void *data, size_t data_size)
107 : {
108 722 : fwrite(data, data_size, 1, f);
109 722 : return (!ferror(f));
110 : }
111 :
112 714 : static bool WriteTextToFile(FILE *f, const char *data)
113 : {
114 714 : return WriteDataToFile(f, data, strlen(data));
115 : }
116 :
117 0 : static _Noreturn void handler(int sig, siginfo_t *si, void *unused)
118 : {
119 0 : (void)sig;
120 0 : (void)si;
121 0 : (void)unused;
122 :
123 0 : struct capi_tls_s *tls = MT_tls_get(capi_tls_key);
124 0 : longjmp(tls->jb, 1);
125 : }
126 :
127 48 : static bool can_mprotect_region(void* addr) {
128 48 : if (!option_enable_mprotect) return false;
129 0 : int pagesize = MT_pagesize();
130 0 : void* page_begin = (void *)((size_t)addr - (size_t)addr % pagesize);
131 0 : return page_begin == addr;
132 : }
133 :
134 0 : static char *mprotect_region(void *addr, size_t len,
135 : mprotected_region **regions)
136 : {
137 0 : mprotected_region *region;
138 0 : if (len == 0)
139 : return NULL;
140 :
141 0 : assert(can_mprotect_region(addr));
142 :
143 0 : region = GDKmalloc(sizeof(mprotected_region));
144 0 : if (!region) {
145 : return MAL_MALLOC_FAIL;
146 : }
147 0 : region->addr = addr;
148 0 : region->len = len;
149 0 : region->next = *regions;
150 0 : *regions = region;
151 0 : return NULL;
152 : }
153 :
154 0 : static void clear_mprotect(void *addr, size_t len)
155 : {
156 0 : if (addr)
157 0 : mprotect(addr, len, PROT_READ | PROT_WRITE);
158 : }
159 :
160 : #define ATTEMPT_TO_WRITE_TO_FILE(f, data) \
161 : if (!WriteTextToFile(f, data)) { \
162 : errno = 0; \
163 : msg = createException(MAL, "cudf.eval", "Write error."); \
164 : goto wrapup; \
165 : }
166 :
167 : #define ATTEMPT_TO_WRITE_DATA_TO_FILE(f, data, size) \
168 : if (!WriteDataToFile(f, data, size)) { \
169 : errno = 0; \
170 : msg = createException(MAL, "cudf.eval", "Write error."); \
171 : goto wrapup; \
172 : }
173 :
174 312 : static void *jump_GDK_malloc(size_t size)
175 : {
176 312 : if (size == 0)
177 : return NULL;
178 312 : void *ptr = GDKmalloc(size);
179 312 : if (!ptr && option_enable_longjmp) {
180 0 : struct capi_tls_s *tls = MT_tls_get(capi_tls_key);
181 0 : longjmp(tls->jb, 2);
182 : }
183 : return ptr;
184 : }
185 :
186 296 : static inline void *add_allocated_region(void *ptr)
187 : {
188 296 : allocated_region *region = (allocated_region *)ptr;
189 296 : struct capi_tls_s *tls = MT_tls_get(capi_tls_key);
190 296 : region->next = tls->ar;
191 296 : tls->ar = region;
192 296 : return (char *)ptr + sizeof(allocated_region);
193 : }
194 :
195 297 : static void *wrapped_GDK_malloc(size_t size)
196 : {
197 297 : if (size == 0)
198 : return NULL;
199 296 : void *ptr = jump_GDK_malloc(size + sizeof(allocated_region));
200 296 : return add_allocated_region(ptr);
201 : }
202 :
203 0 : static void wrapped_GDK_free(void* ptr) {
204 0 : (void) ptr;
205 0 : return;
206 : }
207 :
208 : #define GENERATE_NUMERIC_IS_NULL(type, tpename) \
209 : static int tpename##_is_null(type value) { return is_##tpename##_nil(value); }
210 :
211 : #define GENERATE_NUMERIC_INITIALIZE(type, tpename) \
212 : static void tpename##_initialize(struct cudf_data_struct_##tpename *self, \
213 : size_t count) \
214 : { \
215 : BAT* b; \
216 : if (self->bat) { \
217 : BBPunfix(((BAT*)self->bat)->batCacheid); \
218 : self->bat = NULL; \
219 : } \
220 : b = COLnew(0, TYPE_##tpename, count, TRANSIENT); \
221 : if (!b) { \
222 : if (option_enable_longjmp) { \
223 : struct capi_tls_s *tls = MT_tls_get(capi_tls_key); \
224 : longjmp(tls->jb, 2); \
225 : } \
226 : else return; \
227 : } \
228 : self->bat = (void*) b; \
229 : self->count = count; \
230 : self->data = (type*) b->theap->base; \
231 : BATsetcount(b, count); \
232 : }
233 :
234 : #define GENERATE_NUMERIC_ALL(type, tpename) \
235 : GENERATE_NUMERIC_INITIALIZE(type, tpename) \
236 : GENERATE_NUMERIC_IS_NULL(type, tpename)
237 :
238 :
239 : #define GENERATE_BASE_HEADERS(type, tpename) \
240 : static int tpename##_is_null(type value); \
241 : static void tpename##_initialize(struct cudf_data_struct_##tpename *self, \
242 : size_t count) \
243 : { \
244 : self->count = count; \
245 : self->data = jump_GDK_malloc(count * sizeof(self->null_value)); \
246 : }
247 :
248 0 : GENERATE_NUMERIC_ALL(bit, bit);
249 0 : GENERATE_NUMERIC_ALL(bte, bte);
250 0 : GENERATE_NUMERIC_ALL(sht, sht);
251 19 : GENERATE_NUMERIC_ALL(int, int);
252 10 : GENERATE_NUMERIC_ALL(lng, lng);
253 5 : GENERATE_NUMERIC_ALL(flt, flt);
254 10 : GENERATE_NUMERIC_ALL(dbl, dbl);
255 0 : GENERATE_NUMERIC_ALL(oid, oid);
256 :
257 8 : GENERATE_BASE_HEADERS(char *, str);
258 2 : GENERATE_BASE_HEADERS(cudf_data_date, date);
259 2 : GENERATE_BASE_HEADERS(cudf_data_time, time);
260 2 : GENERATE_BASE_HEADERS(cudf_data_timestamp, timestamp);
261 : static int blob_is_null(cudf_data_blob value);
262 : static void blob_initialize(struct cudf_data_struct_blob *self,
263 : size_t count);
264 :
265 : #define GENERATE_BAT_INPUT_BASE(tpe) \
266 : struct cudf_data_struct_##tpe *bat_data = \
267 : GDKzalloc(sizeof(struct cudf_data_struct_##tpe)); \
268 : if (!bat_data) { \
269 : msg = createException(MAL, "cudf.eval", MAL_MALLOC_FAIL); \
270 : goto wrapup; \
271 : } \
272 : inputs[index] = bat_data; \
273 : bat_data->is_null = tpe##_is_null; \
274 : bat_data->scale = \
275 : argnode ? pow(10, ((sql_arg *)argnode->data)->type.scale) : 1; \
276 : bat_data->bat = NULL; \
277 : bat_data->initialize = (void (*)(void *, size_t))tpe##_initialize;
278 :
279 : #define GENERATE_BAT_INPUT(b, tpe) \
280 : { \
281 : char *mprotect_retval; \
282 : GENERATE_BAT_INPUT_BASE(tpe); \
283 : bat_data->count = BATcount(b); \
284 : bat_data->null_value = tpe##_nil; \
285 : if (BATtdense(b)) { \
286 : size_t it = 0; \
287 : tpe val = b->tseqbase; \
288 : /* bat is dense, materialize it */ \
289 : bat_data->data = GDKmalloc( \
290 : bat_data->count * sizeof(bat_data->null_value)); \
291 : if (!bat_data->data) { \
292 : msg = createException(MAL, "cudf.eval", MAL_MALLOC_FAIL); \
293 : goto wrapup; \
294 : } \
295 : bat_data->alloced = true; \
296 : for (it = 0; it < bat_data->count; it++) { \
297 : bat_data->data[it] = val++; \
298 : } \
299 : } else if (can_mprotect_region(Tloc(b, 0))) { \
300 : bat_data->data = (tpe *)Tloc(b, 0); \
301 : mprotect_retval = mprotect_region( \
302 : bat_data->data, \
303 : bat_data->count * sizeof(bat_data->null_value), ®ions); \
304 : if (mprotect_retval) { \
305 : msg = createException(MAL, "cudf.eval", \
306 : "Failed to mprotect region: %s", \
307 : mprotect_retval); \
308 : goto wrapup; \
309 : } \
310 : } else { \
311 : /* cannot mprotect bat region, copy data */ \
312 : bat_data->data = GDKmalloc( \
313 : bat_data->count * sizeof(bat_data->null_value)); \
314 : if (bat_data->count > 0 && !bat_data->data) { \
315 : msg = createException(MAL, "cudf.eval", MAL_MALLOC_FAIL); \
316 : goto wrapup; \
317 : } \
318 : bat_data->alloced = true; \
319 : memcpy(bat_data->data, Tloc(b, 0), \
320 : bat_data->count * sizeof(bat_data->null_value)); \
321 : } \
322 : }
323 :
324 : #define GENERATE_BAT_OUTPUT_BASE(tpe) \
325 : struct cudf_data_struct_##tpe *bat_data = \
326 : GDKzalloc(sizeof(struct cudf_data_struct_##tpe)); \
327 : if (!bat_data) { \
328 : msg = createException(MAL, "cudf.eval", MAL_MALLOC_FAIL); \
329 : goto wrapup; \
330 : } \
331 : outputs[index] = bat_data; \
332 : bat_data->count = 0; \
333 : bat_data->data = NULL; \
334 : bat_data->is_null = tpe##_is_null; \
335 : bat_data->scale = \
336 : argnode ? pow(10, ((sql_arg *)argnode->data)->type.scale) : 1; \
337 : bat_data->initialize = (void (*)(void *, size_t))tpe##_initialize;
338 :
339 : #define GENERATE_BAT_OUTPUT(tpe) \
340 : { \
341 : GENERATE_BAT_OUTPUT_BASE(tpe); \
342 : bat_data->null_value = tpe##_nil; \
343 : }
344 :
345 : #ifdef NDEBUG
346 : static const char debug_flag[] = "capi_use_debug";
347 : #endif
348 : static const char cc_flag[] = "capi_cc";
349 : static const char cpp_flag[] = "capi_cpp";
350 :
351 : static const char cflags_pragma[] = "#pragma CFLAGS ";
352 : static const char ldflags_pragma[] = "#pragma LDFLAGS ";
353 :
354 : #define JIT_COMPILER_NAME "cc"
355 : #define JIT_CPP_COMPILER_NAME "c++"
356 :
357 : static bool isAlloced(int type, void *struct_ptr);
358 : static bool isValloced(int type, void *struct_ptr);
359 : static size_t GetTypeCount(int type, void *struct_ptr);
360 : static void *GetTypeData(int type, void *struct_ptr);
361 : static void *GetTypeBat(int type, void *struct_ptr);
362 : static const char *GetTypeName(int type);
363 :
364 : static void data_from_date(date d, cudf_data_date *ptr);
365 : static date date_from_data(cudf_data_date *ptr);
366 : static void data_from_time(daytime d, cudf_data_time *ptr);
367 : static daytime time_from_data(cudf_data_time *ptr);
368 : static void data_from_timestamp(timestamp d, cudf_data_timestamp *ptr);
369 : static timestamp timestamp_from_data(cudf_data_timestamp *ptr);
370 :
371 : static const char valid_path_characters[] = "abcdefghijklmnopqrstuvwxyz";
372 :
373 : static str
374 3 : empty_return(MalBlkPtr mb, MalStkPtr stk, InstrPtr pci, size_t retcols, oid seqbase)
375 : {
376 3 : str msg = MAL_SUCCEED;
377 3 : void **res = GDKzalloc(retcols * sizeof(void*));
378 :
379 3 : if (!res) {
380 0 : msg = createException(MAL, "capi.eval", SQLSTATE(HY013) MAL_MALLOC_FAIL);
381 0 : goto bailout;
382 : }
383 :
384 6 : for (size_t i = 0; i < retcols; i++) {
385 3 : if (isaBatType(getArgType(mb, pci, i))) {
386 2 : BAT *b = COLnew(seqbase, getBatType(getArgType(mb, pci, i)), 0, TRANSIENT);
387 2 : if (!b) {
388 0 : msg = createException(MAL, "capi.eval", GDK_EXCEPTION);
389 0 : goto bailout;
390 : }
391 2 : ((BAT**)res)[i] = b;
392 : } else { // single value return, only for non-grouped aggregations
393 : // return NULL to conform to SQL aggregates
394 1 : int tpe = getArgType(mb, pci, i);
395 1 : if (!VALinit(&stk->stk[pci->argv[i]], tpe, ATOMnilptr(tpe))) {
396 0 : msg = createException(MAL, "capi.eval", SQLSTATE(HY013) MAL_MALLOC_FAIL);
397 0 : goto bailout;
398 : }
399 1 : ((ValPtr*)res)[i] = &stk->stk[pci->argv[i]];
400 : }
401 : }
402 :
403 3 : bailout:
404 3 : if (res) {
405 6 : for (size_t i = 0; i < retcols; i++) {
406 3 : if (isaBatType(getArgType(mb, pci, i))) {
407 2 : BAT *b = ((BAT**)res)[i];
408 :
409 2 : if (b && msg) {
410 0 : BBPreclaim(b);
411 2 : } else if (b) {
412 2 : *getArgReference_bat(stk, pci, i) = b->batCacheid;
413 2 : BBPkeepref(b);
414 : }
415 1 : } else if (msg) {
416 0 : ValPtr pt = ((ValPtr*)res)[i];
417 :
418 0 : if (pt)
419 0 : VALclear(pt);
420 : }
421 : }
422 3 : GDKfree(res);
423 : }
424 3 : return msg;
425 : }
426 :
427 48 : static str CUDFeval(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci,
428 : bool grouped)
429 : {
430 48 : sql_func *sqlfun = NULL;
431 48 : bit use_cpp = *getArgReference_bit(stk, pci, pci->retc + 1);
432 48 : str exprStr = *getArgReference_str(stk, pci, pci->retc + 2);
433 :
434 48 : const int ARG_OFFSET = 3;
435 :
436 48 : size_t i = 0, j = 0;
437 48 : char argbuf[64];
438 48 : char buf[8192];
439 48 : char *oname = NULL;
440 48 : char error_buf[BUFSIZ];
441 48 : char total_error_buf[8192];
442 48 : size_t error_buffer_position = 0;
443 48 : str *args = NULL;
444 48 : str *output_names = NULL;
445 48 : char *msg = MAL_SUCCEED;
446 48 : node *argnode;
447 48 : int seengrp = 0;
448 48 : FILE *f = NULL;
449 48 : void *handle = NULL;
450 48 : jitted_function func = NULL;
451 48 : int ret, limit_argc = 0;
452 :
453 48 : FILE *compiler = NULL;
454 48 : int compiler_return_code;
455 :
456 48 : void **inputs = NULL;
457 48 : size_t input_count = 0;
458 48 : void **outputs = NULL;
459 48 : size_t output_count = 0;
460 48 : BAT **input_bats = NULL;
461 48 : mprotected_region *regions = NULL, *region_iter = NULL;
462 :
463 48 : lng initial_output_count = -1;
464 :
465 48 : struct sigaction sa = (struct sigaction) {.sa_flags = 0}, oldsa, oldsb;
466 48 : sigset_t signal_set;
467 :
468 : #ifdef NDEBUG
469 : bool debug_build =
470 : GDKgetenv_istrue(debug_flag) || GDKgetenv_isyes(debug_flag);
471 : #else
472 48 : bool debug_build = true;
473 : #endif
474 48 : char* extra_cflags = NULL;
475 48 : char* extra_ldflags = NULL;
476 :
477 :
478 48 : const char *compilation_flags = debug_build ? "-g -O0" : "-O2";
479 96 : const char *c_compiler =
480 1 : use_cpp ? (GDKgetenv(cpp_flag) ? GDKgetenv(cpp_flag)
481 1 : : JIT_CPP_COMPILER_NAME)
482 48 : : (GDKgetenv(cc_flag) ? GDKgetenv(cc_flag) : JIT_COMPILER_NAME);
483 :
484 48 : const char struct_prefix[] = "struct cudf_data_struct_";
485 48 : const char *funcname;
486 :
487 48 : BUN expression_hash = 0, funcname_hash = 0;
488 48 : cached_functions *cached_function;
489 48 : char *function_parameters = NULL;
490 48 : size_t input_size = 0;
491 48 : bit non_grouped_aggregate = 0;
492 :
493 48 : size_t index = 0;
494 48 : int bat_type = 0;
495 48 : const char* tpe = NULL;
496 :
497 48 : size_t extra_inputs = 0;
498 :
499 48 : struct capi_tls_s tls;
500 :
501 48 : tls.ar = NULL;
502 48 : MT_tls_set(capi_tls_key, &tls);
503 :
504 48 : (void)cntxt;
505 :
506 48 : if (!GDKgetenv_istrue("embedded_c") && !GDKgetenv_isyes("embedded_c"))
507 0 : throw(MAL, "cudf.eval", "Embedded C has not been enabled. "
508 : "Start server with --set embedded_c=true");
509 :
510 : // we need to be able to catch segfaults and bus errors
511 : // so we can work with mprotect to prevent UDFs from changing
512 : // the input data
513 :
514 : // we remove them from the pthread_sigmask
515 48 : if (option_enable_mprotect) {
516 0 : (void)sigemptyset(&signal_set);
517 0 : (void)sigaddset(&signal_set, SIGSEGV);
518 0 : (void)sigaddset(&signal_set, SIGBUS);
519 0 : (void)pthread_sigmask(SIG_UNBLOCK, &signal_set, NULL);
520 : }
521 :
522 48 : sqlfun = (sql_func *)*getArgReference_ptr(stk, pci, pci->retc);
523 48 : funcname = sqlfun ? sqlfun->base.name : "yet_another_c_function";
524 :
525 48 : args = (str *)GDKzalloc(sizeof(str) * pci->argc);
526 48 : output_names = (str *)GDKzalloc(sizeof(str) * pci->argc);
527 48 : if (!args || !output_names) {
528 0 : throw(MAL, "cudf.eval", MAL_MALLOC_FAIL);
529 : }
530 :
531 : // retrieve the argument names from the sqlfun structure
532 : // first argument after the return contains the pointer to the sql_func
533 : // structure
534 48 : if (sqlfun != NULL) {
535 : // retrieve the argument names (inputs)
536 48 : if (sqlfun->ops->cnt > 0) {
537 46 : int carg = pci->retc + ARG_OFFSET;
538 46 : argnode = sqlfun->ops->h;
539 103 : while (argnode) {
540 57 : char *argname = ((sql_arg *)argnode->data)->name;
541 57 : args[carg] = GDKstrdup(argname);
542 57 : if (!args[carg]) {
543 0 : msg = createException(MAL, "cudf.eval", MAL_MALLOC_FAIL);
544 0 : goto wrapup;
545 : }
546 57 : carg++;
547 57 : argnode = argnode->next;
548 : }
549 : }
550 : // retrieve the output names
551 48 : argnode = sqlfun->res->h;
552 98 : for (i = 0; i < (size_t)sqlfun->res->cnt; i++) {
553 50 : output_names[i] = GDKstrdup(((sql_arg *)argnode->data)->name);
554 50 : argnode = argnode->next;
555 : }
556 : }
557 :
558 : // name unnamed outputs
559 98 : for (i = 0; i < (size_t)pci->retc; i++) {
560 50 : if (!output_names[i]) {
561 0 : if (pci->retc > 1) {
562 0 : snprintf(argbuf, sizeof(argbuf), "output%zu", i);
563 : } else {
564 : // just call it "output" if there is only one
565 0 : snprintf(argbuf, sizeof(argbuf), "output");
566 : }
567 0 : output_names[i] = GDKstrdup(argbuf);
568 : }
569 : }
570 : // the first unknown argument is the group, we don't really care for the
571 : // rest.
572 106 : for (i = pci->retc + ARG_OFFSET; i < (size_t)pci->argc; i++) {
573 64 : if (args[i] == NULL) {
574 7 : if (grouped && (i+2) == (size_t)pci->argc) {
575 6 : args[i] = GDKstrdup("aggr_group");
576 6 : if (!args[i]) {
577 0 : msg = createException(MAL, "cudf.eval", MAL_MALLOC_FAIL);
578 0 : goto wrapup;
579 : }
580 6 : seengrp = i++; /* Don't be interested in the extents BAT */
581 6 : break;
582 : } else {
583 1 : snprintf(argbuf, sizeof(argbuf), "arg%zu", i - pci->retc - 1);
584 1 : args[i] = GDKstrdup(argbuf);
585 1 : if (!args[i]) {
586 0 : msg = createException(MAL, "cudf.eval", MAL_MALLOC_FAIL);
587 0 : goto wrapup;
588 : }
589 : }
590 : }
591 : }
592 : // the first index where input arguments are not relevant for the C UDF
593 48 : limit_argc = i;
594 : // non-grouped aggregates don't have the group list
595 : // to allow users to write code for both grouped and non-grouped aggregates
596 : // we create an "aggr_group" BAT for non-grouped aggregates
597 48 : non_grouped_aggregate = grouped && !seengrp;
598 :
599 48 : input_count = limit_argc - (pci->retc + ARG_OFFSET);
600 48 : output_count = pci->retc;
601 :
602 : // begin the compilation phase
603 : // first look up if we have already compiled this function
604 48 : expression_hash = 0;
605 48 : expression_hash = strHash(exprStr);
606 48 : funcname_hash = strHash(funcname);
607 48 : funcname_hash = funcname_hash % FUNCTION_CACHE_SIZE;
608 48 : j = 0;
609 306 : for (i = 0; i < (size_t)limit_argc; i++) {
610 258 : if (args[i]) {
611 64 : j += strlen(args[i]);
612 : }
613 258 : if (output_names[i]) {
614 50 : j += strlen(output_names[i]);
615 : }
616 : }
617 :
618 96 : function_parameters =
619 48 : GDKzalloc((j + input_count + output_count + 1) * sizeof(char));
620 48 : if (!function_parameters) {
621 0 : msg = createException(MAL, "cudf.eval", MAL_MALLOC_FAIL);
622 0 : goto wrapup;
623 : }
624 112 : for (i = 0; i < input_count; i++) {
625 64 : if (!isaBatType(getArgType(mb, pci, i))) {
626 31 : function_parameters[i] = getArgType(mb, pci, i);
627 : } else {
628 33 : function_parameters[i] = getBatType(getArgType(mb, pci, i));
629 : }
630 : }
631 98 : for (i = 0; i < output_count; i++) {
632 50 : if (!isaBatType(getArgType(mb, pci, i))) {
633 16 : function_parameters[input_count + i] = getArgType(mb, pci, i);
634 : } else {
635 34 : function_parameters[input_count + i] =
636 : getBatType(getArgType(mb, pci, i));
637 : }
638 : }
639 48 : j = input_count + output_count;
640 306 : for (i = 0; i < (size_t)limit_argc; i++) {
641 258 : if (args[i]) {
642 64 : size_t len = strlen(args[i]);
643 64 : memcpy(function_parameters + j, args[i], len);
644 64 : j += len;
645 : }
646 258 : if (output_names[i]) {
647 50 : size_t len = strlen(output_names[i]);
648 50 : memcpy(function_parameters + j, output_names[i], len);
649 50 : j += len;
650 : }
651 : }
652 :
653 48 : MT_lock_set(&cache_lock);
654 48 : cached_function = function_cache[funcname_hash];
655 72 : while (cached_function) {
656 36 : if (cached_function->expression_hash == expression_hash &&
657 18 : strcmp(cached_function->parameters, function_parameters) == 0) {
658 : // this function matches our compiled function
659 : // in both source code and parameters
660 : // use the already compiled function instead of recompiling
661 12 : func = cached_function->function;
662 12 : break;
663 : }
664 24 : cached_function = cached_function->next;
665 : }
666 48 : MT_lock_unset(&cache_lock);
667 :
668 48 : if (!func) {
669 36 : char fname[MAXPATH];
670 : // function was not found in the cache
671 : // we have to compile it
672 :
673 : // first generate the names of the files
674 : // we place the temporary files in the DELDIR directory
675 : // because this will be removed again upon server startup
676 36 : const int RANDOM_NAME_SIZE = 32;
677 36 : const char prefix[] = TEMPDIR_NAME DIR_SEP_STR;
678 36 : size_t prefix_size = strlen(prefix);
679 36 : char deldirpath[MAXPATH];
680 :
681 36 : memcpy(buf, prefix, sizeof(char) * strlen(prefix));
682 : // generate a random 32-character name for the temporary files
683 1188 : for (i = prefix_size; i < prefix_size + RANDOM_NAME_SIZE; i++) {
684 1152 : buf[i] = valid_path_characters[rand() %
685 : (sizeof(valid_path_characters) - 1)];
686 : }
687 36 : buf[i] = '\0';
688 36 : if (GDKfilepath(fname, sizeof(fname), 0, BATDIR, buf, "c") != GDK_SUCCEED) {
689 0 : msg = createException(MAL, "cudf.eval", MAL_MALLOC_FAIL);
690 0 : goto wrapup;
691 : }
692 36 : oname = GDKstrdup(fname);
693 36 : if (oname == NULL) {
694 0 : msg = createException(MAL, "cudf.eval", MAL_MALLOC_FAIL);
695 0 : goto wrapup;
696 : }
697 36 : oname[strlen(oname) - 1] = 'o';
698 :
699 36 : memmove(buf + strlen(SO_PREFIX) + prefix_size, buf + prefix_size,
700 : i + 1 - prefix_size);
701 36 : memcpy(buf + prefix_size, SO_PREFIX, sizeof(char) * strlen(SO_PREFIX));
702 36 : char libname[MAXPATH];
703 36 : if (GDKfilepath(libname, sizeof(libname), 0, BATDIR, buf, SO_EXT[0] == '.' ? &SO_EXT[1] : SO_EXT) != GDK_SUCCEED) {
704 0 : msg = createException(MAL, "cudf.eval", MAL_MALLOC_FAIL);
705 0 : goto wrapup;
706 : }
707 :
708 : // if DELDIR directory does not exist, create it
709 36 : if (GDKfilepath(deldirpath, sizeof(deldirpath), 0, NULL, TEMPDIR, NULL) != GDK_SUCCEED) {
710 0 : msg = createException(MAL, "cudf.eval", MAL_MALLOC_FAIL);
711 0 : goto wrapup;
712 : }
713 36 : if (MT_mkdir(deldirpath) < 0 && errno != EEXIST) {
714 0 : msg = createException(MAL, "cudf.eval",
715 : "cannot create directory %s\n", deldirpath);
716 0 : goto wrapup;
717 : }
718 :
719 : // now generate the source file
720 36 : f = MT_fopen(fname, "w+");
721 36 : if (!f) {
722 0 : msg = createException(MAL, "cudf.eval",
723 : "Failed to open file for JIT compilation: %s",
724 0 : GDKstrerror(errno, (char[128]){0}, 128));
725 0 : errno = 0;
726 0 : goto wrapup;
727 : }
728 :
729 : // include some standard C headers first
730 36 : ATTEMPT_TO_WRITE_TO_FILE(f, "#include <stdio.h>\n");
731 36 : ATTEMPT_TO_WRITE_TO_FILE(f, "#include <stdlib.h>\n");
732 36 : ATTEMPT_TO_WRITE_TO_FILE(f, "#include <string.h>\n");
733 : // we include "cheader.h", but not directly to avoid having to deal with
734 : // headers, etc...
735 : // Instead it is embedded in a string (loaded from "cheader.text.h")
736 : // this file contains the structures used for input/output arguments
737 36 : ATTEMPT_TO_WRITE_TO_FILE(f, cheader_header_text);
738 : // some monetdb-style typedefs to make it easier
739 36 : ATTEMPT_TO_WRITE_TO_FILE(f, "typedef int8_t bte;\n");
740 36 : ATTEMPT_TO_WRITE_TO_FILE(f, "typedef int16_t sht;\n");
741 36 : ATTEMPT_TO_WRITE_TO_FILE(f, "typedef int64_t lng;\n");
742 36 : ATTEMPT_TO_WRITE_TO_FILE(f, "typedef float flt;\n");
743 36 : ATTEMPT_TO_WRITE_TO_FILE(f, "typedef double dbl;\n");
744 36 : ATTEMPT_TO_WRITE_TO_FILE(f, "typedef char* str;\n");
745 36 : ATTEMPT_TO_WRITE_TO_FILE(f, "typedef size_t oid;\n");
746 : // now we search exprStr for any preprocessor directives (#)
747 : // we move these to the top of the file
748 : // this allows the user to normally #include files
749 : {
750 : int preprocessor_start = 0;
751 : bool is_preprocessor_directive = false;
752 : bool new_line = false;
753 9853 : for (i = 0; i < strlen(exprStr); i++) {
754 9817 : if (exprStr[i] == '\n') {
755 359 : if (is_preprocessor_directive) {
756 : // the previous line was a preprocessor directive
757 : // first check if it is one of our special preprocessor directives
758 8 : if (i - preprocessor_start >= strlen(cflags_pragma) &&
759 8 : memcmp(exprStr + preprocessor_start, cflags_pragma, strlen(cflags_pragma)) == 0) {
760 0 : size_t cflags_characters = (i - preprocessor_start) - strlen(cflags_pragma);
761 0 : if (cflags_characters > 0 && !extra_cflags) {
762 0 : extra_cflags = GDKzalloc(cflags_characters + 1);
763 0 : if (extra_cflags) {
764 0 : memcpy(extra_cflags, exprStr + preprocessor_start + strlen(cflags_pragma), cflags_characters);
765 : }
766 : }
767 8 : } else if (i - preprocessor_start >= strlen(ldflags_pragma) &&
768 8 : memcmp(exprStr + preprocessor_start, ldflags_pragma, strlen(ldflags_pragma)) == 0) {
769 0 : size_t ldflags_characters = (i - preprocessor_start) - strlen(ldflags_pragma);
770 0 : if (ldflags_characters > 0 && !extra_ldflags) {
771 0 : extra_ldflags = GDKzalloc(ldflags_characters + 1);
772 0 : if (extra_ldflags) {
773 0 : memcpy(extra_ldflags, exprStr + preprocessor_start + strlen(ldflags_pragma), ldflags_characters);
774 : }
775 : }
776 : } else {
777 : // regular preprocessor directive: write it to the file
778 8 : ATTEMPT_TO_WRITE_DATA_TO_FILE(f, exprStr +
779 : preprocessor_start,
780 8 : i - preprocessor_start);
781 8 : ATTEMPT_TO_WRITE_TO_FILE(f, "\n");
782 : }
783 : // now overwrite the preprocessor directive in the
784 : // expression string with spaces
785 157 : for (j = preprocessor_start; j < i; j++) {
786 149 : exprStr[j] = ' ';
787 : }
788 : }
789 : is_preprocessor_directive = false;
790 : new_line = true;
791 9458 : } else if (exprStr[i] == ' ' || exprStr[i] == '\t') {
792 : // skip any spaces
793 2531 : continue;
794 6927 : } else if (new_line) {
795 359 : if (exprStr[i] == '#') {
796 8 : preprocessor_start = i;
797 8 : is_preprocessor_directive = true;
798 : }
799 : new_line = false;
800 : }
801 : }
802 : }
803 :
804 : // create the actual function
805 36 : if (use_cpp) {
806 : // avoid name wrangling if we are compiling C++ code
807 1 : ATTEMPT_TO_WRITE_TO_FILE(f, "\nextern \"C\"");
808 : }
809 36 : ATTEMPT_TO_WRITE_TO_FILE(f, "\nchar* ");
810 36 : ATTEMPT_TO_WRITE_TO_FILE(f, funcname);
811 36 : ATTEMPT_TO_WRITE_TO_FILE(f, "(void** __inputs, void** __outputs, "
812 36 : "malloc_function_ptr malloc, free_function_ptr free) {\n");
813 :
814 : // now we convert the input arguments from void** to the proper
815 : // input/output
816 : // of the function
817 : // first convert the input
818 87 : for (i = pci->retc + ARG_OFFSET; i < (size_t)limit_argc; i++) {
819 102 : bat_type = !isaBatType(getArgType(mb, pci, i))
820 : ? getArgType(mb, pci, i)
821 51 : : getBatType(getArgType(mb, pci, i));
822 51 : tpe = GetTypeName(bat_type);
823 51 : assert(tpe);
824 51 : if (tpe) {
825 51 : snprintf(buf, sizeof(buf),
826 : "\t%s%s %s = *((%s%s*)__inputs[%zu]);\n", struct_prefix,
827 51 : tpe, args[i], struct_prefix, tpe,
828 51 : i - (pci->retc + ARG_OFFSET));
829 51 : ATTEMPT_TO_WRITE_TO_FILE(f, buf);
830 : }
831 : }
832 36 : if (non_grouped_aggregate) {
833 : // manually add "aggr_group" for non-grouped aggregates
834 4 : bat_type = TYPE_oid;
835 4 : tpe = GetTypeName(bat_type);
836 4 : assert(tpe);
837 4 : if (tpe) {
838 4 : snprintf(buf, sizeof(buf),
839 : "\t%s%s %s = *((%s%s*)__inputs[%zu]);\n", struct_prefix,
840 : tpe, "aggr_group", struct_prefix, tpe, input_count);
841 4 : ATTEMPT_TO_WRITE_TO_FILE(f, buf);
842 : }
843 : }
844 : // output types
845 74 : for (i = 0; i < (size_t)pci->retc; i++) {
846 38 : bat_type = getBatType(getArgType(mb, pci, i));
847 38 : tpe = GetTypeName(bat_type);
848 38 : assert(tpe);
849 38 : if (tpe) {
850 38 : snprintf(buf, sizeof(buf),
851 : "\t%s%s* %s = ((%s%s*)__outputs[%zu]);\n", struct_prefix,
852 38 : tpe, output_names[i], struct_prefix, tpe, i);
853 38 : ATTEMPT_TO_WRITE_TO_FILE(f, buf);
854 : }
855 : }
856 :
857 36 : ATTEMPT_TO_WRITE_TO_FILE(f, "\n");
858 : // write the actual user defined code into the file
859 36 : ATTEMPT_TO_WRITE_TO_FILE(f, exprStr);
860 :
861 36 : ATTEMPT_TO_WRITE_TO_FILE(f, "\nreturn 0;\n}\n");
862 :
863 36 : fclose(f);
864 36 : f = NULL;
865 :
866 : // now it's time to try to compile the code
867 : // we use popen to capture any error output
868 36 : snprintf(buf, sizeof(buf), "%s %s -c -fPIC %s %s -o %s 2>&1 >/dev/null",
869 : c_compiler, extra_cflags ? extra_cflags : "", compilation_flags, fname, oname);
870 36 : compiler = popen(buf, "r");
871 36 : if (!compiler) {
872 0 : msg = createException(MAL, "cudf.eval", "Failed popen");
873 0 : goto wrapup;
874 : }
875 : // read the error stream into the error buffer until the compiler is
876 : // done
877 36 : while (fgets(error_buf, sizeof(error_buf), compiler)) {
878 0 : size_t error_size = strlen(error_buf);
879 0 : snprintf(total_error_buf + error_buffer_position,
880 : sizeof(total_error_buf) - error_buffer_position, "%s",
881 : error_buf);
882 0 : error_buffer_position += error_size;
883 0 : if (error_buffer_position >= sizeof(total_error_buf)) break;
884 : }
885 :
886 36 : compiler_return_code = pclose(compiler);
887 36 : compiler = NULL;
888 :
889 36 : if (compiler_return_code != 0) {
890 : // failure in compiling the code
891 : // report the failure to the user
892 0 : msg = createException(MAL, "cudf.eval",
893 : "Failed to compile C UDF:\n%s",
894 : total_error_buf);
895 0 : goto wrapup;
896 : }
897 :
898 36 : error_buffer_position = 0;
899 36 : error_buf[0] = '\0';
900 :
901 36 : snprintf(buf, sizeof(buf), "%s %s %s -shared -o %s 2>&1 >/dev/null", c_compiler,
902 : extra_ldflags ? extra_ldflags : "", oname, libname);
903 36 : GDKfree(oname);
904 36 : oname = NULL;
905 36 : compiler = popen(buf, "r");
906 36 : if (!compiler) {
907 0 : msg = createException(MAL, "cudf.eval", "Failed popen");
908 0 : goto wrapup;
909 : }
910 36 : while (fgets(error_buf, sizeof(error_buf), compiler)) {
911 0 : size_t error_size = strlen(error_buf);
912 0 : snprintf(total_error_buf + error_buffer_position,
913 : sizeof(total_error_buf) - error_buffer_position, "%s",
914 : error_buf);
915 0 : error_buffer_position += error_size;
916 0 : if (error_buffer_position >= sizeof(total_error_buf)) break;
917 : }
918 :
919 36 : compiler_return_code = pclose(compiler);
920 36 : compiler = NULL;
921 :
922 36 : if (compiler_return_code != 0) {
923 : // failure in compiler
924 0 : msg = createException(MAL, "cudf.eval", "Failed to link C UDF.\n%s",
925 : total_error_buf);
926 0 : goto wrapup;
927 : }
928 :
929 36 : handle = dlopen(libname, RTLD_LAZY);
930 36 : if (!handle) {
931 0 : msg = createException(MAL, "cudf.eval",
932 : "Failed to open shared library: %s.",
933 : dlerror());
934 0 : goto wrapup;
935 : }
936 36 : func = (jitted_function)dlsym(handle, funcname);
937 36 : if (!func) {
938 0 : msg = createException(MAL, "cudf.eval",
939 : "Failed to load function from library: %s.",
940 : dlerror());
941 0 : goto wrapup;
942 : }
943 : // now that we have compiled this function
944 : // store it in our function cache
945 : {
946 36 : cached_functions *new_entry = GDKmalloc(sizeof(cached_functions));
947 36 : if (!new_entry) {
948 0 : msg = createException(MAL, "cudf.eval", MAL_MALLOC_FAIL);
949 0 : goto wrapup;
950 : }
951 36 : new_entry->function = func;
952 36 : new_entry->expression_hash = expression_hash;
953 36 : new_entry->parameters = function_parameters;
954 36 : new_entry->dll_handle = handle;
955 36 : function_parameters = NULL;
956 36 : handle = NULL;
957 36 : MT_lock_set(&cache_lock);
958 36 : new_entry->next = function_cache[funcname_hash];
959 36 : function_cache[funcname_hash] = new_entry;
960 36 : MT_lock_unset(&cache_lock);
961 : }
962 : }
963 48 : if (input_count > 0) {
964 : // add "aggr_group" for non-grouped aggregates
965 47 : extra_inputs = non_grouped_aggregate ? 1 : 0;
966 47 : input_bats = GDKzalloc(sizeof(BAT *) * (input_count + extra_inputs));
967 47 : inputs = GDKzalloc(sizeof(void *) * (input_count + extra_inputs));
968 47 : if (!inputs || !input_bats) {
969 0 : msg = createException(MAL, "cudf.eval", MAL_MALLOC_FAIL);
970 0 : goto wrapup;
971 : }
972 : }
973 48 : if (output_count > 0) {
974 48 : outputs = GDKzalloc(sizeof(void *) * output_count);
975 48 : if (!outputs) {
976 0 : msg = createException(MAL, "cudf.eval", MAL_MALLOC_FAIL);
977 0 : goto wrapup;
978 : }
979 : }
980 : // create the inputs
981 48 : argnode = sqlfun ? sqlfun->ops->h : NULL;
982 103 : for (i = pci->retc + ARG_OFFSET; i < (size_t)limit_argc; i++) {
983 58 : index = i - (pci->retc + ARG_OFFSET);
984 58 : bat_type = getArgType(mb, pci, i);
985 58 : if (!isaBatType(bat_type)) {
986 13 : void* input = NULL;
987 13 : if (bat_type == TYPE_str) {
988 2 : input = *getArgReference_str(stk, pci, i);
989 11 : } else if (bat_type == TYPE_blob) {
990 1 : input = *(blob**)getArgReference(stk, pci, i);
991 : } else {
992 10 : input = getArgReference(stk, pci, i);
993 : }
994 : // scalar input
995 : // create a temporary BAT
996 13 : input_bats[index] = COLnew(0, bat_type, 1, TRANSIENT);
997 13 : if (!input_bats[index]) {
998 0 : msg = createException(MAL, "cudf.eval", MAL_MALLOC_FAIL);
999 0 : goto wrapup;
1000 : }
1001 13 : if (BUNappend(input_bats[index], input,
1002 : false) != GDK_SUCCEED) {
1003 0 : msg = createException(MAL, "cudf.eval", MAL_MALLOC_FAIL);
1004 0 : goto wrapup;
1005 : }
1006 : } else {
1007 : // deal with BAT input
1008 45 : bat_type = getBatType(getArgType(mb, pci, i));
1009 45 : if (!(input_bats[index] =
1010 45 : BATdescriptor(*getArgReference_bat(stk, pci, i)))) {
1011 0 : msg = createException(MAL, "cudf.eval", MAL_MALLOC_FAIL);
1012 0 : goto wrapup;
1013 : }
1014 45 : if (BATcount(input_bats[index]) == 0) {
1015 : /* empty input, generate trivial return */
1016 : /* I expect all inputs to have the same size,
1017 : so this should be safe */
1018 3 : msg = empty_return(mb, stk, pci, output_count,
1019 : input_bats[index]->hseqbase);
1020 3 : goto wrapup;
1021 : }
1022 : }
1023 :
1024 55 : if (bat_type == TYPE_bit) {
1025 0 : GENERATE_BAT_INPUT(input_bats[index], bit);
1026 : } else if (bat_type == TYPE_bte) {
1027 0 : GENERATE_BAT_INPUT(input_bats[index], bte);
1028 : } else if (bat_type == TYPE_sht) {
1029 0 : GENERATE_BAT_INPUT(input_bats[index], sht);
1030 : } else if (bat_type == TYPE_int) {
1031 28 : GENERATE_BAT_INPUT(input_bats[index], int);
1032 : } else if (bat_type == TYPE_oid) {
1033 5 : GENERATE_BAT_INPUT(input_bats[index], oid);
1034 : // Hack for groups BAT, the count should reflect on the number of groups and not the number
1035 : // of rows, so use extents BAT
1036 5 : if (i == (size_t)seengrp) {
1037 5 : struct cudf_data_struct_oid *t = inputs[index];
1038 5 : BAT *ex = BBPquickdesc(*getArgReference_bat(stk, pci, i + 1));
1039 5 : if (!ex) {
1040 0 : msg = createException(MAL, "cudf.eval", SQLSTATE(HY002) RUNTIME_OBJECT_MISSING);
1041 0 : goto wrapup;
1042 : }
1043 5 : t->count = BATcount(ex);
1044 : }
1045 : } else if (bat_type == TYPE_lng) {
1046 2 : GENERATE_BAT_INPUT(input_bats[index], lng);
1047 : } else if (bat_type == TYPE_flt) {
1048 1 : GENERATE_BAT_INPUT(input_bats[index], flt);
1049 : } else if (bat_type == TYPE_dbl) {
1050 4 : GENERATE_BAT_INPUT(input_bats[index], dbl);
1051 : } else if (bat_type == TYPE_str) {
1052 6 : BATiter li;
1053 6 : BUN p = 0, q = 0;
1054 6 : bool can_mprotect_varheap = false;
1055 6 : str mprotect_retval;
1056 6 : GENERATE_BAT_INPUT_BASE(str);
1057 6 : bat_data->count = BATcount(input_bats[index]);
1058 6 : bat_data->data = bat_data->count == 0 ? NULL : GDKmalloc(sizeof(char *) * bat_data->count);
1059 6 : bat_data->null_value = NULL;
1060 6 : if (bat_data->count > 0 && !bat_data->data) {
1061 0 : msg = createException(MAL, "cudf.eval", MAL_MALLOC_FAIL);
1062 0 : goto wrapup;
1063 : }
1064 6 : bat_data->alloced = true;
1065 6 : j = 0;
1066 :
1067 : // check if we can mprotect the varheap
1068 : // if we can't mprotect, copy the strings instead
1069 6 : assert(input_bats[index]->tvheap);
1070 6 : can_mprotect_varheap = can_mprotect_region(input_bats[index]->tvheap->base);
1071 6 : bat_data->valloced = !can_mprotect_varheap;
1072 :
1073 6 : li = bat_iterator(input_bats[index]);
1074 304 : BATloop(input_bats[index], p, q)
1075 : {
1076 298 : char *t = (char *)BUNtvar(li, p);
1077 298 : if (strNil(t)) {
1078 2 : bat_data->data[j] = NULL;
1079 : } else {
1080 296 : if (can_mprotect_varheap) {
1081 0 : bat_data->data[j] = t;
1082 : } else {
1083 296 : bat_data->data[j] = GDKmalloc(strlen(t) + 1);
1084 296 : if (!bat_data->data[j]) {
1085 0 : bat_iterator_end(&li);
1086 0 : msg = createException(MAL, "cudf.eval", MAL_MALLOC_FAIL);
1087 0 : goto wrapup;
1088 : }
1089 296 : strcpy(bat_data->data[j], t);
1090 : }
1091 : }
1092 298 : j++;
1093 : }
1094 6 : bat_iterator_end(&li);
1095 6 : if (can_mprotect_varheap) {
1096 : // mprotect the varheap of the BAT to prevent modification of input strings
1097 0 : mprotect_retval =
1098 0 : mprotect_region(input_bats[index]->tvheap->base,
1099 0 : input_bats[index]->tvheap->size, ®ions);
1100 0 : if (mprotect_retval) {
1101 0 : msg = createException(MAL, "cudf.eval",
1102 : "Failed to mprotect region: %s",
1103 : mprotect_retval);
1104 0 : goto wrapup;
1105 : }
1106 : }
1107 : } else if (bat_type == TYPE_date) {
1108 2 : date *baseptr;
1109 2 : GENERATE_BAT_INPUT_BASE(date);
1110 2 : bat_data->count = BATcount(input_bats[index]);
1111 2 : bat_data->data = bat_data->count == 0 ? NULL :
1112 2 : GDKmalloc(sizeof(bat_data->null_value) * bat_data->count);
1113 2 : if (bat_data->count > 0 && !bat_data->data) {
1114 0 : msg = createException(MAL, "cudf.eval", MAL_MALLOC_FAIL);
1115 0 : goto wrapup;
1116 : }
1117 2 : bat_data->alloced = true;
1118 :
1119 2 : baseptr = (date *)Tloc(input_bats[index], 0);
1120 7 : for (j = 0; j < bat_data->count; j++) {
1121 5 : data_from_date(baseptr[j], bat_data->data + j);
1122 : }
1123 2 : data_from_date(date_nil, &bat_data->null_value);
1124 : } else if (bat_type == TYPE_daytime) {
1125 2 : daytime *baseptr;
1126 2 : GENERATE_BAT_INPUT_BASE(time);
1127 2 : bat_data->count = BATcount(input_bats[index]);
1128 2 : bat_data->data = bat_data->count == 0 ? NULL :
1129 2 : GDKmalloc(sizeof(bat_data->null_value) * bat_data->count);
1130 2 : if (bat_data->count > 0 && !bat_data->data) {
1131 0 : msg = createException(MAL, "cudf.eval", MAL_MALLOC_FAIL);
1132 0 : goto wrapup;
1133 : }
1134 2 : bat_data->alloced = true;
1135 :
1136 2 : baseptr = (daytime *)Tloc(input_bats[index], 0);
1137 6 : for (j = 0; j < bat_data->count; j++) {
1138 4 : data_from_time(baseptr[j], bat_data->data + j);
1139 : }
1140 2 : data_from_time(daytime_nil, &bat_data->null_value);
1141 : } else if (bat_type == TYPE_timestamp) {
1142 2 : timestamp *baseptr;
1143 2 : GENERATE_BAT_INPUT_BASE(timestamp);
1144 2 : bat_data->count = BATcount(input_bats[index]);
1145 2 : bat_data->data = bat_data->count == 0 ? NULL :
1146 2 : GDKmalloc(sizeof(bat_data->null_value) * bat_data->count);
1147 2 : if (bat_data->count > 0 && !bat_data->data) {
1148 0 : msg = createException(MAL, "cudf.eval", MAL_MALLOC_FAIL);
1149 0 : goto wrapup;
1150 : }
1151 2 : bat_data->alloced = true;
1152 :
1153 2 : baseptr = (timestamp *)Tloc(input_bats[index], 0);
1154 6 : for (j = 0; j < bat_data->count; j++) {
1155 4 : data_from_timestamp(baseptr[j], bat_data->data + j);
1156 : }
1157 2 : data_from_timestamp(timestamp_nil, &bat_data->null_value);
1158 : } else if (bat_type == TYPE_blob) {
1159 2 : BATiter li;
1160 2 : BUN p = 0, q = 0;
1161 2 : str mprotect_retval;
1162 2 : bool can_mprotect_varheap = false;
1163 2 : GENERATE_BAT_INPUT_BASE(blob);
1164 2 : bat_data->count = BATcount(input_bats[index]);
1165 2 : bat_data->data = bat_data->count == 0 ? NULL :
1166 2 : GDKmalloc(sizeof(cudf_data_blob) * bat_data->count);
1167 2 : if (bat_data->count > 0 && !bat_data->data) {
1168 0 : msg = createException(MAL, "cudf.eval", MAL_MALLOC_FAIL);
1169 0 : goto wrapup;
1170 : }
1171 2 : bat_data->alloced = true;
1172 2 : j = 0;
1173 :
1174 : // check if we can mprotect the varheap
1175 : // if we can't mprotect, copy the strings instead
1176 2 : assert(input_bats[index]->tvheap);
1177 2 : can_mprotect_varheap = can_mprotect_region(input_bats[index]->tvheap->base);
1178 2 : bat_data->valloced = !can_mprotect_varheap;
1179 :
1180 2 : li = bat_iterator(input_bats[index]);
1181 6 : BATloop(input_bats[index], p, q)
1182 : {
1183 4 : blob *t = (blob *)BUNtvar(li, p);
1184 4 : if (t->nitems == ~(size_t)0) {
1185 1 : bat_data->data[j].size = ~(size_t) 0;
1186 1 : bat_data->data[j].data = NULL;
1187 : } else {
1188 3 : bat_data->data[j].size = t->nitems;
1189 3 : if (can_mprotect_varheap) {
1190 0 : bat_data->data[j].data = &t->data[0];
1191 3 : } else if (t->nitems > 0) {
1192 2 : bat_data->data[j].data = GDKmalloc(t->nitems);
1193 2 : if (!bat_data->data[j].data) {
1194 0 : bat_iterator_end(&li);
1195 0 : msg = createException(MAL, "cudf.eval", MAL_MALLOC_FAIL);
1196 0 : goto wrapup;
1197 : }
1198 2 : memcpy(bat_data->data[j].data, &t->data[0], t->nitems);
1199 : } else {
1200 1 : bat_data->data[j].data = NULL;
1201 : }
1202 : }
1203 4 : j++;
1204 : }
1205 2 : bat_iterator_end(&li);
1206 2 : bat_data->null_value.size = ~(size_t) 0;
1207 2 : bat_data->null_value.data = NULL;
1208 2 : if (can_mprotect_varheap) {
1209 : // for blob columns, mprotect the varheap of the BAT
1210 0 : mprotect_retval =
1211 0 : mprotect_region(input_bats[index]->tvheap->base,
1212 0 : input_bats[index]->tvheap->size, ®ions);
1213 0 : if (mprotect_retval) {
1214 0 : msg = createException(MAL, "cudf.eval",
1215 : "Failed to mprotect region: %s",
1216 : mprotect_retval);
1217 0 : goto wrapup;
1218 : }
1219 : }
1220 : } else {
1221 : // unsupported type: convert to string
1222 1 : BATiter li;
1223 1 : BUN p = 0, q = 0;
1224 1 : GENERATE_BAT_INPUT_BASE(str);
1225 1 : bat_data->count = BATcount(input_bats[index]);
1226 1 : bat_data->null_value = NULL;
1227 1 : bat_data->data = bat_data->count == 0 ? NULL :
1228 1 : GDKzalloc(sizeof(char *) * bat_data->count);
1229 1 : if (bat_data->count > 0 && !bat_data->data) {
1230 0 : msg = createException(MAL, "cudf.eval", MAL_MALLOC_FAIL);
1231 0 : goto wrapup;
1232 : }
1233 1 : bat_data->alloced = true;
1234 1 : j = 0;
1235 :
1236 1 : li = bat_iterator(input_bats[index]);
1237 3 : BATloop(input_bats[index], p, q)
1238 : {
1239 2 : void *t = BUNtail(li, p);
1240 4 : if (BATatoms[bat_type].atomNull &&
1241 2 : BATatoms[bat_type].atomCmp(
1242 : t, BATatoms[bat_type].atomNull) == 0) {
1243 1 : bat_data->data[j] = NULL;
1244 : } else {
1245 1 : char *result = NULL;
1246 1 : size_t length = 0;
1247 1 : if (BATatoms[bat_type].atomToStr(&result, &length, t, false) ==
1248 : 0) {
1249 0 : bat_iterator_end(&li);
1250 0 : msg = createException(
1251 : MAL, "cudf.eval",
1252 : "Failed to convert element to string");
1253 0 : goto wrapup;
1254 : }
1255 1 : bat_data->data[j] = result;
1256 : }
1257 2 : j++;
1258 : }
1259 1 : bat_iterator_end(&li);
1260 1 : bat_data->valloced = true;
1261 : }
1262 55 : input_size = BATcount(input_bats[index]) > input_size
1263 : ? BATcount(input_bats[index])
1264 : : input_size;
1265 55 : argnode = argnode ? argnode->next : NULL;
1266 : }
1267 :
1268 45 : index = input_count;
1269 45 : if (non_grouped_aggregate) {
1270 5 : GENERATE_BAT_INPUT_BASE(oid);
1271 5 : bat_data->count = input_size;
1272 5 : bat_data->null_value = oid_nil;
1273 10 : bat_data->data =
1274 5 : GDKzalloc(bat_data->count * sizeof(bat_data->null_value));
1275 5 : bat_data->alloced = true;
1276 5 : if (!bat_data->data) {
1277 0 : msg = createException(MAL, "cudf.eval", MAL_MALLOC_FAIL);
1278 0 : goto wrapup;
1279 : }
1280 : }
1281 :
1282 45 : argnode = sqlfun ? sqlfun->res->h : NULL;
1283 : // output types
1284 92 : for (i = 0; i < output_count; i++) {
1285 47 : index = i;
1286 47 : bat_type = getBatType(getArgType(mb, pci, i));
1287 47 : if (bat_type == TYPE_bit) {
1288 0 : GENERATE_BAT_OUTPUT(bit);
1289 : } else if (bat_type == TYPE_bte) {
1290 0 : GENERATE_BAT_OUTPUT(bte);
1291 : } else if (bat_type == TYPE_sht) {
1292 0 : GENERATE_BAT_OUTPUT(sht);
1293 : } else if (bat_type == TYPE_int) {
1294 16 : GENERATE_BAT_OUTPUT(int);
1295 : } else if (bat_type == TYPE_oid) {
1296 0 : GENERATE_BAT_OUTPUT(oid);
1297 : } else if (bat_type == TYPE_lng) {
1298 10 : GENERATE_BAT_OUTPUT(lng);
1299 : } else if (bat_type == TYPE_flt) {
1300 0 : GENERATE_BAT_OUTPUT(flt);
1301 : } else if (bat_type == TYPE_dbl) {
1302 5 : GENERATE_BAT_OUTPUT(dbl);
1303 : } else if (bat_type == TYPE_str) {
1304 7 : GENERATE_BAT_OUTPUT_BASE(str);
1305 7 : bat_data->null_value = NULL;
1306 : } else if (bat_type == TYPE_date) {
1307 2 : GENERATE_BAT_OUTPUT_BASE(date);
1308 2 : data_from_date(date_nil, &bat_data->null_value);
1309 : } else if (bat_type == TYPE_daytime) {
1310 2 : GENERATE_BAT_OUTPUT_BASE(time);
1311 2 : data_from_time(daytime_nil, &bat_data->null_value);
1312 : } else if (bat_type == TYPE_timestamp) {
1313 2 : GENERATE_BAT_OUTPUT_BASE(timestamp);
1314 2 : data_from_timestamp(timestamp_nil, &bat_data->null_value);
1315 : } else if (bat_type == TYPE_blob) {
1316 2 : GENERATE_BAT_OUTPUT_BASE(blob);
1317 2 : bat_data->null_value.size = ~(size_t) 0;
1318 2 : bat_data->null_value.data = NULL;
1319 : } else {
1320 : // unsupported type, convert from string output
1321 1 : GENERATE_BAT_OUTPUT_BASE(str);
1322 1 : bat_data->null_value = NULL;
1323 : }
1324 47 : argnode = argnode ? argnode->next : NULL;
1325 : }
1326 :
1327 : // set up a longjmp point
1328 : // this longjmp point is used for some error handling in the C function
1329 : // such as failed mallocs
1330 45 : if (option_enable_longjmp) {
1331 0 : struct capi_tls_s *tls = MT_tls_get(capi_tls_key);
1332 0 : ret = setjmp(tls->jb);
1333 0 : if (ret < 0) {
1334 : // error value
1335 0 : msg = createException(MAL, "cudf.eval", "Failed setjmp: %s",
1336 0 : GDKstrerror(errno, (char[128]){0}, 128));
1337 0 : errno = 0;
1338 0 : goto wrapup;
1339 0 : } else if (ret > 0) {
1340 0 : if (ret == 1) {
1341 0 : msg = createException(MAL, "cudf.eval", "Attempting to write to "
1342 : "the input or triggered a "
1343 : "segfault/bus error");
1344 0 : } else if (ret == 2) {
1345 0 : msg = createException(MAL, "cudf.eval",
1346 : "Malloc failure in internal function!");
1347 : } else {
1348 : // we jumped here
1349 0 : msg = createException(MAL, "cudf.eval", "We longjumped here "
1350 : "because of an error, but "
1351 : "we don't know which!");
1352 : }
1353 0 : goto wrapup;
1354 : }
1355 : }
1356 :
1357 : // set up the signal handler for catching segfaults
1358 45 : if (option_enable_mprotect) {
1359 0 : sa = (struct sigaction) {
1360 : .sa_flags = SA_SIGINFO,
1361 : .sa_sigaction = handler,
1362 : };
1363 0 : (void) sigfillset(&sa.sa_mask);
1364 0 : if (sigaction(SIGSEGV, &sa, &oldsa) == -1 ||
1365 0 : sigaction(SIGBUS, &sa, &oldsb) == -1) {
1366 0 : msg = createException(MAL, "cudf.eval",
1367 : "Failed to set signal handler: %s",
1368 0 : GDKstrerror(errno, (char[128]){0}, 128));
1369 0 : errno = 0;
1370 0 : goto wrapup;
1371 : }
1372 : // actually mprotect the regions now that the signal handlers are set
1373 0 : region_iter = regions;
1374 0 : while (region_iter) {
1375 0 : if (mprotect(region_iter->addr, region_iter->len, PROT_READ) < 0) {
1376 0 : goto wrapup;
1377 : }
1378 0 : region_iter = region_iter->next;
1379 : }
1380 : }
1381 : // call the actual jitted function
1382 45 : msg = func(inputs, outputs, wrapped_GDK_malloc, wrapped_GDK_free);
1383 :
1384 :
1385 45 : if (option_enable_mprotect) {
1386 : // clear any mprotected regions
1387 0 : while (regions) {
1388 0 : mprotected_region *next = regions->next;
1389 0 : clear_mprotect(regions->addr, regions->len);
1390 0 : GDKfree(regions);
1391 0 : regions = next;
1392 : }
1393 : // clear the signal handlers
1394 0 : if (sigaction(SIGSEGV, &oldsa, NULL) == -1 ||
1395 0 : sigaction(SIGBUS, &oldsb, NULL) == -1) {
1396 0 : msg = createException(MAL, "cudf.eval",
1397 : "Failed to unset signal handler: %s",
1398 0 : GDKstrerror(errno, (char[128]){0}, 128));
1399 0 : errno = 0;
1400 0 : goto wrapup;
1401 : }
1402 0 : sa = (struct sigaction) {.sa_flags = 0};
1403 : }
1404 :
1405 45 : if (msg) {
1406 : // failure in function
1407 1 : msg = createException(MAL, "cudf.eval", "%s", msg);
1408 1 : goto wrapup;
1409 : }
1410 :
1411 : // create the output bats from the returned results
1412 88 : for (i = 0; i < (size_t)pci->retc; i++) {
1413 46 : size_t count;
1414 46 : void *data;
1415 46 : BAT *b;
1416 46 : bat_type = getBatType(getArgType(mb, pci, i));
1417 :
1418 46 : if (!outputs[i]) {
1419 0 : msg = createException(MAL, "cudf.eval", "No data returned.");
1420 0 : goto wrapup;
1421 : }
1422 46 : count = GetTypeCount(bat_type, outputs[i]);
1423 46 : data = GetTypeData(bat_type, outputs[i]);
1424 46 : if (!data) {
1425 1 : msg = createException(MAL, "cudf.eval", "No data returned.");
1426 1 : goto wrapup;
1427 : }
1428 45 : if (initial_output_count < 0) {
1429 43 : initial_output_count = count;
1430 2 : } else if ((size_t)initial_output_count != count) {
1431 1 : msg = createException(MAL, "cudf.eval",
1432 : "Data has different cardinalities.");
1433 1 : goto wrapup;
1434 : }
1435 44 : if (bat_type == TYPE_bit || bat_type == TYPE_bte ||
1436 44 : bat_type == TYPE_sht || bat_type == TYPE_int ||
1437 44 : bat_type == TYPE_oid || bat_type == TYPE_lng ||
1438 21 : bat_type == TYPE_flt || bat_type == TYPE_dbl) {
1439 28 : b = GetTypeBat(bat_type, outputs[i]);
1440 28 : if (!b) {
1441 0 : msg = createException(MAL, "cudf.eval", "Output column was not properly initialized.");
1442 0 : goto wrapup;
1443 : }
1444 : } else {
1445 16 : assert(GetTypeBat(bat_type, outputs[i]) == NULL);
1446 16 : b = COLnew(0, bat_type, count, TRANSIENT);
1447 16 : if (!b) {
1448 0 : msg = createException(MAL, "cudf.eval", MAL_MALLOC_FAIL);
1449 0 : goto wrapup;
1450 : }
1451 16 : if (bat_type == TYPE_date) {
1452 2 : date *baseptr = (date *)Tloc(b, 0);
1453 2 : cudf_data_date *source_base = (cudf_data_date *)data;
1454 7 : for (j = 0; j < count; j++) {
1455 5 : baseptr[j] = date_from_data(source_base + j);
1456 : }
1457 2 : BATsetcount(b, count);
1458 2 : GDKfree(data);
1459 : } else if (bat_type == TYPE_daytime) {
1460 2 : daytime *baseptr = (daytime *)Tloc(b, 0);
1461 2 : cudf_data_time *source_base = (cudf_data_time *)data;
1462 6 : for (j = 0; j < count; j++) {
1463 4 : baseptr[j] = time_from_data(source_base + j);
1464 : }
1465 2 : BATsetcount(b, count);
1466 2 : GDKfree(data);
1467 : } else if (bat_type == TYPE_timestamp) {
1468 2 : timestamp *baseptr = (timestamp *)Tloc(b, 0);
1469 2 : cudf_data_timestamp *source_base = (cudf_data_timestamp *)data;
1470 6 : for (j = 0; j < count; j++) {
1471 4 : baseptr[j] = timestamp_from_data(source_base + j);
1472 : }
1473 2 : BATsetcount(b, count);
1474 2 : GDKfree(data);
1475 : } else if (bat_type == TYPE_str) {
1476 : char **source_base = (char **)data;
1477 306 : for (j = 0; j < count; j++) {
1478 299 : const char *ptr = source_base[j];
1479 299 : if (!ptr) {
1480 2 : ptr = str_nil;
1481 : }
1482 299 : if (BUNappend(b, ptr, false) != GDK_SUCCEED) {
1483 0 : msg = createException(MAL, "cudf.eval", MAL_MALLOC_FAIL);
1484 0 : goto wrapup;
1485 : }
1486 : }
1487 7 : GDKfree(data);
1488 : } else if (bat_type == TYPE_blob) {
1489 : cudf_data_blob *source_base = (cudf_data_blob *)data;
1490 : blob *current_blob = NULL;
1491 : size_t current_blob_maxsize = 0;
1492 6 : for (j = 0; j < count; j++) {
1493 4 : const cudf_data_blob blob = source_base[j];
1494 :
1495 4 : if (blob.size == ~(size_t) 0) {
1496 1 : current_blob->nitems = ~(size_t)0;
1497 : } else {
1498 3 : if (!current_blob || current_blob_maxsize < blob.size) {
1499 0 : if (current_blob) {
1500 0 : GDKfree(current_blob);
1501 : }
1502 2 : current_blob_maxsize = blob.size;
1503 2 : current_blob = GDKmalloc(sizeof(size_t) + blob.size);
1504 2 : if (!current_blob) {
1505 0 : msg =
1506 0 : createException(MAL, "cudf.eval", MAL_MALLOC_FAIL);
1507 0 : goto wrapup;
1508 : }
1509 : }
1510 :
1511 3 : current_blob->nitems = blob.size;
1512 3 : if (blob.size > 0)
1513 2 : memcpy(¤t_blob->data[0], blob.data, blob.size);
1514 : }
1515 :
1516 4 : if (BUNappend(b, current_blob, false) != GDK_SUCCEED) {
1517 0 : if (current_blob) {
1518 0 : GDKfree(current_blob);
1519 : }
1520 0 : msg = createException(MAL, "cudf.eval", MAL_MALLOC_FAIL);
1521 0 : goto wrapup;
1522 : }
1523 : }
1524 2 : if (current_blob) {
1525 2 : GDKfree(current_blob);
1526 : }
1527 2 : GDKfree(data);
1528 : } else {
1529 1 : char **source_base = (char **)data;
1530 1 : size_t len = 0;
1531 1 : void *element = NULL;
1532 3 : for (j = 0; j < count; j++) {
1533 2 : const char *ptr = source_base[j];
1534 2 : const void *appended_element;
1535 2 : if (strNil(ptr)) {
1536 1 : appended_element = (void *)BATatoms[bat_type].atomNull;
1537 : } else {
1538 1 : if (BATatoms[bat_type].atomFromStr(ptr, &len, &element, false) ==
1539 : 0) {
1540 0 : msg = createException(MAL, "cudf.eval",
1541 : "Failed to convert output "
1542 : "element from string: %s",
1543 : ptr);
1544 0 : goto wrapup;
1545 : }
1546 1 : appended_element = element;
1547 : }
1548 2 : if (BUNappend(b, appended_element, false) != GDK_SUCCEED) {
1549 0 : if (element) {
1550 0 : GDKfree(element);
1551 : }
1552 0 : msg = createException(MAL, "cudf.eval", MAL_MALLOC_FAIL);
1553 0 : goto wrapup;
1554 : }
1555 : }
1556 1 : if (element) {
1557 1 : GDKfree(element);
1558 : }
1559 1 : GDKfree(data);
1560 : }
1561 : }
1562 44 : b->tnil = false;
1563 44 : b->tnonil = false;
1564 44 : b->tkey = false;
1565 44 : b->tsorted = false;
1566 44 : b->trevsorted = false;
1567 :
1568 : // free the output value right now to prevent the internal data from
1569 : // being freed later
1570 : // as the internal data is now part of the bat we just created
1571 44 : GDKfree(outputs[i]);
1572 44 : outputs[i] = NULL;
1573 :
1574 : // return the BAT from the function
1575 44 : if (isaBatType(getArgType(mb, pci, i))) {
1576 29 : *getArgReference_bat(stk, pci, i) = b->batCacheid;
1577 29 : BBPkeepref(b);
1578 : } else {
1579 15 : BATiter li = bat_iterator(b);
1580 15 : if (VALinit(&stk->stk[pci->argv[i]], bat_type,
1581 15 : BUNtail(li, 0)) == NULL) {
1582 0 : msg = createException(MAL, "cudf.eval", SQLSTATE(HY013) MAL_MALLOC_FAIL);
1583 : }
1584 15 : bat_iterator_end(&li);
1585 15 : BBPunfix(b->batCacheid);
1586 : }
1587 : }
1588 :
1589 42 : wrapup:
1590 : // cleanup
1591 : // remove the signal handler, if any was set
1592 48 : GDKfree(oname);
1593 48 : MT_tls_set(capi_tls_key, NULL);
1594 48 : if (option_enable_mprotect) {
1595 0 : if (sa.sa_sigaction) {
1596 0 : (void) sigaction(SIGSEGV, &oldsa, NULL);
1597 0 : (void) sigaction(SIGBUS, &oldsb, NULL);
1598 :
1599 0 : sa = (struct sigaction) {.sa_flags = 0,};
1600 : }
1601 : // clear any mprotected regions
1602 0 : while (regions) {
1603 0 : mprotected_region *next = regions->next;
1604 0 : clear_mprotect(regions->addr, regions->len);
1605 0 : GDKfree(regions);
1606 0 : regions = next;
1607 : }
1608 : }
1609 344 : while (tls.ar != NULL) {
1610 296 : allocated_region *next = tls.ar->next;
1611 296 : GDKfree(tls.ar);
1612 296 : tls.ar = next;
1613 : }
1614 48 : if (option_enable_mprotect) {
1615 : // block segfaults and bus errors again after we exit
1616 0 : (void)pthread_sigmask(SIG_BLOCK, &signal_set, NULL);
1617 : }
1618 : // argument names (input)
1619 48 : if (args) {
1620 306 : for (i = 0; i < (size_t)limit_argc; i++) {
1621 258 : if (args[i]) {
1622 64 : GDKfree(args[i]);
1623 : }
1624 : }
1625 48 : GDKfree(args);
1626 : }
1627 : // output names
1628 48 : if (output_names) {
1629 98 : for (i = 0; i < (size_t)pci->retc; i++) {
1630 50 : if (output_names[i]) {
1631 50 : GDKfree(output_names[i]);
1632 : }
1633 : }
1634 48 : GDKfree(output_names);
1635 : }
1636 48 : if (input_bats) {
1637 117 : for(i = 0; i < input_count + extra_inputs; i++) {
1638 128 : BBPreclaim(input_bats[i]);
1639 : }
1640 47 : GDKfree(input_bats);
1641 : }
1642 : // input data
1643 48 : if (inputs) {
1644 117 : for (i = 0; i < input_count + extra_inputs; i++) {
1645 70 : if (inputs[i]) {
1646 60 : int arg = i + pci->retc + ARG_OFFSET;
1647 60 : bat_type = getArgType(mb, pci, arg);
1648 60 : if (isaBatType(bat_type)) {
1649 42 : bat_type = getBatType(bat_type);
1650 : }
1651 60 : if (i == input_count) /* non grouped aggr case */
1652 : bat_type = TYPE_oid;
1653 55 : if (bat_type < 0)
1654 0 : continue;
1655 60 : if (isAlloced(bat_type, inputs[i])) {
1656 60 : char **data = (char **)GetTypeData(bat_type, inputs[i]);
1657 120 : if (isValloced(bat_type, inputs[i])) {
1658 9 : size_t count = GetTypeCount(bat_type, inputs[i]);
1659 9 : if (bat_type == TYPE_blob) {
1660 : cudf_data_blob *bd = (cudf_data_blob*)data;
1661 6 : for (j = 0; j < count; j++)
1662 4 : if (bd[j].data)
1663 2 : GDKfree(bd[j].data);
1664 : } else {
1665 307 : for (j = 0; j < count; j++)
1666 300 : if (data[j])
1667 297 : GDKfree(data[j]);
1668 : }
1669 : }
1670 60 : if (data)
1671 60 : GDKfree(data);
1672 : }
1673 60 : GDKfree(inputs[i]);
1674 : }
1675 : }
1676 47 : GDKfree(inputs);
1677 : }
1678 : // output data
1679 48 : if (outputs) {
1680 98 : for (i = 0; i < (size_t)output_count; i++) {
1681 100 : bat_type = isaBatType(getArgType(mb, pci, i))
1682 : ? getBatType(getArgType(mb, pci, i))
1683 50 : : getArgType(mb, pci, i);
1684 50 : if (outputs[i]) {
1685 3 : void* b = GetTypeBat(bat_type, outputs[i]);
1686 3 : if (b) {
1687 1 : BBPunfix(((BAT*)b)->batCacheid);
1688 : } else {
1689 2 : void *data = GetTypeData(bat_type, outputs[i]);
1690 2 : if (data) {
1691 0 : GDKfree(data);
1692 : }
1693 : }
1694 3 : GDKfree(outputs[i]);
1695 : }
1696 : }
1697 48 : GDKfree(outputs);
1698 : }
1699 48 : if (function_parameters) {
1700 12 : GDKfree(function_parameters);
1701 : }
1702 : // close the file handle
1703 48 : if (f) {
1704 0 : fclose(f);
1705 : }
1706 : // close the dll
1707 48 : if (handle) {
1708 0 : dlclose(handle);
1709 : }
1710 : // close the compiler stream
1711 48 : if (compiler) {
1712 0 : pclose(compiler);
1713 : }
1714 48 : if (extra_cflags) {
1715 0 : GDKfree(extra_cflags);
1716 : }
1717 48 : if (extra_ldflags) {
1718 0 : GDKfree(extra_ldflags);
1719 : }
1720 48 : return msg;
1721 : }
1722 :
1723 93 : static const char *GetTypeName(int type)
1724 : {
1725 93 : const char *tpe = NULL;
1726 93 : if (type == TYPE_bit || type == TYPE_bte) {
1727 : tpe = "bte";
1728 : } else if (type == TYPE_sht) {
1729 : tpe = "sht";
1730 : } else if (type == TYPE_int) {
1731 : tpe = "int";
1732 : } else if (type == TYPE_oid) {
1733 : tpe = "oid";
1734 : } else if (type == TYPE_lng) {
1735 : tpe = "lng";
1736 : } else if (type == TYPE_flt) {
1737 : tpe = "flt";
1738 : } else if (type == TYPE_dbl) {
1739 : tpe = "dbl";
1740 : } else if (type == TYPE_str) {
1741 : tpe = "str";
1742 : } else if (type == TYPE_date) {
1743 : tpe = "date";
1744 : } else if (type == TYPE_daytime) {
1745 : tpe = "time";
1746 : } else if (type == TYPE_timestamp) {
1747 : tpe = "timestamp";
1748 : } else if (type == TYPE_blob) {
1749 : tpe = "blob";
1750 : } else {
1751 : // unsupported type: string
1752 93 : tpe = "str";
1753 : }
1754 93 : return tpe;
1755 : }
1756 :
1757 : static bool
1758 60 : isAlloced(int type, void *struct_ptr)
1759 : {
1760 60 : bool alloced = false;
1761 :
1762 60 : if (type == TYPE_bit || type == TYPE_bte) {
1763 0 : alloced = ((struct cudf_data_struct_bte *)struct_ptr)->alloced;
1764 : } else if (type == TYPE_sht) {
1765 0 : alloced = ((struct cudf_data_struct_sht *)struct_ptr)->alloced;
1766 : } else if (type == TYPE_int) {
1767 28 : alloced = ((struct cudf_data_struct_int *)struct_ptr)->alloced;
1768 : } else if (type == TYPE_oid) {
1769 10 : alloced = ((struct cudf_data_struct_oid *)struct_ptr)->alloced;
1770 : } else if (type == TYPE_lng) {
1771 2 : alloced = ((struct cudf_data_struct_lng *)struct_ptr)->alloced;
1772 : } else if (type == TYPE_flt) {
1773 1 : alloced = ((struct cudf_data_struct_flt *)struct_ptr)->alloced;
1774 : } else if (type == TYPE_dbl) {
1775 4 : alloced = ((struct cudf_data_struct_dbl *)struct_ptr)->alloced;
1776 : } else if (type == TYPE_str) {
1777 6 : alloced = ((struct cudf_data_struct_str *)struct_ptr)->alloced;
1778 : } else if (type == TYPE_date) {
1779 2 : alloced = ((struct cudf_data_struct_date *)struct_ptr)->alloced;
1780 : } else if (type == TYPE_daytime) {
1781 2 : alloced = ((struct cudf_data_struct_time *)struct_ptr)->alloced;
1782 : } else if (type == TYPE_timestamp) {
1783 2 : alloced = ((struct cudf_data_struct_timestamp *)struct_ptr)->alloced;
1784 : } else if (type == TYPE_blob) {
1785 2 : alloced = ((struct cudf_data_struct_blob *)struct_ptr)->alloced;
1786 : } else {
1787 : // unsupported type: string
1788 1 : alloced = ((struct cudf_data_struct_str *)struct_ptr)->alloced;
1789 : }
1790 60 : return alloced;
1791 : }
1792 :
1793 : static bool
1794 60 : isValloced(int type, void *struct_ptr)
1795 : {
1796 60 : bool alloced = false;
1797 :
1798 60 : if (type == TYPE_str) {
1799 6 : alloced = ((struct cudf_data_struct_str *)struct_ptr)->valloced;
1800 54 : } else if (type == TYPE_blob) {
1801 2 : alloced = ((struct cudf_data_struct_blob *)struct_ptr)->valloced;
1802 : } else {
1803 : // unsupported type: string
1804 52 : alloced = ((struct cudf_data_struct_str *)struct_ptr)->valloced;
1805 : }
1806 60 : return alloced;
1807 : }
1808 : void *
1809 108 : GetTypeData(int type, void *struct_ptr)
1810 : {
1811 108 : void *data = NULL;
1812 :
1813 108 : if (type == TYPE_bit || type == TYPE_bte) {
1814 0 : data = ((struct cudf_data_struct_bte *)struct_ptr)->data;
1815 : } else if (type == TYPE_sht) {
1816 0 : data = ((struct cudf_data_struct_sht *)struct_ptr)->data;
1817 : } else if (type == TYPE_int) {
1818 45 : data = ((struct cudf_data_struct_int *)struct_ptr)->data;
1819 : } else if (type == TYPE_oid) {
1820 10 : data = ((struct cudf_data_struct_oid *)struct_ptr)->data;
1821 : } else if (type == TYPE_lng) {
1822 12 : data = ((struct cudf_data_struct_lng *)struct_ptr)->data;
1823 : } else if (type == TYPE_flt) {
1824 1 : data = ((struct cudf_data_struct_flt *)struct_ptr)->data;
1825 : } else if (type == TYPE_dbl) {
1826 9 : data = ((struct cudf_data_struct_dbl *)struct_ptr)->data;
1827 : } else if (type == TYPE_str) {
1828 13 : data = ((struct cudf_data_struct_str *)struct_ptr)->data;
1829 : } else if (type == TYPE_date) {
1830 4 : data = ((struct cudf_data_struct_date *)struct_ptr)->data;
1831 : } else if (type == TYPE_daytime) {
1832 4 : data = ((struct cudf_data_struct_time *)struct_ptr)->data;
1833 : } else if (type == TYPE_timestamp) {
1834 4 : data = ((struct cudf_data_struct_timestamp *)struct_ptr)->data;
1835 : } else if (type == TYPE_blob) {
1836 4 : data = ((struct cudf_data_struct_blob *)struct_ptr)->data;
1837 : } else {
1838 : // unsupported type: string
1839 2 : data = ((struct cudf_data_struct_str *)struct_ptr)->data;
1840 : }
1841 108 : return data;
1842 : }
1843 :
1844 47 : void *GetTypeBat(int type, void *struct_ptr)
1845 : {
1846 47 : void *bat = NULL;
1847 :
1848 47 : if (type == TYPE_bit || type == TYPE_bte) {
1849 0 : bat = ((struct cudf_data_struct_bte *)struct_ptr)->bat;
1850 : } else if (type == TYPE_sht) {
1851 0 : bat = ((struct cudf_data_struct_sht *)struct_ptr)->bat;
1852 : } else if (type == TYPE_int) {
1853 16 : bat = ((struct cudf_data_struct_int *)struct_ptr)->bat;
1854 : } else if (type == TYPE_oid) {
1855 0 : bat = ((struct cudf_data_struct_oid *)struct_ptr)->bat;
1856 : } else if (type == TYPE_lng) {
1857 10 : bat = ((struct cudf_data_struct_lng *)struct_ptr)->bat;
1858 : } else if (type == TYPE_flt) {
1859 0 : bat = ((struct cudf_data_struct_flt *)struct_ptr)->bat;
1860 : } else if (type == TYPE_dbl) {
1861 5 : bat = ((struct cudf_data_struct_dbl *)struct_ptr)->bat;
1862 : } else if (type == TYPE_str) {
1863 7 : bat = ((struct cudf_data_struct_str *)struct_ptr)->bat;
1864 : } else if (type == TYPE_date) {
1865 2 : bat = ((struct cudf_data_struct_date *)struct_ptr)->bat;
1866 : } else if (type == TYPE_daytime) {
1867 2 : bat = ((struct cudf_data_struct_time *)struct_ptr)->bat;
1868 : } else if (type == TYPE_timestamp) {
1869 2 : bat = ((struct cudf_data_struct_timestamp *)struct_ptr)->bat;
1870 : } else if (type == TYPE_blob) {
1871 2 : bat = ((struct cudf_data_struct_blob *)struct_ptr)->bat;
1872 : } else {
1873 : // unsupported type: string
1874 1 : bat = ((struct cudf_data_struct_str *)struct_ptr)->bat;
1875 : }
1876 47 : return bat;
1877 : }
1878 :
1879 55 : size_t GetTypeCount(int type, void *struct_ptr)
1880 : {
1881 55 : size_t count = 0;
1882 55 : if (type == TYPE_bit || type == TYPE_bte) {
1883 0 : count = ((struct cudf_data_struct_bte *)struct_ptr)->count;
1884 : } else if (type == TYPE_sht) {
1885 0 : count = ((struct cudf_data_struct_sht *)struct_ptr)->count;
1886 : } else if (type == TYPE_int) {
1887 15 : count = ((struct cudf_data_struct_int *)struct_ptr)->count;
1888 : } else if (type == TYPE_oid) {
1889 0 : count = ((struct cudf_data_struct_oid *)struct_ptr)->count;
1890 : } else if (type == TYPE_lng) {
1891 10 : count = ((struct cudf_data_struct_lng *)struct_ptr)->count;
1892 : } else if (type == TYPE_flt) {
1893 0 : count = ((struct cudf_data_struct_flt *)struct_ptr)->count;
1894 : } else if (type == TYPE_dbl) {
1895 5 : count = ((struct cudf_data_struct_dbl *)struct_ptr)->count;
1896 : } else if (type == TYPE_str) {
1897 13 : count = ((struct cudf_data_struct_str *)struct_ptr)->count;
1898 : } else if (type == TYPE_date) {
1899 2 : count = ((struct cudf_data_struct_date *)struct_ptr)->count;
1900 : } else if (type == TYPE_daytime) {
1901 2 : count = ((struct cudf_data_struct_time *)struct_ptr)->count;
1902 : } else if (type == TYPE_timestamp) {
1903 2 : count = ((struct cudf_data_struct_timestamp *)struct_ptr)->count;
1904 : } else if (type == TYPE_blob) {
1905 4 : count = ((struct cudf_data_struct_blob *)struct_ptr)->count;
1906 : } else {
1907 : // unsupported type: string
1908 2 : count = ((struct cudf_data_struct_str *)struct_ptr)->count;
1909 : }
1910 55 : return count;
1911 : }
1912 :
1913 14 : void data_from_date(date d, cudf_data_date *ptr)
1914 : {
1915 14 : ptr->day = date_day(d);
1916 14 : ptr->month = date_month(d);
1917 14 : ptr->year = date_year(d);
1918 14 : }
1919 :
1920 5 : date date_from_data(cudf_data_date *ptr)
1921 : {
1922 5 : return date_create(ptr->year, ptr->month, ptr->day);
1923 : }
1924 :
1925 12 : void data_from_time(daytime d, cudf_data_time *ptr)
1926 : {
1927 12 : ptr->hours = daytime_hour(d);
1928 12 : ptr->minutes = daytime_min(d);
1929 12 : ptr->seconds = daytime_sec(d);
1930 12 : ptr->ms = daytime_usec(d) / 1000;
1931 12 : }
1932 :
1933 4 : daytime time_from_data(cudf_data_time *ptr)
1934 : {
1935 4 : return daytime_create(ptr->hours, ptr->minutes, ptr->seconds,
1936 4 : ptr->ms * 1000);
1937 : }
1938 :
1939 8 : void data_from_timestamp(timestamp d, cudf_data_timestamp *ptr)
1940 : {
1941 8 : daytime tm = timestamp_daytime(d);
1942 8 : date dt = timestamp_date(d);
1943 :
1944 8 : ptr->date.day = date_day(dt);
1945 8 : ptr->date.month = date_month(dt);
1946 8 : ptr->date.year = date_year(dt);
1947 8 : ptr->time.hours = daytime_hour(tm);
1948 8 : ptr->time.minutes = daytime_min(tm);
1949 8 : ptr->time.seconds = daytime_sec(tm);
1950 8 : ptr->time.ms = daytime_usec(tm) / 1000;
1951 8 : }
1952 :
1953 8 : timestamp timestamp_from_data(cudf_data_timestamp *ptr)
1954 : {
1955 8 : return timestamp_create(date_create(ptr->date.year,
1956 8 : ptr->date.month,
1957 8 : ptr->date.day),
1958 8 : daytime_create(ptr->time.hours,
1959 8 : ptr->time.minutes,
1960 8 : ptr->time.seconds,
1961 8 : ptr->time.ms * 1000));
1962 : }
1963 :
1964 5 : int date_is_null(cudf_data_date value)
1965 : {
1966 5 : cudf_data_date null_value;
1967 5 : data_from_date(date_nil, &null_value);
1968 5 : return value.year == null_value.year && value.month == null_value.month &&
1969 : value.day == null_value.day;
1970 : }
1971 :
1972 4 : int time_is_null(cudf_data_time value)
1973 : {
1974 4 : cudf_data_time null_value;
1975 4 : data_from_time(daytime_nil, &null_value);
1976 4 : return value.hours == null_value.hours &&
1977 4 : value.minutes == null_value.minutes &&
1978 4 : value.seconds == null_value.seconds && value.ms == null_value.ms;
1979 : }
1980 :
1981 4 : int timestamp_is_null(cudf_data_timestamp value)
1982 : {
1983 4 : return is_timestamp_nil(timestamp_from_data(&value));
1984 : }
1985 :
1986 10 : int str_is_null(char *value) { return value == NULL; }
1987 :
1988 4 : int blob_is_null(cudf_data_blob value) { return value.size == ~(size_t) 0; }
1989 :
1990 2 : void blob_initialize(struct cudf_data_struct_blob *self,
1991 : size_t count) {
1992 2 : self->count = count;
1993 2 : self->data = jump_GDK_malloc(count * sizeof(self->null_value));
1994 2 : memset(self->data, 0, count * sizeof(self->null_value));
1995 2 : }
1996 :
1997 : #include "mel.h"
1998 : static mel_func capi_init_funcs[] = {
1999 : pattern("capi", "eval", CUDFevalStd, false, "Execute a simple CUDF script returning a single value", args(1,4, argany("",0),arg("fptr",ptr),arg("cpp",bit),arg("expr",str))),
2000 : pattern("capi", "eval", CUDFevalStd, false, "Execute a simple CUDF script value", args(1,5, varargany("",0),arg("fptr",ptr),arg("cpp",bit),arg("expr",str),varargany("arg",0))),
2001 : pattern("capi", "subeval_aggr", CUDFevalAggr, false, "grouped aggregates through CUDF", args(1,5, varargany("",0),arg("fptr",ptr),arg("cpp",bit),arg("expr",str),varargany("arg",0))),
2002 : pattern("capi", "eval_aggr", CUDFevalAggr, false, "grouped aggregates through CUDF", args(1,5, varargany("",0),arg("fptr",ptr),arg("cpp",bit),arg("expr",str),varargany("arg",0))),
2003 : pattern("batcapi", "eval", CUDFevalStd, false, "Execute a simple CUDF script value", args(1,5, varargany("",0),arg("fptr",ptr),arg("cpp",bit),arg("expr",str),varargany("arg",0))),
2004 : { .imp=NULL }
2005 : };
2006 : #include "mal_import.h"
2007 : #ifdef _MSC_VER
2008 : #undef read
2009 : #pragma section(".CRT$XCU",read)
2010 : #endif
2011 6 : LIB_STARTUP_FUNC(init_capi_mal)
2012 6 : { mal_module2("capi", NULL, capi_init_funcs, CUDFprelude, NULL); }
|