Line data Source code
1 : /*
2 : * SPDX-License-Identifier: MPL-2.0
3 : *
4 : * This Source Code Form is subject to the terms of the Mozilla Public
5 : * License, v. 2.0. If a copy of the MPL was not distributed with this
6 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
7 : *
8 : * Copyright 2024 MonetDB Foundation;
9 : * Copyright August 2008 - 2023 MonetDB B.V.;
10 : * Copyright 1997 - July 2008 CWI.
11 : */
12 :
13 : #include "monetdb_config.h"
14 : #include "mal.h"
15 : #include "mal_stack.h"
16 : #include "mal_linker.h"
17 : #include "gdk.h"
18 : #include "sql_catalog.h"
19 : #include "sql_scenario.h"
20 : #include "sql_cast.h"
21 : #include "sql_execute.h"
22 : #include "sql_storage.h"
23 : #include "cheader.h"
24 : #include "cheader.text.h"
25 :
26 : #include "gdk_time.h"
27 : #include "mutils.h"
28 :
29 : #include <setjmp.h>
30 : #include <signal.h>
31 : #include <sys/mman.h>
32 : #include <unistd.h>
33 : #include <string.h>
34 :
35 : #if defined(__GNUC__) && !defined(__clang__)
36 : #pragma GCC diagnostic ignored "-Wclobbered"
37 : #endif
38 :
39 : static const char mprotect_enableflag[] = "enable_mprotect";
40 : static bool option_enable_mprotect = false;
41 : static const char longjmp_enableflag[] = "enable_longjmp";
42 : static bool option_enable_longjmp = false;
43 :
44 : typedef struct _allocated_region {
45 : struct _allocated_region *next;
46 : } allocated_region;
47 :
48 : struct _mprotected_region;
49 : typedef struct _mprotected_region {
50 : void *addr;
51 : size_t len;
52 :
53 : struct _mprotected_region *next;
54 : } mprotected_region;
55 :
56 : static char *mprotect_region(void *addr, size_t len,
57 : mprotected_region **regions);
58 : struct capi_tls_s {
59 : allocated_region *ar;
60 : jmp_buf jb;
61 : };
62 : static MT_TLS_t capi_tls_key;
63 :
64 : typedef char *(*jitted_function)(void **inputs, void **outputs,
65 : malloc_function_ptr malloc, free_function_ptr free);
66 :
67 : struct _cached_functions;
68 : typedef struct _cached_functions {
69 : jitted_function function;
70 : BUN expression_hash;
71 : char *parameters;
72 : void *dll_handle;
73 : struct _cached_functions *next;
74 : } cached_functions;
75 :
76 : #define FUNCTION_CACHE_SIZE 128
77 :
78 : static cached_functions *function_cache[FUNCTION_CACHE_SIZE];
79 : static MT_Lock cache_lock = MT_LOCK_INITIALIZER(cache_lock);
80 : static int cudf_initialized = 0;
81 :
82 : static str CUDFeval(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci,
83 : bool grouped);
84 :
85 36 : static str CUDFevalStd(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
86 : {
87 36 : return CUDFeval(cntxt, mb, stk, pci, false);
88 : }
89 :
90 12 : static str CUDFevalAggr(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
91 : {
92 12 : return CUDFeval(cntxt, mb, stk, pci, true);
93 : }
94 :
95 6 : static str CUDFprelude(void)
96 : {
97 6 : if (!cudf_initialized) {
98 6 : cudf_initialized = true;
99 6 : option_enable_mprotect = GDKgetenv_istrue(mprotect_enableflag) || GDKgetenv_isyes(mprotect_enableflag);
100 6 : option_enable_longjmp = GDKgetenv_istrue(longjmp_enableflag) || GDKgetenv_isyes(longjmp_enableflag);
101 6 : MT_alloc_tls(&capi_tls_key);
102 : }
103 6 : return MAL_SUCCEED;
104 : }
105 :
106 722 : static bool WriteDataToFile(FILE *f, const void *data, size_t data_size)
107 : {
108 722 : fwrite(data, data_size, 1, f);
109 722 : return (!ferror(f));
110 : }
111 :
112 714 : static bool WriteTextToFile(FILE *f, const char *data)
113 : {
114 714 : return WriteDataToFile(f, data, strlen(data));
115 : }
116 :
117 0 : static _Noreturn void handler(int sig, siginfo_t *si, void *unused)
118 : {
119 0 : (void)sig;
120 0 : (void)si;
121 0 : (void)unused;
122 :
123 0 : struct capi_tls_s *tls = MT_tls_get(capi_tls_key);
124 0 : longjmp(tls->jb, 1);
125 : }
126 :
127 48 : static bool can_mprotect_region(void* addr) {
128 48 : if (!option_enable_mprotect) return false;
129 0 : int pagesize = MT_pagesize();
130 0 : void* page_begin = (void *)((size_t)addr - (size_t)addr % pagesize);
131 0 : return page_begin == addr;
132 : }
133 :
134 0 : static char *mprotect_region(void *addr, size_t len,
135 : mprotected_region **regions)
136 : {
137 0 : mprotected_region *region;
138 0 : if (len == 0)
139 : return NULL;
140 :
141 0 : assert(can_mprotect_region(addr));
142 :
143 0 : region = GDKmalloc(sizeof(mprotected_region));
144 0 : if (!region) {
145 : return MAL_MALLOC_FAIL;
146 : }
147 0 : region->addr = addr;
148 0 : region->len = len;
149 0 : region->next = *regions;
150 0 : *regions = region;
151 0 : return NULL;
152 : }
153 :
154 0 : static void clear_mprotect(void *addr, size_t len)
155 : {
156 0 : if (addr)
157 0 : mprotect(addr, len, PROT_READ | PROT_WRITE);
158 : }
159 :
160 : #define ATTEMPT_TO_WRITE_TO_FILE(f, data) \
161 : if (!WriteTextToFile(f, data)) { \
162 : errno = 0; \
163 : msg = createException(MAL, "cudf.eval", "Write error."); \
164 : goto wrapup; \
165 : }
166 :
167 : #define ATTEMPT_TO_WRITE_DATA_TO_FILE(f, data, size) \
168 : if (!WriteDataToFile(f, data, size)) { \
169 : errno = 0; \
170 : msg = createException(MAL, "cudf.eval", "Write error."); \
171 : goto wrapup; \
172 : }
173 :
174 312 : static void *jump_GDK_malloc(size_t size)
175 : {
176 312 : if (size == 0)
177 : return NULL;
178 312 : void *ptr = GDKmalloc(size);
179 312 : if (!ptr && option_enable_longjmp) {
180 0 : struct capi_tls_s *tls = MT_tls_get(capi_tls_key);
181 0 : longjmp(tls->jb, 2);
182 : }
183 : return ptr;
184 : }
185 :
186 296 : static inline void *add_allocated_region(void *ptr)
187 : {
188 296 : allocated_region *region = (allocated_region *)ptr;
189 296 : struct capi_tls_s *tls = MT_tls_get(capi_tls_key);
190 296 : region->next = tls->ar;
191 296 : tls->ar = region;
192 296 : return (char *)ptr + sizeof(allocated_region);
193 : }
194 :
195 297 : static void *wrapped_GDK_malloc(size_t size)
196 : {
197 297 : if (size == 0)
198 : return NULL;
199 296 : void *ptr = jump_GDK_malloc(size + sizeof(allocated_region));
200 296 : return add_allocated_region(ptr);
201 : }
202 :
203 0 : static void wrapped_GDK_free(void* ptr) {
204 0 : (void) ptr;
205 0 : return;
206 : }
207 :
208 : #define GENERATE_NUMERIC_IS_NULL(type, tpename) \
209 : static int tpename##_is_null(type value) { return is_##tpename##_nil(value); }
210 :
211 : #define GENERATE_NUMERIC_INITIALIZE(type, tpename) \
212 : static void tpename##_initialize(struct cudf_data_struct_##tpename *self, \
213 : size_t count) \
214 : { \
215 : BAT* b; \
216 : if (self->bat) { \
217 : BBPunfix(((BAT*)self->bat)->batCacheid); \
218 : self->bat = NULL; \
219 : } \
220 : b = COLnew(0, TYPE_##tpename, count, TRANSIENT); \
221 : if (!b) { \
222 : if (option_enable_longjmp) { \
223 : struct capi_tls_s *tls = MT_tls_get(capi_tls_key); \
224 : longjmp(tls->jb, 2); \
225 : } \
226 : else return; \
227 : } \
228 : self->bat = (void*) b; \
229 : self->count = count; \
230 : self->data = (type*) b->theap->base; \
231 : BATsetcount(b, count); \
232 : }
233 :
234 : #define GENERATE_NUMERIC_ALL(type, tpename) \
235 : GENERATE_NUMERIC_INITIALIZE(type, tpename) \
236 : GENERATE_NUMERIC_IS_NULL(type, tpename)
237 :
238 :
239 : #define GENERATE_BASE_HEADERS(type, tpename) \
240 : static int tpename##_is_null(type value); \
241 : static void tpename##_initialize(struct cudf_data_struct_##tpename *self, \
242 : size_t count) \
243 : { \
244 : self->count = count; \
245 : self->data = jump_GDK_malloc(count * sizeof(self->null_value)); \
246 : }
247 :
248 0 : GENERATE_NUMERIC_ALL(bit, bit);
249 0 : GENERATE_NUMERIC_ALL(bte, bte);
250 0 : GENERATE_NUMERIC_ALL(sht, sht);
251 19 : GENERATE_NUMERIC_ALL(int, int);
252 10 : GENERATE_NUMERIC_ALL(lng, lng);
253 5 : GENERATE_NUMERIC_ALL(flt, flt);
254 10 : GENERATE_NUMERIC_ALL(dbl, dbl);
255 0 : GENERATE_NUMERIC_ALL(oid, oid);
256 :
257 8 : GENERATE_BASE_HEADERS(char *, str);
258 2 : GENERATE_BASE_HEADERS(cudf_data_date, date);
259 2 : GENERATE_BASE_HEADERS(cudf_data_time, time);
260 2 : GENERATE_BASE_HEADERS(cudf_data_timestamp, timestamp);
261 : static int blob_is_null(cudf_data_blob value);
262 : static void blob_initialize(struct cudf_data_struct_blob *self,
263 : size_t count);
264 :
265 : #define GENERATE_BAT_INPUT_BASE(tpe) \
266 : struct cudf_data_struct_##tpe *bat_data = \
267 : GDKzalloc(sizeof(struct cudf_data_struct_##tpe)); \
268 : if (!bat_data) { \
269 : msg = createException(MAL, "cudf.eval", MAL_MALLOC_FAIL); \
270 : goto wrapup; \
271 : } \
272 : inputs[index] = bat_data; \
273 : bat_data->is_null = tpe##_is_null; \
274 : bat_data->scale = \
275 : argnode ? pow(10, ((sql_arg *)argnode->data)->type.scale) : 1; \
276 : bat_data->bat = NULL; \
277 : bat_data->initialize = (void (*)(void *, size_t))tpe##_initialize;
278 :
279 : #define GENERATE_BAT_INPUT(b, tpe) \
280 : { \
281 : char *mprotect_retval; \
282 : GENERATE_BAT_INPUT_BASE(tpe); \
283 : bat_data->count = BATcount(b); \
284 : bat_data->null_value = tpe##_nil; \
285 : if (BATtdense(b)) { \
286 : size_t it = 0; \
287 : tpe val = b->tseqbase; \
288 : /* bat is dense, materialize it */ \
289 : bat_data->data = GDKmalloc( \
290 : bat_data->count * sizeof(bat_data->null_value)); \
291 : if (!bat_data->data) { \
292 : msg = createException(MAL, "cudf.eval", MAL_MALLOC_FAIL); \
293 : goto wrapup; \
294 : } \
295 : bat_data->alloced = true; \
296 : for (it = 0; it < bat_data->count; it++) { \
297 : bat_data->data[it] = val++; \
298 : } \
299 : } else if (can_mprotect_region(Tloc(b, 0))) { \
300 : bat_data->data = (tpe *)Tloc(b, 0); \
301 : mprotect_retval = mprotect_region( \
302 : bat_data->data, \
303 : bat_data->count * sizeof(bat_data->null_value), ®ions); \
304 : if (mprotect_retval) { \
305 : msg = createException(MAL, "cudf.eval", \
306 : "Failed to mprotect region: %s", \
307 : mprotect_retval); \
308 : goto wrapup; \
309 : } \
310 : } else { \
311 : /* cannot mprotect bat region, copy data */ \
312 : bat_data->data = GDKmalloc( \
313 : bat_data->count * sizeof(bat_data->null_value)); \
314 : if (bat_data->count > 0 && !bat_data->data) { \
315 : msg = createException(MAL, "cudf.eval", MAL_MALLOC_FAIL); \
316 : goto wrapup; \
317 : } \
318 : bat_data->alloced = true; \
319 : memcpy(bat_data->data, Tloc(b, 0), \
320 : bat_data->count * sizeof(bat_data->null_value)); \
321 : } \
322 : }
323 :
324 : #define GENERATE_BAT_OUTPUT_BASE(tpe) \
325 : struct cudf_data_struct_##tpe *bat_data = \
326 : GDKzalloc(sizeof(struct cudf_data_struct_##tpe)); \
327 : if (!bat_data) { \
328 : msg = createException(MAL, "cudf.eval", MAL_MALLOC_FAIL); \
329 : goto wrapup; \
330 : } \
331 : outputs[index] = bat_data; \
332 : bat_data->count = 0; \
333 : bat_data->data = NULL; \
334 : bat_data->is_null = tpe##_is_null; \
335 : bat_data->scale = \
336 : argnode ? pow(10, ((sql_arg *)argnode->data)->type.scale) : 1; \
337 : bat_data->initialize = (void (*)(void *, size_t))tpe##_initialize;
338 :
339 : #define GENERATE_BAT_OUTPUT(tpe) \
340 : { \
341 : GENERATE_BAT_OUTPUT_BASE(tpe); \
342 : bat_data->null_value = tpe##_nil; \
343 : }
344 :
345 : #ifdef NDEBUG
346 : static const char debug_flag[] = "capi_use_debug";
347 : #endif
348 : static const char cc_flag[] = "capi_cc";
349 : static const char cpp_flag[] = "capi_cpp";
350 :
351 : static const char cflags_pragma[] = "#pragma CFLAGS ";
352 : static const char ldflags_pragma[] = "#pragma LDFLAGS ";
353 :
354 : #define JIT_COMPILER_NAME "cc"
355 : #define JIT_CPP_COMPILER_NAME "c++"
356 :
357 : static bool isAlloced(int type, void *struct_ptr);
358 : static bool isValloced(int type, void *struct_ptr);
359 : static size_t GetTypeCount(int type, void *struct_ptr);
360 : static void *GetTypeData(int type, void *struct_ptr);
361 : static void *GetTypeBat(int type, void *struct_ptr);
362 : static const char *GetTypeName(int type);
363 :
364 : static void data_from_date(date d, cudf_data_date *ptr);
365 : static date date_from_data(cudf_data_date *ptr);
366 : static void data_from_time(daytime d, cudf_data_time *ptr);
367 : static daytime time_from_data(cudf_data_time *ptr);
368 : static void data_from_timestamp(timestamp d, cudf_data_timestamp *ptr);
369 : static timestamp timestamp_from_data(cudf_data_timestamp *ptr);
370 :
371 : static const char valid_path_characters[] = "abcdefghijklmnopqrstuvwxyz";
372 :
373 : static str
374 3 : empty_return(MalBlkPtr mb, MalStkPtr stk, InstrPtr pci, size_t retcols, oid seqbase)
375 : {
376 3 : str msg = MAL_SUCCEED;
377 3 : void **res = GDKzalloc(retcols * sizeof(void*));
378 :
379 3 : if (!res) {
380 0 : msg = createException(MAL, "capi.eval", SQLSTATE(HY013) MAL_MALLOC_FAIL);
381 0 : goto bailout;
382 : }
383 :
384 6 : for (size_t i = 0; i < retcols; i++) {
385 3 : if (isaBatType(getArgType(mb, pci, i))) {
386 2 : BAT *b = COLnew(seqbase, getBatType(getArgType(mb, pci, i)), 0, TRANSIENT);
387 2 : if (!b) {
388 0 : msg = createException(MAL, "capi.eval", GDK_EXCEPTION);
389 0 : goto bailout;
390 : }
391 2 : ((BAT**)res)[i] = b;
392 : } else { // single value return, only for non-grouped aggregations
393 : // return NULL to conform to SQL aggregates
394 1 : int tpe = getArgType(mb, pci, i);
395 1 : if (!VALinit(&stk->stk[pci->argv[i]], tpe, ATOMnilptr(tpe))) {
396 0 : msg = createException(MAL, "capi.eval", SQLSTATE(HY013) MAL_MALLOC_FAIL);
397 0 : goto bailout;
398 : }
399 1 : ((ValPtr*)res)[i] = &stk->stk[pci->argv[i]];
400 : }
401 : }
402 :
403 3 : bailout:
404 3 : if (res) {
405 6 : for (size_t i = 0; i < retcols; i++) {
406 3 : if (isaBatType(getArgType(mb, pci, i))) {
407 2 : BAT *b = ((BAT**)res)[i];
408 :
409 2 : if (b && msg) {
410 0 : BBPreclaim(b);
411 2 : } else if (b) {
412 2 : *getArgReference_bat(stk, pci, i) = b->batCacheid;
413 2 : BBPkeepref(b);
414 : }
415 1 : } else if (msg) {
416 0 : ValPtr pt = ((ValPtr*)res)[i];
417 :
418 0 : if (pt)
419 0 : VALclear(pt);
420 : }
421 : }
422 3 : GDKfree(res);
423 : }
424 3 : return msg;
425 : }
426 :
427 48 : static str CUDFeval(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci,
428 : bool grouped)
429 : {
430 48 : sql_func *sqlfun = NULL;
431 48 : bit use_cpp = *getArgReference_bit(stk, pci, pci->retc + 1);
432 48 : str exprStr = *getArgReference_str(stk, pci, pci->retc + 2);
433 :
434 48 : const int ARG_OFFSET = 3;
435 :
436 48 : size_t i = 0, j = 0;
437 48 : char argbuf[64];
438 48 : char buf[8192];
439 48 : char *fname = NULL;
440 48 : char *oname = NULL;
441 48 : char *libname = NULL;
442 48 : char error_buf[BUFSIZ];
443 48 : char total_error_buf[8192];
444 48 : size_t error_buffer_position = 0;
445 48 : str *args = NULL;
446 48 : str *output_names = NULL;
447 48 : char *msg = MAL_SUCCEED;
448 48 : node *argnode;
449 48 : int seengrp = 0;
450 48 : FILE *f = NULL;
451 48 : void *handle = NULL;
452 48 : jitted_function func = NULL;
453 48 : int ret, limit_argc = 0;
454 :
455 48 : FILE *compiler = NULL;
456 48 : int compiler_return_code;
457 :
458 48 : void **inputs = NULL;
459 48 : size_t input_count = 0;
460 48 : void **outputs = NULL;
461 48 : size_t output_count = 0;
462 48 : BAT **input_bats = NULL;
463 48 : mprotected_region *regions = NULL, *region_iter = NULL;
464 :
465 48 : lng initial_output_count = -1;
466 :
467 48 : struct sigaction sa = (struct sigaction) {.sa_flags = 0}, oldsa, oldsb;
468 48 : sigset_t signal_set;
469 :
470 : #ifdef NDEBUG
471 : bool debug_build =
472 : GDKgetenv_istrue(debug_flag) || GDKgetenv_isyes(debug_flag);
473 : #else
474 48 : bool debug_build = true;
475 : #endif
476 48 : char* extra_cflags = NULL;
477 48 : char* extra_ldflags = NULL;
478 :
479 :
480 48 : const char *compilation_flags = debug_build ? "-g -O0" : "-O2";
481 96 : const char *c_compiler =
482 1 : use_cpp ? (GDKgetenv(cpp_flag) ? GDKgetenv(cpp_flag)
483 1 : : JIT_CPP_COMPILER_NAME)
484 48 : : (GDKgetenv(cc_flag) ? GDKgetenv(cc_flag) : JIT_COMPILER_NAME);
485 :
486 48 : const char struct_prefix[] = "struct cudf_data_struct_";
487 48 : const char *funcname;
488 :
489 48 : BUN expression_hash = 0, funcname_hash = 0;
490 48 : cached_functions *cached_function;
491 48 : char *function_parameters = NULL;
492 48 : size_t input_size = 0;
493 48 : bit non_grouped_aggregate = 0;
494 :
495 48 : size_t index = 0;
496 48 : int bat_type = 0;
497 48 : const char* tpe = NULL;
498 :
499 48 : size_t extra_inputs = 0;
500 :
501 48 : struct capi_tls_s tls;
502 :
503 48 : tls.ar = NULL;
504 48 : MT_tls_set(capi_tls_key, &tls);
505 :
506 48 : (void)cntxt;
507 :
508 48 : if (!GDKgetenv_istrue("embedded_c") && !GDKgetenv_isyes("embedded_c"))
509 0 : throw(MAL, "cudf.eval", "Embedded C has not been enabled. "
510 : "Start server with --set embedded_c=true");
511 :
512 : // we need to be able to catch segfaults and bus errors
513 : // so we can work with mprotect to prevent UDFs from changing
514 : // the input data
515 :
516 : // we remove them from the pthread_sigmask
517 48 : if (option_enable_mprotect) {
518 0 : (void)sigemptyset(&signal_set);
519 0 : (void)sigaddset(&signal_set, SIGSEGV);
520 0 : (void)sigaddset(&signal_set, SIGBUS);
521 0 : (void)pthread_sigmask(SIG_UNBLOCK, &signal_set, NULL);
522 : }
523 :
524 48 : sqlfun = (sql_func *)*getArgReference_ptr(stk, pci, pci->retc);
525 48 : funcname = sqlfun ? sqlfun->base.name : "yet_another_c_function";
526 :
527 48 : args = (str *)GDKzalloc(sizeof(str) * pci->argc);
528 48 : output_names = (str *)GDKzalloc(sizeof(str) * pci->argc);
529 48 : if (!args || !output_names) {
530 0 : throw(MAL, "cudf.eval", MAL_MALLOC_FAIL);
531 : }
532 :
533 : // retrieve the argument names from the sqlfun structure
534 : // first argument after the return contains the pointer to the sql_func
535 : // structure
536 48 : if (sqlfun != NULL) {
537 : // retrieve the argument names (inputs)
538 48 : if (sqlfun->ops->cnt > 0) {
539 46 : int carg = pci->retc + ARG_OFFSET;
540 46 : argnode = sqlfun->ops->h;
541 103 : while (argnode) {
542 57 : char *argname = ((sql_arg *)argnode->data)->name;
543 57 : args[carg] = GDKstrdup(argname);
544 57 : if (!args[carg]) {
545 0 : msg = createException(MAL, "cudf.eval", MAL_MALLOC_FAIL);
546 0 : goto wrapup;
547 : }
548 57 : carg++;
549 57 : argnode = argnode->next;
550 : }
551 : }
552 : // retrieve the output names
553 48 : argnode = sqlfun->res->h;
554 98 : for (i = 0; i < (size_t)sqlfun->res->cnt; i++) {
555 50 : output_names[i] = GDKstrdup(((sql_arg *)argnode->data)->name);
556 50 : argnode = argnode->next;
557 : }
558 : }
559 :
560 : // name unnamed outputs
561 98 : for (i = 0; i < (size_t)pci->retc; i++) {
562 50 : if (!output_names[i]) {
563 0 : if (pci->retc > 1) {
564 0 : snprintf(argbuf, sizeof(argbuf), "output%zu", i);
565 : } else {
566 : // just call it "output" if there is only one
567 0 : snprintf(argbuf, sizeof(argbuf), "output");
568 : }
569 0 : output_names[i] = GDKstrdup(argbuf);
570 : }
571 : }
572 : // the first unknown argument is the group, we don't really care for the
573 : // rest.
574 106 : for (i = pci->retc + ARG_OFFSET; i < (size_t)pci->argc; i++) {
575 64 : if (args[i] == NULL) {
576 7 : if (grouped && (i+2) == (size_t)pci->argc) {
577 6 : args[i] = GDKstrdup("aggr_group");
578 6 : if (!args[i]) {
579 0 : msg = createException(MAL, "cudf.eval", MAL_MALLOC_FAIL);
580 0 : goto wrapup;
581 : }
582 6 : seengrp = i++; /* Don't be interested in the extents BAT */
583 6 : break;
584 : } else {
585 1 : snprintf(argbuf, sizeof(argbuf), "arg%zu", i - pci->retc - 1);
586 1 : args[i] = GDKstrdup(argbuf);
587 1 : if (!args[i]) {
588 0 : msg = createException(MAL, "cudf.eval", MAL_MALLOC_FAIL);
589 0 : goto wrapup;
590 : }
591 : }
592 : }
593 : }
594 : // the first index where input arguments are not relevant for the C UDF
595 48 : limit_argc = i;
596 : // non-grouped aggregates don't have the group list
597 : // to allow users to write code for both grouped and non-grouped aggregates
598 : // we create an "aggr_group" BAT for non-grouped aggregates
599 48 : non_grouped_aggregate = grouped && !seengrp;
600 :
601 48 : input_count = limit_argc - (pci->retc + ARG_OFFSET);
602 48 : output_count = pci->retc;
603 :
604 : // begin the compilation phase
605 : // first look up if we have already compiled this function
606 48 : expression_hash = 0;
607 48 : expression_hash = strHash(exprStr);
608 48 : funcname_hash = strHash(funcname);
609 48 : funcname_hash = funcname_hash % FUNCTION_CACHE_SIZE;
610 48 : j = 0;
611 306 : for (i = 0; i < (size_t)limit_argc; i++) {
612 258 : if (args[i]) {
613 64 : j += strlen(args[i]);
614 : }
615 258 : if (output_names[i]) {
616 50 : j += strlen(output_names[i]);
617 : }
618 : }
619 :
620 96 : function_parameters =
621 48 : GDKzalloc((j + input_count + output_count + 1) * sizeof(char));
622 48 : if (!function_parameters) {
623 0 : msg = createException(MAL, "cudf.eval", MAL_MALLOC_FAIL);
624 0 : goto wrapup;
625 : }
626 112 : for (i = 0; i < input_count; i++) {
627 64 : if (!isaBatType(getArgType(mb, pci, i))) {
628 31 : function_parameters[i] = getArgType(mb, pci, i);
629 : } else {
630 33 : function_parameters[i] = getBatType(getArgType(mb, pci, i));
631 : }
632 : }
633 98 : for (i = 0; i < output_count; i++) {
634 50 : if (!isaBatType(getArgType(mb, pci, i))) {
635 16 : function_parameters[input_count + i] = getArgType(mb, pci, i);
636 : } else {
637 34 : function_parameters[input_count + i] =
638 : getBatType(getArgType(mb, pci, i));
639 : }
640 : }
641 48 : j = input_count + output_count;
642 306 : for (i = 0; i < (size_t)limit_argc; i++) {
643 258 : if (args[i]) {
644 64 : size_t len = strlen(args[i]);
645 64 : memcpy(function_parameters + j, args[i], len);
646 64 : j += len;
647 : }
648 258 : if (output_names[i]) {
649 50 : size_t len = strlen(output_names[i]);
650 50 : memcpy(function_parameters + j, output_names[i], len);
651 50 : j += len;
652 : }
653 : }
654 :
655 48 : MT_lock_set(&cache_lock);
656 48 : cached_function = function_cache[funcname_hash];
657 72 : while (cached_function) {
658 36 : if (cached_function->expression_hash == expression_hash &&
659 18 : strcmp(cached_function->parameters, function_parameters) == 0) {
660 : // this function matches our compiled function
661 : // in both source code and parameters
662 : // use the already compiled function instead of recompiling
663 12 : func = cached_function->function;
664 12 : break;
665 : }
666 24 : cached_function = cached_function->next;
667 : }
668 48 : MT_lock_unset(&cache_lock);
669 :
670 48 : if (!func) {
671 : // function was not found in the cache
672 : // we have to compile it
673 :
674 : // first generate the names of the files
675 : // we place the temporary files in the DELDIR directory
676 : // because this will be removed again upon server startup
677 36 : const int RANDOM_NAME_SIZE = 32;
678 36 : const char prefix[] = TEMPDIR_NAME DIR_SEP_STR;
679 36 : size_t prefix_size = strlen(prefix);
680 36 : char *deldirpath;
681 :
682 36 : memcpy(buf, prefix, sizeof(char) * strlen(prefix));
683 : // generate a random 32-character name for the temporary files
684 1188 : for (i = prefix_size; i < prefix_size + RANDOM_NAME_SIZE; i++) {
685 1152 : buf[i] = valid_path_characters[rand() %
686 : (sizeof(valid_path_characters) - 1)];
687 : }
688 36 : buf[i] = '\0';
689 36 : fname = GDKfilepath(0, BATDIR, buf, "c");
690 36 : if (fname == NULL) {
691 0 : msg = createException(MAL, "cudf.eval", MAL_MALLOC_FAIL);
692 0 : goto wrapup;
693 : }
694 36 : oname = GDKstrdup(fname);
695 36 : if (oname == NULL) {
696 0 : msg = createException(MAL, "cudf.eval", MAL_MALLOC_FAIL);
697 0 : goto wrapup;
698 : }
699 36 : oname[strlen(oname) - 1] = 'o';
700 :
701 36 : memmove(buf + strlen(SO_PREFIX) + prefix_size, buf + prefix_size,
702 : i + 1 - prefix_size);
703 36 : memcpy(buf + prefix_size, SO_PREFIX, sizeof(char) * strlen(SO_PREFIX));
704 36 : libname =
705 36 : GDKfilepath(0, BATDIR, buf, SO_EXT[0] == '.' ? &SO_EXT[1] : SO_EXT);
706 36 : if (libname == NULL) {
707 0 : msg = createException(MAL, "cudf.eval", MAL_MALLOC_FAIL);
708 0 : goto wrapup;
709 : }
710 :
711 : // if DELDIR directory does not exist, create it
712 36 : deldirpath = GDKfilepath(0, NULL, TEMPDIR, NULL);
713 36 : if (deldirpath == NULL) {
714 0 : msg = createException(MAL, "cudf.eval", MAL_MALLOC_FAIL);
715 0 : goto wrapup;
716 : }
717 36 : if (MT_mkdir(deldirpath) < 0 && errno != EEXIST) {
718 0 : msg = createException(MAL, "cudf.eval",
719 : "cannot create directory %s\n", deldirpath);
720 0 : goto wrapup;
721 : }
722 36 : GDKfree(deldirpath);
723 :
724 : // now generate the source file
725 36 : f = MT_fopen(fname, "w+");
726 36 : if (!f) {
727 0 : msg = createException(MAL, "cudf.eval",
728 : "Failed to open file for JIT compilation: %s",
729 0 : GDKstrerror(errno, (char[128]){0}, 128));
730 0 : errno = 0;
731 0 : goto wrapup;
732 : }
733 :
734 : // include some standard C headers first
735 36 : ATTEMPT_TO_WRITE_TO_FILE(f, "#include <stdio.h>\n");
736 36 : ATTEMPT_TO_WRITE_TO_FILE(f, "#include <stdlib.h>\n");
737 36 : ATTEMPT_TO_WRITE_TO_FILE(f, "#include <string.h>\n");
738 : // we include "cheader.h", but not directly to avoid having to deal with
739 : // headers, etc...
740 : // Instead it is embedded in a string (loaded from "cheader.text.h")
741 : // this file contains the structures used for input/output arguments
742 36 : ATTEMPT_TO_WRITE_TO_FILE(f, cheader_header_text);
743 : // some monetdb-style typedefs to make it easier
744 36 : ATTEMPT_TO_WRITE_TO_FILE(f, "typedef int8_t bte;\n");
745 36 : ATTEMPT_TO_WRITE_TO_FILE(f, "typedef int16_t sht;\n");
746 36 : ATTEMPT_TO_WRITE_TO_FILE(f, "typedef int64_t lng;\n");
747 36 : ATTEMPT_TO_WRITE_TO_FILE(f, "typedef float flt;\n");
748 36 : ATTEMPT_TO_WRITE_TO_FILE(f, "typedef double dbl;\n");
749 36 : ATTEMPT_TO_WRITE_TO_FILE(f, "typedef char* str;\n");
750 36 : ATTEMPT_TO_WRITE_TO_FILE(f, "typedef size_t oid;\n");
751 : // now we search exprStr for any preprocessor directives (#)
752 : // we move these to the top of the file
753 : // this allows the user to normally #include files
754 : {
755 : int preprocessor_start = 0;
756 : bool is_preprocessor_directive = false;
757 : bool new_line = false;
758 9853 : for (i = 0; i < strlen(exprStr); i++) {
759 9817 : if (exprStr[i] == '\n') {
760 359 : if (is_preprocessor_directive) {
761 : // the previous line was a preprocessor directive
762 : // first check if it is one of our special preprocessor directives
763 8 : if (i - preprocessor_start >= strlen(cflags_pragma) &&
764 8 : memcmp(exprStr + preprocessor_start, cflags_pragma, strlen(cflags_pragma)) == 0) {
765 0 : size_t cflags_characters = (i - preprocessor_start) - strlen(cflags_pragma);
766 0 : if (cflags_characters > 0 && !extra_cflags) {
767 0 : extra_cflags = GDKzalloc(cflags_characters + 1);
768 0 : if (extra_cflags) {
769 0 : memcpy(extra_cflags, exprStr + preprocessor_start + strlen(cflags_pragma), cflags_characters);
770 : }
771 : }
772 8 : } else if (i - preprocessor_start >= strlen(ldflags_pragma) &&
773 8 : memcmp(exprStr + preprocessor_start, ldflags_pragma, strlen(ldflags_pragma)) == 0) {
774 0 : size_t ldflags_characters = (i - preprocessor_start) - strlen(ldflags_pragma);
775 0 : if (ldflags_characters > 0 && !extra_ldflags) {
776 0 : extra_ldflags = GDKzalloc(ldflags_characters + 1);
777 0 : if (extra_ldflags) {
778 0 : memcpy(extra_ldflags, exprStr + preprocessor_start + strlen(ldflags_pragma), ldflags_characters);
779 : }
780 : }
781 : } else {
782 : // regular preprocessor directive: write it to the file
783 8 : ATTEMPT_TO_WRITE_DATA_TO_FILE(f, exprStr +
784 : preprocessor_start,
785 8 : i - preprocessor_start);
786 8 : ATTEMPT_TO_WRITE_TO_FILE(f, "\n");
787 : }
788 : // now overwrite the preprocessor directive in the
789 : // expression string with spaces
790 157 : for (j = preprocessor_start; j < i; j++) {
791 149 : exprStr[j] = ' ';
792 : }
793 : }
794 : is_preprocessor_directive = false;
795 : new_line = true;
796 9458 : } else if (exprStr[i] == ' ' || exprStr[i] == '\t') {
797 : // skip any spaces
798 2531 : continue;
799 6927 : } else if (new_line) {
800 359 : if (exprStr[i] == '#') {
801 8 : preprocessor_start = i;
802 8 : is_preprocessor_directive = true;
803 : }
804 : new_line = false;
805 : }
806 : }
807 : }
808 :
809 : // create the actual function
810 36 : if (use_cpp) {
811 : // avoid name wrangling if we are compiling C++ code
812 1 : ATTEMPT_TO_WRITE_TO_FILE(f, "\nextern \"C\"");
813 : }
814 36 : ATTEMPT_TO_WRITE_TO_FILE(f, "\nchar* ");
815 36 : ATTEMPT_TO_WRITE_TO_FILE(f, funcname);
816 36 : ATTEMPT_TO_WRITE_TO_FILE(f, "(void** __inputs, void** __outputs, "
817 36 : "malloc_function_ptr malloc, free_function_ptr free) {\n");
818 :
819 : // now we convert the input arguments from void** to the proper
820 : // input/output
821 : // of the function
822 : // first convert the input
823 87 : for (i = pci->retc + ARG_OFFSET; i < (size_t)limit_argc; i++) {
824 102 : bat_type = !isaBatType(getArgType(mb, pci, i))
825 : ? getArgType(mb, pci, i)
826 51 : : getBatType(getArgType(mb, pci, i));
827 51 : tpe = GetTypeName(bat_type);
828 51 : assert(tpe);
829 51 : if (tpe) {
830 51 : snprintf(buf, sizeof(buf),
831 : "\t%s%s %s = *((%s%s*)__inputs[%zu]);\n", struct_prefix,
832 51 : tpe, args[i], struct_prefix, tpe,
833 51 : i - (pci->retc + ARG_OFFSET));
834 51 : ATTEMPT_TO_WRITE_TO_FILE(f, buf);
835 : }
836 : }
837 36 : if (non_grouped_aggregate) {
838 : // manually add "aggr_group" for non-grouped aggregates
839 4 : bat_type = TYPE_oid;
840 4 : tpe = GetTypeName(bat_type);
841 4 : assert(tpe);
842 4 : if (tpe) {
843 4 : snprintf(buf, sizeof(buf),
844 : "\t%s%s %s = *((%s%s*)__inputs[%zu]);\n", struct_prefix,
845 : tpe, "aggr_group", struct_prefix, tpe, input_count);
846 4 : ATTEMPT_TO_WRITE_TO_FILE(f, buf);
847 : }
848 : }
849 : // output types
850 74 : for (i = 0; i < (size_t)pci->retc; i++) {
851 38 : bat_type = getBatType(getArgType(mb, pci, i));
852 38 : tpe = GetTypeName(bat_type);
853 38 : assert(tpe);
854 38 : if (tpe) {
855 38 : snprintf(buf, sizeof(buf),
856 : "\t%s%s* %s = ((%s%s*)__outputs[%zu]);\n", struct_prefix,
857 38 : tpe, output_names[i], struct_prefix, tpe, i);
858 38 : ATTEMPT_TO_WRITE_TO_FILE(f, buf);
859 : }
860 : }
861 :
862 36 : ATTEMPT_TO_WRITE_TO_FILE(f, "\n");
863 : // write the actual user defined code into the file
864 36 : ATTEMPT_TO_WRITE_TO_FILE(f, exprStr);
865 :
866 36 : ATTEMPT_TO_WRITE_TO_FILE(f, "\nreturn 0;\n}\n");
867 :
868 36 : fclose(f);
869 36 : f = NULL;
870 :
871 : // now it's time to try to compile the code
872 : // we use popen to capture any error output
873 36 : snprintf(buf, sizeof(buf), "%s %s -c -fPIC %s %s -o %s 2>&1 >/dev/null",
874 : c_compiler, extra_cflags ? extra_cflags : "", compilation_flags, fname, oname);
875 36 : GDKfree(fname);
876 36 : fname = NULL;
877 36 : compiler = popen(buf, "r");
878 36 : if (!compiler) {
879 0 : msg = createException(MAL, "cudf.eval", "Failed popen");
880 0 : goto wrapup;
881 : }
882 : // read the error stream into the error buffer until the compiler is
883 : // done
884 36 : while (fgets(error_buf, sizeof(error_buf), compiler)) {
885 0 : size_t error_size = strlen(error_buf);
886 0 : snprintf(total_error_buf + error_buffer_position,
887 : sizeof(total_error_buf) - error_buffer_position, "%s",
888 : error_buf);
889 0 : error_buffer_position += error_size;
890 0 : if (error_buffer_position >= sizeof(total_error_buf)) break;
891 : }
892 :
893 36 : compiler_return_code = pclose(compiler);
894 36 : compiler = NULL;
895 :
896 36 : if (compiler_return_code != 0) {
897 : // failure in compiling the code
898 : // report the failure to the user
899 0 : msg = createException(MAL, "cudf.eval",
900 : "Failed to compile C UDF:\n%s",
901 : total_error_buf);
902 0 : goto wrapup;
903 : }
904 :
905 36 : error_buffer_position = 0;
906 36 : error_buf[0] = '\0';
907 :
908 36 : snprintf(buf, sizeof(buf), "%s %s %s -shared -o %s 2>&1 >/dev/null", c_compiler,
909 : extra_ldflags ? extra_ldflags : "", oname, libname);
910 36 : GDKfree(oname);
911 36 : oname = NULL;
912 36 : compiler = popen(buf, "r");
913 36 : if (!compiler) {
914 0 : msg = createException(MAL, "cudf.eval", "Failed popen");
915 0 : goto wrapup;
916 : }
917 36 : while (fgets(error_buf, sizeof(error_buf), compiler)) {
918 0 : size_t error_size = strlen(error_buf);
919 0 : snprintf(total_error_buf + error_buffer_position,
920 : sizeof(total_error_buf) - error_buffer_position, "%s",
921 : error_buf);
922 0 : error_buffer_position += error_size;
923 0 : if (error_buffer_position >= sizeof(total_error_buf)) break;
924 : }
925 :
926 36 : compiler_return_code = pclose(compiler);
927 36 : compiler = NULL;
928 :
929 36 : if (compiler_return_code != 0) {
930 : // failure in compiler
931 0 : msg = createException(MAL, "cudf.eval", "Failed to link C UDF.\n%s",
932 : total_error_buf);
933 0 : goto wrapup;
934 : }
935 :
936 36 : handle = dlopen(libname, RTLD_LAZY);
937 36 : GDKfree(libname);
938 36 : libname = NULL;
939 36 : if (!handle) {
940 0 : msg = createException(MAL, "cudf.eval",
941 : "Failed to open shared library: %s.",
942 : dlerror());
943 0 : goto wrapup;
944 : }
945 36 : func = (jitted_function)dlsym(handle, funcname);
946 36 : if (!func) {
947 0 : msg = createException(MAL, "cudf.eval",
948 : "Failed to load function from library: %s.",
949 : dlerror());
950 0 : goto wrapup;
951 : }
952 : // now that we have compiled this function
953 : // store it in our function cache
954 : {
955 36 : cached_functions *new_entry = GDKmalloc(sizeof(cached_functions));
956 36 : if (!new_entry) {
957 0 : msg = createException(MAL, "cudf.eval", MAL_MALLOC_FAIL);
958 0 : goto wrapup;
959 : }
960 36 : new_entry->function = func;
961 36 : new_entry->expression_hash = expression_hash;
962 36 : new_entry->parameters = function_parameters;
963 36 : new_entry->dll_handle = handle;
964 36 : function_parameters = NULL;
965 36 : handle = NULL;
966 36 : MT_lock_set(&cache_lock);
967 36 : new_entry->next = function_cache[funcname_hash];
968 36 : function_cache[funcname_hash] = new_entry;
969 36 : MT_lock_unset(&cache_lock);
970 : }
971 : }
972 48 : if (input_count > 0) {
973 : // add "aggr_group" for non-grouped aggregates
974 47 : extra_inputs = non_grouped_aggregate ? 1 : 0;
975 47 : input_bats = GDKzalloc(sizeof(BAT *) * (input_count + extra_inputs));
976 47 : inputs = GDKzalloc(sizeof(void *) * (input_count + extra_inputs));
977 47 : if (!inputs || !input_bats) {
978 0 : msg = createException(MAL, "cudf.eval", MAL_MALLOC_FAIL);
979 0 : goto wrapup;
980 : }
981 : }
982 48 : if (output_count > 0) {
983 48 : outputs = GDKzalloc(sizeof(void *) * output_count);
984 48 : if (!outputs) {
985 0 : msg = createException(MAL, "cudf.eval", MAL_MALLOC_FAIL);
986 0 : goto wrapup;
987 : }
988 : }
989 : // create the inputs
990 48 : argnode = sqlfun ? sqlfun->ops->h : NULL;
991 103 : for (i = pci->retc + ARG_OFFSET; i < (size_t)limit_argc; i++) {
992 58 : index = i - (pci->retc + ARG_OFFSET);
993 58 : bat_type = getArgType(mb, pci, i);
994 58 : if (!isaBatType(bat_type)) {
995 13 : void* input = NULL;
996 13 : if (bat_type == TYPE_str) {
997 2 : input = *getArgReference_str(stk, pci, i);
998 11 : } else if (bat_type == TYPE_blob) {
999 1 : input = *(blob**)getArgReference(stk, pci, i);
1000 : } else {
1001 10 : input = getArgReference(stk, pci, i);
1002 : }
1003 : // scalar input
1004 : // create a temporary BAT
1005 13 : input_bats[index] = COLnew(0, bat_type, 1, TRANSIENT);
1006 13 : if (!input_bats[index]) {
1007 0 : msg = createException(MAL, "cudf.eval", MAL_MALLOC_FAIL);
1008 0 : goto wrapup;
1009 : }
1010 13 : if (BUNappend(input_bats[index], input,
1011 : false) != GDK_SUCCEED) {
1012 0 : msg = createException(MAL, "cudf.eval", MAL_MALLOC_FAIL);
1013 0 : goto wrapup;
1014 : }
1015 : } else {
1016 : // deal with BAT input
1017 45 : bat_type = getBatType(getArgType(mb, pci, i));
1018 45 : if (!(input_bats[index] =
1019 45 : BATdescriptor(*getArgReference_bat(stk, pci, i)))) {
1020 0 : msg = createException(MAL, "cudf.eval", MAL_MALLOC_FAIL);
1021 0 : goto wrapup;
1022 : }
1023 45 : if (BATcount(input_bats[index]) == 0) {
1024 : /* empty input, generate trivial return */
1025 : /* I expect all inputs to have the same size,
1026 : so this should be safe */
1027 3 : msg = empty_return(mb, stk, pci, output_count,
1028 : input_bats[index]->hseqbase);
1029 3 : goto wrapup;
1030 : }
1031 : }
1032 :
1033 55 : if (bat_type == TYPE_bit) {
1034 0 : GENERATE_BAT_INPUT(input_bats[index], bit);
1035 : } else if (bat_type == TYPE_bte) {
1036 0 : GENERATE_BAT_INPUT(input_bats[index], bte);
1037 : } else if (bat_type == TYPE_sht) {
1038 0 : GENERATE_BAT_INPUT(input_bats[index], sht);
1039 : } else if (bat_type == TYPE_int) {
1040 28 : GENERATE_BAT_INPUT(input_bats[index], int);
1041 : } else if (bat_type == TYPE_oid) {
1042 5 : GENERATE_BAT_INPUT(input_bats[index], oid);
1043 : // Hack for groups BAT, the count should reflect on the number of groups and not the number
1044 : // of rows, so use extents BAT
1045 5 : if (i == (size_t)seengrp) {
1046 5 : struct cudf_data_struct_oid *t = inputs[index];
1047 5 : BAT *ex = BBPquickdesc(*getArgReference_bat(stk, pci, i + 1));
1048 5 : if (!ex) {
1049 0 : msg = createException(MAL, "cudf.eval", SQLSTATE(HY002) RUNTIME_OBJECT_MISSING);
1050 0 : goto wrapup;
1051 : }
1052 5 : t->count = BATcount(ex);
1053 : }
1054 : } else if (bat_type == TYPE_lng) {
1055 2 : GENERATE_BAT_INPUT(input_bats[index], lng);
1056 : } else if (bat_type == TYPE_flt) {
1057 1 : GENERATE_BAT_INPUT(input_bats[index], flt);
1058 : } else if (bat_type == TYPE_dbl) {
1059 4 : GENERATE_BAT_INPUT(input_bats[index], dbl);
1060 : } else if (bat_type == TYPE_str) {
1061 6 : BATiter li;
1062 6 : BUN p = 0, q = 0;
1063 6 : bool can_mprotect_varheap = false;
1064 6 : str mprotect_retval;
1065 6 : GENERATE_BAT_INPUT_BASE(str);
1066 6 : bat_data->count = BATcount(input_bats[index]);
1067 6 : bat_data->data = bat_data->count == 0 ? NULL : GDKmalloc(sizeof(char *) * bat_data->count);
1068 6 : bat_data->null_value = NULL;
1069 6 : if (bat_data->count > 0 && !bat_data->data) {
1070 0 : msg = createException(MAL, "cudf.eval", MAL_MALLOC_FAIL);
1071 0 : goto wrapup;
1072 : }
1073 6 : bat_data->alloced = true;
1074 6 : j = 0;
1075 :
1076 : // check if we can mprotect the varheap
1077 : // if we can't mprotect, copy the strings instead
1078 6 : assert(input_bats[index]->tvheap);
1079 6 : can_mprotect_varheap = can_mprotect_region(input_bats[index]->tvheap->base);
1080 6 : bat_data->valloced = !can_mprotect_varheap;
1081 :
1082 6 : li = bat_iterator(input_bats[index]);
1083 304 : BATloop(input_bats[index], p, q)
1084 : {
1085 298 : char *t = (char *)BUNtvar(li, p);
1086 298 : if (strNil(t)) {
1087 2 : bat_data->data[j] = NULL;
1088 : } else {
1089 296 : if (can_mprotect_varheap) {
1090 0 : bat_data->data[j] = t;
1091 : } else {
1092 296 : bat_data->data[j] = GDKmalloc(strlen(t) + 1);
1093 296 : if (!bat_data->data[j]) {
1094 0 : bat_iterator_end(&li);
1095 0 : msg = createException(MAL, "cudf.eval", MAL_MALLOC_FAIL);
1096 0 : goto wrapup;
1097 : }
1098 296 : strcpy(bat_data->data[j], t);
1099 : }
1100 : }
1101 298 : j++;
1102 : }
1103 6 : bat_iterator_end(&li);
1104 6 : if (can_mprotect_varheap) {
1105 : // mprotect the varheap of the BAT to prevent modification of input strings
1106 0 : mprotect_retval =
1107 0 : mprotect_region(input_bats[index]->tvheap->base,
1108 0 : input_bats[index]->tvheap->size, ®ions);
1109 0 : if (mprotect_retval) {
1110 0 : msg = createException(MAL, "cudf.eval",
1111 : "Failed to mprotect region: %s",
1112 : mprotect_retval);
1113 0 : goto wrapup;
1114 : }
1115 : }
1116 : } else if (bat_type == TYPE_date) {
1117 2 : date *baseptr;
1118 2 : GENERATE_BAT_INPUT_BASE(date);
1119 2 : bat_data->count = BATcount(input_bats[index]);
1120 2 : bat_data->data = bat_data->count == 0 ? NULL :
1121 2 : GDKmalloc(sizeof(bat_data->null_value) * bat_data->count);
1122 2 : if (bat_data->count > 0 && !bat_data->data) {
1123 0 : msg = createException(MAL, "cudf.eval", MAL_MALLOC_FAIL);
1124 0 : goto wrapup;
1125 : }
1126 2 : bat_data->alloced = true;
1127 :
1128 2 : baseptr = (date *)Tloc(input_bats[index], 0);
1129 7 : for (j = 0; j < bat_data->count; j++) {
1130 5 : data_from_date(baseptr[j], bat_data->data + j);
1131 : }
1132 2 : data_from_date(date_nil, &bat_data->null_value);
1133 : } else if (bat_type == TYPE_daytime) {
1134 2 : daytime *baseptr;
1135 2 : GENERATE_BAT_INPUT_BASE(time);
1136 2 : bat_data->count = BATcount(input_bats[index]);
1137 2 : bat_data->data = bat_data->count == 0 ? NULL :
1138 2 : GDKmalloc(sizeof(bat_data->null_value) * bat_data->count);
1139 2 : if (bat_data->count > 0 && !bat_data->data) {
1140 0 : msg = createException(MAL, "cudf.eval", MAL_MALLOC_FAIL);
1141 0 : goto wrapup;
1142 : }
1143 2 : bat_data->alloced = true;
1144 :
1145 2 : baseptr = (daytime *)Tloc(input_bats[index], 0);
1146 6 : for (j = 0; j < bat_data->count; j++) {
1147 4 : data_from_time(baseptr[j], bat_data->data + j);
1148 : }
1149 2 : data_from_time(daytime_nil, &bat_data->null_value);
1150 : } else if (bat_type == TYPE_timestamp) {
1151 2 : timestamp *baseptr;
1152 2 : GENERATE_BAT_INPUT_BASE(timestamp);
1153 2 : bat_data->count = BATcount(input_bats[index]);
1154 2 : bat_data->data = bat_data->count == 0 ? NULL :
1155 2 : GDKmalloc(sizeof(bat_data->null_value) * bat_data->count);
1156 2 : if (bat_data->count > 0 && !bat_data->data) {
1157 0 : msg = createException(MAL, "cudf.eval", MAL_MALLOC_FAIL);
1158 0 : goto wrapup;
1159 : }
1160 2 : bat_data->alloced = true;
1161 :
1162 2 : baseptr = (timestamp *)Tloc(input_bats[index], 0);
1163 6 : for (j = 0; j < bat_data->count; j++) {
1164 4 : data_from_timestamp(baseptr[j], bat_data->data + j);
1165 : }
1166 2 : data_from_timestamp(timestamp_nil, &bat_data->null_value);
1167 : } else if (bat_type == TYPE_blob) {
1168 2 : BATiter li;
1169 2 : BUN p = 0, q = 0;
1170 2 : str mprotect_retval;
1171 2 : bool can_mprotect_varheap = false;
1172 2 : GENERATE_BAT_INPUT_BASE(blob);
1173 2 : bat_data->count = BATcount(input_bats[index]);
1174 2 : bat_data->data = bat_data->count == 0 ? NULL :
1175 2 : GDKmalloc(sizeof(cudf_data_blob) * bat_data->count);
1176 2 : if (bat_data->count > 0 && !bat_data->data) {
1177 0 : msg = createException(MAL, "cudf.eval", MAL_MALLOC_FAIL);
1178 0 : goto wrapup;
1179 : }
1180 2 : bat_data->alloced = true;
1181 2 : j = 0;
1182 :
1183 : // check if we can mprotect the varheap
1184 : // if we can't mprotect, copy the strings instead
1185 2 : assert(input_bats[index]->tvheap);
1186 2 : can_mprotect_varheap = can_mprotect_region(input_bats[index]->tvheap->base);
1187 2 : bat_data->valloced = !can_mprotect_varheap;
1188 :
1189 2 : li = bat_iterator(input_bats[index]);
1190 6 : BATloop(input_bats[index], p, q)
1191 : {
1192 4 : blob *t = (blob *)BUNtvar(li, p);
1193 4 : if (t->nitems == ~(size_t)0) {
1194 1 : bat_data->data[j].size = ~(size_t) 0;
1195 1 : bat_data->data[j].data = NULL;
1196 : } else {
1197 3 : bat_data->data[j].size = t->nitems;
1198 3 : if (can_mprotect_varheap) {
1199 0 : bat_data->data[j].data = &t->data[0];
1200 3 : } else if (t->nitems > 0) {
1201 2 : bat_data->data[j].data = GDKmalloc(t->nitems);
1202 2 : if (!bat_data->data[j].data) {
1203 0 : bat_iterator_end(&li);
1204 0 : msg = createException(MAL, "cudf.eval", MAL_MALLOC_FAIL);
1205 0 : goto wrapup;
1206 : }
1207 2 : memcpy(bat_data->data[j].data, &t->data[0], t->nitems);
1208 : } else {
1209 1 : bat_data->data[j].data = NULL;
1210 : }
1211 : }
1212 4 : j++;
1213 : }
1214 2 : bat_iterator_end(&li);
1215 2 : bat_data->null_value.size = ~(size_t) 0;
1216 2 : bat_data->null_value.data = NULL;
1217 2 : if (can_mprotect_varheap) {
1218 : // for blob columns, mprotect the varheap of the BAT
1219 0 : mprotect_retval =
1220 0 : mprotect_region(input_bats[index]->tvheap->base,
1221 0 : input_bats[index]->tvheap->size, ®ions);
1222 0 : if (mprotect_retval) {
1223 0 : msg = createException(MAL, "cudf.eval",
1224 : "Failed to mprotect region: %s",
1225 : mprotect_retval);
1226 0 : goto wrapup;
1227 : }
1228 : }
1229 : } else {
1230 : // unsupported type: convert to string
1231 1 : BATiter li;
1232 1 : BUN p = 0, q = 0;
1233 1 : GENERATE_BAT_INPUT_BASE(str);
1234 1 : bat_data->count = BATcount(input_bats[index]);
1235 1 : bat_data->null_value = NULL;
1236 1 : bat_data->data = bat_data->count == 0 ? NULL :
1237 1 : GDKzalloc(sizeof(char *) * bat_data->count);
1238 1 : if (bat_data->count > 0 && !bat_data->data) {
1239 0 : msg = createException(MAL, "cudf.eval", MAL_MALLOC_FAIL);
1240 0 : goto wrapup;
1241 : }
1242 1 : bat_data->alloced = true;
1243 1 : j = 0;
1244 :
1245 1 : li = bat_iterator(input_bats[index]);
1246 3 : BATloop(input_bats[index], p, q)
1247 : {
1248 2 : void *t = BUNtail(li, p);
1249 4 : if (BATatoms[bat_type].atomNull &&
1250 2 : BATatoms[bat_type].atomCmp(
1251 : t, BATatoms[bat_type].atomNull) == 0) {
1252 1 : bat_data->data[j] = NULL;
1253 : } else {
1254 1 : char *result = NULL;
1255 1 : size_t length = 0;
1256 1 : if (BATatoms[bat_type].atomToStr(&result, &length, t, false) ==
1257 : 0) {
1258 0 : bat_iterator_end(&li);
1259 0 : msg = createException(
1260 : MAL, "cudf.eval",
1261 : "Failed to convert element to string");
1262 0 : goto wrapup;
1263 : }
1264 1 : bat_data->data[j] = result;
1265 : }
1266 2 : j++;
1267 : }
1268 1 : bat_iterator_end(&li);
1269 1 : bat_data->valloced = true;
1270 : }
1271 55 : input_size = BATcount(input_bats[index]) > input_size
1272 : ? BATcount(input_bats[index])
1273 : : input_size;
1274 55 : argnode = argnode ? argnode->next : NULL;
1275 : }
1276 :
1277 45 : index = input_count;
1278 45 : if (non_grouped_aggregate) {
1279 5 : GENERATE_BAT_INPUT_BASE(oid);
1280 5 : bat_data->count = input_size;
1281 5 : bat_data->null_value = oid_nil;
1282 10 : bat_data->data =
1283 5 : GDKzalloc(bat_data->count * sizeof(bat_data->null_value));
1284 5 : bat_data->alloced = true;
1285 5 : if (!bat_data->data) {
1286 0 : msg = createException(MAL, "cudf.eval", MAL_MALLOC_FAIL);
1287 0 : goto wrapup;
1288 : }
1289 : }
1290 :
1291 45 : argnode = sqlfun ? sqlfun->res->h : NULL;
1292 : // output types
1293 92 : for (i = 0; i < output_count; i++) {
1294 47 : index = i;
1295 47 : bat_type = getBatType(getArgType(mb, pci, i));
1296 47 : if (bat_type == TYPE_bit) {
1297 0 : GENERATE_BAT_OUTPUT(bit);
1298 : } else if (bat_type == TYPE_bte) {
1299 0 : GENERATE_BAT_OUTPUT(bte);
1300 : } else if (bat_type == TYPE_sht) {
1301 0 : GENERATE_BAT_OUTPUT(sht);
1302 : } else if (bat_type == TYPE_int) {
1303 16 : GENERATE_BAT_OUTPUT(int);
1304 : } else if (bat_type == TYPE_oid) {
1305 0 : GENERATE_BAT_OUTPUT(oid);
1306 : } else if (bat_type == TYPE_lng) {
1307 10 : GENERATE_BAT_OUTPUT(lng);
1308 : } else if (bat_type == TYPE_flt) {
1309 0 : GENERATE_BAT_OUTPUT(flt);
1310 : } else if (bat_type == TYPE_dbl) {
1311 5 : GENERATE_BAT_OUTPUT(dbl);
1312 : } else if (bat_type == TYPE_str) {
1313 7 : GENERATE_BAT_OUTPUT_BASE(str);
1314 7 : bat_data->null_value = NULL;
1315 : } else if (bat_type == TYPE_date) {
1316 2 : GENERATE_BAT_OUTPUT_BASE(date);
1317 2 : data_from_date(date_nil, &bat_data->null_value);
1318 : } else if (bat_type == TYPE_daytime) {
1319 2 : GENERATE_BAT_OUTPUT_BASE(time);
1320 2 : data_from_time(daytime_nil, &bat_data->null_value);
1321 : } else if (bat_type == TYPE_timestamp) {
1322 2 : GENERATE_BAT_OUTPUT_BASE(timestamp);
1323 2 : data_from_timestamp(timestamp_nil, &bat_data->null_value);
1324 : } else if (bat_type == TYPE_blob) {
1325 2 : GENERATE_BAT_OUTPUT_BASE(blob);
1326 2 : bat_data->null_value.size = ~(size_t) 0;
1327 2 : bat_data->null_value.data = NULL;
1328 : } else {
1329 : // unsupported type, convert from string output
1330 1 : GENERATE_BAT_OUTPUT_BASE(str);
1331 1 : bat_data->null_value = NULL;
1332 : }
1333 47 : argnode = argnode ? argnode->next : NULL;
1334 : }
1335 :
1336 : // set up a longjmp point
1337 : // this longjmp point is used for some error handling in the C function
1338 : // such as failed mallocs
1339 45 : if (option_enable_longjmp) {
1340 0 : struct capi_tls_s *tls = MT_tls_get(capi_tls_key);
1341 0 : ret = setjmp(tls->jb);
1342 0 : if (ret < 0) {
1343 : // error value
1344 0 : msg = createException(MAL, "cudf.eval", "Failed setjmp: %s",
1345 0 : GDKstrerror(errno, (char[128]){0}, 128));
1346 0 : errno = 0;
1347 0 : goto wrapup;
1348 0 : } else if (ret > 0) {
1349 0 : if (ret == 1) {
1350 0 : msg = createException(MAL, "cudf.eval", "Attempting to write to "
1351 : "the input or triggered a "
1352 : "segfault/bus error");
1353 0 : } else if (ret == 2) {
1354 0 : msg = createException(MAL, "cudf.eval",
1355 : "Malloc failure in internal function!");
1356 : } else {
1357 : // we jumped here
1358 0 : msg = createException(MAL, "cudf.eval", "We longjumped here "
1359 : "because of an error, but "
1360 : "we don't know which!");
1361 : }
1362 0 : goto wrapup;
1363 : }
1364 : }
1365 :
1366 : // set up the signal handler for catching segfaults
1367 45 : if (option_enable_mprotect) {
1368 0 : sa = (struct sigaction) {
1369 : .sa_flags = SA_SIGINFO,
1370 : .sa_sigaction = handler,
1371 : };
1372 0 : (void) sigfillset(&sa.sa_mask);
1373 0 : if (sigaction(SIGSEGV, &sa, &oldsa) == -1 ||
1374 0 : sigaction(SIGBUS, &sa, &oldsb) == -1) {
1375 0 : msg = createException(MAL, "cudf.eval",
1376 : "Failed to set signal handler: %s",
1377 0 : GDKstrerror(errno, (char[128]){0}, 128));
1378 0 : errno = 0;
1379 0 : goto wrapup;
1380 : }
1381 : // actually mprotect the regions now that the signal handlers are set
1382 0 : region_iter = regions;
1383 0 : while (region_iter) {
1384 0 : if (mprotect(region_iter->addr, region_iter->len, PROT_READ) < 0) {
1385 0 : goto wrapup;
1386 : }
1387 0 : region_iter = region_iter->next;
1388 : }
1389 : }
1390 : // call the actual jitted function
1391 45 : msg = func(inputs, outputs, wrapped_GDK_malloc, wrapped_GDK_free);
1392 :
1393 :
1394 45 : if (option_enable_mprotect) {
1395 : // clear any mprotected regions
1396 0 : while (regions) {
1397 0 : mprotected_region *next = regions->next;
1398 0 : clear_mprotect(regions->addr, regions->len);
1399 0 : GDKfree(regions);
1400 0 : regions = next;
1401 : }
1402 : // clear the signal handlers
1403 0 : if (sigaction(SIGSEGV, &oldsa, NULL) == -1 ||
1404 0 : sigaction(SIGBUS, &oldsb, NULL) == -1) {
1405 0 : msg = createException(MAL, "cudf.eval",
1406 : "Failed to unset signal handler: %s",
1407 0 : GDKstrerror(errno, (char[128]){0}, 128));
1408 0 : errno = 0;
1409 0 : goto wrapup;
1410 : }
1411 0 : sa = (struct sigaction) {.sa_flags = 0};
1412 : }
1413 :
1414 45 : if (msg) {
1415 : // failure in function
1416 1 : msg = createException(MAL, "cudf.eval", "%s", msg);
1417 1 : goto wrapup;
1418 : }
1419 :
1420 : // create the output bats from the returned results
1421 88 : for (i = 0; i < (size_t)pci->retc; i++) {
1422 46 : size_t count;
1423 46 : void *data;
1424 46 : BAT *b;
1425 46 : bat_type = getBatType(getArgType(mb, pci, i));
1426 :
1427 46 : if (!outputs[i]) {
1428 0 : msg = createException(MAL, "cudf.eval", "No data returned.");
1429 0 : goto wrapup;
1430 : }
1431 46 : count = GetTypeCount(bat_type, outputs[i]);
1432 46 : data = GetTypeData(bat_type, outputs[i]);
1433 46 : if (!data) {
1434 1 : msg = createException(MAL, "cudf.eval", "No data returned.");
1435 1 : goto wrapup;
1436 : }
1437 45 : if (initial_output_count < 0) {
1438 43 : initial_output_count = count;
1439 2 : } else if ((size_t)initial_output_count != count) {
1440 1 : msg = createException(MAL, "cudf.eval",
1441 : "Data has different cardinalities.");
1442 1 : goto wrapup;
1443 : }
1444 44 : if (bat_type == TYPE_bit || bat_type == TYPE_bte ||
1445 44 : bat_type == TYPE_sht || bat_type == TYPE_int ||
1446 44 : bat_type == TYPE_oid || bat_type == TYPE_lng ||
1447 21 : bat_type == TYPE_flt || bat_type == TYPE_dbl) {
1448 28 : b = GetTypeBat(bat_type, outputs[i]);
1449 28 : if (!b) {
1450 0 : msg = createException(MAL, "cudf.eval", "Output column was not properly initialized.");
1451 0 : goto wrapup;
1452 : }
1453 : } else {
1454 16 : assert(GetTypeBat(bat_type, outputs[i]) == NULL);
1455 16 : b = COLnew(0, bat_type, count, TRANSIENT);
1456 16 : if (!b) {
1457 0 : msg = createException(MAL, "cudf.eval", MAL_MALLOC_FAIL);
1458 0 : goto wrapup;
1459 : }
1460 16 : if (bat_type == TYPE_date) {
1461 2 : date *baseptr = (date *)Tloc(b, 0);
1462 2 : cudf_data_date *source_base = (cudf_data_date *)data;
1463 7 : for (j = 0; j < count; j++) {
1464 5 : baseptr[j] = date_from_data(source_base + j);
1465 : }
1466 2 : BATsetcount(b, count);
1467 2 : GDKfree(data);
1468 : } else if (bat_type == TYPE_daytime) {
1469 2 : daytime *baseptr = (daytime *)Tloc(b, 0);
1470 2 : cudf_data_time *source_base = (cudf_data_time *)data;
1471 6 : for (j = 0; j < count; j++) {
1472 4 : baseptr[j] = time_from_data(source_base + j);
1473 : }
1474 2 : BATsetcount(b, count);
1475 2 : GDKfree(data);
1476 : } else if (bat_type == TYPE_timestamp) {
1477 2 : timestamp *baseptr = (timestamp *)Tloc(b, 0);
1478 2 : cudf_data_timestamp *source_base = (cudf_data_timestamp *)data;
1479 6 : for (j = 0; j < count; j++) {
1480 4 : baseptr[j] = timestamp_from_data(source_base + j);
1481 : }
1482 2 : BATsetcount(b, count);
1483 2 : GDKfree(data);
1484 : } else if (bat_type == TYPE_str) {
1485 : char **source_base = (char **)data;
1486 306 : for (j = 0; j < count; j++) {
1487 299 : const char *ptr = source_base[j];
1488 299 : if (!ptr) {
1489 2 : ptr = str_nil;
1490 : }
1491 299 : if (BUNappend(b, ptr, false) != GDK_SUCCEED) {
1492 0 : msg = createException(MAL, "cudf.eval", MAL_MALLOC_FAIL);
1493 0 : goto wrapup;
1494 : }
1495 : }
1496 7 : GDKfree(data);
1497 : } else if (bat_type == TYPE_blob) {
1498 : cudf_data_blob *source_base = (cudf_data_blob *)data;
1499 : blob *current_blob = NULL;
1500 : size_t current_blob_maxsize = 0;
1501 6 : for (j = 0; j < count; j++) {
1502 4 : const cudf_data_blob blob = source_base[j];
1503 :
1504 4 : if (blob.size == ~(size_t) 0) {
1505 1 : current_blob->nitems = ~(size_t)0;
1506 : } else {
1507 3 : if (!current_blob || current_blob_maxsize < blob.size) {
1508 0 : if (current_blob) {
1509 0 : GDKfree(current_blob);
1510 : }
1511 2 : current_blob_maxsize = blob.size;
1512 2 : current_blob = GDKmalloc(sizeof(size_t) + blob.size);
1513 2 : if (!current_blob) {
1514 0 : msg =
1515 0 : createException(MAL, "cudf.eval", MAL_MALLOC_FAIL);
1516 0 : goto wrapup;
1517 : }
1518 : }
1519 :
1520 3 : current_blob->nitems = blob.size;
1521 3 : if (blob.size > 0)
1522 2 : memcpy(¤t_blob->data[0], blob.data, blob.size);
1523 : }
1524 :
1525 4 : if (BUNappend(b, current_blob, false) != GDK_SUCCEED) {
1526 0 : if (current_blob) {
1527 0 : GDKfree(current_blob);
1528 : }
1529 0 : msg = createException(MAL, "cudf.eval", MAL_MALLOC_FAIL);
1530 0 : goto wrapup;
1531 : }
1532 : }
1533 2 : if (current_blob) {
1534 2 : GDKfree(current_blob);
1535 : }
1536 2 : GDKfree(data);
1537 : } else {
1538 1 : char **source_base = (char **)data;
1539 1 : size_t len = 0;
1540 1 : void *element = NULL;
1541 3 : for (j = 0; j < count; j++) {
1542 2 : const char *ptr = source_base[j];
1543 2 : const void *appended_element;
1544 2 : if (strNil(ptr)) {
1545 1 : appended_element = (void *)BATatoms[bat_type].atomNull;
1546 : } else {
1547 1 : if (BATatoms[bat_type].atomFromStr(ptr, &len, &element, false) ==
1548 : 0) {
1549 0 : msg = createException(MAL, "cudf.eval",
1550 : "Failed to convert output "
1551 : "element from string: %s",
1552 : ptr);
1553 0 : goto wrapup;
1554 : }
1555 1 : appended_element = element;
1556 : }
1557 2 : if (BUNappend(b, appended_element, false) != GDK_SUCCEED) {
1558 0 : if (element) {
1559 0 : GDKfree(element);
1560 : }
1561 0 : msg = createException(MAL, "cudf.eval", MAL_MALLOC_FAIL);
1562 0 : goto wrapup;
1563 : }
1564 : }
1565 1 : if (element) {
1566 1 : GDKfree(element);
1567 : }
1568 1 : GDKfree(data);
1569 : }
1570 : }
1571 44 : b->tnil = false;
1572 44 : b->tnonil = false;
1573 44 : b->tkey = false;
1574 44 : b->tsorted = false;
1575 44 : b->trevsorted = false;
1576 :
1577 : // free the output value right now to prevent the internal data from
1578 : // being freed later
1579 : // as the internal data is now part of the bat we just created
1580 44 : GDKfree(outputs[i]);
1581 44 : outputs[i] = NULL;
1582 :
1583 : // return the BAT from the function
1584 44 : if (isaBatType(getArgType(mb, pci, i))) {
1585 29 : *getArgReference_bat(stk, pci, i) = b->batCacheid;
1586 29 : BBPkeepref(b);
1587 : } else {
1588 15 : BATiter li = bat_iterator(b);
1589 15 : if (VALinit(&stk->stk[pci->argv[i]], bat_type,
1590 15 : BUNtail(li, 0)) == NULL) {
1591 0 : msg = createException(MAL, "cudf.eval", SQLSTATE(HY013) MAL_MALLOC_FAIL);
1592 : }
1593 15 : bat_iterator_end(&li);
1594 15 : BBPunfix(b->batCacheid);
1595 : }
1596 : }
1597 :
1598 42 : wrapup:
1599 : // cleanup
1600 : // remove the signal handler, if any was set
1601 48 : GDKfree(fname);
1602 48 : GDKfree(oname);
1603 48 : GDKfree(libname);
1604 48 : MT_tls_set(capi_tls_key, NULL);
1605 48 : if (option_enable_mprotect) {
1606 0 : if (sa.sa_sigaction) {
1607 0 : (void) sigaction(SIGSEGV, &oldsa, NULL);
1608 0 : (void) sigaction(SIGBUS, &oldsb, NULL);
1609 :
1610 0 : sa = (struct sigaction) {.sa_flags = 0,};
1611 : }
1612 : // clear any mprotected regions
1613 0 : while (regions) {
1614 0 : mprotected_region *next = regions->next;
1615 0 : clear_mprotect(regions->addr, regions->len);
1616 0 : GDKfree(regions);
1617 0 : regions = next;
1618 : }
1619 : }
1620 344 : while (tls.ar != NULL) {
1621 296 : allocated_region *next = tls.ar->next;
1622 296 : GDKfree(tls.ar);
1623 296 : tls.ar = next;
1624 : }
1625 48 : if (option_enable_mprotect) {
1626 : // block segfaults and bus errors again after we exit
1627 0 : (void)pthread_sigmask(SIG_BLOCK, &signal_set, NULL);
1628 : }
1629 : // argument names (input)
1630 48 : if (args) {
1631 306 : for (i = 0; i < (size_t)limit_argc; i++) {
1632 258 : if (args[i]) {
1633 64 : GDKfree(args[i]);
1634 : }
1635 : }
1636 48 : GDKfree(args);
1637 : }
1638 : // output names
1639 48 : if (output_names) {
1640 98 : for (i = 0; i < (size_t)pci->retc; i++) {
1641 50 : if (output_names[i]) {
1642 50 : GDKfree(output_names[i]);
1643 : }
1644 : }
1645 48 : GDKfree(output_names);
1646 : }
1647 48 : if (input_bats) {
1648 117 : for(i = 0; i < input_count + extra_inputs; i++) {
1649 128 : BBPreclaim(input_bats[i]);
1650 : }
1651 47 : GDKfree(input_bats);
1652 : }
1653 : // input data
1654 48 : if (inputs) {
1655 117 : for (i = 0; i < input_count + extra_inputs; i++) {
1656 70 : if (inputs[i]) {
1657 60 : int arg = i + pci->retc + ARG_OFFSET;
1658 60 : bat_type = getArgType(mb, pci, arg);
1659 60 : if (isaBatType(bat_type)) {
1660 42 : bat_type = getBatType(bat_type);
1661 : }
1662 60 : if (i == input_count) /* non grouped aggr case */
1663 : bat_type = TYPE_oid;
1664 55 : if (bat_type < 0)
1665 0 : continue;
1666 60 : if (isAlloced(bat_type, inputs[i])) {
1667 60 : char **data = (char **)GetTypeData(bat_type, inputs[i]);
1668 120 : if (isValloced(bat_type, inputs[i])) {
1669 9 : size_t count = GetTypeCount(bat_type, inputs[i]);
1670 9 : if (bat_type == TYPE_blob) {
1671 : cudf_data_blob *bd = (cudf_data_blob*)data;
1672 6 : for (j = 0; j < count; j++)
1673 4 : if (bd[j].data)
1674 2 : GDKfree(bd[j].data);
1675 : } else {
1676 307 : for (j = 0; j < count; j++)
1677 300 : if (data[j])
1678 297 : GDKfree(data[j]);
1679 : }
1680 : }
1681 60 : if (data)
1682 60 : GDKfree(data);
1683 : }
1684 60 : GDKfree(inputs[i]);
1685 : }
1686 : }
1687 47 : GDKfree(inputs);
1688 : }
1689 : // output data
1690 48 : if (outputs) {
1691 98 : for (i = 0; i < (size_t)output_count; i++) {
1692 100 : bat_type = isaBatType(getArgType(mb, pci, i))
1693 : ? getBatType(getArgType(mb, pci, i))
1694 50 : : getArgType(mb, pci, i);
1695 50 : if (outputs[i]) {
1696 3 : void* b = GetTypeBat(bat_type, outputs[i]);
1697 3 : if (b) {
1698 1 : BBPunfix(((BAT*)b)->batCacheid);
1699 : } else {
1700 2 : void *data = GetTypeData(bat_type, outputs[i]);
1701 2 : if (data) {
1702 0 : GDKfree(data);
1703 : }
1704 : }
1705 3 : GDKfree(outputs[i]);
1706 : }
1707 : }
1708 48 : GDKfree(outputs);
1709 : }
1710 48 : if (function_parameters) {
1711 12 : GDKfree(function_parameters);
1712 : }
1713 : // close the file handle
1714 48 : if (f) {
1715 0 : fclose(f);
1716 : }
1717 : // close the dll
1718 48 : if (handle) {
1719 0 : dlclose(handle);
1720 : }
1721 : // close the compiler stream
1722 48 : if (compiler) {
1723 0 : pclose(compiler);
1724 : }
1725 48 : if (extra_cflags) {
1726 0 : GDKfree(extra_cflags);
1727 : }
1728 48 : if (extra_ldflags) {
1729 0 : GDKfree(extra_ldflags);
1730 : }
1731 48 : return msg;
1732 : }
1733 :
1734 93 : static const char *GetTypeName(int type)
1735 : {
1736 93 : const char *tpe = NULL;
1737 93 : if (type == TYPE_bit || type == TYPE_bte) {
1738 : tpe = "bte";
1739 : } else if (type == TYPE_sht) {
1740 : tpe = "sht";
1741 : } else if (type == TYPE_int) {
1742 : tpe = "int";
1743 : } else if (type == TYPE_oid) {
1744 : tpe = "oid";
1745 : } else if (type == TYPE_lng) {
1746 : tpe = "lng";
1747 : } else if (type == TYPE_flt) {
1748 : tpe = "flt";
1749 : } else if (type == TYPE_dbl) {
1750 : tpe = "dbl";
1751 : } else if (type == TYPE_str) {
1752 : tpe = "str";
1753 : } else if (type == TYPE_date) {
1754 : tpe = "date";
1755 : } else if (type == TYPE_daytime) {
1756 : tpe = "time";
1757 : } else if (type == TYPE_timestamp) {
1758 : tpe = "timestamp";
1759 : } else if (type == TYPE_blob) {
1760 : tpe = "blob";
1761 : } else {
1762 : // unsupported type: string
1763 93 : tpe = "str";
1764 : }
1765 93 : return tpe;
1766 : }
1767 :
1768 : static bool
1769 60 : isAlloced(int type, void *struct_ptr)
1770 : {
1771 60 : bool alloced = false;
1772 :
1773 60 : if (type == TYPE_bit || type == TYPE_bte) {
1774 0 : alloced = ((struct cudf_data_struct_bte *)struct_ptr)->alloced;
1775 : } else if (type == TYPE_sht) {
1776 0 : alloced = ((struct cudf_data_struct_sht *)struct_ptr)->alloced;
1777 : } else if (type == TYPE_int) {
1778 28 : alloced = ((struct cudf_data_struct_int *)struct_ptr)->alloced;
1779 : } else if (type == TYPE_oid) {
1780 10 : alloced = ((struct cudf_data_struct_oid *)struct_ptr)->alloced;
1781 : } else if (type == TYPE_lng) {
1782 2 : alloced = ((struct cudf_data_struct_lng *)struct_ptr)->alloced;
1783 : } else if (type == TYPE_flt) {
1784 1 : alloced = ((struct cudf_data_struct_flt *)struct_ptr)->alloced;
1785 : } else if (type == TYPE_dbl) {
1786 4 : alloced = ((struct cudf_data_struct_dbl *)struct_ptr)->alloced;
1787 : } else if (type == TYPE_str) {
1788 6 : alloced = ((struct cudf_data_struct_str *)struct_ptr)->alloced;
1789 : } else if (type == TYPE_date) {
1790 2 : alloced = ((struct cudf_data_struct_date *)struct_ptr)->alloced;
1791 : } else if (type == TYPE_daytime) {
1792 2 : alloced = ((struct cudf_data_struct_time *)struct_ptr)->alloced;
1793 : } else if (type == TYPE_timestamp) {
1794 2 : alloced = ((struct cudf_data_struct_timestamp *)struct_ptr)->alloced;
1795 : } else if (type == TYPE_blob) {
1796 2 : alloced = ((struct cudf_data_struct_blob *)struct_ptr)->alloced;
1797 : } else {
1798 : // unsupported type: string
1799 1 : alloced = ((struct cudf_data_struct_str *)struct_ptr)->alloced;
1800 : }
1801 60 : return alloced;
1802 : }
1803 :
1804 : static bool
1805 60 : isValloced(int type, void *struct_ptr)
1806 : {
1807 60 : bool alloced = false;
1808 :
1809 60 : if (type == TYPE_str) {
1810 6 : alloced = ((struct cudf_data_struct_str *)struct_ptr)->valloced;
1811 54 : } else if (type == TYPE_blob) {
1812 2 : alloced = ((struct cudf_data_struct_blob *)struct_ptr)->valloced;
1813 : } else {
1814 : // unsupported type: string
1815 52 : alloced = ((struct cudf_data_struct_str *)struct_ptr)->valloced;
1816 : }
1817 60 : return alloced;
1818 : }
1819 : void *
1820 108 : GetTypeData(int type, void *struct_ptr)
1821 : {
1822 108 : void *data = NULL;
1823 :
1824 108 : if (type == TYPE_bit || type == TYPE_bte) {
1825 0 : data = ((struct cudf_data_struct_bte *)struct_ptr)->data;
1826 : } else if (type == TYPE_sht) {
1827 0 : data = ((struct cudf_data_struct_sht *)struct_ptr)->data;
1828 : } else if (type == TYPE_int) {
1829 45 : data = ((struct cudf_data_struct_int *)struct_ptr)->data;
1830 : } else if (type == TYPE_oid) {
1831 10 : data = ((struct cudf_data_struct_oid *)struct_ptr)->data;
1832 : } else if (type == TYPE_lng) {
1833 12 : data = ((struct cudf_data_struct_lng *)struct_ptr)->data;
1834 : } else if (type == TYPE_flt) {
1835 1 : data = ((struct cudf_data_struct_flt *)struct_ptr)->data;
1836 : } else if (type == TYPE_dbl) {
1837 9 : data = ((struct cudf_data_struct_dbl *)struct_ptr)->data;
1838 : } else if (type == TYPE_str) {
1839 13 : data = ((struct cudf_data_struct_str *)struct_ptr)->data;
1840 : } else if (type == TYPE_date) {
1841 4 : data = ((struct cudf_data_struct_date *)struct_ptr)->data;
1842 : } else if (type == TYPE_daytime) {
1843 4 : data = ((struct cudf_data_struct_time *)struct_ptr)->data;
1844 : } else if (type == TYPE_timestamp) {
1845 4 : data = ((struct cudf_data_struct_timestamp *)struct_ptr)->data;
1846 : } else if (type == TYPE_blob) {
1847 4 : data = ((struct cudf_data_struct_blob *)struct_ptr)->data;
1848 : } else {
1849 : // unsupported type: string
1850 2 : data = ((struct cudf_data_struct_str *)struct_ptr)->data;
1851 : }
1852 108 : return data;
1853 : }
1854 :
1855 47 : void *GetTypeBat(int type, void *struct_ptr)
1856 : {
1857 47 : void *bat = NULL;
1858 :
1859 47 : if (type == TYPE_bit || type == TYPE_bte) {
1860 0 : bat = ((struct cudf_data_struct_bte *)struct_ptr)->bat;
1861 : } else if (type == TYPE_sht) {
1862 0 : bat = ((struct cudf_data_struct_sht *)struct_ptr)->bat;
1863 : } else if (type == TYPE_int) {
1864 16 : bat = ((struct cudf_data_struct_int *)struct_ptr)->bat;
1865 : } else if (type == TYPE_oid) {
1866 0 : bat = ((struct cudf_data_struct_oid *)struct_ptr)->bat;
1867 : } else if (type == TYPE_lng) {
1868 10 : bat = ((struct cudf_data_struct_lng *)struct_ptr)->bat;
1869 : } else if (type == TYPE_flt) {
1870 0 : bat = ((struct cudf_data_struct_flt *)struct_ptr)->bat;
1871 : } else if (type == TYPE_dbl) {
1872 5 : bat = ((struct cudf_data_struct_dbl *)struct_ptr)->bat;
1873 : } else if (type == TYPE_str) {
1874 7 : bat = ((struct cudf_data_struct_str *)struct_ptr)->bat;
1875 : } else if (type == TYPE_date) {
1876 2 : bat = ((struct cudf_data_struct_date *)struct_ptr)->bat;
1877 : } else if (type == TYPE_daytime) {
1878 2 : bat = ((struct cudf_data_struct_time *)struct_ptr)->bat;
1879 : } else if (type == TYPE_timestamp) {
1880 2 : bat = ((struct cudf_data_struct_timestamp *)struct_ptr)->bat;
1881 : } else if (type == TYPE_blob) {
1882 2 : bat = ((struct cudf_data_struct_blob *)struct_ptr)->bat;
1883 : } else {
1884 : // unsupported type: string
1885 1 : bat = ((struct cudf_data_struct_str *)struct_ptr)->bat;
1886 : }
1887 47 : return bat;
1888 : }
1889 :
1890 55 : size_t GetTypeCount(int type, void *struct_ptr)
1891 : {
1892 55 : size_t count = 0;
1893 55 : if (type == TYPE_bit || type == TYPE_bte) {
1894 0 : count = ((struct cudf_data_struct_bte *)struct_ptr)->count;
1895 : } else if (type == TYPE_sht) {
1896 0 : count = ((struct cudf_data_struct_sht *)struct_ptr)->count;
1897 : } else if (type == TYPE_int) {
1898 15 : count = ((struct cudf_data_struct_int *)struct_ptr)->count;
1899 : } else if (type == TYPE_oid) {
1900 0 : count = ((struct cudf_data_struct_oid *)struct_ptr)->count;
1901 : } else if (type == TYPE_lng) {
1902 10 : count = ((struct cudf_data_struct_lng *)struct_ptr)->count;
1903 : } else if (type == TYPE_flt) {
1904 0 : count = ((struct cudf_data_struct_flt *)struct_ptr)->count;
1905 : } else if (type == TYPE_dbl) {
1906 5 : count = ((struct cudf_data_struct_dbl *)struct_ptr)->count;
1907 : } else if (type == TYPE_str) {
1908 13 : count = ((struct cudf_data_struct_str *)struct_ptr)->count;
1909 : } else if (type == TYPE_date) {
1910 2 : count = ((struct cudf_data_struct_date *)struct_ptr)->count;
1911 : } else if (type == TYPE_daytime) {
1912 2 : count = ((struct cudf_data_struct_time *)struct_ptr)->count;
1913 : } else if (type == TYPE_timestamp) {
1914 2 : count = ((struct cudf_data_struct_timestamp *)struct_ptr)->count;
1915 : } else if (type == TYPE_blob) {
1916 4 : count = ((struct cudf_data_struct_blob *)struct_ptr)->count;
1917 : } else {
1918 : // unsupported type: string
1919 2 : count = ((struct cudf_data_struct_str *)struct_ptr)->count;
1920 : }
1921 55 : return count;
1922 : }
1923 :
1924 14 : void data_from_date(date d, cudf_data_date *ptr)
1925 : {
1926 14 : ptr->day = date_day(d);
1927 14 : ptr->month = date_month(d);
1928 14 : ptr->year = date_year(d);
1929 14 : }
1930 :
1931 5 : date date_from_data(cudf_data_date *ptr)
1932 : {
1933 5 : return date_create(ptr->year, ptr->month, ptr->day);
1934 : }
1935 :
1936 12 : void data_from_time(daytime d, cudf_data_time *ptr)
1937 : {
1938 12 : ptr->hours = daytime_hour(d);
1939 12 : ptr->minutes = daytime_min(d);
1940 12 : ptr->seconds = daytime_sec(d);
1941 12 : ptr->ms = daytime_usec(d) / 1000;
1942 12 : }
1943 :
1944 4 : daytime time_from_data(cudf_data_time *ptr)
1945 : {
1946 4 : return daytime_create(ptr->hours, ptr->minutes, ptr->seconds,
1947 4 : ptr->ms * 1000);
1948 : }
1949 :
1950 8 : void data_from_timestamp(timestamp d, cudf_data_timestamp *ptr)
1951 : {
1952 8 : daytime tm = timestamp_daytime(d);
1953 8 : date dt = timestamp_date(d);
1954 :
1955 8 : ptr->date.day = date_day(dt);
1956 8 : ptr->date.month = date_month(dt);
1957 8 : ptr->date.year = date_year(dt);
1958 8 : ptr->time.hours = daytime_hour(tm);
1959 8 : ptr->time.minutes = daytime_min(tm);
1960 8 : ptr->time.seconds = daytime_sec(tm);
1961 8 : ptr->time.ms = daytime_usec(tm) / 1000;
1962 8 : }
1963 :
1964 8 : timestamp timestamp_from_data(cudf_data_timestamp *ptr)
1965 : {
1966 8 : return timestamp_create(date_create(ptr->date.year,
1967 8 : ptr->date.month,
1968 8 : ptr->date.day),
1969 8 : daytime_create(ptr->time.hours,
1970 8 : ptr->time.minutes,
1971 8 : ptr->time.seconds,
1972 8 : ptr->time.ms * 1000));
1973 : }
1974 :
1975 5 : int date_is_null(cudf_data_date value)
1976 : {
1977 5 : cudf_data_date null_value;
1978 5 : data_from_date(date_nil, &null_value);
1979 5 : return value.year == null_value.year && value.month == null_value.month &&
1980 : value.day == null_value.day;
1981 : }
1982 :
1983 4 : int time_is_null(cudf_data_time value)
1984 : {
1985 4 : cudf_data_time null_value;
1986 4 : data_from_time(daytime_nil, &null_value);
1987 4 : return value.hours == null_value.hours &&
1988 4 : value.minutes == null_value.minutes &&
1989 4 : value.seconds == null_value.seconds && value.ms == null_value.ms;
1990 : }
1991 :
1992 4 : int timestamp_is_null(cudf_data_timestamp value)
1993 : {
1994 4 : return is_timestamp_nil(timestamp_from_data(&value));
1995 : }
1996 :
1997 10 : int str_is_null(char *value) { return value == NULL; }
1998 :
1999 4 : int blob_is_null(cudf_data_blob value) { return value.size == ~(size_t) 0; }
2000 :
2001 2 : void blob_initialize(struct cudf_data_struct_blob *self,
2002 : size_t count) {
2003 2 : self->count = count;
2004 2 : self->data = jump_GDK_malloc(count * sizeof(self->null_value));
2005 2 : memset(self->data, 0, count * sizeof(self->null_value));
2006 2 : }
2007 :
2008 : #include "mel.h"
2009 : static mel_func capi_init_funcs[] = {
2010 : pattern("capi", "eval", CUDFevalStd, false, "Execute a simple CUDF script returning a single value", args(1,4, argany("",0),arg("fptr",ptr),arg("cpp",bit),arg("expr",str))),
2011 : pattern("capi", "eval", CUDFevalStd, false, "Execute a simple CUDF script value", args(1,5, varargany("",0),arg("fptr",ptr),arg("cpp",bit),arg("expr",str),varargany("arg",0))),
2012 : pattern("capi", "subeval_aggr", CUDFevalAggr, false, "grouped aggregates through CUDF", args(1,5, varargany("",0),arg("fptr",ptr),arg("cpp",bit),arg("expr",str),varargany("arg",0))),
2013 : pattern("capi", "eval_aggr", CUDFevalAggr, false, "grouped aggregates through CUDF", args(1,5, varargany("",0),arg("fptr",ptr),arg("cpp",bit),arg("expr",str),varargany("arg",0))),
2014 : pattern("batcapi", "eval", CUDFevalStd, false, "Execute a simple CUDF script value", args(1,5, varargany("",0),arg("fptr",ptr),arg("cpp",bit),arg("expr",str),varargany("arg",0))),
2015 : { .imp=NULL }
2016 : };
2017 : #include "mal_import.h"
2018 : #ifdef _MSC_VER
2019 : #undef read
2020 : #pragma section(".CRT$XCU",read)
2021 : #endif
2022 6 : LIB_STARTUP_FUNC(init_capi_mal)
2023 6 : { mal_module2("capi", NULL, capi_init_funcs, CUDFprelude, NULL); }
|