Line data Source code
1 : /*
2 : * SPDX-License-Identifier: MPL-2.0
3 : *
4 : * This Source Code Form is subject to the terms of the Mozilla Public
5 : * License, v. 2.0. If a copy of the MPL was not distributed with this
6 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
7 : *
8 : * Copyright 2024 MonetDB Foundation;
9 : * Copyright August 2008 - 2023 MonetDB B.V.;
10 : * Copyright 1997 - July 2008 CWI.
11 : */
12 :
13 : #include "monetdb_config.h"
14 : #include "pyapi.h"
15 : #include "connection.h"
16 : #include "mutils.h"
17 :
18 : #include "unicode.h"
19 : #include "pytypes.h"
20 : #include "type_conversion.h"
21 : #include "formatinput.h"
22 : #include "conversion.h"
23 :
24 : static PyObject *marshal_module = NULL;
25 : PyObject *marshal_loads = NULL;
26 :
27 : static str CreateEmptyReturn(MalBlkPtr mb, MalStkPtr stk, InstrPtr pci,
28 : size_t retcols, oid seqbase);
29 :
30 1 : static const char *FunctionBasePath(char *buf, size_t len)
31 : {
32 1 : const char *basepath = GDKgetenv("function_basepath");
33 : #ifdef NATIVE_WIN32
34 : if (basepath == NULL) {
35 : const wchar_t *home = _wgetenv(L"HOME");
36 : if (home) {
37 : char *path = wchartoutf8(home);
38 : if (path) {
39 : strcpy_len(buf, path, len);
40 : free(path);
41 : basepath = buf;
42 : }
43 : }
44 : }
45 : #else
46 : /* not used except on Windows */
47 1 : (void) buf;
48 1 : (void) len;
49 1 : if (basepath == NULL) {
50 1 : basepath = getenv("HOME");
51 : }
52 : #endif
53 1 : if (basepath == NULL) {
54 0 : basepath = "";
55 : }
56 1 : return basepath;
57 : }
58 :
59 : static MT_Lock pyapiLock = MT_LOCK_INITIALIZER(pyapiLock);
60 : static bool pyapiInitialized = false;
61 : static PyDateTime_CAPI *PYAPI3_DateTimeAPI;
62 :
63 25 : bool PYAPI3PyAPIInitialized(void) {
64 25 : return pyapiInitialized;
65 : }
66 :
67 780 : PyDateTime_CAPI *get_DateTimeAPI(void) {
68 780 : return PYAPI3_DateTimeAPI;
69 : }
70 :
71 10 : void init_DateTimeAPI(void) {
72 10 : PyDateTime_IMPORT;
73 10 : PYAPI3_DateTimeAPI = PyDateTimeAPI;
74 10 : }
75 :
76 : #ifdef WIN32
77 : static bool enable_zerocopy_input = true;
78 : static bool enable_zerocopy_output = false;
79 : #else
80 : static bool enable_zerocopy_input = true;
81 : static bool enable_zerocopy_output = true;
82 : #endif
83 :
84 : static str
85 : PyAPIeval(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci, bool grouped);
86 :
87 : str
88 136 : PYAPI3PyAPIevalStd(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci) {
89 136 : return PyAPIeval(cntxt, mb, stk, pci, false);
90 : }
91 :
92 : str
93 26 : PYAPI3PyAPIevalAggr(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci) {
94 26 : return PyAPIeval(cntxt, mb, stk, pci, true);
95 : }
96 :
97 : #define NP_SPLIT_BAT(tpe) \
98 : { \
99 : tpe ***ptr = (tpe ***)split_bats; \
100 : size_t *temp_indices; \
101 : tpe *batcontent = (tpe *)basevals; \
102 : /* allocate space for split BAT */ \
103 : for (group_it = 0; group_it < group_count; group_it++) { \
104 : ptr[group_it][i] = \
105 : GDKzalloc(group_counts[group_it] * sizeof(tpe)); \
106 : } \
107 : /*iterate over the elements of the current BAT*/ \
108 : temp_indices = GDKzalloc(sizeof(lng) * group_count); \
109 : if (BATtvoid(aggr_group)) { \
110 : for (element_it = 0; element_it < elements; element_it++) { \
111 : /*append current element to proper group*/ \
112 : ptr[element_it][i][temp_indices[element_it]++] = \
113 : batcontent[element_it]; \
114 : } \
115 : } else { \
116 : for (element_it = 0; element_it < elements; element_it++) { \
117 : /*group of current element*/ \
118 : oid group = aggr_group_arr[element_it]; \
119 : /*append current element to proper group*/ \
120 : ptr[group][i][temp_indices[group]++] = batcontent[element_it]; \
121 : } \
122 : } \
123 : GDKfree(temp_indices); \
124 : }
125 :
126 : //! The main PyAPI function, this function does everything PyAPI related
127 : //! It takes as argument a bunch of input BATs, a python function, and outputs a
128 : //! number of BATs
129 : //! This function follows the following pipeline
130 : //! [PARSE_CODE] Step 1: It parses the Python source code and corrects any wrong
131 : //! indentation, or converts the source code into a PyCodeObject if the source
132 : //! code is encoded as such
133 : //! [CONVERT_BAT] Step 2: It converts the input BATs into Numpy Arrays
134 : //! [EXECUTE_CODE] Step 3: It executes the Python code using the Numpy arrays as arguments
135 : //! [RETURN_VALUES] Step 4: It collects the return values and converts them back into BATs
136 162 : static str PyAPIeval(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci, bool grouped) {
137 162 : sql_func * sqlfun = NULL;
138 162 : str exprStr = NULL;
139 :
140 162 : const int additional_columns = 3;
141 162 : int i = 1, ai = 0;
142 162 : char *pycall = NULL;
143 162 : str *args;
144 162 : char *msg = MAL_SUCCEED;
145 162 : BAT *b = NULL;
146 162 : node *argnode = NULL;
147 162 : int seengrp = FALSE;
148 162 : PyObject *pArgs = NULL, *pColumns = NULL, *pColumnTypes = NULL,
149 : *pConnection,
150 162 : *pResult = NULL; // this is going to be the parameter tuple
151 162 : PyObject *code_object = NULL;
152 162 : PyReturn *pyreturn_values = NULL;
153 162 : PyInput *pyinput_values = NULL;
154 162 : oid seqbase = 0;
155 162 : bit varres;
156 162 : int retcols;
157 162 : bool gstate = 0;
158 162 : int unnamedArgs = 0;
159 162 : bool freeexprStr = false;
160 162 : int argcount = pci->argc;
161 :
162 162 : char *eval_additional_args[] = {"_columns", "_column_types", "_conn"};
163 :
164 162 : if (!pyapiInitialized) {
165 0 : throw(MAL, "pyapi3.eval", SQLSTATE(PY000) "Embedded Python is enabled but an error was "
166 : "thrown during initialization.");
167 : }
168 :
169 : // If the first input argument is of type lng, this is a cardinality-only bulk operation.
170 162 : int has_card_arg = 0;
171 162 : BUN card; // cardinality of non-bat inputs
172 162 : if (getArgType(mb, pci, pci->retc) == TYPE_lng) {
173 1 : has_card_arg=1;
174 1 : card = (BUN) *getArgReference_lng(stk, pci, pci->retc);
175 : } else {
176 : has_card_arg=0;
177 : card = 1;
178 : }
179 :
180 162 : sqlfun = *(sql_func **)getArgReference(stk, pci, pci->retc + has_card_arg);
181 162 : exprStr = *getArgReference_str(stk, pci, pci->retc + 1 + has_card_arg);
182 324 : varres = sqlfun ? sqlfun->varres : 0;
183 162 : retcols = !varres ? pci->retc : -1;
184 :
185 162 : args = (str *)GDKzalloc(pci->argc * sizeof(str));
186 162 : pyreturn_values = GDKzalloc(pci->retc * sizeof(PyReturn));
187 162 : if (args == NULL || pyreturn_values == NULL) {
188 0 : throw(MAL, "pyapi3.eval", SQLSTATE(HY013) MAL_MALLOC_FAIL " arguments.");
189 : }
190 :
191 162 : if ((pci->argc - (pci->retc + 2 + has_card_arg)) * sizeof(PyInput) > 0) {
192 114 : pyinput_values = GDKzalloc((pci->argc - (pci->retc + 2 + has_card_arg)) * sizeof(PyInput));
193 :
194 114 : if (pyinput_values == NULL) {
195 0 : GDKfree(args);
196 0 : GDKfree(pyreturn_values);
197 0 : throw(MAL, "pyapi3.eval", SQLSTATE(HY013) MAL_MALLOC_FAIL " input values.");
198 : }
199 : }
200 :
201 : // Analyse the SQL_Func structure to get the parameter names
202 162 : if (sqlfun != NULL && sqlfun->ops->cnt > 0) {
203 101 : unnamedArgs = pci->retc + 2 + has_card_arg;
204 101 : argnode = sqlfun->ops->h;
205 238 : while (argnode) {
206 137 : char *argname = ((sql_arg *)argnode->data)->name;
207 137 : args[unnamedArgs++] = GDKstrdup(argname);
208 137 : argnode = argnode->next;
209 : }
210 : }
211 :
212 : // We name all the unknown arguments, if grouping is enabled the first
213 : // unknown argument that is the group variable, we name this 'aggr_group'
214 528 : for (i = pci->retc + 2 + has_card_arg; i < argcount; i++) {
215 366 : if (args[i] == NULL) {
216 229 : if (!seengrp && grouped) {
217 17 : args[i] = GDKstrdup("aggr_group");
218 17 : seengrp = TRUE;
219 : } else {
220 212 : char argbuf[64];
221 212 : snprintf(argbuf, sizeof(argbuf), "arg%i", i - pci->retc - (1 + has_card_arg));
222 212 : args[i] = GDKstrdup(argbuf);
223 : }
224 : }
225 : }
226 :
227 : // Construct PyInput objects
228 162 : argnode = sqlfun && sqlfun->ops->cnt > 0 ? sqlfun->ops->h : NULL;
229 523 : for (i = pci->retc + 2 + has_card_arg; i < argcount; i++) {
230 364 : PyInput *inp = &pyinput_values[i - (pci->retc + 2 + has_card_arg)];
231 364 : if (!isaBatType(getArgType(mb, pci, i))) {
232 41 : inp->scalar = true;
233 41 : inp->bat_type = getArgType(mb, pci, i);
234 41 : inp->count = 1;
235 :
236 41 : if (!has_card_arg) {
237 41 : if (inp->bat_type == TYPE_str) {
238 0 : inp->dataptr = getArgReference_str(stk, pci, i);
239 : } else {
240 41 : inp->dataptr = getArgReference(stk, pci, i);
241 : }
242 : }
243 : else {
244 0 : const ValRecord *v = &stk->stk[getArg(pci, i)];
245 0 : b = BATconstant(0, v->vtype, VALptr(v), card, TRANSIENT);
246 0 : if (b == NULL) {
247 0 : msg = createException(MAL, "pyapi3.eval", SQLSTATE(HY013) MAL_MALLOC_FAIL);
248 0 : goto wrapup;
249 : }
250 0 : inp->count = BATcount(b);
251 0 : inp->bat_type = b->ttype;
252 0 : inp->bat = b;
253 : }
254 : } else {
255 323 : b = BATdescriptor(*getArgReference_bat(stk, pci, i));
256 323 : if (b == NULL) {
257 0 : msg = createException(MAL, "pyapi3.eval", SQLSTATE(HY013) MAL_MALLOC_FAIL);
258 0 : goto wrapup;
259 : }
260 323 : seqbase = b->hseqbase;
261 323 : inp->count = BATcount(b);
262 323 : inp->bat_type = b->ttype;
263 323 : inp->bat = b;
264 323 : if (inp->count == 0) {
265 : // one of the input BATs is empty, don't execute the function at
266 : // all
267 : // just return empty BATs
268 3 : msg = CreateEmptyReturn(mb, stk, pci, retcols, seqbase);
269 3 : goto wrapup;
270 : }
271 : }
272 361 : if (argnode) {
273 134 : inp->sql_subtype = &((sql_arg *)argnode->data)->type;
274 134 : argnode = argnode->next;
275 : }
276 : }
277 :
278 : // After this point we will execute Python Code, so we need to acquire the
279 : // GIL
280 159 : gstate = Python_ObtainGIL();
281 :
282 159 : if (sqlfun) {
283 : // Check if exprStr references to a file path or if it contains the
284 : // Python code itself
285 : // There is no easy way to check, so the rule is if it starts with '/'
286 : // it is always a file path,
287 : // Otherwise it's a (relative) file path only if it ends with '.py'
288 159 : size_t length = strlen(exprStr);
289 159 : if (exprStr[0] == '/' ||
290 159 : (exprStr[length - 3] == '.' && exprStr[length - 2] == 'p' &&
291 1 : exprStr[length - 1] == 'y')) {
292 1 : FILE *fp;
293 1 : char address[1000];
294 1 : struct stat buffer;
295 1 : ssize_t length;
296 1 : if (exprStr[0] == '/') {
297 : // absolute path
298 0 : snprintf(address, 1000, "%s", exprStr);
299 : } else {
300 : // relative path
301 1 : snprintf(address, 1000, "%s/%s", FunctionBasePath((char[256]){0}, 256), exprStr);
302 : }
303 1 : if (MT_stat(address, &buffer) < 0) {
304 0 : msg = createException(
305 : MAL, "pyapi3.eval",
306 : SQLSTATE(PY000) "Could not find Python source file \"%s\".", address);
307 0 : goto wrapup;
308 : }
309 1 : fp = fopen(address, "r");
310 1 : if (fp == NULL) {
311 0 : msg = createException(
312 : MAL, "pyapi3.eval",
313 : SQLSTATE(PY000) "Could not open Python source file \"%s\".", address);
314 0 : goto wrapup;
315 : }
316 1 : if(fseek(fp, 0, SEEK_END) == -1) {
317 0 : msg = createException(
318 : MAL, "pyapi3.eval",
319 : SQLSTATE(PY000) "Failed to set file pointer on Python source file \"%s\".", address);
320 0 : goto wrapup;
321 : }
322 1 : if((length = ftell(fp)) == -1) {
323 0 : msg = createException(
324 : MAL, "pyapi3.eval",
325 : SQLSTATE(PY000) "Failed to set file pointer on Python source file \"%s\".", address);
326 0 : goto wrapup;
327 : }
328 1 : if(fseek(fp, 0, SEEK_SET) == -1) {
329 0 : msg = createException(
330 : MAL, "pyapi3.eval",
331 : SQLSTATE(PY000) "Failed to set file pointer on Python source file \"%s\".", address);
332 0 : goto wrapup;
333 : }
334 1 : exprStr = GDKzalloc(length + 1);
335 1 : if (exprStr == NULL) {
336 0 : msg = createException(MAL, "pyapi3.eval",
337 : SQLSTATE(HY013) MAL_MALLOC_FAIL " function body string.");
338 0 : goto wrapup;
339 : }
340 1 : freeexprStr = true;
341 1 : if (fread(exprStr, 1, (size_t) length, fp) != (size_t) length) {
342 0 : msg = createException(MAL, "pyapi3.eval",
343 : SQLSTATE(PY000) "Failed to read from file \"%s\".",
344 : address);
345 0 : goto wrapup;
346 : }
347 1 : fclose(fp);
348 : }
349 : }
350 :
351 : /*[PARSE_CODE]*/
352 159 : pycall = FormatCode(exprStr, args, argcount, 4, &code_object, &msg,
353 : eval_additional_args, additional_columns);
354 159 : if (pycall == NULL && code_object == NULL) {
355 0 : if (msg == NULL) {
356 0 : msg = createException(MAL, "pyapi3.eval",
357 : SQLSTATE(PY000) "Error while parsing Python code.");
358 : }
359 0 : goto wrapup;
360 : }
361 :
362 : /*[CONVERT_BAT]*/
363 : // Now we will do the input handling (aka converting the input BATs to numpy
364 : // arrays)
365 : // We will put the python arrays in a PyTuple object, we will use this
366 : // PyTuple object as the set of arguments to call the Python function
367 159 : pArgs = PyTuple_New(argcount - (pci->retc + 2 + has_card_arg) +
368 159 : (code_object == NULL ? additional_columns : 0));
369 159 : pColumns = PyDict_New();
370 159 : pColumnTypes = PyDict_New();
371 159 : pConnection = Py_Connection_Create(cntxt, 0, 0);
372 :
373 : // Now we will loop over the input BATs and convert them to python objects
374 520 : for (i = pci->retc + 2 + has_card_arg; i < argcount; i++) {
375 361 : PyObject *result_array;
376 : // t_start and t_end hold the part of the BAT we will convert to a Numpy
377 : // array, by default these hold the entire BAT [0 - BATcount(b)]
378 361 : size_t t_start = 0, t_end = pyinput_values[i - (pci->retc + 2 + has_card_arg)].count;
379 :
380 : // There are two possibilities, either the input is a BAT, or the input
381 : // is a scalar
382 : // If the input is a scalar we will convert it to a python scalar
383 : // If the input is a BAT, we will convert it to a numpy array
384 361 : if (pyinput_values[i - (pci->retc + 2 + has_card_arg)].scalar) {
385 41 : result_array = PyArrayObject_FromScalar(
386 : &pyinput_values[i - (pci->retc + 2 + has_card_arg)], &msg);
387 : } else {
388 320 : int type = pyinput_values[i - (pci->retc + 2 + has_card_arg)].bat_type;
389 320 : result_array = PyMaskedArray_FromBAT(
390 : &pyinput_values[i - (pci->retc + 2 + has_card_arg)], t_start, t_end, &msg,
391 320 : !enable_zerocopy_input && type != TYPE_void);
392 : }
393 361 : if (result_array == NULL) {
394 0 : if (msg == MAL_SUCCEED) {
395 0 : msg = createException(MAL, "pyapi3.eval",
396 : SQLSTATE(PY000) "Failed to create Numpy Array from BAT.");
397 : }
398 0 : goto wrapup;
399 : }
400 361 : if (code_object == NULL) {
401 361 : PyObject *arg_type = PyUnicode_FromString(
402 361 : BatType_Format(pyinput_values[i - (pci->retc + 2 + has_card_arg)].bat_type));
403 361 : PyDict_SetItemString(pColumns, args[i], result_array);
404 361 : PyDict_SetItemString(pColumnTypes, args[i], arg_type);
405 361 : Py_DECREF(arg_type);
406 : }
407 361 : pyinput_values[i - (pci->retc + 2 + has_card_arg)].result = result_array;
408 361 : PyTuple_SetItem(pArgs, ai++, result_array);
409 : }
410 159 : if (code_object == NULL) {
411 159 : PyTuple_SetItem(pArgs, ai++, pColumns);
412 159 : PyTuple_SetItem(pArgs, ai++, pColumnTypes);
413 159 : PyTuple_SetItem(pArgs, ai++, pConnection);
414 : }
415 :
416 : /*[EXECUTE_CODE]*/
417 : // Now it is time to actually execute the python code
418 : {
419 159 : PyObject *pFunc, *pModule, *v, *d;
420 :
421 : // First we will load the main module, this is required
422 159 : pModule = PyImport_AddModule("__main__");
423 159 : if (!pModule) {
424 0 : msg = PyError_CreateException("Failed to load module", NULL);
425 0 : goto wrapup;
426 : }
427 :
428 : // Now we will add the UDF to the main module
429 159 : d = PyModule_GetDict(pModule);
430 159 : if (code_object == NULL) {
431 159 : v = PyRun_StringFlags(pycall, Py_file_input, d, NULL, NULL);
432 159 : if (v == NULL) {
433 2 : msg = PyError_CreateException("Could not parse Python code",
434 : pycall);
435 2 : goto wrapup;
436 : }
437 157 : Py_DECREF(v);
438 :
439 : // Now we need to obtain a pointer to the function, the function is
440 : // called "pyfun"
441 157 : pFunc = PyObject_GetAttrString(pModule, "pyfun");
442 157 : if (!pFunc || !PyCallable_Check(pFunc)) {
443 0 : msg = PyError_CreateException("Failed to load function", NULL);
444 0 : goto wrapup;
445 : }
446 : } else {
447 0 : pFunc = PyFunction_New(code_object, d);
448 0 : if (!pFunc || !PyCallable_Check(pFunc)) {
449 0 : msg = PyError_CreateException("Failed to load function", NULL);
450 0 : goto wrapup;
451 : }
452 : }
453 :
454 : // The function has been successfully created/compiled, all that
455 : // remains is to actually call the function
456 157 : pResult = PyObject_CallObject(pFunc, pArgs);
457 :
458 157 : Py_DECREF(pFunc);
459 157 : Py_DECREF(pArgs);
460 :
461 157 : if (PyErr_Occurred()) {
462 4 : msg = PyError_CreateException("Python exception", pycall);
463 4 : if (code_object == NULL) {
464 4 : PyRun_SimpleString("del pyfun");
465 : }
466 4 : goto wrapup;
467 : }
468 :
469 : // if (code_object == NULL) { PyRun_SimpleString("del pyfun"); }
470 :
471 153 : if (PyDict_Check(pResult)) { // Handle dictionary returns
472 : // For dictionary returns we need to map each of the (key,value)
473 : // pairs to the proper return value
474 : // We first analyze the SQL Function structure for a list of return
475 : // value names
476 41 : char **retnames = NULL;
477 41 : if (!varres) {
478 41 : if (sqlfun != NULL) {
479 41 : retnames = GDKzalloc(sizeof(char *) * sqlfun->res->cnt);
480 41 : argnode = sqlfun->res->h;
481 147 : for (i = 0; i < sqlfun->res->cnt; i++) {
482 106 : retnames[i] = ((sql_arg *)argnode->data)->name;
483 106 : argnode = argnode->next;
484 : }
485 : } else {
486 0 : msg = createException(MAL, "pyapi3.eval",
487 : SQLSTATE(PY000) "Return value is a dictionary, but "
488 : "there is no sql function object, so "
489 : "we don't know the return value "
490 : "names and mapping cannot be done.");
491 0 : goto wrapup;
492 : }
493 : } else {
494 : // If there are a variable number of return types, we take the
495 : // column names from the dictionary
496 0 : PyObject *keys = PyDict_Keys(pResult);
497 0 : retcols = (int)PyList_Size(keys);
498 0 : retnames = GDKzalloc(sizeof(char *) * retcols);
499 0 : for (i = 0; i < retcols; i++) {
500 0 : PyObject *colname = PyList_GetItem(keys, i);
501 0 : if (!PyUnicode_CheckExact(colname)) {
502 0 : msg = createException(MAL, "pyapi3.eval",
503 : SQLSTATE(PY000) "Expected a string key in the "
504 : "dictionary, but received an "
505 : "object of type %s",
506 : colname->ob_type->tp_name);
507 0 : goto wrapup;
508 : }
509 0 : retnames[i] = (char *) PyUnicode_AsUTF8(colname);
510 : }
511 0 : Py_DECREF(keys);
512 : }
513 41 : pResult = PyDict_CheckForConversion(pResult, retcols, retnames, &msg);
514 41 : if (retnames != NULL)
515 41 : GDKfree(retnames);
516 112 : } else if (varres) {
517 0 : msg = createException(MAL, "pyapi3.eval",
518 : SQLSTATE(PY000) "Expected a variable number return values, "
519 : "but the return type was not a dictionary. "
520 : "We require the return type to be a "
521 : "dictionary for column naming purposes.");
522 0 : goto wrapup;
523 : } else {
524 : // Now we need to do some error checking on the result object,
525 : // because the result object has to have the correct type/size
526 : // We will also do some converting of result objects to a common
527 : // type (such as scalar -> [[scalar]])
528 112 : pResult = PyObject_CheckForConversion(pResult, retcols, NULL, &msg);
529 : }
530 153 : if (pResult == NULL) {
531 3 : goto wrapup;
532 : }
533 : }
534 :
535 150 : if (varres) {
536 0 : GDKfree(pyreturn_values);
537 0 : pyreturn_values = GDKzalloc(retcols * sizeof(PyReturn));
538 : }
539 :
540 : // Now we have executed the Python function, we have to collect the return
541 : // values and convert them to BATs
542 : // We will first collect header information about the Python return objects
543 : // and extract the underlying C arrays
544 : // We will store this header information in a PyReturn object
545 :
546 : // The reason we are doing this as a separate step is because this
547 : // preprocessing requires us to call the Python API
548 : // Whereas the actual returning does not require us to call the Python API
549 : // This means we can do the actual returning without holding the GIL
550 150 : if (!PyObject_PreprocessObject(pResult, pyreturn_values, retcols, &msg)) {
551 0 : goto wrapup;
552 : }
553 :
554 : // We are done executing Python code (aside from cleanup), so we can release
555 : // the GIL
556 150 : gstate = Python_ReleaseGIL(gstate);
557 :
558 : /*[RETURN_VALUES]*/
559 150 : argnode = sqlfun && sqlfun->res ? sqlfun->res->h : NULL;
560 363 : for (i = 0; i < retcols; i++) {
561 213 : PyReturn *ret = &pyreturn_values[i];
562 213 : int bat_type = TYPE_any;
563 213 : sql_subtype *sql_subtype = argnode ? &((sql_arg *)argnode->data)->type : NULL;
564 213 : if (!varres) {
565 213 : bat_type = getBatType(getArgType(mb, pci, i));
566 :
567 213 : if (bat_type == TYPE_any || bat_type == TYPE_void) {
568 0 : bat_type = PyType_ToBat(ret->result_type);
569 0 : getArgType(mb, pci, i) = bat_type;
570 : }
571 : } else {
572 0 : bat_type = PyType_ToBat(ret->result_type);
573 : }
574 :
575 426 : b = PyObject_ConvertToBAT(ret, sql_subtype, bat_type, i, seqbase, &msg,
576 213 : !enable_zerocopy_output);
577 213 : if (b == NULL) {
578 0 : goto wrapup;
579 : }
580 :
581 213 : msg = MAL_SUCCEED;
582 213 : if (isaBatType(getArgType(mb, pci, i))) {
583 192 : *getArgReference_bat(stk, pci, i) = b->batCacheid;
584 192 : BBPkeepref(b);
585 : } else { // single value return, only for non-grouped aggregations
586 21 : BATiter li = bat_iterator(b);
587 21 : if (bat_type != TYPE_str) {
588 21 : if (VALinit(&stk->stk[pci->argv[i]], bat_type, li.base) ==
589 : NULL)
590 0 : msg = createException(MAL, "pyapi3.eval", SQLSTATE(HY013) MAL_MALLOC_FAIL);
591 : } else {
592 0 : if (VALinit(&stk->stk[pci->argv[i]], bat_type,
593 0 : BUNtail(li, 0)) == NULL)
594 0 : msg = createException(MAL, "pyapi3.eval", SQLSTATE(HY013) MAL_MALLOC_FAIL);
595 : }
596 21 : bat_iterator_end(&li);
597 21 : BBPunfix(b->batCacheid);
598 21 : b = NULL;
599 21 : if (msg != MAL_SUCCEED)
600 0 : goto wrapup;
601 : }
602 213 : if (argnode) {
603 213 : argnode = argnode->next;
604 : }
605 : }
606 150 : wrapup:
607 :
608 : // Actual cleanup
609 : // Cleanup input BATs
610 528 : for (i = pci->retc + 2 + has_card_arg; i < pci->argc; i++) {
611 366 : PyInput *inp = &pyinput_values[i - (pci->retc + 2 + has_card_arg)];
612 366 : BBPreclaim(inp->bat);
613 369 : BBPreclaim(inp->conv_bat); /* delayed free */
614 : }
615 162 : if (pResult != NULL && gstate == 0) {
616 : // if there is a pResult here, we are running single threaded (LANGUAGE
617 : // PYTHON),
618 : // thus we need to free python objects, thus we need to obtain the GIL
619 150 : gstate = Python_ObtainGIL();
620 : }
621 393 : for (i = 0; i < retcols; i++) {
622 231 : PyReturn *ret = &pyreturn_values[i];
623 : // First clean up any return values
624 231 : if (!ret->multidimensional) {
625 : // Clean up numpy arrays, if they are there
626 231 : if (ret->numpy_array != NULL) {
627 213 : Py_DECREF(ret->numpy_array);
628 : }
629 231 : if (ret->numpy_mask != NULL) {
630 30 : Py_DECREF(ret->numpy_mask);
631 : }
632 : }
633 : }
634 162 : if (pResult != NULL) {
635 150 : Py_DECREF(pResult);
636 : }
637 162 : if (gstate != 0) {
638 159 : gstate = Python_ReleaseGIL(gstate);
639 : }
640 :
641 : // Now release some GDK memory we allocated for strings and input values
642 162 : GDKfree(pyreturn_values);
643 162 : GDKfree(pyinput_values);
644 1246 : for (i = 0; i < pci->argc; i++)
645 922 : if (args[i])
646 366 : GDKfree(args[i]);
647 162 : GDKfree(args);
648 162 : GDKfree(pycall);
649 162 : if (freeexprStr)
650 1 : GDKfree(exprStr);
651 :
652 162 : return msg;
653 : }
654 :
655 : #ifdef _MSC_VER
656 : #define wcsdup _wcsdup
657 : #endif
658 : static str
659 10 : PYAPI3PyAPIprelude(void) {
660 10 : MT_lock_set(&pyapiLock);
661 10 : if (!pyapiInitialized) {
662 10 : wchar_t* program = L"mserver5";
663 10 : wchar_t* argv[] = { program, NULL };
664 10 : str msg = MAL_SUCCEED;
665 10 : PyObject *tmp;
666 :
667 10 : static_assert(PY_MAJOR_VERSION == 3, "Python 3.X required");
668 : #if PY_MINOR_VERSION >= 11
669 : /* introduced in 3.8, we use it for 3.11 and later
670 : * on Windows, this code does not work with 3.10, it needs more
671 : * complex initialization */
672 10 : PyStatus status;
673 10 : PyConfig config;
674 :
675 10 : PyConfig_InitIsolatedConfig(&config);
676 10 : status = PyConfig_SetArgv(&config, 1, argv);
677 10 : if (!PyStatus_Exception(status))
678 10 : status = PyConfig_Read(&config);
679 10 : if (!PyStatus_Exception(status))
680 10 : status = Py_InitializeFromConfig(&config);
681 10 : PyConfig_Clear(&config);
682 10 : if (PyStatus_Exception(status)) {
683 0 : MT_lock_unset(&pyapiLock);
684 0 : throw(MAL, "pyapi3.eval",
685 : SQLSTATE(PY000) "Python initialization failed: %s: %s",
686 : status.func ? status.func : "PYAPI3PyAPIprelude",
687 : status.err_msg ? status.err_msg : "");
688 : }
689 : #else
690 : /* PySys_SetArgvEx deprecated in 3.11 */
691 : Py_InitializeEx(0);
692 : PySys_SetArgvEx(1, argv, 0);
693 : #endif
694 :
695 10 : _import_array();
696 10 : msg = _connection_init();
697 10 : if (msg != MAL_SUCCEED) {
698 0 : MT_lock_unset(&pyapiLock);
699 0 : return msg;
700 : }
701 10 : msg = _conversion_init();
702 10 : if (msg != MAL_SUCCEED) {
703 0 : MT_lock_unset(&pyapiLock);
704 0 : return msg;
705 : }
706 10 : _pytypes_init();
707 10 : _loader_init();
708 10 : tmp = PyUnicode_FromString("marshal");
709 10 : marshal_module = PyImport_Import(tmp);
710 10 : init_DateTimeAPI();
711 10 : Py_DECREF(tmp);
712 10 : if (marshal_module == NULL) {
713 0 : MT_lock_unset(&pyapiLock);
714 0 : return createException(MAL, "pyapi3.eval", SQLSTATE(PY000) "Failed to load Marshal module.");
715 : }
716 10 : marshal_loads = PyObject_GetAttrString(marshal_module, "loads");
717 10 : if (marshal_loads == NULL) {
718 0 : MT_lock_unset(&pyapiLock);
719 0 : return createException(MAL, "pyapi3.eval", SQLSTATE(PY000) "Failed to load function \"loads\" from Marshal module.");
720 : }
721 10 : if (PyRun_SimpleString("import numpy") != 0) {
722 0 : msg = PyError_CreateException("Failed to initialize embedded python", NULL);
723 0 : MT_lock_unset(&pyapiLock);
724 0 : return msg;
725 : }
726 10 : PyEval_SaveThread();
727 10 : if (msg != MAL_SUCCEED) {
728 : MT_lock_unset(&pyapiLock);
729 : return msg;
730 : }
731 10 : pyapiInitialized = true;
732 10 : fprintf(stdout, "# MonetDB/Python%d module loaded\n", 3);
733 : }
734 10 : MT_lock_unset(&pyapiLock);
735 10 : return MAL_SUCCEED;
736 : }
737 :
738 18 : char *PyError_CreateException(char *error_text, char *pycall)
739 : {
740 18 : PyObject *py_error_type = NULL, *py_error_value = NULL,
741 18 : *py_error_traceback = NULL;
742 18 : const char *py_error_string = NULL;
743 18 : lng line_number = -1;
744 :
745 18 : PyErr_Fetch(&py_error_type, &py_error_value, &py_error_traceback);
746 18 : if (py_error_value) {
747 18 : PyObject *error;
748 18 : PyErr_NormalizeException(&py_error_type, &py_error_value,
749 : &py_error_traceback);
750 18 : error = PyObject_Str(py_error_value);
751 :
752 18 : py_error_string = PyUnicode_AsUTF8(error);
753 18 : Py_XDECREF(error);
754 18 : if (pycall != NULL && strlen(pycall) > 0) {
755 18 : if (py_error_traceback == NULL) {
756 : // no traceback info, this means we are dealing with a parsing
757 : // error
758 : // line information should be in the error message
759 2 : sscanf(py_error_string, "%*[^0-9]" LLSCN, &line_number);
760 2 : if (line_number < 0)
761 0 : goto finally;
762 : } else {
763 16 : line_number =
764 16 : ((PyTracebackObject *)py_error_traceback)->tb_lineno;
765 : }
766 :
767 : // Now only display the line numbers around the error message, we
768 : // display 5 lines around the error message
769 : {
770 : char linenr[32];
771 : size_t nrpos, pos, i, j;
772 : char lineinformation[5000]; // we only support 5000 characters
773 : // for 5 lines of the program,
774 : // should be enough
775 : nrpos = 0; // Current line number
776 : pos = 0; // Current position in the lineinformation result array
777 2690 : for (i = 0; i < strlen(pycall); i++) {
778 2672 : if (pycall[i] == '\n' || i == 0) {
779 : // Check if we have arrived at a new line, if we have
780 : // increment the line count
781 151 : nrpos++;
782 : // Now check if we should display this line
783 151 : if (nrpos >= ((size_t)line_number - 2) &&
784 14 : nrpos <= ((size_t)line_number + 2) && pos < 4997) {
785 : // We shouldn't put a newline on the first line we
786 : // encounter, only on subsequent lines
787 8 : if (nrpos > ((size_t)line_number - 2))
788 7 : lineinformation[pos++] = '\n';
789 8 : if ((size_t)line_number == nrpos) {
790 : // If this line is the 'error' line, add an
791 : // arrow before it, otherwise just add spaces
792 2 : lineinformation[pos++] = '>';
793 2 : lineinformation[pos++] = ' ';
794 : } else {
795 6 : lineinformation[pos++] = ' ';
796 6 : lineinformation[pos++] = ' ';
797 : }
798 8 : snprintf(linenr, 32, "%zu", nrpos);
799 18 : for (j = 0; j < strlen(linenr); j++) {
800 10 : lineinformation[pos++] = linenr[j];
801 : }
802 8 : lineinformation[pos++] = '.';
803 8 : lineinformation[pos++] = ' ';
804 : }
805 : }
806 2672 : if (pycall[i] != '\n' && nrpos >= (size_t)line_number - 2 &&
807 150 : nrpos <= (size_t)line_number + 2 && pos < 4999) {
808 : // If we are on a line number that we have to display,
809 : // copy the text from this line for display
810 94 : lineinformation[pos++] = pycall[i];
811 : }
812 : }
813 18 : lineinformation[pos] = '\0';
814 18 : return createException(MAL, "pyapi3.eval", SQLSTATE(PY000) "%s\n%s\n%s",
815 : error_text, lineinformation,
816 : py_error_string);
817 : }
818 : }
819 : } else {
820 : py_error_string = "";
821 : }
822 0 : finally:
823 0 : if (pycall == NULL)
824 0 : return createException(MAL, "pyapi3.eval", SQLSTATE(PY000) "%s\n%s", error_text,
825 : py_error_string);
826 0 : return createException(MAL, "pyapi3.eval", SQLSTATE(PY000) "%s\n%s\n%s", error_text, pycall,
827 : py_error_string);
828 : }
829 :
830 3 : static str CreateEmptyReturn(MalBlkPtr mb, MalStkPtr stk, InstrPtr pci,
831 : size_t retcols, oid seqbase)
832 : {
833 3 : str msg = MAL_SUCCEED;
834 3 : void **res = GDKzalloc(retcols * sizeof(void*));
835 :
836 3 : if (!res) {
837 0 : msg = createException(MAL, "pyapi3.eval", SQLSTATE(HY013) MAL_MALLOC_FAIL);
838 0 : goto bailout;
839 : }
840 :
841 6 : for (size_t i = 0; i < retcols; i++) {
842 3 : if (isaBatType(getArgType(mb, pci, i))) {
843 2 : BAT *b = COLnew(seqbase, getBatType(getArgType(mb, pci, i)), 0, TRANSIENT);
844 2 : if (!b) {
845 0 : msg = createException(MAL, "pyapi3.eval", GDK_EXCEPTION);
846 0 : goto bailout;
847 : }
848 2 : ((BAT**)res)[i] = b;
849 : } else { // single value return, only for non-grouped aggregations
850 : // return NULL to conform to SQL aggregates
851 1 : int tpe = getArgType(mb, pci, i);
852 1 : if (!VALinit(&stk->stk[pci->argv[i]], tpe, ATOMnilptr(tpe))) {
853 0 : msg = createException(MAL, "pyapi3.eval", SQLSTATE(HY013) MAL_MALLOC_FAIL);
854 0 : goto bailout;
855 : }
856 1 : ((ValPtr*)res)[i] = &stk->stk[pci->argv[i]];
857 : }
858 : }
859 :
860 3 : bailout:
861 3 : if (res) {
862 6 : for (size_t i = 0; i < retcols; i++) {
863 3 : if (isaBatType(getArgType(mb, pci, i))) {
864 2 : BAT *b = ((BAT**)res)[i];
865 :
866 2 : if (b && msg) {
867 0 : BBPreclaim(b);
868 2 : } else if (b) {
869 2 : *getArgReference_bat(stk, pci, i) = b->batCacheid;
870 2 : BBPkeepref(b);
871 : }
872 1 : } else if (msg) {
873 0 : ValPtr pt = ((ValPtr*)res)[i];
874 :
875 0 : if (pt)
876 0 : VALclear(pt);
877 : }
878 : }
879 3 : GDKfree(res);
880 : }
881 3 : return msg;
882 : }
883 :
884 : static str
885 10 : PyAPI3prelude(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
886 : {
887 10 : (void)cntxt; (void)mb; (void)stk; (void)pci;
888 10 : return PYAPI3PyAPIprelude();
889 : }
890 :
891 : static str
892 10 : PyAPI3epilogue(void *ret)
893 : {
894 10 : (void)ret;
895 10 : MT_lock_set(&pyapiLock);
896 10 : if (pyapiInitialized) {
897 10 : PyGILState_STATE gstate;
898 10 : gstate = PyGILState_Ensure();
899 :
900 : /* now exit/cleanup */
901 10 : if (0) Py_FinalizeEx();
902 10 : (void)gstate;
903 : }
904 10 : MT_lock_unset(&pyapiLock);
905 10 : return MAL_SUCCEED;
906 : }
907 :
908 : #include "mel.h"
909 : static mel_func pyapi3_init_funcs[] = {
910 : pattern("pyapi3", "eval", PYAPI3PyAPIevalStd, true, "Execute a simple Python script returning a single value", args(1,3, argany("",1),arg("fptr",ptr),arg("expr",str))),
911 : pattern("pyapi3", "eval", PYAPI3PyAPIevalStd, true, "Execute a simple Python script value", args(1,4, varargany("",0),arg("fptr",ptr),arg("expr",str),varargany("arg",0))),
912 : pattern("pyapi3", "subeval_aggr", PYAPI3PyAPIevalAggr, true, "grouped aggregates through Python", args(1,4, varargany("",0),arg("fptr",ptr),arg("expr",str),varargany("arg",0))),
913 : pattern("pyapi3", "eval_aggr", PYAPI3PyAPIevalAggr, true, "grouped aggregates through Python", args(1,4, varargany("",0),arg("fptr",ptr),arg("expr",str),varargany("arg",0))),
914 : pattern("pyapi3", "eval_loader", PYAPI3PyAPIevalLoader, true, "loader functions through Python", args(1,3, varargany("",0),arg("fptr",ptr),arg("expr",str))),
915 : pattern("pyapi3", "eval_loader", PYAPI3PyAPIevalLoader, true, "loader functions through Python", args(1,4, varargany("",0),arg("fptr",ptr),arg("expr",str),varargany("arg",0))),
916 : pattern("batpyapi3", "eval", PYAPI3PyAPIevalStd, true, "Execute a simple Python script value", args(1,4, batvarargany("",0),arg("fptr", ptr), arg("expr",str),varargany("arg",0))),
917 : pattern("batpyapi3", "eval", PYAPI3PyAPIevalStd, true, "Execute a simple Python script value", args(1,4, batargany("",1),arg("card", lng), arg("fptr",ptr),arg("expr",str))),
918 : pattern("batpyapi3", "subeval_aggr", PYAPI3PyAPIevalAggr, true, "grouped aggregates through Python", args(1,4, varargany("",0),arg("fptr",ptr),arg("expr",str),varargany("arg",0))),
919 : pattern("batpyapi3", "eval_aggr", PYAPI3PyAPIevalAggr, true, "grouped aggregates through Python", args(1,4, varargany("",0),arg("fptr",ptr),arg("expr",str),varargany("arg",0))),
920 : pattern("batpyapi3", "eval_loader", PYAPI3PyAPIevalLoader, true, "loader functions through Python", args(1,3, varargany("",0),arg("fptr",ptr),arg("expr",str))),
921 : pattern("batpyapi3", "eval_loader", PYAPI3PyAPIevalLoader, true, "loader functions through Python", args(1,4, varargany("",0),arg("fptr",ptr),arg("expr",str),varargany("arg",0))),
922 : pattern("pyapi3", "prelude", PyAPI3prelude, false, "", noargs),
923 : command("pyapi3", "epilogue", PyAPI3epilogue, false, "", noargs),
924 : { .imp=NULL }
925 : };
926 : #include "mal_import.h"
927 : #ifdef _MSC_VER
928 : #undef read
929 : #pragma section(".CRT$XCU",read)
930 : #endif
931 10 : LIB_STARTUP_FUNC(init_pyapi3_mal)
932 10 : { mal_module("pyapi3", NULL, pyapi3_init_funcs); }
|