Line data Source code
1 : /*
2 : * SPDX-License-Identifier: MPL-2.0
3 : *
4 : * This Source Code Form is subject to the terms of the Mozilla Public
5 : * License, v. 2.0. If a copy of the MPL was not distributed with this
6 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
7 : *
8 : * Copyright 2024, 2025 MonetDB Foundation;
9 : * Copyright August 2008 - 2023 MonetDB B.V.;
10 : * Copyright 1997 - July 2008 CWI.
11 : */
12 :
13 : #include "monetdb_config.h"
14 : #include "pyapi.h"
15 : #include "connection.h"
16 : #include "mutils.h"
17 :
18 : #include "unicode.h"
19 : #include "pytypes.h"
20 : #include "type_conversion.h"
21 : #include "formatinput.h"
22 : #include "conversion.h"
23 :
24 : static PyObject *marshal_module = NULL;
25 : PyObject *marshal_loads = NULL;
26 :
27 : static str CreateEmptyReturn(MalBlkPtr mb, MalStkPtr stk, InstrPtr pci,
28 : size_t retcols, oid seqbase);
29 :
30 1 : static const char *FunctionBasePath(char *buf, size_t len)
31 : {
32 1 : const char *basepath = GDKgetenv("function_basepath");
33 : #ifdef NATIVE_WIN32
34 : if (basepath == NULL) {
35 : const wchar_t *home = _wgetenv(L"HOME");
36 : static_assert(SIZEOF_WCHAR_T == 2, "wchar_t on Windows expected to be 2 bytes");
37 : if (home) {
38 : char *path = utf16toutf8(home);
39 : if (path) {
40 : strcpy_len(buf, path, len);
41 : free(path);
42 : basepath = buf;
43 : }
44 : }
45 : }
46 : #else
47 : /* not used except on Windows */
48 1 : (void) buf;
49 1 : (void) len;
50 1 : if (basepath == NULL) {
51 1 : basepath = getenv("HOME");
52 : }
53 : #endif
54 1 : if (basepath == NULL) {
55 0 : basepath = "";
56 : }
57 1 : return basepath;
58 : }
59 :
60 : static MT_Lock pyapiLock = MT_LOCK_INITIALIZER(pyapiLock);
61 : static bool pyapiInitialized = false;
62 : static PyDateTime_CAPI *PYAPI3_DateTimeAPI;
63 :
64 25 : bool PYAPI3PyAPIInitialized(void) {
65 25 : return pyapiInitialized;
66 : }
67 :
68 780 : PyDateTime_CAPI *get_DateTimeAPI(void) {
69 780 : return PYAPI3_DateTimeAPI;
70 : }
71 :
72 10 : void init_DateTimeAPI(void) {
73 10 : PyDateTime_IMPORT;
74 10 : PYAPI3_DateTimeAPI = PyDateTimeAPI;
75 10 : }
76 :
77 : #ifdef WIN32
78 : static bool enable_zerocopy_input = true;
79 : static bool enable_zerocopy_output = false;
80 : #else
81 : static bool enable_zerocopy_input = true;
82 : static bool enable_zerocopy_output = true;
83 : #endif
84 :
85 : static str
86 : PyAPIeval(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci, bool grouped);
87 :
88 : str
89 136 : PYAPI3PyAPIevalStd(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci) {
90 136 : return PyAPIeval(cntxt, mb, stk, pci, false);
91 : }
92 :
93 : str
94 26 : PYAPI3PyAPIevalAggr(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci) {
95 26 : return PyAPIeval(cntxt, mb, stk, pci, true);
96 : }
97 :
98 : #define NP_SPLIT_BAT(tpe) \
99 : { \
100 : tpe ***ptr = (tpe ***)split_bats; \
101 : size_t *temp_indices; \
102 : tpe *batcontent = (tpe *)basevals; \
103 : /* allocate space for split BAT */ \
104 : for (group_it = 0; group_it < group_count; group_it++) { \
105 : ptr[group_it][i] = \
106 : GDKzalloc(group_counts[group_it] * sizeof(tpe)); \
107 : } \
108 : /*iterate over the elements of the current BAT*/ \
109 : temp_indices = GDKzalloc(sizeof(lng) * group_count); \
110 : if (BATtvoid(aggr_group)) { \
111 : for (element_it = 0; element_it < elements; element_it++) { \
112 : /*append current element to proper group*/ \
113 : ptr[element_it][i][temp_indices[element_it]++] = \
114 : batcontent[element_it]; \
115 : } \
116 : } else { \
117 : for (element_it = 0; element_it < elements; element_it++) { \
118 : /*group of current element*/ \
119 : oid group = aggr_group_arr[element_it]; \
120 : /*append current element to proper group*/ \
121 : ptr[group][i][temp_indices[group]++] = batcontent[element_it]; \
122 : } \
123 : } \
124 : GDKfree(temp_indices); \
125 : }
126 :
127 : //! The main PyAPI function, this function does everything PyAPI related
128 : //! It takes as argument a bunch of input BATs, a python function, and outputs a
129 : //! number of BATs
130 : //! This function follows the following pipeline
131 : //! [PARSE_CODE] Step 1: It parses the Python source code and corrects any wrong
132 : //! indentation, or converts the source code into a PyCodeObject if the source
133 : //! code is encoded as such
134 : //! [CONVERT_BAT] Step 2: It converts the input BATs into Numpy Arrays
135 : //! [EXECUTE_CODE] Step 3: It executes the Python code using the Numpy arrays as arguments
136 : //! [RETURN_VALUES] Step 4: It collects the return values and converts them back into BATs
137 162 : static str PyAPIeval(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci, bool grouped) {
138 162 : sql_func * sqlfun = NULL;
139 162 : str exprStr = NULL;
140 :
141 162 : const int additional_columns = 3;
142 162 : int i = 1, ai = 0;
143 162 : char *pycall = NULL;
144 162 : str *args;
145 162 : char *msg = MAL_SUCCEED;
146 162 : BAT *b = NULL;
147 162 : node *argnode = NULL;
148 162 : int seengrp = FALSE;
149 162 : PyObject *pArgs = NULL, *pColumns = NULL, *pColumnTypes = NULL,
150 : *pConnection,
151 162 : *pResult = NULL; // this is going to be the parameter tuple
152 162 : PyObject *code_object = NULL;
153 162 : PyReturn *pyreturn_values = NULL;
154 162 : PyInput *pyinput_values = NULL;
155 162 : oid seqbase = 0;
156 162 : bit varres;
157 162 : int retcols;
158 162 : bool gstate = 0;
159 162 : int unnamedArgs = 0;
160 162 : bool freeexprStr = false;
161 162 : int argcount = pci->argc;
162 :
163 162 : char *eval_additional_args[] = {"_columns", "_column_types", "_conn"};
164 :
165 162 : if (!pyapiInitialized) {
166 0 : throw(MAL, "pyapi3.eval", SQLSTATE(PY000) "Embedded Python is enabled but an error was "
167 : "thrown during initialization.");
168 : }
169 :
170 : // If the first input argument is of type lng, this is a cardinality-only bulk operation.
171 162 : int has_card_arg = 0;
172 162 : BUN card; // cardinality of non-bat inputs
173 162 : if (getArgType(mb, pci, pci->retc) == TYPE_lng) {
174 1 : has_card_arg=1;
175 1 : card = (BUN) *getArgReference_lng(stk, pci, pci->retc);
176 : } else {
177 : has_card_arg=0;
178 : card = 1;
179 : }
180 :
181 162 : sqlfun = *(sql_func **)getArgReference(stk, pci, pci->retc + has_card_arg);
182 162 : exprStr = *getArgReference_str(stk, pci, pci->retc + 1 + has_card_arg);
183 324 : varres = sqlfun ? sqlfun->varres : 0;
184 162 : retcols = !varres ? pci->retc : -1;
185 :
186 162 : args = (str *)GDKzalloc(pci->argc * sizeof(str));
187 162 : pyreturn_values = GDKzalloc(pci->retc * sizeof(PyReturn));
188 162 : if (args == NULL || pyreturn_values == NULL) {
189 0 : throw(MAL, "pyapi3.eval", SQLSTATE(HY013) MAL_MALLOC_FAIL " arguments.");
190 : }
191 :
192 162 : if ((pci->argc - (pci->retc + 2 + has_card_arg)) * sizeof(PyInput) > 0) {
193 114 : pyinput_values = GDKzalloc((pci->argc - (pci->retc + 2 + has_card_arg)) * sizeof(PyInput));
194 :
195 114 : if (pyinput_values == NULL) {
196 0 : GDKfree(args);
197 0 : GDKfree(pyreturn_values);
198 0 : throw(MAL, "pyapi3.eval", SQLSTATE(HY013) MAL_MALLOC_FAIL " input values.");
199 : }
200 : }
201 :
202 : // Analyse the SQL_Func structure to get the parameter names
203 162 : if (sqlfun != NULL && sqlfun->ops->cnt > 0) {
204 101 : unnamedArgs = pci->retc + 2 + has_card_arg;
205 101 : argnode = sqlfun->ops->h;
206 238 : while (argnode) {
207 137 : char *argname = ((sql_arg *)argnode->data)->name;
208 137 : args[unnamedArgs++] = GDKstrdup(argname);
209 137 : argnode = argnode->next;
210 : }
211 : }
212 :
213 : // We name all the unknown arguments, if grouping is enabled the first
214 : // unknown argument that is the group variable, we name this 'aggr_group'
215 528 : for (i = pci->retc + 2 + has_card_arg; i < argcount; i++) {
216 366 : if (args[i] == NULL) {
217 229 : if (!seengrp && grouped) {
218 17 : args[i] = GDKstrdup("aggr_group");
219 17 : seengrp = TRUE;
220 : } else {
221 212 : char argbuf[64];
222 212 : snprintf(argbuf, sizeof(argbuf), "arg%i", i - pci->retc - (1 + has_card_arg));
223 212 : args[i] = GDKstrdup(argbuf);
224 : }
225 : }
226 : }
227 :
228 : // Construct PyInput objects
229 162 : argnode = sqlfun && sqlfun->ops->cnt > 0 ? sqlfun->ops->h : NULL;
230 523 : for (i = pci->retc + 2 + has_card_arg; i < argcount; i++) {
231 364 : PyInput *inp = &pyinput_values[i - (pci->retc + 2 + has_card_arg)];
232 364 : if (!isaBatType(getArgType(mb, pci, i))) {
233 41 : inp->scalar = true;
234 41 : inp->bat_type = getArgType(mb, pci, i);
235 41 : inp->count = 1;
236 :
237 41 : if (!has_card_arg) {
238 41 : if (inp->bat_type == TYPE_str) {
239 0 : inp->dataptr = getArgReference_str(stk, pci, i);
240 : } else {
241 41 : inp->dataptr = getArgReference(stk, pci, i);
242 : }
243 : }
244 : else {
245 0 : const ValRecord *v = &stk->stk[getArg(pci, i)];
246 0 : b = BATconstant(0, v->vtype, VALptr(v), card, TRANSIENT);
247 0 : if (b == NULL) {
248 0 : msg = createException(MAL, "pyapi3.eval", SQLSTATE(HY013) MAL_MALLOC_FAIL);
249 0 : goto wrapup;
250 : }
251 0 : inp->count = BATcount(b);
252 0 : inp->bat_type = b->ttype;
253 0 : inp->bat = b;
254 : }
255 : } else {
256 323 : b = BATdescriptor(*getArgReference_bat(stk, pci, i));
257 323 : if (b == NULL) {
258 0 : msg = createException(MAL, "pyapi3.eval", SQLSTATE(HY013) MAL_MALLOC_FAIL);
259 0 : goto wrapup;
260 : }
261 323 : seqbase = b->hseqbase;
262 323 : inp->count = BATcount(b);
263 323 : inp->bat_type = b->ttype;
264 323 : inp->bat = b;
265 323 : if (inp->count == 0) {
266 : // one of the input BATs is empty, don't execute the function at
267 : // all
268 : // just return empty BATs
269 3 : msg = CreateEmptyReturn(mb, stk, pci, retcols, seqbase);
270 3 : goto wrapup;
271 : }
272 : }
273 361 : if (argnode) {
274 134 : inp->sql_subtype = &((sql_arg *)argnode->data)->type;
275 134 : argnode = argnode->next;
276 : }
277 : }
278 :
279 : // After this point we will execute Python Code, so we need to acquire the
280 : // GIL
281 159 : gstate = Python_ObtainGIL();
282 :
283 159 : if (sqlfun) {
284 : // Check if exprStr references to a file path or if it contains the
285 : // Python code itself
286 : // There is no easy way to check, so the rule is if it starts with '/'
287 : // it is always a file path,
288 : // Otherwise it's a (relative) file path only if it ends with '.py'
289 159 : size_t length = strlen(exprStr);
290 159 : if (exprStr[0] == '/' ||
291 159 : (exprStr[length - 3] == '.' && exprStr[length - 2] == 'p' &&
292 1 : exprStr[length - 1] == 'y')) {
293 1 : FILE *fp;
294 1 : char address[1000];
295 1 : struct stat buffer;
296 1 : ssize_t length;
297 1 : if (exprStr[0] == '/') {
298 : // absolute path
299 0 : snprintf(address, 1000, "%s", exprStr);
300 : } else {
301 : // relative path
302 1 : snprintf(address, 1000, "%s/%s", FunctionBasePath((char[256]){0}, 256), exprStr);
303 : }
304 1 : if (MT_stat(address, &buffer) < 0) {
305 0 : msg = createException(
306 : MAL, "pyapi3.eval",
307 : SQLSTATE(PY000) "Could not find Python source file \"%s\".", address);
308 0 : goto wrapup;
309 : }
310 1 : fp = fopen(address, "r");
311 1 : if (fp == NULL) {
312 0 : msg = createException(
313 : MAL, "pyapi3.eval",
314 : SQLSTATE(PY000) "Could not open Python source file \"%s\".", address);
315 0 : goto wrapup;
316 : }
317 1 : if(fseek(fp, 0, SEEK_END) == -1) {
318 0 : msg = createException(
319 : MAL, "pyapi3.eval",
320 : SQLSTATE(PY000) "Failed to set file pointer on Python source file \"%s\".", address);
321 0 : goto wrapup;
322 : }
323 1 : if((length = ftell(fp)) == -1) {
324 0 : msg = createException(
325 : MAL, "pyapi3.eval",
326 : SQLSTATE(PY000) "Failed to set file pointer on Python source file \"%s\".", address);
327 0 : goto wrapup;
328 : }
329 1 : if(fseek(fp, 0, SEEK_SET) == -1) {
330 0 : msg = createException(
331 : MAL, "pyapi3.eval",
332 : SQLSTATE(PY000) "Failed to set file pointer on Python source file \"%s\".", address);
333 0 : goto wrapup;
334 : }
335 1 : exprStr = GDKzalloc(length + 1);
336 1 : if (exprStr == NULL) {
337 0 : msg = createException(MAL, "pyapi3.eval",
338 : SQLSTATE(HY013) MAL_MALLOC_FAIL " function body string.");
339 0 : goto wrapup;
340 : }
341 1 : freeexprStr = true;
342 1 : if (fread(exprStr, 1, (size_t) length, fp) != (size_t) length) {
343 0 : msg = createException(MAL, "pyapi3.eval",
344 : SQLSTATE(PY000) "Failed to read from file \"%s\".",
345 : address);
346 0 : goto wrapup;
347 : }
348 1 : fclose(fp);
349 : }
350 : }
351 :
352 : /*[PARSE_CODE]*/
353 159 : pycall = FormatCode(exprStr, args, argcount, 4, &code_object, &msg,
354 : eval_additional_args, additional_columns);
355 159 : if (pycall == NULL && code_object == NULL) {
356 0 : if (msg == NULL) {
357 0 : msg = createException(MAL, "pyapi3.eval",
358 : SQLSTATE(PY000) "Error while parsing Python code.");
359 : }
360 0 : goto wrapup;
361 : }
362 :
363 : /*[CONVERT_BAT]*/
364 : // Now we will do the input handling (aka converting the input BATs to numpy
365 : // arrays)
366 : // We will put the python arrays in a PyTuple object, we will use this
367 : // PyTuple object as the set of arguments to call the Python function
368 159 : pArgs = PyTuple_New(argcount - (pci->retc + 2 + has_card_arg) +
369 159 : (code_object == NULL ? additional_columns : 0));
370 159 : pColumns = PyDict_New();
371 159 : pColumnTypes = PyDict_New();
372 159 : pConnection = Py_Connection_Create(cntxt, 0, 0);
373 :
374 : // Now we will loop over the input BATs and convert them to python objects
375 520 : for (i = pci->retc + 2 + has_card_arg; i < argcount; i++) {
376 361 : PyObject *result_array;
377 : // t_start and t_end hold the part of the BAT we will convert to a Numpy
378 : // array, by default these hold the entire BAT [0 - BATcount(b)]
379 361 : size_t t_start = 0, t_end = pyinput_values[i - (pci->retc + 2 + has_card_arg)].count;
380 :
381 : // There are two possibilities, either the input is a BAT, or the input
382 : // is a scalar
383 : // If the input is a scalar we will convert it to a python scalar
384 : // If the input is a BAT, we will convert it to a numpy array
385 361 : if (pyinput_values[i - (pci->retc + 2 + has_card_arg)].scalar) {
386 41 : result_array = PyArrayObject_FromScalar(
387 : &pyinput_values[i - (pci->retc + 2 + has_card_arg)], &msg);
388 : } else {
389 320 : int type = pyinput_values[i - (pci->retc + 2 + has_card_arg)].bat_type;
390 320 : result_array = PyMaskedArray_FromBAT(
391 : &pyinput_values[i - (pci->retc + 2 + has_card_arg)], t_start, t_end, &msg,
392 320 : !enable_zerocopy_input && type != TYPE_void);
393 : }
394 361 : if (result_array == NULL) {
395 0 : if (msg == MAL_SUCCEED) {
396 0 : msg = createException(MAL, "pyapi3.eval",
397 : SQLSTATE(PY000) "Failed to create Numpy Array from BAT.");
398 : }
399 0 : goto wrapup;
400 : }
401 361 : if (code_object == NULL) {
402 361 : PyObject *arg_type = PyUnicode_FromString(
403 361 : BatType_Format(pyinput_values[i - (pci->retc + 2 + has_card_arg)].bat_type));
404 361 : PyDict_SetItemString(pColumns, args[i], result_array);
405 361 : PyDict_SetItemString(pColumnTypes, args[i], arg_type);
406 361 : Py_DECREF(arg_type);
407 : }
408 361 : pyinput_values[i - (pci->retc + 2 + has_card_arg)].result = result_array;
409 361 : PyTuple_SetItem(pArgs, ai++, result_array);
410 : }
411 159 : if (code_object == NULL) {
412 159 : PyTuple_SetItem(pArgs, ai++, pColumns);
413 159 : PyTuple_SetItem(pArgs, ai++, pColumnTypes);
414 159 : PyTuple_SetItem(pArgs, ai++, pConnection);
415 : }
416 :
417 : /*[EXECUTE_CODE]*/
418 : // Now it is time to actually execute the python code
419 : {
420 159 : PyObject *pFunc, *pModule, *v, *d;
421 :
422 : // First we will load the main module, this is required
423 159 : pModule = PyImport_AddModule("__main__");
424 159 : if (!pModule) {
425 0 : msg = PyError_CreateException("Failed to load module", NULL);
426 0 : goto wrapup;
427 : }
428 :
429 : // Now we will add the UDF to the main module
430 159 : d = PyModule_GetDict(pModule);
431 159 : if (code_object == NULL) {
432 159 : v = PyRun_StringFlags(pycall, Py_file_input, d, NULL, NULL);
433 159 : if (v == NULL) {
434 2 : msg = PyError_CreateException("Could not parse Python code",
435 : pycall);
436 2 : goto wrapup;
437 : }
438 157 : Py_DECREF(v);
439 :
440 : // Now we need to obtain a pointer to the function, the function is
441 : // called "pyfun"
442 157 : pFunc = PyObject_GetAttrString(pModule, "pyfun");
443 157 : if (!pFunc || !PyCallable_Check(pFunc)) {
444 0 : msg = PyError_CreateException("Failed to load function", NULL);
445 0 : goto wrapup;
446 : }
447 : } else {
448 0 : pFunc = PyFunction_New(code_object, d);
449 0 : if (!pFunc || !PyCallable_Check(pFunc)) {
450 0 : msg = PyError_CreateException("Failed to load function", NULL);
451 0 : goto wrapup;
452 : }
453 : }
454 :
455 : // The function has been successfully created/compiled, all that
456 : // remains is to actually call the function
457 157 : pResult = PyObject_CallObject(pFunc, pArgs);
458 :
459 157 : Py_DECREF(pFunc);
460 157 : Py_DECREF(pArgs);
461 :
462 157 : if (PyErr_Occurred()) {
463 4 : msg = PyError_CreateException("Python exception", pycall);
464 4 : if (code_object == NULL) {
465 4 : PyRun_SimpleString("del pyfun");
466 : }
467 4 : goto wrapup;
468 : }
469 :
470 : // if (code_object == NULL) { PyRun_SimpleString("del pyfun"); }
471 :
472 153 : if (PyDict_Check(pResult)) { // Handle dictionary returns
473 : // For dictionary returns we need to map each of the (key,value)
474 : // pairs to the proper return value
475 : // We first analyze the SQL Function structure for a list of return
476 : // value names
477 41 : char **retnames = NULL;
478 41 : if (!varres) {
479 41 : if (sqlfun != NULL) {
480 41 : retnames = GDKzalloc(sizeof(char *) * sqlfun->res->cnt);
481 41 : argnode = sqlfun->res->h;
482 147 : for (i = 0; i < sqlfun->res->cnt; i++) {
483 106 : retnames[i] = ((sql_arg *)argnode->data)->name;
484 106 : argnode = argnode->next;
485 : }
486 : } else {
487 0 : msg = createException(MAL, "pyapi3.eval",
488 : SQLSTATE(PY000) "Return value is a dictionary, but "
489 : "there is no sql function object, so "
490 : "we don't know the return value "
491 : "names and mapping cannot be done.");
492 0 : goto wrapup;
493 : }
494 : } else {
495 : // If there are a variable number of return types, we take the
496 : // column names from the dictionary
497 0 : PyObject *keys = PyDict_Keys(pResult);
498 0 : retcols = (int)PyList_Size(keys);
499 0 : retnames = GDKzalloc(sizeof(char *) * retcols);
500 0 : for (i = 0; i < retcols; i++) {
501 0 : PyObject *colname = PyList_GetItem(keys, i);
502 0 : if (!PyUnicode_CheckExact(colname)) {
503 0 : msg = createException(MAL, "pyapi3.eval",
504 : SQLSTATE(PY000) "Expected a string key in the "
505 : "dictionary, but received an "
506 : "object of type %s",
507 : colname->ob_type->tp_name);
508 0 : goto wrapup;
509 : }
510 0 : retnames[i] = (char *) PyUnicode_AsUTF8(colname);
511 : }
512 0 : Py_DECREF(keys);
513 : }
514 41 : pResult = PyDict_CheckForConversion(pResult, retcols, retnames, &msg);
515 41 : if (retnames != NULL)
516 41 : GDKfree(retnames);
517 112 : } else if (varres) {
518 0 : msg = createException(MAL, "pyapi3.eval",
519 : SQLSTATE(PY000) "Expected a variable number return values, "
520 : "but the return type was not a dictionary. "
521 : "We require the return type to be a "
522 : "dictionary for column naming purposes.");
523 0 : goto wrapup;
524 : } else {
525 : // Now we need to do some error checking on the result object,
526 : // because the result object has to have the correct type/size
527 : // We will also do some converting of result objects to a common
528 : // type (such as scalar -> [[scalar]])
529 112 : pResult = PyObject_CheckForConversion(pResult, retcols, NULL, &msg);
530 : }
531 153 : if (pResult == NULL) {
532 3 : goto wrapup;
533 : }
534 : }
535 :
536 150 : if (varres) {
537 0 : GDKfree(pyreturn_values);
538 0 : pyreturn_values = GDKzalloc(retcols * sizeof(PyReturn));
539 : }
540 :
541 : // Now we have executed the Python function, we have to collect the return
542 : // values and convert them to BATs
543 : // We will first collect header information about the Python return objects
544 : // and extract the underlying C arrays
545 : // We will store this header information in a PyReturn object
546 :
547 : // The reason we are doing this as a separate step is because this
548 : // preprocessing requires us to call the Python API
549 : // Whereas the actual returning does not require us to call the Python API
550 : // This means we can do the actual returning without holding the GIL
551 150 : if (!PyObject_PreprocessObject(pResult, pyreturn_values, retcols, &msg)) {
552 0 : goto wrapup;
553 : }
554 :
555 : // We are done executing Python code (aside from cleanup), so we can release
556 : // the GIL
557 150 : gstate = Python_ReleaseGIL(gstate);
558 :
559 : /*[RETURN_VALUES]*/
560 150 : argnode = sqlfun && sqlfun->res ? sqlfun->res->h : NULL;
561 363 : for (i = 0; i < retcols; i++) {
562 213 : PyReturn *ret = &pyreturn_values[i];
563 213 : int bat_type = TYPE_any;
564 213 : sql_subtype *sql_subtype = argnode ? &((sql_arg *)argnode->data)->type : NULL;
565 213 : if (!varres) {
566 213 : bat_type = getBatType(getArgType(mb, pci, i));
567 :
568 213 : if (bat_type == TYPE_any || bat_type == TYPE_void) {
569 0 : bat_type = PyType_ToBat(ret->result_type);
570 0 : getArgType(mb, pci, i) = bat_type;
571 : }
572 : } else {
573 0 : bat_type = PyType_ToBat(ret->result_type);
574 : }
575 :
576 426 : b = PyObject_ConvertToBAT(ret, sql_subtype, bat_type, i, seqbase, &msg,
577 213 : !enable_zerocopy_output);
578 213 : if (b == NULL) {
579 0 : goto wrapup;
580 : }
581 :
582 213 : msg = MAL_SUCCEED;
583 213 : if (isaBatType(getArgType(mb, pci, i))) {
584 192 : *getArgReference_bat(stk, pci, i) = b->batCacheid;
585 192 : BBPkeepref(b);
586 : } else { // single value return, only for non-grouped aggregations
587 21 : BATiter li = bat_iterator(b);
588 21 : if (bat_type != TYPE_str) {
589 21 : if (VALinit(&stk->stk[pci->argv[i]], bat_type, li.base) ==
590 : NULL)
591 0 : msg = createException(MAL, "pyapi3.eval", SQLSTATE(HY013) MAL_MALLOC_FAIL);
592 : } else {
593 0 : if (VALinit(&stk->stk[pci->argv[i]], bat_type,
594 0 : BUNtail(li, 0)) == NULL)
595 0 : msg = createException(MAL, "pyapi3.eval", SQLSTATE(HY013) MAL_MALLOC_FAIL);
596 : }
597 21 : bat_iterator_end(&li);
598 21 : BBPunfix(b->batCacheid);
599 21 : b = NULL;
600 21 : if (msg != MAL_SUCCEED)
601 0 : goto wrapup;
602 : }
603 213 : if (argnode) {
604 213 : argnode = argnode->next;
605 : }
606 : }
607 150 : wrapup:
608 :
609 : // Actual cleanup
610 : // Cleanup input BATs
611 528 : for (i = pci->retc + 2 + has_card_arg; i < pci->argc; i++) {
612 366 : PyInput *inp = &pyinput_values[i - (pci->retc + 2 + has_card_arg)];
613 366 : BBPreclaim(inp->bat);
614 369 : BBPreclaim(inp->conv_bat); /* delayed free */
615 : }
616 162 : if (pResult != NULL && gstate == 0) {
617 : // if there is a pResult here, we are running single threaded (LANGUAGE
618 : // PYTHON),
619 : // thus we need to free python objects, thus we need to obtain the GIL
620 150 : gstate = Python_ObtainGIL();
621 : }
622 393 : for (i = 0; i < retcols; i++) {
623 231 : PyReturn *ret = &pyreturn_values[i];
624 : // First clean up any return values
625 231 : if (!ret->multidimensional) {
626 : // Clean up numpy arrays, if they are there
627 231 : if (ret->numpy_array != NULL) {
628 213 : Py_DECREF(ret->numpy_array);
629 : }
630 231 : if (ret->numpy_mask != NULL) {
631 30 : Py_DECREF(ret->numpy_mask);
632 : }
633 : }
634 : }
635 162 : if (pResult != NULL) {
636 150 : Py_DECREF(pResult);
637 : }
638 162 : if (gstate != 0) {
639 159 : gstate = Python_ReleaseGIL(gstate);
640 : }
641 :
642 : // Now release some GDK memory we allocated for strings and input values
643 162 : GDKfree(pyreturn_values);
644 162 : GDKfree(pyinput_values);
645 1246 : for (i = 0; i < pci->argc; i++)
646 922 : if (args[i])
647 366 : GDKfree(args[i]);
648 162 : GDKfree(args);
649 162 : GDKfree(pycall);
650 162 : if (freeexprStr)
651 1 : GDKfree(exprStr);
652 :
653 162 : return msg;
654 : }
655 :
656 : #ifdef _MSC_VER
657 : #define wcsdup _wcsdup
658 : #endif
659 : static str
660 10 : PYAPI3PyAPIprelude(void) {
661 10 : MT_lock_set(&pyapiLock);
662 10 : if (!pyapiInitialized) {
663 10 : wchar_t* program = L"mserver5";
664 10 : wchar_t* argv[] = { program, NULL };
665 10 : str msg = MAL_SUCCEED;
666 :
667 10 : static_assert(PY_MAJOR_VERSION == 3, "Python 3.X required");
668 : #if PY_MINOR_VERSION >= 11
669 : /* introduced in 3.8, we use it for 3.11 and later
670 : * on Windows, this code does not work with 3.10, it needs more
671 : * complex initialization */
672 10 : PyStatus status;
673 10 : PyConfig config;
674 :
675 10 : PyConfig_InitIsolatedConfig(&config);
676 10 : status = PyConfig_SetArgv(&config, 1, argv);
677 10 : if (!PyStatus_Exception(status))
678 10 : status = PyConfig_Read(&config);
679 10 : if (!PyStatus_Exception(status))
680 10 : status = Py_InitializeFromConfig(&config);
681 10 : PyConfig_Clear(&config);
682 10 : if (PyStatus_Exception(status)) {
683 0 : MT_lock_unset(&pyapiLock);
684 0 : throw(MAL, "pyapi3.eval",
685 : SQLSTATE(PY000) "Python initialization failed: %s: %s",
686 : status.func ? status.func : "PYAPI3PyAPIprelude",
687 : status.err_msg ? status.err_msg : "");
688 : }
689 : #else
690 : /* PySys_SetArgvEx deprecated in 3.11 */
691 : Py_InitializeEx(0);
692 : PySys_SetArgvEx(1, argv, 0);
693 : #endif
694 :
695 10 : _import_array();
696 10 : msg = _connection_init();
697 10 : if (msg != MAL_SUCCEED) {
698 0 : MT_lock_unset(&pyapiLock);
699 0 : return msg;
700 : }
701 10 : msg = _conversion_init();
702 10 : if (msg != MAL_SUCCEED) {
703 0 : MT_lock_unset(&pyapiLock);
704 0 : return msg;
705 : }
706 10 : _pytypes_init();
707 10 : _loader_init();
708 10 : marshal_module = PyImport_ImportModule("marshal");
709 10 : init_DateTimeAPI();
710 10 : if (marshal_module == NULL) {
711 0 : MT_lock_unset(&pyapiLock);
712 0 : return createException(MAL, "pyapi3.eval", SQLSTATE(PY000) "Failed to load Marshal module.");
713 : }
714 10 : marshal_loads = PyObject_GetAttrString(marshal_module, "loads");
715 10 : if (marshal_loads == NULL) {
716 0 : MT_lock_unset(&pyapiLock);
717 0 : return createException(MAL, "pyapi3.eval", SQLSTATE(PY000) "Failed to load function \"loads\" from Marshal module.");
718 : }
719 10 : if (PyRun_SimpleString("import numpy") != 0) {
720 0 : msg = PyError_CreateException("Failed to initialize embedded python", NULL);
721 0 : MT_lock_unset(&pyapiLock);
722 0 : return msg;
723 : }
724 10 : PyEval_SaveThread();
725 10 : if (msg != MAL_SUCCEED) {
726 : MT_lock_unset(&pyapiLock);
727 : return msg;
728 : }
729 10 : pyapiInitialized = true;
730 10 : fprintf(stdout, "# MonetDB/Python%d module loaded\n", 3);
731 : }
732 10 : MT_lock_unset(&pyapiLock);
733 10 : return MAL_SUCCEED;
734 : }
735 :
736 18 : char *PyError_CreateException(char *error_text, char *pycall)
737 : {
738 18 : PyObject *py_error_type = NULL, *py_error_value = NULL,
739 18 : *py_error_traceback = NULL;
740 18 : const char *py_error_string = NULL;
741 18 : lng line_number = -1;
742 :
743 18 : PyErr_Fetch(&py_error_type, &py_error_value, &py_error_traceback);
744 18 : if (py_error_value) {
745 18 : PyObject *error;
746 18 : PyErr_NormalizeException(&py_error_type, &py_error_value,
747 : &py_error_traceback);
748 18 : error = PyObject_Str(py_error_value);
749 :
750 18 : py_error_string = PyUnicode_AsUTF8(error);
751 18 : Py_XDECREF(error);
752 18 : if (pycall != NULL && strlen(pycall) > 0) {
753 18 : if (py_error_traceback == NULL) {
754 : // no traceback info, this means we are dealing with a parsing
755 : // error
756 : // line information should be in the error message
757 2 : sscanf(py_error_string, "%*[^0-9]" LLSCN, &line_number);
758 2 : if (line_number < 0)
759 0 : goto finally;
760 : } else {
761 16 : line_number =
762 16 : ((PyTracebackObject *)py_error_traceback)->tb_lineno;
763 : }
764 :
765 : // Now only display the line numbers around the error message, we
766 : // display 5 lines around the error message
767 : {
768 : char linenr[32];
769 : size_t nrpos, pos, i, j;
770 : char lineinformation[5000]; // we only support 5000 characters
771 : // for 5 lines of the program,
772 : // should be enough
773 : nrpos = 0; // Current line number
774 : pos = 0; // Current position in the lineinformation result array
775 2690 : for (i = 0; i < strlen(pycall); i++) {
776 2672 : if (pycall[i] == '\n' || i == 0) {
777 : // Check if we have arrived at a new line, if we have
778 : // increment the line count
779 151 : nrpos++;
780 : // Now check if we should display this line
781 151 : if (nrpos >= ((size_t)line_number - 2) &&
782 14 : nrpos <= ((size_t)line_number + 2) && pos < 4997) {
783 : // We shouldn't put a newline on the first line we
784 : // encounter, only on subsequent lines
785 8 : if (nrpos > ((size_t)line_number - 2))
786 7 : lineinformation[pos++] = '\n';
787 8 : if ((size_t)line_number == nrpos) {
788 : // If this line is the 'error' line, add an
789 : // arrow before it, otherwise just add spaces
790 2 : lineinformation[pos++] = '>';
791 2 : lineinformation[pos++] = ' ';
792 : } else {
793 6 : lineinformation[pos++] = ' ';
794 6 : lineinformation[pos++] = ' ';
795 : }
796 8 : snprintf(linenr, 32, "%zu", nrpos);
797 18 : for (j = 0; j < strlen(linenr); j++) {
798 10 : lineinformation[pos++] = linenr[j];
799 : }
800 8 : lineinformation[pos++] = '.';
801 8 : lineinformation[pos++] = ' ';
802 : }
803 : }
804 2672 : if (pycall[i] != '\n' && nrpos >= (size_t)line_number - 2 &&
805 150 : nrpos <= (size_t)line_number + 2 && pos < 4999) {
806 : // If we are on a line number that we have to display,
807 : // copy the text from this line for display
808 94 : lineinformation[pos++] = pycall[i];
809 : }
810 : }
811 18 : lineinformation[pos] = '\0';
812 18 : return createException(MAL, "pyapi3.eval", SQLSTATE(PY000) "%s\n%s\n%s",
813 : error_text, lineinformation,
814 : py_error_string);
815 : }
816 : }
817 : } else {
818 : py_error_string = "";
819 : }
820 0 : finally:
821 0 : if (pycall == NULL)
822 0 : return createException(MAL, "pyapi3.eval", SQLSTATE(PY000) "%s\n%s", error_text,
823 : py_error_string);
824 0 : return createException(MAL, "pyapi3.eval", SQLSTATE(PY000) "%s\n%s\n%s", error_text, pycall,
825 : py_error_string);
826 : }
827 :
828 3 : static str CreateEmptyReturn(MalBlkPtr mb, MalStkPtr stk, InstrPtr pci,
829 : size_t retcols, oid seqbase)
830 : {
831 3 : str msg = MAL_SUCCEED;
832 3 : void **res = GDKzalloc(retcols * sizeof(void*));
833 :
834 3 : if (!res) {
835 0 : msg = createException(MAL, "pyapi3.eval", SQLSTATE(HY013) MAL_MALLOC_FAIL);
836 0 : goto bailout;
837 : }
838 :
839 6 : for (size_t i = 0; i < retcols; i++) {
840 3 : if (isaBatType(getArgType(mb, pci, i))) {
841 2 : BAT *b = COLnew(seqbase, getBatType(getArgType(mb, pci, i)), 0, TRANSIENT);
842 2 : if (!b) {
843 0 : msg = createException(MAL, "pyapi3.eval", GDK_EXCEPTION);
844 0 : goto bailout;
845 : }
846 2 : ((BAT**)res)[i] = b;
847 : } else { // single value return, only for non-grouped aggregations
848 : // return NULL to conform to SQL aggregates
849 1 : int tpe = getArgType(mb, pci, i);
850 1 : if (!VALinit(&stk->stk[pci->argv[i]], tpe, ATOMnilptr(tpe))) {
851 0 : msg = createException(MAL, "pyapi3.eval", SQLSTATE(HY013) MAL_MALLOC_FAIL);
852 0 : goto bailout;
853 : }
854 1 : ((ValPtr*)res)[i] = &stk->stk[pci->argv[i]];
855 : }
856 : }
857 :
858 3 : bailout:
859 3 : if (res) {
860 6 : for (size_t i = 0; i < retcols; i++) {
861 3 : if (isaBatType(getArgType(mb, pci, i))) {
862 2 : BAT *b = ((BAT**)res)[i];
863 :
864 2 : if (b && msg) {
865 0 : BBPreclaim(b);
866 2 : } else if (b) {
867 2 : *getArgReference_bat(stk, pci, i) = b->batCacheid;
868 2 : BBPkeepref(b);
869 : }
870 1 : } else if (msg) {
871 0 : ValPtr pt = ((ValPtr*)res)[i];
872 :
873 0 : if (pt)
874 0 : VALclear(pt);
875 : }
876 : }
877 3 : GDKfree(res);
878 : }
879 3 : return msg;
880 : }
881 :
882 : static str
883 10 : PyAPI3prelude(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
884 : {
885 10 : (void)cntxt; (void)mb; (void)stk; (void)pci;
886 10 : return PYAPI3PyAPIprelude();
887 : }
888 :
889 : static str
890 10 : PyAPI3epilogue(void *ret)
891 : {
892 10 : (void)ret;
893 10 : MT_lock_set(&pyapiLock);
894 10 : if (pyapiInitialized) {
895 10 : PyGILState_STATE gstate;
896 10 : gstate = PyGILState_Ensure();
897 :
898 : /* now exit/cleanup */
899 10 : if (0) Py_FinalizeEx();
900 10 : (void)gstate;
901 : }
902 10 : MT_lock_unset(&pyapiLock);
903 10 : return MAL_SUCCEED;
904 : }
905 :
906 : #include "mel.h"
907 : static mel_func pyapi3_init_funcs[] = {
908 : pattern("pyapi3", "eval", PYAPI3PyAPIevalStd, true, "Execute a simple Python script returning a single value", args(1,3, argany("",1),arg("fptr",ptr),arg("expr",str))),
909 : pattern("pyapi3", "eval", PYAPI3PyAPIevalStd, true, "Execute a simple Python script value", args(1,4, varargany("",0),arg("fptr",ptr),arg("expr",str),varargany("arg",0))),
910 : pattern("pyapi3", "subeval_aggr", PYAPI3PyAPIevalAggr, true, "grouped aggregates through Python", args(1,4, varargany("",0),arg("fptr",ptr),arg("expr",str),varargany("arg",0))),
911 : pattern("pyapi3", "eval_aggr", PYAPI3PyAPIevalAggr, true, "grouped aggregates through Python", args(1,4, varargany("",0),arg("fptr",ptr),arg("expr",str),varargany("arg",0))),
912 : pattern("pyapi3", "eval_loader", PYAPI3PyAPIevalLoader, true, "loader functions through Python", args(1,3, varargany("",0),arg("fptr",ptr),arg("expr",str))),
913 : pattern("pyapi3", "eval_loader", PYAPI3PyAPIevalLoader, true, "loader functions through Python", args(1,4, varargany("",0),arg("fptr",ptr),arg("expr",str),varargany("arg",0))),
914 : pattern("batpyapi3", "eval", PYAPI3PyAPIevalStd, true, "Execute a simple Python script value", args(1,4, batvarargany("",0),arg("fptr", ptr), arg("expr",str),varargany("arg",0))),
915 : pattern("batpyapi3", "eval", PYAPI3PyAPIevalStd, true, "Execute a simple Python script value", args(1,4, batargany("",1),arg("card", lng), arg("fptr",ptr),arg("expr",str))),
916 : pattern("batpyapi3", "subeval_aggr", PYAPI3PyAPIevalAggr, true, "grouped aggregates through Python", args(1,4, varargany("",0),arg("fptr",ptr),arg("expr",str),varargany("arg",0))),
917 : pattern("batpyapi3", "eval_aggr", PYAPI3PyAPIevalAggr, true, "grouped aggregates through Python", args(1,4, varargany("",0),arg("fptr",ptr),arg("expr",str),varargany("arg",0))),
918 : pattern("batpyapi3", "eval_loader", PYAPI3PyAPIevalLoader, true, "loader functions through Python", args(1,3, varargany("",0),arg("fptr",ptr),arg("expr",str))),
919 : pattern("batpyapi3", "eval_loader", PYAPI3PyAPIevalLoader, true, "loader functions through Python", args(1,4, varargany("",0),arg("fptr",ptr),arg("expr",str),varargany("arg",0))),
920 : pattern("pyapi3", "prelude", PyAPI3prelude, false, "", noargs),
921 : command("pyapi3", "epilogue", PyAPI3epilogue, false, "", noargs),
922 : { .imp=NULL }
923 : };
924 : #include "mal_import.h"
925 : #ifdef _MSC_VER
926 : #undef read
927 : #pragma section(".CRT$XCU",read)
928 : #endif
929 10 : LIB_STARTUP_FUNC(init_pyapi3_mal)
930 10 : { mal_module("pyapi3", NULL, pyapi3_init_funcs); }
|