Line data Source code
1 : /*
2 : * SPDX-License-Identifier: MPL-2.0
3 : *
4 : * This Source Code Form is subject to the terms of the Mozilla Public
5 : * License, v. 2.0. If a copy of the MPL was not distributed with this
6 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
7 : *
8 : * Copyright 2024 MonetDB Foundation;
9 : * Copyright August 2008 - 2023 MonetDB B.V.;
10 : * Copyright 1997 - July 2008 CWI.
11 : */
12 :
13 : #include "monetdb_config.h"
14 : #include "emit.h"
15 : #include "conversion.h"
16 : #include "convert_loops.h"
17 : #include "type_conversion.h"
18 : #include "gdk_time.h"
19 :
20 : #include "unicode.h"
21 :
22 : #define scalar_convert(tpe) \
23 : { \
24 : tpe val = tpe##_nil; \
25 : msg = pyobject_to_##tpe(&dictEntry, 42, &val); \
26 : if (msg != MAL_SUCCEED || \
27 : BUNappend(self->cols[i].b, &val, false) != GDK_SUCCEED) { \
28 : if (msg == MAL_SUCCEED) \
29 : msg = createException(MAL, "pyapi3.emit", SQLSTATE(HY013) "BUNappend failed."); \
30 : goto wrapup; \
31 : } \
32 : }
33 :
34 76 : PyObject *PyEmit_Emit(PyEmitObject *self, PyObject *args)
35 : {
36 76 : size_t i, ai; // iterators
37 76 : ssize_t el_count =
38 : -1; // the amount of elements this emit call will write to the table
39 76 : size_t dict_elements, matched_elements;
40 76 : str msg = MAL_SUCCEED; // return message
41 76 : bool error = false;
42 :
43 76 : if (!PyDict_Check(args)) {
44 0 : PyErr_SetString(PyExc_TypeError, "need dict");
45 0 : return NULL;
46 : }
47 :
48 76 : matched_elements = 0;
49 76 : dict_elements = PyDict_Size(args);
50 76 : if (dict_elements == 0) {
51 0 : PyErr_SetString(PyExc_TypeError,
52 : "dict must contain at least one element");
53 0 : return NULL;
54 : }
55 : {
56 76 : PyObject *items = PyDict_Items(args);
57 376 : for (i = 0; i < dict_elements; i++) {
58 224 : PyObject *tuple = PyList_GetItem(items, i);
59 224 : PyObject *key = PyTuple_GetItem(tuple, 0);
60 224 : PyObject *dictEntry = PyTuple_GetItem(tuple, 1);
61 224 : ssize_t this_size = 1;
62 224 : this_size = PyType_Size(dictEntry);
63 224 : if (this_size < 0) {
64 0 : PyErr_Format(
65 : PyExc_TypeError, "Unsupported Python Object %s",
66 : PyUnicode_AsUTF8(PyObject_Str(PyObject_Type(dictEntry))));
67 0 : Py_DECREF(items);
68 0 : return NULL;
69 : }
70 224 : if (el_count < 0) {
71 : el_count = this_size;
72 148 : } else if (el_count != this_size) {
73 : /* don't use "%zu" since format given to Python */
74 0 : PyErr_Format(
75 : PyExc_TypeError, "Element %s has size %zd, but expected an "
76 : "element with size %zd",
77 : PyUnicode_AsUTF8(PyObject_Str(key)), this_size, el_count);
78 0 : Py_DECREF(items);
79 0 : return NULL;
80 : }
81 : }
82 76 : Py_DECREF(items);
83 : }
84 76 : if (el_count == 0) {
85 0 : PyErr_SetString(PyExc_TypeError, "Empty input values supplied");
86 0 : return NULL;
87 : }
88 :
89 76 : if (!self->create_table) {
90 324 : for (i = 0; i < self->ncols; i++) {
91 249 : PyObject *dictEntry =
92 249 : PyDict_GetItemString(args, self->cols[i].name);
93 249 : if (dictEntry) {
94 222 : matched_elements++;
95 : }
96 : }
97 75 : if (matched_elements != dict_elements) {
98 : // not all elements in the dictionary were matched, look for the
99 : // element that was not matched
100 0 : PyObject *keys = PyDict_Keys(args);
101 0 : if (!keys) {
102 0 : msg = createException(MAL, "pyapi3.emit", SQLSTATE(HY013) MAL_MALLOC_FAIL);
103 0 : goto wrapup;
104 : }
105 0 : for (i = 0; i < (size_t)PyList_Size(keys); i++) {
106 0 : PyObject *key = PyList_GetItem(keys, i);
107 0 : char *val = NULL;
108 0 : bool found = false;
109 :
110 0 : msg = pyobject_to_str(&key, 42, &val);
111 0 : if (msg != MAL_SUCCEED) {
112 : // one of the keys in the dictionary was not a string
113 0 : PyErr_Format(
114 : PyExc_TypeError,
115 : "Could not convert object type %s to a string: %s",
116 : PyUnicode_AsUTF8(PyObject_Str(PyObject_Type(key))),
117 : msg);
118 0 : free(val);
119 0 : goto loop_end;
120 : }
121 0 : for (ai = 0; ai < self->ncols; ai++) {
122 0 : if (strcmp(val, self->cols[ai].name) == 0) {
123 : found = true;
124 : break;
125 : }
126 : }
127 0 : if (!found) {
128 : // the current element was present in the dictionary, but it
129 : // has no matching column
130 0 : PyErr_Format(PyExc_TypeError,
131 : "Unmatched element \"%s\" in dict", val);
132 0 : error = true;
133 0 : free(val);
134 0 : goto loop_end;
135 : }
136 0 : free(val);
137 : }
138 0 : loop_end:
139 0 : Py_DECREF(keys);
140 0 : goto wrapup;
141 : }
142 : } else {
143 1 : size_t potential_size = self->ncols + PyDict_Size(args);
144 1 : PyObject *keys;
145 1 : if (potential_size > self->maxcols) {
146 : // allocate space for new columns (if any new columns show up)
147 1 : sql_emit_col *old = self->cols;
148 1 : self->cols = GDKzalloc(sizeof(sql_emit_col) * potential_size);
149 1 : if (self->cols == NULL) {
150 0 : PyErr_Format(PyExc_TypeError, "Out of memory error");
151 0 : error = true;
152 0 : goto wrapup;
153 : }
154 1 : if (old) {
155 0 : memcpy(self->cols, old, sizeof(sql_emit_col) * self->maxcols);
156 0 : GDKfree(old);
157 : }
158 1 : self->maxcols = potential_size;
159 : }
160 1 : keys = PyDict_Keys(args);
161 : // create new columns based on the entries in the dictionary
162 4 : for (i = 0; i < (size_t)PyList_Size(keys); i++) {
163 2 : PyObject *key = PyList_GetItem(keys, i);
164 2 : char *val = NULL;
165 2 : bool found = false;
166 :
167 2 : msg = pyobject_to_str(&key, 42, &val);
168 2 : if (msg != MAL_SUCCEED) {
169 : // one of the keys in the dictionary was not a string
170 0 : PyErr_Format(
171 : PyExc_TypeError,
172 : "Could not convert object type %s to a string: %s",
173 : PyUnicode_AsUTF8(PyObject_Str(PyObject_Type(key))), msg);
174 0 : error = true;
175 0 : Py_DECREF(keys);
176 0 : free(val);
177 0 : goto wrapup;
178 : }
179 3 : for (ai = 0; ai < self->ncols; ai++) {
180 1 : if (strcmp(val, self->cols[ai].name) == 0) {
181 : found = true;
182 : break;
183 : }
184 : }
185 2 : if (!found) {
186 : // unrecognized column, create the column in the table
187 : // first infer the type from the value
188 : // we use NumPy for this by creating an array from the object
189 : // without specifying the type
190 2 : PyObject *value = PyDict_GetItem(args, key);
191 2 : PyObject *array = PyArray_FromAny(
192 : value, NULL, 0, 0, NPY_ARRAY_CARRAY | NPY_ARRAY_FORCECAST,
193 : NULL);
194 2 : PyArray_Descr *array_type = NULL;
195 2 : int bat_type = TYPE_int;
196 2 : if (!array) {
197 0 : PyErr_Format(PyExc_TypeError,
198 : "Failed to create NumPy array.");
199 0 : error = true;
200 0 : free(val);
201 0 : goto wrapup;
202 : }
203 2 : array_type =
204 2 : (PyArray_Descr *)PyArray_DESCR((PyArrayObject *)array);
205 2 : bat_type = PyType_ToBat(array_type->type_num);
206 2 : Py_DECREF(array);
207 :
208 2 : if (!(self->cols[self->ncols].b = COLnew(0, bat_type, 0, TRANSIENT))) {
209 0 : msg = createException(MAL, "pyapi3.emit", SQLSTATE(HY013) MAL_MALLOC_FAIL);
210 0 : free(val);
211 0 : goto wrapup;
212 : }
213 2 : if (!(self->cols[self->ncols].name = GDKstrdup(val))) {
214 0 : msg = createException(MAL, "pyapi3.emit", SQLSTATE(HY013) MAL_MALLOC_FAIL);
215 0 : free(val);
216 0 : goto wrapup;
217 : }
218 2 : self->cols[self->ncols].def = NULL;
219 2 : if (self->nvals > 0) {
220 : // insert NULL values up until the current entry
221 0 : for (ai = 0; ai < self->nvals; ai++) {
222 0 : if (BUNappend(self->cols[self->ncols].b,
223 0 : ATOMnilptr(self->cols[self->ncols].b->ttype),
224 : false) != GDK_SUCCEED) {
225 0 : msg = createException(MAL, "pyapi3.emit", SQLSTATE(HY013) "BUNappend failed.");
226 0 : free(val);
227 0 : goto wrapup;
228 : }
229 : }
230 0 : self->cols[self->ncols].b->tnil = true;
231 0 : self->cols[self->ncols].b->tnonil = false;
232 0 : BATsetcount(self->cols[self->ncols].b, self->nvals);
233 : }
234 2 : self->ncols++;
235 : }
236 2 : free(val);
237 : }
238 : }
239 :
240 273 : for (i = 0; i < self->ncols; i++) {
241 209 : PyObject *dictEntry = PyDict_GetItemString(args, self->cols[i].name);
242 209 : if (dictEntry && dictEntry != Py_None) {
243 182 : if (PyType_IsPyScalar(dictEntry)) {
244 141 : if (self->cols[i].b->ttype == TYPE_blob) {
245 0 : blob s;
246 0 : blob* val = &s;
247 0 : val->nitems = ~(size_t) 0;
248 0 : msg = pyobject_to_blob(&dictEntry, 42, &val);
249 0 : if (msg != MAL_SUCCEED ||
250 0 : BUNappend(self->cols[i].b, val, false) != GDK_SUCCEED) {
251 0 : if (msg == MAL_SUCCEED)
252 0 : msg = createException(MAL, "pyapi3.emit", SQLSTATE(HY013) "BUNappend failed.");
253 0 : goto wrapup;
254 : }
255 0 : GDKfree(val);
256 : } else {
257 141 : switch (self->cols[i].b->ttype) {
258 0 : case TYPE_bit:
259 0 : scalar_convert(bit);
260 0 : break;
261 0 : case TYPE_bte:
262 0 : scalar_convert(bte);
263 0 : break;
264 0 : case TYPE_sht:
265 0 : scalar_convert(sht);
266 0 : break;
267 132 : case TYPE_int:
268 132 : scalar_convert(int);
269 132 : break;
270 0 : case TYPE_oid:
271 0 : scalar_convert(oid);
272 0 : break;
273 0 : case TYPE_lng:
274 0 : scalar_convert(lng);
275 0 : break;
276 0 : case TYPE_flt:
277 0 : scalar_convert(flt);
278 0 : break;
279 4 : case TYPE_dbl:
280 4 : scalar_convert(dbl);
281 4 : break;
282 : #ifdef HAVE_HGE
283 0 : case TYPE_hge:
284 0 : scalar_convert(hge);
285 0 : break;
286 : #endif
287 5 : default: {
288 5 : str val = NULL;
289 5 : gdk_return retval;
290 5 : msg = pyobject_to_str(&dictEntry, 42, &val);
291 5 : if (msg != MAL_SUCCEED) {
292 0 : free(val);
293 0 : goto wrapup;
294 : }
295 5 : assert(val);
296 5 : retval = convert_and_append(self->cols[i].b, val, 0);
297 5 : free(val);
298 5 : if (retval != GDK_SUCCEED) {
299 0 : msg = createException(MAL, "pyapi3.emit", SQLSTATE(HY013) "BUNappend failed.");
300 0 : goto wrapup;
301 : }
302 5 : break;
303 : }
304 : }
305 : }
306 : } else {
307 41 : bool *mask = NULL;
308 41 : char *data = NULL;
309 41 : PyReturn return_struct;
310 41 : PyReturn *ret = &return_struct;
311 41 : size_t index_offset = 0;
312 41 : size_t iu = 0;
313 41 : if (BATextend(self->cols[i].b, self->nvals + el_count) !=
314 : GDK_SUCCEED) {
315 0 : msg = createException(MAL, "pyapi3.emit", SQLSTATE(HY013) "Failed to allocate memory to extend BAT.");
316 0 : goto wrapup;
317 : }
318 41 : msg = PyObject_GetReturnValues(dictEntry, ret);
319 41 : if (msg != MAL_SUCCEED) {
320 0 : goto wrapup;
321 : }
322 41 : if (ret->array_data == NULL) {
323 0 : msg = createException(MAL, "pyapi3.emit", SQLSTATE(HY013) "No return value stored in the structure.");
324 0 : goto wrapup;
325 : }
326 41 : mask = (bool *)ret->mask_data;
327 41 : data = (char *)ret->array_data;
328 41 : assert((size_t)el_count == (size_t)ret->count);
329 :
330 : /* we're not maintaining properties */
331 41 : self->cols[i].b->tsorted = false;
332 41 : self->cols[i].b->trevsorted = false;
333 41 : self->cols[i].b->tkey = false;
334 :
335 41 : switch (self->cols[i].b->ttype) {
336 0 : case TYPE_bit:
337 0 : NP_INSERT_BAT(self->cols[i].b, bit, self->nvals);
338 0 : break;
339 0 : case TYPE_bte:
340 0 : NP_INSERT_BAT(self->cols[i].b, bte, self->nvals);
341 0 : break;
342 0 : case TYPE_sht:
343 0 : NP_INSERT_BAT(self->cols[i].b, sht, self->nvals);
344 0 : break;
345 28 : case TYPE_int:
346 84 : NP_INSERT_BAT(self->cols[i].b, int, self->nvals);
347 28 : break;
348 0 : case TYPE_oid:
349 0 : NP_INSERT_BAT(self->cols[i].b, oid, self->nvals);
350 0 : break;
351 0 : case TYPE_lng:
352 0 : NP_INSERT_BAT(self->cols[i].b, lng, self->nvals);
353 0 : break;
354 0 : case TYPE_flt:
355 0 : NP_INSERT_BAT(self->cols[i].b, flt, self->nvals);
356 0 : break;
357 0 : case TYPE_dbl:
358 0 : NP_INSERT_BAT(self->cols[i].b, dbl, self->nvals);
359 0 : break;
360 : #ifdef HAVE_HGE
361 0 : case TYPE_hge:
362 0 : NP_INSERT_BAT(self->cols[i].b, hge, self->nvals);
363 0 : break;
364 : #endif
365 13 : default: {
366 13 : char *utf8_string = NULL;
367 13 : if (ret->result_type != NPY_OBJECT) {
368 13 : utf8_string = GDKzalloc(utf8string_minlength +
369 : ret->memory_size + 1);
370 13 : utf8_string[utf8string_minlength +
371 13 : ret->memory_size] = '\0';
372 : }
373 48 : NP_INSERT_STRING_BAT(self->cols[i].b);
374 13 : GDKfree(utf8_string);
375 : }
376 : }
377 41 : self->cols[i].b->tnonil = !self->cols[i].b->tnil;
378 41 : if (ret->numpy_array) {
379 41 : Py_DECREF(ret->numpy_array);
380 : }
381 41 : if (ret->numpy_mask) {
382 0 : Py_DECREF(ret->numpy_mask);
383 : }
384 : }
385 : } else {
386 27 : if (self->cols[i].def != NULL) {
387 12 : msg = createException(MAL, "pyapi3.emit", "Inserting into columns with default values is not supported currently.");
388 12 : goto wrapup;
389 : }
390 31 : for (ai = 0; ai < (size_t)el_count; ai++) {
391 16 : if (BUNappend(self->cols[i].b,
392 16 : ATOMnilptr(self->cols[i].b->ttype),
393 : false) != GDK_SUCCEED) {
394 0 : goto wrapup;
395 : }
396 : }
397 15 : self->cols[i].b->tnil = true;
398 15 : self->cols[i].b->tnonil = false;
399 : }
400 197 : BATsetcount(self->cols[i].b, self->nvals + el_count);
401 : }
402 :
403 64 : self->nvals += el_count;
404 76 : wrapup:
405 76 : if (msg != MAL_SUCCEED) {
406 12 : PyErr_Format(PyExc_TypeError, "Failed conversion: %s", msg);
407 12 : freeException(msg);
408 64 : } else if (!error) {
409 64 : Py_RETURN_NONE;
410 : }
411 : return NULL;
412 : }
413 :
414 : static PyMethodDef _emitObject_methods[] = {
415 : {"emit", (PyCFunction)PyEmit_Emit, METH_O,
416 : "emit(dictionary) -> returns parsed values for table insertion"},
417 : {NULL, NULL, 0, NULL} /* Sentinel */
418 : };
419 :
420 : PyTypeObject PyEmitType = {
421 : .ob_base.ob_base.ob_refcnt = 1,
422 : .tp_name = "monetdb._emit",
423 : .tp_basicsize = sizeof(PyEmitObject),
424 : .tp_hash = (hashfunc)PyObject_HashNotImplemented,
425 : .tp_flags = Py_TPFLAGS_DEFAULT,
426 : .tp_doc = "Value Emitter",
427 : .tp_methods = _emitObject_methods,
428 : .tp_alloc = PyType_GenericAlloc,
429 : .tp_new = PyType_GenericNew,
430 : .tp_free = PyObject_Del,
431 : };
432 :
433 25 : PyObject *PyEmit_Create(sql_emit_col *cols, size_t ncols)
434 : {
435 25 : register PyEmitObject *op;
436 :
437 25 : op = (PyEmitObject *)PyObject_MALLOC(sizeof(PyEmitObject));
438 25 : if (op == NULL)
439 0 : return PyErr_NoMemory();
440 25 : PyObject_Init((PyObject *)op, &PyEmitType);
441 25 : op->cols = cols;
442 25 : op->ncols = ncols;
443 25 : op->maxcols = ncols;
444 25 : op->nvals = 0;
445 25 : op->create_table = cols == NULL;
446 25 : return (PyObject *)op;
447 : }
448 :
449 10 : str _emit_init(void)
450 : {
451 10 : _import_array();
452 10 : if (PyType_Ready(&PyEmitType) < 0)
453 0 : return createException(MAL, "pyapi3.eval",
454 : SQLSTATE(PY000) "Failed to initialize emit type.");
455 : return MAL_SUCCEED;
456 : }
|