LCOV - code coverage report
Current view: top level - sql/backends/monet5/UDF/pyapi3 - emit3.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 146 279 52.3 %
Date: 2024-12-20 20:06:10 Functions: 3 3 100.0 %

          Line data    Source code
       1             : /*
       2             :  * SPDX-License-Identifier: MPL-2.0
       3             :  *
       4             :  * This Source Code Form is subject to the terms of the Mozilla Public
       5             :  * License, v. 2.0.  If a copy of the MPL was not distributed with this
       6             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       7             :  *
       8             :  * Copyright 2024 MonetDB Foundation;
       9             :  * Copyright August 2008 - 2023 MonetDB B.V.;
      10             :  * Copyright 1997 - July 2008 CWI.
      11             :  */
      12             : 
      13             : #include "monetdb_config.h"
      14             : #include "emit.h"
      15             : #include "conversion.h"
      16             : #include "convert_loops.h"
      17             : #include "type_conversion.h"
      18             : #include "gdk_time.h"
      19             : 
      20             : #include "unicode.h"
      21             : 
      22             : #define scalar_convert(tpe)                                                    \
      23             :         {                                                                          \
      24             :                 tpe val = tpe##_nil;                                                   \
      25             :                 msg = pyobject_to_##tpe(&dictEntry, 42, &val);                         \
      26             :                 if (msg != MAL_SUCCEED ||                                              \
      27             :                         BUNappend(self->cols[i].b, &val, false) != GDK_SUCCEED) {          \
      28             :                         if (msg == MAL_SUCCEED)                                            \
      29             :                                 msg = createException(MAL, "pyapi3.emit", SQLSTATE(HY013) "BUNappend failed."); \
      30             :                         goto wrapup;                                                       \
      31             :                 }                                                                      \
      32             :         }
      33             : 
      34          76 : PyObject *PyEmit_Emit(PyEmitObject *self, PyObject *args)
      35             : {
      36          76 :         size_t i, ai; // iterators
      37          76 :         ssize_t el_count =
      38             :                 -1; // the amount of elements this emit call will write to the table
      39          76 :         size_t dict_elements, matched_elements;
      40          76 :         str msg = MAL_SUCCEED; // return message
      41          76 :         bool error = false;
      42             : 
      43          76 :         if (!PyDict_Check(args)) {
      44           0 :                 PyErr_SetString(PyExc_TypeError, "need dict");
      45           0 :                 return NULL;
      46             :         }
      47             : 
      48          76 :         matched_elements = 0;
      49          76 :         dict_elements = PyDict_Size(args);
      50          76 :         if (dict_elements == 0) {
      51           0 :                 PyErr_SetString(PyExc_TypeError,
      52             :                                                 "dict must contain at least one element");
      53           0 :                 return NULL;
      54             :         }
      55             :         {
      56          76 :                 PyObject *items = PyDict_Items(args);
      57         376 :                 for (i = 0; i < dict_elements; i++) {
      58         224 :                         PyObject *tuple = PyList_GetItem(items, i);
      59         224 :                         PyObject *key = PyTuple_GetItem(tuple, 0);
      60         224 :                         PyObject *dictEntry = PyTuple_GetItem(tuple, 1);
      61         224 :                         ssize_t this_size = 1;
      62         224 :                         this_size = PyType_Size(dictEntry);
      63         224 :                         if (this_size < 0) {
      64           0 :                                 PyErr_Format(
      65             :                                         PyExc_TypeError, "Unsupported Python Object %s",
      66             :                                         PyUnicode_AsUTF8(PyObject_Str(PyObject_Type(dictEntry))));
      67           0 :                                 Py_DECREF(items);
      68           0 :                                 return NULL;
      69             :                         }
      70         224 :                         if (el_count < 0) {
      71             :                                 el_count = this_size;
      72         148 :                         } else if (el_count != this_size) {
      73             :                                 /* don't use "%zu" since format given to Python */
      74           0 :                                 PyErr_Format(
      75             :                                         PyExc_TypeError, "Element %s has size %zd, but expected an "
      76             :                                                                          "element with size %zd",
      77             :                                         PyUnicode_AsUTF8(PyObject_Str(key)), this_size, el_count);
      78           0 :                                 Py_DECREF(items);
      79           0 :                                 return NULL;
      80             :                         }
      81             :                 }
      82          76 :                 Py_DECREF(items);
      83             :         }
      84          76 :         if (el_count == 0) {
      85           0 :                 PyErr_SetString(PyExc_TypeError, "Empty input values supplied");
      86           0 :                 return NULL;
      87             :         }
      88             : 
      89          76 :         if (!self->create_table) {
      90         324 :                 for (i = 0; i < self->ncols; i++) {
      91         249 :                         PyObject *dictEntry =
      92         249 :                                 PyDict_GetItemString(args, self->cols[i].name);
      93         249 :                         if (dictEntry) {
      94         222 :                                 matched_elements++;
      95             :                         }
      96             :                 }
      97          75 :                 if (matched_elements != dict_elements) {
      98             :                         // not all elements in the dictionary were matched, look for the
      99             :                         // element that was not matched
     100           0 :                         PyObject *keys = PyDict_Keys(args);
     101           0 :                         if (!keys) {
     102           0 :                                 msg = createException(MAL, "pyapi3.emit", SQLSTATE(HY013) MAL_MALLOC_FAIL);
     103           0 :                                 goto wrapup;
     104             :                         }
     105           0 :                         for (i = 0; i < (size_t)PyList_Size(keys); i++) {
     106           0 :                                 PyObject *key = PyList_GetItem(keys, i);
     107           0 :                                 char *val = NULL;
     108           0 :                                 bool found = false;
     109             : 
     110           0 :                                 msg = pyobject_to_str(&key, 42, &val);
     111           0 :                                 if (msg != MAL_SUCCEED) {
     112             :                                         // one of the keys in the dictionary was not a string
     113           0 :                                         PyErr_Format(
     114             :                                                 PyExc_TypeError,
     115             :                                                 "Could not convert object type %s to a string: %s",
     116             :                                                 PyUnicode_AsUTF8(PyObject_Str(PyObject_Type(key))),
     117             :                                                 msg);
     118           0 :                                         free(val);
     119           0 :                                         goto loop_end;
     120             :                                 }
     121           0 :                                 for (ai = 0; ai < self->ncols; ai++) {
     122           0 :                                         if (strcmp(val, self->cols[ai].name) == 0) {
     123             :                                                 found = true;
     124             :                                                 break;
     125             :                                         }
     126             :                                 }
     127           0 :                                 if (!found) {
     128             :                                         // the current element was present in the dictionary, but it
     129             :                                         // has no matching column
     130           0 :                                         PyErr_Format(PyExc_TypeError,
     131             :                                                                  "Unmatched element \"%s\" in dict", val);
     132           0 :                                         error = true;
     133           0 :                                         free(val);
     134           0 :                                         goto loop_end;
     135             :                                 }
     136           0 :                                 free(val);
     137             :                         }
     138           0 :                 loop_end:
     139           0 :                         Py_DECREF(keys);
     140           0 :                         goto wrapup;
     141             :                 }
     142             :         } else {
     143           1 :                 size_t potential_size = self->ncols + PyDict_Size(args);
     144           1 :                 PyObject *keys;
     145           1 :                 if (potential_size > self->maxcols) {
     146             :                         // allocate space for new columns (if any new columns show up)
     147           1 :                         sql_emit_col *old = self->cols;
     148           1 :                         self->cols = GDKzalloc(sizeof(sql_emit_col) * potential_size);
     149           1 :                         if (self->cols == NULL) {
     150           0 :                                 PyErr_Format(PyExc_TypeError, "Out of memory error");
     151           0 :                                 error = true;
     152           0 :                                 goto wrapup;
     153             :                         }
     154           1 :                         if (old) {
     155           0 :                                 memcpy(self->cols, old, sizeof(sql_emit_col) * self->maxcols);
     156           0 :                                 GDKfree(old);
     157             :                         }
     158           1 :                         self->maxcols = potential_size;
     159             :                 }
     160           1 :                 keys = PyDict_Keys(args);
     161             :                 // create new columns based on the entries in the dictionary
     162           4 :                 for (i = 0; i < (size_t)PyList_Size(keys); i++) {
     163           2 :                         PyObject *key = PyList_GetItem(keys, i);
     164           2 :                         char *val = NULL;
     165           2 :                         bool found = false;
     166             : 
     167           2 :                         msg = pyobject_to_str(&key, 42, &val);
     168           2 :                         if (msg != MAL_SUCCEED) {
     169             :                                 // one of the keys in the dictionary was not a string
     170           0 :                                 PyErr_Format(
     171             :                                         PyExc_TypeError,
     172             :                                         "Could not convert object type %s to a string: %s",
     173             :                                         PyUnicode_AsUTF8(PyObject_Str(PyObject_Type(key))), msg);
     174           0 :                                 error = true;
     175           0 :                                 Py_DECREF(keys);
     176           0 :                                 free(val);
     177           0 :                                 goto wrapup;
     178             :                         }
     179           3 :                         for (ai = 0; ai < self->ncols; ai++) {
     180           1 :                                 if (strcmp(val, self->cols[ai].name) == 0) {
     181             :                                         found = true;
     182             :                                         break;
     183             :                                 }
     184             :                         }
     185           2 :                         if (!found) {
     186             :                                 // unrecognized column, create the column in the table
     187             :                                 // first infer the type from the value
     188             :                                 // we use NumPy for this by creating an array from the object
     189             :                                 // without specifying the type
     190           2 :                                 PyObject *value = PyDict_GetItem(args, key);
     191           2 :                                 PyObject *array = PyArray_FromAny(
     192             :                                         value, NULL, 0, 0, NPY_ARRAY_CARRAY | NPY_ARRAY_FORCECAST,
     193             :                                         NULL);
     194           2 :                                 PyArray_Descr *array_type = NULL;
     195           2 :                                 int bat_type = TYPE_int;
     196           2 :                                 if (!array) {
     197           0 :                                         PyErr_Format(PyExc_TypeError,
     198             :                                                                  "Failed to create NumPy array.");
     199           0 :                                         error = true;
     200           0 :                                         free(val);
     201           0 :                                         goto wrapup;
     202             :                                 }
     203           2 :                                 array_type =
     204           2 :                                         (PyArray_Descr *)PyArray_DESCR((PyArrayObject *)array);
     205           2 :                                 bat_type = PyType_ToBat(array_type->type_num);
     206           2 :                                 Py_DECREF(array);
     207             : 
     208           2 :                                 if (!(self->cols[self->ncols].b = COLnew(0, bat_type, 0, TRANSIENT))) {
     209           0 :                                         msg = createException(MAL, "pyapi3.emit", SQLSTATE(HY013) MAL_MALLOC_FAIL);
     210           0 :                                         free(val);
     211           0 :                                         goto wrapup;
     212             :                                 }
     213           2 :                                 if (!(self->cols[self->ncols].name = GDKstrdup(val))) {
     214           0 :                                         msg = createException(MAL, "pyapi3.emit", SQLSTATE(HY013) MAL_MALLOC_FAIL);
     215           0 :                                         free(val);
     216           0 :                                         goto wrapup;
     217             :                                 }
     218           2 :                                 self->cols[self->ncols].def = NULL;
     219           2 :                                 if (self->nvals > 0) {
     220             :                                         // insert NULL values up until the current entry
     221           0 :                                         for (ai = 0; ai < self->nvals; ai++) {
     222           0 :                                                 if (BUNappend(self->cols[self->ncols].b,
     223           0 :                                                                           ATOMnilptr(self->cols[self->ncols].b->ttype),
     224             :                                                                           false) != GDK_SUCCEED) {
     225           0 :                                                         msg = createException(MAL, "pyapi3.emit", SQLSTATE(HY013) "BUNappend failed.");
     226           0 :                                                         free(val);
     227           0 :                                                         goto wrapup;
     228             :                                                 }
     229             :                                         }
     230           0 :                                         self->cols[self->ncols].b->tnil = true;
     231           0 :                                         self->cols[self->ncols].b->tnonil = false;
     232           0 :                                         BATsetcount(self->cols[self->ncols].b, self->nvals);
     233             :                                 }
     234           2 :                                 self->ncols++;
     235             :                         }
     236           2 :                         free(val);
     237             :                 }
     238             :         }
     239             : 
     240         273 :         for (i = 0; i < self->ncols; i++) {
     241         209 :                 PyObject *dictEntry = PyDict_GetItemString(args, self->cols[i].name);
     242         209 :                 if (dictEntry && dictEntry != Py_None) {
     243         182 :                         if (PyType_IsPyScalar(dictEntry)) {
     244         141 :                                 if (self->cols[i].b->ttype == TYPE_blob) {
     245           0 :                                         blob s;
     246           0 :                                         blob* val = &s;
     247           0 :                                         val->nitems = ~(size_t) 0;
     248           0 :                                         msg = pyobject_to_blob(&dictEntry, 42, &val);
     249           0 :                                         if (msg != MAL_SUCCEED ||
     250           0 :                                                 BUNappend(self->cols[i].b, val, false) != GDK_SUCCEED) {
     251           0 :                                                 if (msg == MAL_SUCCEED)
     252           0 :                                                         msg = createException(MAL, "pyapi3.emit", SQLSTATE(HY013) "BUNappend failed.");
     253           0 :                                                 goto wrapup;
     254             :                                         }
     255           0 :                                 GDKfree(val);
     256             :                                 } else {
     257         141 :                                         switch (self->cols[i].b->ttype) {
     258           0 :                                                 case TYPE_bit:
     259           0 :                                                         scalar_convert(bit);
     260           0 :                                                         break;
     261           0 :                                                 case TYPE_bte:
     262           0 :                                                         scalar_convert(bte);
     263           0 :                                                         break;
     264           0 :                                                 case TYPE_sht:
     265           0 :                                                         scalar_convert(sht);
     266           0 :                                                         break;
     267         132 :                                                 case TYPE_int:
     268         132 :                                                         scalar_convert(int);
     269         132 :                                                         break;
     270           0 :                                                 case TYPE_oid:
     271           0 :                                                         scalar_convert(oid);
     272           0 :                                                         break;
     273           0 :                                                 case TYPE_lng:
     274           0 :                                                         scalar_convert(lng);
     275           0 :                                                         break;
     276           0 :                                                 case TYPE_flt:
     277           0 :                                                         scalar_convert(flt);
     278           0 :                                                         break;
     279           4 :                                                 case TYPE_dbl:
     280           4 :                                                         scalar_convert(dbl);
     281           4 :                                                         break;
     282             : #ifdef HAVE_HGE
     283           0 :                                                 case TYPE_hge:
     284           0 :                                                         scalar_convert(hge);
     285           0 :                                                         break;
     286             : #endif
     287           5 :                                                 default: {
     288           5 :                                                         str val = NULL;
     289           5 :                                                         gdk_return retval;
     290           5 :                                                         msg = pyobject_to_str(&dictEntry, 42, &val);
     291           5 :                                                         if (msg != MAL_SUCCEED) {
     292           0 :                                                                 free(val);
     293           0 :                                                                 goto wrapup;
     294             :                                                         }
     295           5 :                                                         assert(val);
     296           5 :                                                         retval = convert_and_append(self->cols[i].b, val, 0);
     297           5 :                                                         free(val);
     298           5 :                                                         if (retval != GDK_SUCCEED) {
     299           0 :                                                                 msg = createException(MAL, "pyapi3.emit", SQLSTATE(HY013) "BUNappend failed.");
     300           0 :                                                                 goto wrapup;
     301             :                                                         }
     302           5 :                                                         break;
     303             :                                                 }
     304             :                                         }
     305             :                                 }
     306             :                         } else {
     307          41 :                                 bool *mask = NULL;
     308          41 :                                 char *data = NULL;
     309          41 :                                 PyReturn return_struct;
     310          41 :                                 PyReturn *ret = &return_struct;
     311          41 :                                 size_t index_offset = 0;
     312          41 :                                 size_t iu = 0;
     313          41 :                                 if (BATextend(self->cols[i].b, self->nvals + el_count) !=
     314             :                                         GDK_SUCCEED) {
     315           0 :                                         msg = createException(MAL, "pyapi3.emit", SQLSTATE(HY013) "Failed to allocate memory to extend BAT.");
     316           0 :                                         goto wrapup;
     317             :                                 }
     318          41 :                                 msg = PyObject_GetReturnValues(dictEntry, ret);
     319          41 :                                 if (msg != MAL_SUCCEED) {
     320           0 :                                         goto wrapup;
     321             :                                 }
     322          41 :                                 if (ret->array_data == NULL) {
     323           0 :                                         msg = createException(MAL, "pyapi3.emit", SQLSTATE(HY013) "No return value stored in the structure.");
     324           0 :                                         goto wrapup;
     325             :                                 }
     326          41 :                                 mask = (bool *)ret->mask_data;
     327          41 :                                 data = (char *)ret->array_data;
     328          41 :                                 assert((size_t)el_count == (size_t)ret->count);
     329             : 
     330             :                                 /* we're not maintaining properties */
     331          41 :                                 self->cols[i].b->tsorted = false;
     332          41 :                                 self->cols[i].b->trevsorted = false;
     333          41 :                                 self->cols[i].b->tkey = false;
     334             : 
     335          41 :                                 switch (self->cols[i].b->ttype) {
     336           0 :                                         case TYPE_bit:
     337           0 :                                                 NP_INSERT_BAT(self->cols[i].b, bit, self->nvals);
     338           0 :                                                 break;
     339           0 :                                         case TYPE_bte:
     340           0 :                                                 NP_INSERT_BAT(self->cols[i].b, bte, self->nvals);
     341           0 :                                                 break;
     342           0 :                                         case TYPE_sht:
     343           0 :                                                 NP_INSERT_BAT(self->cols[i].b, sht, self->nvals);
     344           0 :                                                 break;
     345          28 :                                         case TYPE_int:
     346          84 :                                                 NP_INSERT_BAT(self->cols[i].b, int, self->nvals);
     347          28 :                                                 break;
     348           0 :                                         case TYPE_oid:
     349           0 :                                                 NP_INSERT_BAT(self->cols[i].b, oid, self->nvals);
     350           0 :                                                 break;
     351           0 :                                         case TYPE_lng:
     352           0 :                                                 NP_INSERT_BAT(self->cols[i].b, lng, self->nvals);
     353           0 :                                                 break;
     354           0 :                                         case TYPE_flt:
     355           0 :                                                 NP_INSERT_BAT(self->cols[i].b, flt, self->nvals);
     356           0 :                                                 break;
     357           0 :                                         case TYPE_dbl:
     358           0 :                                                 NP_INSERT_BAT(self->cols[i].b, dbl, self->nvals);
     359           0 :                                                 break;
     360             : #ifdef HAVE_HGE
     361           0 :                                         case TYPE_hge:
     362           0 :                                                 NP_INSERT_BAT(self->cols[i].b, hge, self->nvals);
     363           0 :                                                 break;
     364             : #endif
     365          13 :                                         default: {
     366          13 :                                                 char *utf8_string = NULL;
     367          13 :                                                 if (ret->result_type != NPY_OBJECT) {
     368          13 :                                                         utf8_string = GDKzalloc(utf8string_minlength +
     369             :                                                                                                         ret->memory_size + 1);
     370          13 :                                                         utf8_string[utf8string_minlength +
     371          13 :                                                                                 ret->memory_size] = '\0';
     372             :                                                 }
     373          48 :                                                 NP_INSERT_STRING_BAT(self->cols[i].b);
     374          13 :                                                 GDKfree(utf8_string);
     375             :                                         }
     376             :                                 }
     377          41 :                                 self->cols[i].b->tnonil = !self->cols[i].b->tnil;
     378          41 :                                 if (ret->numpy_array) {
     379          41 :                                         Py_DECREF(ret->numpy_array);
     380             :                                 }
     381          41 :                                 if (ret->numpy_mask) {
     382           0 :                                         Py_DECREF(ret->numpy_mask);
     383             :                                 }
     384             :                         }
     385             :                 } else {
     386          27 :                         if (self->cols[i].def != NULL) {
     387          12 :                                 msg = createException(MAL, "pyapi3.emit", "Inserting into columns with default values is not supported currently.");
     388          12 :                                 goto wrapup;
     389             :                         }
     390          31 :                         for (ai = 0; ai < (size_t)el_count; ai++) {
     391          16 :                                 if (BUNappend(self->cols[i].b,
     392          16 :                                                           ATOMnilptr(self->cols[i].b->ttype),
     393             :                                                           false) != GDK_SUCCEED) {
     394           0 :                                         goto wrapup;
     395             :                                 }
     396             :                         }
     397          15 :                         self->cols[i].b->tnil = true;
     398          15 :                         self->cols[i].b->tnonil = false;
     399             :                 }
     400         197 :                 BATsetcount(self->cols[i].b, self->nvals + el_count);
     401             :         }
     402             : 
     403          64 :         self->nvals += el_count;
     404          76 : wrapup:
     405          76 :         if (msg != MAL_SUCCEED) {
     406          12 :                 PyErr_Format(PyExc_TypeError, "Failed conversion: %s", msg);
     407          12 :                 freeException(msg);
     408          64 :         } else if (!error) {
     409          64 :                 Py_RETURN_NONE;
     410             :         }
     411             :         return NULL;
     412             : }
     413             : 
     414             : static PyMethodDef _emitObject_methods[] = {
     415             :         {"emit", (PyCFunction)PyEmit_Emit, METH_O,
     416             :          "emit(dictionary) -> returns parsed values for table insertion"},
     417             :         {NULL, NULL, 0, NULL} /* Sentinel */
     418             : };
     419             : 
     420             : PyTypeObject PyEmitType = {
     421             :         .ob_base.ob_base.ob_refcnt = 1,
     422             :         .tp_name = "monetdb._emit",
     423             :         .tp_basicsize = sizeof(PyEmitObject),
     424             :         .tp_hash = (hashfunc)PyObject_HashNotImplemented,
     425             :         .tp_flags = Py_TPFLAGS_DEFAULT,
     426             :         .tp_doc = "Value Emitter",
     427             :         .tp_methods = _emitObject_methods,
     428             :         .tp_alloc = PyType_GenericAlloc,
     429             :         .tp_new = PyType_GenericNew,
     430             :         .tp_free = PyObject_Del,
     431             : };
     432             : 
     433          25 : PyObject *PyEmit_Create(sql_emit_col *cols, size_t ncols)
     434             : {
     435          25 :         register PyEmitObject *op;
     436             : 
     437          25 :         op = (PyEmitObject *)PyObject_MALLOC(sizeof(PyEmitObject));
     438          25 :         if (op == NULL)
     439           0 :                 return PyErr_NoMemory();
     440          25 :         PyObject_Init((PyObject *)op, &PyEmitType);
     441          25 :         op->cols = cols;
     442          25 :         op->ncols = ncols;
     443          25 :         op->maxcols = ncols;
     444          25 :         op->nvals = 0;
     445          25 :         op->create_table = cols == NULL;
     446          25 :         return (PyObject *)op;
     447             : }
     448             : 
     449          10 : str _emit_init(void)
     450             : {
     451          10 :         _import_array();
     452          10 :         if (PyType_Ready(&PyEmitType) < 0)
     453           0 :                 return createException(MAL, "pyapi3.eval",
     454             :                                                            SQLSTATE(PY000) "Failed to initialize emit type.");
     455             :         return MAL_SUCCEED;
     456             : }

Generated by: LCOV version 1.14