LCOV - code coverage report
Current view: top level - sql/backends/monet5/UDF/pyapi3 - formatinput3.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 137 215 63.7 %
Date: 2024-12-19 23:10:26 Functions: 1 3 33.3 %

          Line data    Source code
       1             : /*
       2             :  * SPDX-License-Identifier: MPL-2.0
       3             :  *
       4             :  * This Source Code Form is subject to the terms of the Mozilla Public
       5             :  * License, v. 2.0.  If a copy of the MPL was not distributed with this
       6             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       7             :  *
       8             :  * Copyright 2024 MonetDB Foundation;
       9             :  * Copyright August 2008 - 2023 MonetDB B.V.;
      10             :  * Copyright 1997 - July 2008 CWI.
      11             :  */
      12             : 
      13             : #include "monetdb_config.h"
      14             : #include "formatinput.h"
      15             : #include "type_conversion.h"
      16             : 
      17             : //! Parse a PyCodeObject from a string, the string is expected to be in the
      18             : //! format {@<encoded_function>};, where <encoded_function> is the Marshalled
      19             : //! code object
      20             : PyObject *PyCodeObject_ParseString(char *string, char **msg);
      21           0 : PyObject *PyCodeObject_ParseString(char *string, char **msg)
      22             : {
      23           0 :         size_t length = strlen(string);
      24           0 :         PyObject *code_object, *tuple, *mystr;
      25           0 :         char *code_copy = GDKmalloc(length * sizeof(char));
      26           0 :         char hex[3];
      27           0 :         size_t i, j;
      28           0 :         hex[2] = '\0';
      29           0 :         if (code_copy == NULL) {
      30           0 :                 *msg = createException(MAL, "pyapi3.eval", SQLSTATE(HY013) MAL_MALLOC_FAIL);
      31           0 :                 return NULL;
      32             :         }
      33             :         // decode hex codes (e.g. \x00) in the string to the actual numeric
      34             :         // representation
      35           0 :         for (i = 2, j = 0; i < length - 2; i++) {
      36           0 :                 if (string[i] == '\\' && string[i + 1] == '\\')
      37           0 :                         i++;
      38           0 :                 if (string[i] == '\\' && string[i + 1] == 't') {
      39           0 :                         code_copy[j++] = '\t';
      40           0 :                         i++;
      41           0 :                 } else if (string[i] == '\\' && string[i + 1] == 'n') {
      42           0 :                         code_copy[j++] = '\n';
      43           0 :                         i++;
      44           0 :                 } else if (string[i] == '\\' && string[i + 1] == 'x') {
      45           0 :                         hex[0] = string[i + 2];
      46           0 :                         hex[1] = string[i + 3];
      47           0 :                         code_copy[j++] = (char)strtol(hex, NULL, 16);
      48           0 :                         i += 3;
      49             :                 } else {
      50           0 :                         code_copy[j++] = string[i];
      51             :                 }
      52             :         }
      53           0 :         code_copy[j] = '\0';
      54           0 :         tuple = PyTuple_New(1);
      55           0 :         mystr = PyUnicode_FromStringAndSize(
      56             :                 code_copy,
      57             :                 j); // use FromStringAndSize because the string is not null-terminated
      58           0 :         PyTuple_SetItem(tuple, 0, mystr);
      59           0 :         code_object = PyObject_CallObject(marshal_loads, tuple);
      60           0 :         Py_DECREF(tuple);
      61           0 :         GDKfree(code_copy);
      62           0 :         if (code_object == NULL) {
      63           0 :                 PyErr_Print();
      64           0 :                 *msg = createException(MAL, "pyapi3.eval",
      65             :                                                            SQLSTATE(PY000) "Failed to marshal.loads() encoded object");
      66           0 :                 return NULL;
      67             :         }
      68           0 :         *msg = MAL_SUCCEED;
      69           0 :         return code_object;
      70             : }
      71             : 
      72         184 : char *FormatCode(char *code, char **args, size_t argcount, size_t tabwidth,
      73             :                                  PyObject **code_object, char **msg, char **additional_args,
      74             :                                  size_t additional_argcount)
      75             : {
      76             :         // Format the python code by fixing the indentation levels
      77             :         // We do two passes, first we get the length of the resulting formatted code
      78             :         // and then we actually create the resulting code
      79         184 :         size_t i = 0, j = 0, k = 0;
      80         184 :         size_t length = strlen(code);
      81         184 :         size_t size = 0;
      82         184 :         size_t spaces_per_level = 2;
      83             : 
      84         184 :         size_t code_location = 0;
      85         184 :         char *newcode = NULL;
      86             : 
      87         184 :         size_t indentation_count = 0;
      88         184 :         size_t max_indentation = 100;
      89             :         // This keeps track of the different indentation levels
      90             :         // indentation_levels is a sorted array with how many spaces of indentation
      91             :         // that specific array has
      92             :         // so indentation_levels[0] = 4 means that the first level (level 0) has 4
      93             :         // spaces in the source code
      94             :         // after this array is constructed we can count the amount of spaces before
      95             :         // a statement and look in this
      96             :         // array to immediately find the indentation level of the statement
      97         184 :         size_t *indentation_levels;
      98             :         // statements_per_level keeps track of how many statements are at the
      99             :         // specified indentation level
     100             :         // this is needed to compute the size of the resulting formatted code
     101             :         // for every indentation level i, we add statements_per_level[i] * (i + 1) *
     102             :         // spaces_per_level spaces
     103         184 :         size_t *statements_per_level;
     104             : 
     105         184 :         size_t initial_spaces = 0;
     106         184 :         size_t statement_size = 0;
     107         184 :         bool seen_statement = false;
     108         184 :         bool multiline_statement = false;
     109         184 :         int multiline_quotes = 0;
     110             : 
     111         184 :         char base_start[] = "def pyfun(";
     112         184 :         char base_end[] = "):\n";
     113         184 :         *msg = NULL;
     114         184 :         (void)code_object;
     115             : 
     116         184 :         indentation_levels = (size_t *)GDKzalloc(max_indentation * sizeof(size_t));
     117         368 :         statements_per_level =
     118         184 :                 (size_t *)GDKzalloc(max_indentation * sizeof(size_t));
     119         184 :         if (indentation_levels == NULL || statements_per_level == NULL) {
     120           0 :                 *msg = createException(MAL, "pyapi3.eval", SQLSTATE(HY013) MAL_MALLOC_FAIL);
     121           0 :                 goto finally;
     122             :         }
     123             : 
     124             :         // Base function definition size
     125             :         // For every argument, add a comma, and add another entry for the '\0'
     126         184 :         size += strlen(base_start) + strlen(base_end) + argcount + 1;
     127        1211 :         for (i = 0; i < argcount; i++) {
     128        1027 :                 if (args[i] != NULL) {
     129         405 :                         size += strlen(args[i]) + 1;
     130             :                 }
     131             :         }
     132             :         // Additional parameters
     133         711 :         for (i = 0; i < additional_argcount; i++)
     134         527 :                 size += strlen(additional_args[i]) + 1;
     135             : 
     136             :         // First remove the "{" at the start and the "};" at the end of the
     137             :         // function, this is added when we have a function created through SQL and
     138             :         // python doesn't like them
     139             :         // We need to be careful to only remove ones at the start/end, otherwise we
     140             :         // might invalidate some otherwise valid python code containing them
     141         547 :         for (i = length - 1, j = 0; i > 0; i--) {
     142         547 :                 if (code[i] != '\n' && code[i] != ' ' && code[i] != '\t' &&
     143         184 :                         code[i] != ';' && code[i] != '}')
     144             :                         break;
     145         541 :                 if (j == 0) {
     146         190 :                         if (code[i] == ';') {
     147         178 :                                 code[i] = ' ';
     148         178 :                                 j = 1;
     149             :                         }
     150         351 :                 } else if (j == 1) {
     151         351 :                         if (code[i] == '}') {
     152         178 :                                 code[i] = ' ';
     153         178 :                                 break;
     154             :                         }
     155             :                 }
     156             :         }
     157         990 :         for (i = 0; i < length; i++) {
     158         990 :                 if (code[i] != '\n' && code[i] != ' ' && code[i] != '\t' &&
     159             :                         code[i] != '{')
     160             :                         break;
     161         806 :                 if (code[i] == '{') {
     162         178 :                         code[i] = ' ';
     163             :                 }
     164             :         }
     165             :         // We indent using spaces, four spaces per level
     166             :         // We also erase empty lines
     167       23778 :         for (i = 0; i < length; i++) {
     168             :                 // handle multiline strings (strings that start with """)
     169       23594 :                 if (code[i] == '\"') {
     170          56 :                         if (!multiline_statement) {
     171          49 :                                 multiline_quotes++;
     172          49 :                                 multiline_statement = multiline_quotes == 3;
     173             :                         } else {
     174           7 :                                 multiline_quotes--;
     175           7 :                                 multiline_statement = multiline_quotes != 0;
     176             :                         }
     177             :                 } else {
     178       23538 :                         multiline_quotes = multiline_statement ? 3 : 0;
     179             :                 }
     180             : 
     181       23594 :                 if (!seen_statement) {
     182             :                         // We have not seen a statement on this line yet
     183        4264 :                         if (code[i] == '\n') {
     184             :                                 // Empty line, skip to the next one
     185             :                                 initial_spaces = 0;
     186        3939 :                         } else if (code[i] == ' ') {
     187        2767 :                                 initial_spaces++;
     188        1172 :                         } else if (code[i] == '\t') {
     189         434 :                                 initial_spaces += tabwidth;
     190             :                         } else {
     191             :                                 // Statement starts here
     192             :                                 seen_statement = true;
     193             :                         }
     194             :                 }
     195             :                 if (seen_statement) {
     196             :                         // We have seen a statement on this line, check the indentation
     197             :                         // level
     198       20068 :                         statement_size++;
     199             : 
     200       20068 :                         if (code[i] == '\n' || i == length - 1) {
     201             :                                 // Statement ends here
     202         742 :                                 bool placed = false;
     203         742 :                                 size_t level = 0;
     204             : 
     205         742 :                                 if (multiline_statement) {
     206             :                                         // if we are in a multiline statement, we don't want to mess
     207             :                                         // with the indentation
     208           4 :                                         size += statement_size;
     209           4 :                                         initial_spaces = 0;
     210           4 :                                         statement_size = 0;
     211           4 :                                         continue;
     212             :                                 }
     213             :                                 // First put the indentation in the indentation table
     214         738 :                                 if (indentation_count >= max_indentation) {
     215             :                                         // If there is no room in the indentation arrays we will
     216             :                                         // extend them
     217             :                                         // This probably will never happen unless in really extreme
     218             :                                         // code (or if max_indentation is set very low)
     219           0 :                                         size_t *new_indentation =
     220           0 :                                                 GDKzalloc(2 * max_indentation * sizeof(size_t));
     221           0 :                                         size_t *new_statements_per_level;
     222           0 :                                         if (new_indentation == NULL) {
     223           0 :                                                 *msg =
     224           0 :                                                         createException(MAL, "pyapi3.eval", SQLSTATE(HY013) MAL_MALLOC_FAIL);
     225           0 :                                                 goto finally;
     226             :                                         }
     227           0 :                                         new_statements_per_level =
     228           0 :                                                 GDKzalloc(2 * max_indentation * sizeof(size_t));
     229           0 :                                         if (new_statements_per_level == NULL) {
     230           0 :                                                 *msg =
     231           0 :                                                         createException(MAL, "pyapi3.eval", SQLSTATE(HY013) MAL_MALLOC_FAIL);
     232           0 :                                                 goto finally;
     233             :                                         }
     234             : 
     235           0 :                                         for (i = 0; i < max_indentation; i++) {
     236           0 :                                                 new_indentation[i] = indentation_levels[i];
     237           0 :                                                 new_statements_per_level[i] = statements_per_level[i];
     238             :                                         }
     239           0 :                                         GDKfree(indentation_levels);
     240           0 :                                         GDKfree(statements_per_level);
     241           0 :                                         indentation_levels = new_indentation;
     242           0 :                                         statements_per_level = new_statements_per_level;
     243           0 :                                         max_indentation *= 2;
     244             :                                 }
     245             : 
     246        1012 :                                 for (j = 0; j < indentation_count; j++) {
     247         725 :                                         if (initial_spaces == indentation_levels[j]) {
     248             :                                                 // The exact space count is already in the array, so we
     249             :                                                 // can stop
     250             :                                                 level = j;
     251             :                                                 placed = true;
     252             :                                                 break;
     253             :                                         }
     254             : 
     255         274 :                                         if (initial_spaces < indentation_levels[j]) {
     256             :                                                 // The indentation level is smaller than this level (but
     257             :                                                 // bigger than the previous level)
     258             :                                                 // So the indentation level belongs here, so we move
     259             :                                                 // every level past this one upward one level
     260             :                                                 // and put the indentation level here
     261           0 :                                                 for (k = indentation_count; k > j; k--) {
     262           0 :                                                         indentation_levels[k] = indentation_levels[k - 1];
     263           0 :                                                         statements_per_level[k] =
     264           0 :                                                                 statements_per_level[k - 1];
     265             :                                                 }
     266           0 :                                                 indentation_count++;
     267           0 :                                                 statements_per_level[j] = 0;
     268           0 :                                                 indentation_levels[j] = initial_spaces;
     269           0 :                                                 level = j;
     270           0 :                                                 placed = true;
     271           0 :                                                 break;
     272             :                                         }
     273             :                                 }
     274         738 :                                 if (!placed) {
     275             :                                         // The space count is the biggest we have seen, so we add it
     276             :                                         // to the end of the array
     277         287 :                                         level = indentation_count;
     278         287 :                                         indentation_levels[indentation_count++] = initial_spaces;
     279             :                                 }
     280         738 :                                 statements_per_level[level]++;
     281         738 :                                 size += statement_size;
     282         738 :                                 seen_statement = false;
     283         738 :                                 initial_spaces = 0;
     284         738 :                                 statement_size = 0;
     285             :                         }
     286             :                 }
     287             :         }
     288             :         // Add the amount of spaces we will add to the size
     289         471 :         for (i = 0; i < indentation_count; i++) {
     290         287 :                 size += (i + 1) * spaces_per_level * statements_per_level[i];
     291             :         }
     292             : 
     293             :         // Allocate space for the function
     294         184 :         newcode = GDKzalloc(size);
     295         184 :         if (newcode == NULL) {
     296           0 :                 *msg = createException(MAL, "pyapi3.eval", SQLSTATE(HY013) MAL_MALLOC_FAIL);
     297           0 :                 goto finally;
     298             :         }
     299        2024 :         initial_spaces = 0;
     300        2024 :         seen_statement = false;
     301             : 
     302             :         // First print in the function definition and arguments
     303        2024 :         for (i = 0; i < strlen(base_start); i++) {
     304        1840 :                 newcode[code_location++] = base_start[i];
     305             :         }
     306             :         // Add user-defined parameters
     307        1211 :         for (i = 0; i < argcount; i++) {
     308        1027 :                 if (args[i] != NULL) {
     309        1983 :                         for (j = 0; j < strlen(args[i]); j++) {
     310        1578 :                                 newcode[code_location++] = args[i][j];
     311             :                         }
     312         405 :                         if (i != argcount - 1 || additional_argcount > 0) {
     313         405 :                                 newcode[code_location++] = ',';
     314             :                         }
     315             :                 }
     316             :         }
     317             :         // Add additional parameters
     318         711 :         for (i = 0; i < additional_argcount; i++) {
     319         527 :                 if (additional_args[i] != NULL) {
     320        4911 :                         for (j = 0; j < strlen(additional_args[i]); j++) {
     321        4384 :                                 newcode[code_location++] = additional_args[i][j];
     322             :                         }
     323         527 :                         if (i != additional_argcount - 1) {
     324         343 :                                 newcode[code_location++] = ',';
     325             :                         }
     326             :                 }
     327             :         }
     328         736 :         for (i = 0; i < strlen(base_end); i++) {
     329         552 :                 newcode[code_location++] = base_end[i];
     330             :         }
     331             : 
     332             :         // Now the second pass, actually construct the code
     333       23778 :         for (i = 0; i < length; i++) {
     334             :                 // handle multiline statements
     335       23594 :                 if (code[i] == '\"') {
     336          56 :                         if (!multiline_statement) {
     337          49 :                                 multiline_quotes++;
     338          49 :                                 multiline_statement = multiline_quotes == 3;
     339             :                         } else {
     340           7 :                                 multiline_quotes--;
     341           7 :                                 multiline_statement = multiline_quotes != 0;
     342             :                         }
     343             :                 } else {
     344       23538 :                         multiline_quotes = multiline_statement ? 3 : 0;
     345             :                 }
     346             : 
     347       23594 :                 if (!seen_statement) {
     348        4268 :                         if (multiline_statement)
     349             :                                 seen_statement = true; // if we are in a multiline string, we
     350             :                                                                            // simply want to copy everything
     351             :                                                                            // (including indentation)
     352             :                         // We have not seen a statement on this line yet
     353        4264 :                         else if (code[i] == '\n') {
     354             :                                 // Empty line, skip to the next one
     355             :                                 initial_spaces = 0;
     356        3939 :                         } else if (code[i] == ' ') {
     357        2767 :                                 initial_spaces++;
     358        1172 :                         } else if (code[i] == '\t') {
     359         434 :                                 initial_spaces += tabwidth;
     360             :                         } else {
     361             :                                 // Look through the indentation_levels array to find the level
     362             :                                 // of the statement
     363             :                                 // from the amount of initial spaces
     364        1012 :                                 bool placed = false;
     365        1012 :                                 size_t level = 0;
     366             :                                 // Statement starts here
     367        1012 :                                 seen_statement = true;
     368        1012 :                                 for (j = 0; j < indentation_count; j++) {
     369        1012 :                                         if (initial_spaces == indentation_levels[j]) {
     370             :                                                 level = j;
     371             :                                                 placed = true;
     372             :                                                 break;
     373             :                                         }
     374             :                                 }
     375         738 :                                 if (!placed) {
     376             :                                         // This should never happen, because it means the initial
     377             :                                         // spaces was not present in the array
     378             :                                         // When we just did exactly the same loop over the array, we
     379             :                                         // should have encountered this statement
     380             :                                         // This means that something happened to either the
     381             :                                         // indentation_levels array or something happened to the
     382             :                                         // code
     383           0 :                                         *msg = createException(MAL, "pyapi3.eval",
     384             :                                                                                    SQLSTATE(PY000) "If you see this error something "
     385             :                                                                                    "went wrong in the code. Sorry.");
     386           0 :                                         goto finally;
     387             :                                 }
     388        2762 :                                 for (j = 0; j < (level + 1) * spaces_per_level; j++) {
     389             :                                         // Add spaces to the code
     390        2024 :                                         newcode[code_location++] = ' ';
     391             :                                 }
     392             :                         }
     393             :                 }
     394       23594 :                 if (seen_statement) {
     395             :                         // We have seen a statement on this line, copy it
     396       20068 :                         newcode[code_location++] = code[i];
     397       20068 :                         if (code[i] == '\n') {
     398             :                                 // The statement has ended, move on to the next line
     399         733 :                                 seen_statement = false;
     400         733 :                                 initial_spaces = 0;
     401         733 :                                 statement_size = 0;
     402             :                         }
     403             :                 }
     404             :         }
     405         184 :         newcode[code_location] = '\0';
     406         184 :         if (code_location >= size) {
     407             :                 // Something went wrong with our size computation, this also should
     408             :                 // never happen
     409           0 :                 *msg = createException(MAL, "pyapi3.eval",
     410             :                                                            SQLSTATE(PY000) "If you see this error something went wrong in "
     411             :                                                            "the code (size computation). Sorry.");
     412           0 :                 goto finally;
     413             :         }
     414         184 : finally:
     415         184 :         GDKfree(indentation_levels);
     416         184 :         GDKfree(statements_per_level);
     417         184 :         return newcode;
     418             : }
     419             : 
     420           0 : void _formatinput_init(void) { _import_array(); }

Generated by: LCOV version 1.14