Line data Source code
1 : /*
2 : * SPDX-License-Identifier: MPL-2.0
3 : *
4 : * This Source Code Form is subject to the terms of the Mozilla Public
5 : * License, v. 2.0. If a copy of the MPL was not distributed with this
6 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
7 : *
8 : * Copyright 2024 MonetDB Foundation;
9 : * Copyright August 2008 - 2023 MonetDB B.V.;
10 : * Copyright 1997 - July 2008 CWI.
11 : */
12 :
13 : #include "monetdb_config.h"
14 : #include "formatinput.h"
15 : #include "type_conversion.h"
16 :
17 : //! Parse a PyCodeObject from a string, the string is expected to be in the
18 : //! format {@<encoded_function>};, where <encoded_function> is the Marshalled
19 : //! code object
20 : PyObject *PyCodeObject_ParseString(char *string, char **msg);
21 0 : PyObject *PyCodeObject_ParseString(char *string, char **msg)
22 : {
23 0 : size_t length = strlen(string);
24 0 : PyObject *code_object, *tuple, *mystr;
25 0 : char *code_copy = GDKmalloc(length * sizeof(char));
26 0 : char hex[3];
27 0 : size_t i, j;
28 0 : hex[2] = '\0';
29 0 : if (code_copy == NULL) {
30 0 : *msg = createException(MAL, "pyapi3.eval", SQLSTATE(HY013) MAL_MALLOC_FAIL);
31 0 : return NULL;
32 : }
33 : // decode hex codes (e.g. \x00) in the string to the actual numeric
34 : // representation
35 0 : for (i = 2, j = 0; i < length - 2; i++) {
36 0 : if (string[i] == '\\' && string[i + 1] == '\\')
37 0 : i++;
38 0 : if (string[i] == '\\' && string[i + 1] == 't') {
39 0 : code_copy[j++] = '\t';
40 0 : i++;
41 0 : } else if (string[i] == '\\' && string[i + 1] == 'n') {
42 0 : code_copy[j++] = '\n';
43 0 : i++;
44 0 : } else if (string[i] == '\\' && string[i + 1] == 'x') {
45 0 : hex[0] = string[i + 2];
46 0 : hex[1] = string[i + 3];
47 0 : code_copy[j++] = (char)strtol(hex, NULL, 16);
48 0 : i += 3;
49 : } else {
50 0 : code_copy[j++] = string[i];
51 : }
52 : }
53 0 : code_copy[j] = '\0';
54 0 : tuple = PyTuple_New(1);
55 0 : mystr = PyUnicode_FromStringAndSize(
56 : code_copy,
57 : j); // use FromStringAndSize because the string is not null-terminated
58 0 : PyTuple_SetItem(tuple, 0, mystr);
59 0 : code_object = PyObject_CallObject(marshal_loads, tuple);
60 0 : Py_DECREF(tuple);
61 0 : GDKfree(code_copy);
62 0 : if (code_object == NULL) {
63 0 : PyErr_Print();
64 0 : *msg = createException(MAL, "pyapi3.eval",
65 : SQLSTATE(PY000) "Failed to marshal.loads() encoded object");
66 0 : return NULL;
67 : }
68 0 : *msg = MAL_SUCCEED;
69 0 : return code_object;
70 : }
71 :
72 184 : char *FormatCode(char *code, char **args, size_t argcount, size_t tabwidth,
73 : PyObject **code_object, char **msg, char **additional_args,
74 : size_t additional_argcount)
75 : {
76 : // Format the python code by fixing the indentation levels
77 : // We do two passes, first we get the length of the resulting formatted code
78 : // and then we actually create the resulting code
79 184 : size_t i = 0, j = 0, k = 0;
80 184 : size_t length = strlen(code);
81 184 : size_t size = 0;
82 184 : size_t spaces_per_level = 2;
83 :
84 184 : size_t code_location = 0;
85 184 : char *newcode = NULL;
86 :
87 184 : size_t indentation_count = 0;
88 184 : size_t max_indentation = 100;
89 : // This keeps track of the different indentation levels
90 : // indentation_levels is a sorted array with how many spaces of indentation
91 : // that specific array has
92 : // so indentation_levels[0] = 4 means that the first level (level 0) has 4
93 : // spaces in the source code
94 : // after this array is constructed we can count the amount of spaces before
95 : // a statement and look in this
96 : // array to immediately find the indentation level of the statement
97 184 : size_t *indentation_levels;
98 : // statements_per_level keeps track of how many statements are at the
99 : // specified indentation level
100 : // this is needed to compute the size of the resulting formatted code
101 : // for every indentation level i, we add statements_per_level[i] * (i + 1) *
102 : // spaces_per_level spaces
103 184 : size_t *statements_per_level;
104 :
105 184 : size_t initial_spaces = 0;
106 184 : size_t statement_size = 0;
107 184 : bool seen_statement = false;
108 184 : bool multiline_statement = false;
109 184 : int multiline_quotes = 0;
110 :
111 184 : char base_start[] = "def pyfun(";
112 184 : char base_end[] = "):\n";
113 184 : *msg = NULL;
114 184 : (void)code_object;
115 :
116 184 : indentation_levels = (size_t *)GDKzalloc(max_indentation * sizeof(size_t));
117 368 : statements_per_level =
118 184 : (size_t *)GDKzalloc(max_indentation * sizeof(size_t));
119 184 : if (indentation_levels == NULL || statements_per_level == NULL) {
120 0 : *msg = createException(MAL, "pyapi3.eval", SQLSTATE(HY013) MAL_MALLOC_FAIL);
121 0 : goto finally;
122 : }
123 :
124 : // Base function definition size
125 : // For every argument, add a comma, and add another entry for the '\0'
126 184 : size += strlen(base_start) + strlen(base_end) + argcount + 1;
127 1211 : for (i = 0; i < argcount; i++) {
128 1027 : if (args[i] != NULL) {
129 405 : size += strlen(args[i]) + 1;
130 : }
131 : }
132 : // Additional parameters
133 711 : for (i = 0; i < additional_argcount; i++)
134 527 : size += strlen(additional_args[i]) + 1;
135 :
136 : // First remove the "{" at the start and the "};" at the end of the
137 : // function, this is added when we have a function created through SQL and
138 : // python doesn't like them
139 : // We need to be careful to only remove ones at the start/end, otherwise we
140 : // might invalidate some otherwise valid python code containing them
141 547 : for (i = length - 1, j = 0; i > 0; i--) {
142 547 : if (code[i] != '\n' && code[i] != ' ' && code[i] != '\t' &&
143 184 : code[i] != ';' && code[i] != '}')
144 : break;
145 541 : if (j == 0) {
146 190 : if (code[i] == ';') {
147 178 : code[i] = ' ';
148 178 : j = 1;
149 : }
150 351 : } else if (j == 1) {
151 351 : if (code[i] == '}') {
152 178 : code[i] = ' ';
153 178 : break;
154 : }
155 : }
156 : }
157 990 : for (i = 0; i < length; i++) {
158 990 : if (code[i] != '\n' && code[i] != ' ' && code[i] != '\t' &&
159 : code[i] != '{')
160 : break;
161 806 : if (code[i] == '{') {
162 178 : code[i] = ' ';
163 : }
164 : }
165 : // We indent using spaces, four spaces per level
166 : // We also erase empty lines
167 23778 : for (i = 0; i < length; i++) {
168 : // handle multiline strings (strings that start with """)
169 23594 : if (code[i] == '\"') {
170 56 : if (!multiline_statement) {
171 49 : multiline_quotes++;
172 49 : multiline_statement = multiline_quotes == 3;
173 : } else {
174 7 : multiline_quotes--;
175 7 : multiline_statement = multiline_quotes != 0;
176 : }
177 : } else {
178 23538 : multiline_quotes = multiline_statement ? 3 : 0;
179 : }
180 :
181 23594 : if (!seen_statement) {
182 : // We have not seen a statement on this line yet
183 4264 : if (code[i] == '\n') {
184 : // Empty line, skip to the next one
185 : initial_spaces = 0;
186 3939 : } else if (code[i] == ' ') {
187 2767 : initial_spaces++;
188 1172 : } else if (code[i] == '\t') {
189 434 : initial_spaces += tabwidth;
190 : } else {
191 : // Statement starts here
192 : seen_statement = true;
193 : }
194 : }
195 : if (seen_statement) {
196 : // We have seen a statement on this line, check the indentation
197 : // level
198 20068 : statement_size++;
199 :
200 20068 : if (code[i] == '\n' || i == length - 1) {
201 : // Statement ends here
202 742 : bool placed = false;
203 742 : size_t level = 0;
204 :
205 742 : if (multiline_statement) {
206 : // if we are in a multiline statement, we don't want to mess
207 : // with the indentation
208 4 : size += statement_size;
209 4 : initial_spaces = 0;
210 4 : statement_size = 0;
211 4 : continue;
212 : }
213 : // First put the indentation in the indentation table
214 738 : if (indentation_count >= max_indentation) {
215 : // If there is no room in the indentation arrays we will
216 : // extend them
217 : // This probably will never happen unless in really extreme
218 : // code (or if max_indentation is set very low)
219 0 : size_t *new_indentation =
220 0 : GDKzalloc(2 * max_indentation * sizeof(size_t));
221 0 : size_t *new_statements_per_level;
222 0 : if (new_indentation == NULL) {
223 0 : *msg =
224 0 : createException(MAL, "pyapi3.eval", SQLSTATE(HY013) MAL_MALLOC_FAIL);
225 0 : goto finally;
226 : }
227 0 : new_statements_per_level =
228 0 : GDKzalloc(2 * max_indentation * sizeof(size_t));
229 0 : if (new_statements_per_level == NULL) {
230 0 : *msg =
231 0 : createException(MAL, "pyapi3.eval", SQLSTATE(HY013) MAL_MALLOC_FAIL);
232 0 : goto finally;
233 : }
234 :
235 0 : for (i = 0; i < max_indentation; i++) {
236 0 : new_indentation[i] = indentation_levels[i];
237 0 : new_statements_per_level[i] = statements_per_level[i];
238 : }
239 0 : GDKfree(indentation_levels);
240 0 : GDKfree(statements_per_level);
241 0 : indentation_levels = new_indentation;
242 0 : statements_per_level = new_statements_per_level;
243 0 : max_indentation *= 2;
244 : }
245 :
246 1012 : for (j = 0; j < indentation_count; j++) {
247 725 : if (initial_spaces == indentation_levels[j]) {
248 : // The exact space count is already in the array, so we
249 : // can stop
250 : level = j;
251 : placed = true;
252 : break;
253 : }
254 :
255 274 : if (initial_spaces < indentation_levels[j]) {
256 : // The indentation level is smaller than this level (but
257 : // bigger than the previous level)
258 : // So the indentation level belongs here, so we move
259 : // every level past this one upward one level
260 : // and put the indentation level here
261 0 : for (k = indentation_count; k > j; k--) {
262 0 : indentation_levels[k] = indentation_levels[k - 1];
263 0 : statements_per_level[k] =
264 0 : statements_per_level[k - 1];
265 : }
266 0 : indentation_count++;
267 0 : statements_per_level[j] = 0;
268 0 : indentation_levels[j] = initial_spaces;
269 0 : level = j;
270 0 : placed = true;
271 0 : break;
272 : }
273 : }
274 738 : if (!placed) {
275 : // The space count is the biggest we have seen, so we add it
276 : // to the end of the array
277 287 : level = indentation_count;
278 287 : indentation_levels[indentation_count++] = initial_spaces;
279 : }
280 738 : statements_per_level[level]++;
281 738 : size += statement_size;
282 738 : seen_statement = false;
283 738 : initial_spaces = 0;
284 738 : statement_size = 0;
285 : }
286 : }
287 : }
288 : // Add the amount of spaces we will add to the size
289 471 : for (i = 0; i < indentation_count; i++) {
290 287 : size += (i + 1) * spaces_per_level * statements_per_level[i];
291 : }
292 :
293 : // Allocate space for the function
294 184 : newcode = GDKzalloc(size);
295 184 : if (newcode == NULL) {
296 0 : *msg = createException(MAL, "pyapi3.eval", SQLSTATE(HY013) MAL_MALLOC_FAIL);
297 0 : goto finally;
298 : }
299 2024 : initial_spaces = 0;
300 2024 : seen_statement = false;
301 :
302 : // First print in the function definition and arguments
303 2024 : for (i = 0; i < strlen(base_start); i++) {
304 1840 : newcode[code_location++] = base_start[i];
305 : }
306 : // Add user-defined parameters
307 1211 : for (i = 0; i < argcount; i++) {
308 1027 : if (args[i] != NULL) {
309 1983 : for (j = 0; j < strlen(args[i]); j++) {
310 1578 : newcode[code_location++] = args[i][j];
311 : }
312 405 : if (i != argcount - 1 || additional_argcount > 0) {
313 405 : newcode[code_location++] = ',';
314 : }
315 : }
316 : }
317 : // Add additional parameters
318 711 : for (i = 0; i < additional_argcount; i++) {
319 527 : if (additional_args[i] != NULL) {
320 4911 : for (j = 0; j < strlen(additional_args[i]); j++) {
321 4384 : newcode[code_location++] = additional_args[i][j];
322 : }
323 527 : if (i != additional_argcount - 1) {
324 343 : newcode[code_location++] = ',';
325 : }
326 : }
327 : }
328 736 : for (i = 0; i < strlen(base_end); i++) {
329 552 : newcode[code_location++] = base_end[i];
330 : }
331 :
332 : // Now the second pass, actually construct the code
333 23778 : for (i = 0; i < length; i++) {
334 : // handle multiline statements
335 23594 : if (code[i] == '\"') {
336 56 : if (!multiline_statement) {
337 49 : multiline_quotes++;
338 49 : multiline_statement = multiline_quotes == 3;
339 : } else {
340 7 : multiline_quotes--;
341 7 : multiline_statement = multiline_quotes != 0;
342 : }
343 : } else {
344 23538 : multiline_quotes = multiline_statement ? 3 : 0;
345 : }
346 :
347 23594 : if (!seen_statement) {
348 4268 : if (multiline_statement)
349 : seen_statement = true; // if we are in a multiline string, we
350 : // simply want to copy everything
351 : // (including indentation)
352 : // We have not seen a statement on this line yet
353 4264 : else if (code[i] == '\n') {
354 : // Empty line, skip to the next one
355 : initial_spaces = 0;
356 3939 : } else if (code[i] == ' ') {
357 2767 : initial_spaces++;
358 1172 : } else if (code[i] == '\t') {
359 434 : initial_spaces += tabwidth;
360 : } else {
361 : // Look through the indentation_levels array to find the level
362 : // of the statement
363 : // from the amount of initial spaces
364 1012 : bool placed = false;
365 1012 : size_t level = 0;
366 : // Statement starts here
367 1012 : seen_statement = true;
368 1012 : for (j = 0; j < indentation_count; j++) {
369 1012 : if (initial_spaces == indentation_levels[j]) {
370 : level = j;
371 : placed = true;
372 : break;
373 : }
374 : }
375 738 : if (!placed) {
376 : // This should never happen, because it means the initial
377 : // spaces was not present in the array
378 : // When we just did exactly the same loop over the array, we
379 : // should have encountered this statement
380 : // This means that something happened to either the
381 : // indentation_levels array or something happened to the
382 : // code
383 0 : *msg = createException(MAL, "pyapi3.eval",
384 : SQLSTATE(PY000) "If you see this error something "
385 : "went wrong in the code. Sorry.");
386 0 : goto finally;
387 : }
388 2762 : for (j = 0; j < (level + 1) * spaces_per_level; j++) {
389 : // Add spaces to the code
390 2024 : newcode[code_location++] = ' ';
391 : }
392 : }
393 : }
394 23594 : if (seen_statement) {
395 : // We have seen a statement on this line, copy it
396 20068 : newcode[code_location++] = code[i];
397 20068 : if (code[i] == '\n') {
398 : // The statement has ended, move on to the next line
399 733 : seen_statement = false;
400 733 : initial_spaces = 0;
401 733 : statement_size = 0;
402 : }
403 : }
404 : }
405 184 : newcode[code_location] = '\0';
406 184 : if (code_location >= size) {
407 : // Something went wrong with our size computation, this also should
408 : // never happen
409 0 : *msg = createException(MAL, "pyapi3.eval",
410 : SQLSTATE(PY000) "If you see this error something went wrong in "
411 : "the code (size computation). Sorry.");
412 0 : goto finally;
413 : }
414 184 : finally:
415 184 : GDKfree(indentation_levels);
416 184 : GDKfree(statements_per_level);
417 184 : return newcode;
418 : }
419 :
420 0 : void _formatinput_init(void) { _import_array(); }
|