Line data Source code
1 : /*
2 : * SPDX-License-Identifier: MPL-2.0
3 : *
4 : * This Source Code Form is subject to the terms of the Mozilla Public
5 : * License, v. 2.0. If a copy of the MPL was not distributed with this
6 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
7 : *
8 : * Copyright 2024 MonetDB Foundation;
9 : * Copyright August 2008 - 2023 MonetDB B.V.;
10 : * Copyright 1997 - July 2008 CWI.
11 : */
12 :
13 : /*
14 : * (author) Author M. Kersten
15 : * For documentation see website
16 : */
17 : #include "monetdb_config.h"
18 : #include "mal_instruction.h"
19 : #include "mal_function.h" /* for getPC() */
20 : #include "mal_utils.h"
21 : #include "mal_exception.h"
22 : #include "mal_private.h"
23 :
24 : /* to avoid memory fragmentation stmt and var blocks are allocated in chunks */
25 : #define MALCHUNK 256
26 :
27 : /* If we encounter an error it can be left behind in the MalBlk
28 : * for the upper layers to abandon the track
29 : */
30 : void
31 0 : addMalException(MalBlkPtr mb, str msg)
32 : {
33 0 : if (msg == NULL)
34 : return;
35 0 : if (mb->errors) {
36 0 : mb->errors = concatErrors(mb->errors, msg);
37 : } else {
38 0 : mb->errors = dupError(msg);
39 : }
40 : }
41 :
42 : Symbol
43 3486636 : newSymbol(const char *nme, int kind)
44 : {
45 3486636 : Symbol cur;
46 :
47 3486636 : if (nme == NULL)
48 : return NULL;
49 3486636 : cur = (Symbol) GDKzalloc(sizeof(SymRecord));
50 3486634 : if (cur == NULL)
51 : return NULL;
52 3486634 : cur->name = putName(nme);
53 3486636 : if (cur->name == NULL) {
54 0 : GDKfree(cur);
55 0 : return NULL;
56 : }
57 3486636 : cur->kind = kind;
58 3486636 : cur->peer = NULL;
59 6922724 : cur->def = newMalBlk(kind == FUNCTIONsymbol ? STMT_INCREMENT : 2);
60 3486634 : if (cur->def == NULL) {
61 0 : GDKfree(cur);
62 0 : return NULL;
63 : }
64 : return cur;
65 : }
66 :
67 : void
68 3466505 : freeSymbol(Symbol s)
69 : {
70 3466505 : if (s == NULL)
71 : return;
72 3466505 : if (s->def) {
73 3466505 : freeMalBlk(s->def);
74 3466505 : s->def = NULL;
75 : }
76 3466505 : GDKfree(s);
77 : }
78 :
79 : void
80 200317 : freeSymbolList(Symbol s)
81 : {
82 200317 : Symbol t = s;
83 :
84 3666261 : while (s) {
85 3465944 : t = s->peer;
86 3465944 : s->peer = NULL;
87 3465944 : freeSymbol(s);
88 3465944 : s = t;
89 : }
90 200317 : }
91 :
92 : int
93 7252677 : newMalBlkStmt(MalBlkPtr mb, int maxstmts)
94 : {
95 7252677 : InstrPtr *p;
96 7252677 : maxstmts = maxstmts % MALCHUNK == 0 ? maxstmts : ((maxstmts / MALCHUNK) + 1) * MALCHUNK;
97 :
98 7252677 : p = (InstrPtr *) GDKzalloc(sizeof(InstrPtr) * maxstmts);
99 7252655 : if (p == NULL)
100 : return -1;
101 7252655 : mb->stmt = p;
102 7252655 : mb->stop = 0;
103 7252655 : mb->ssize = maxstmts;
104 7252655 : return 0;
105 : }
106 :
107 : MalBlkPtr
108 3491049 : newMalBlk(int elements)
109 : {
110 3491049 : MalBlkPtr mb;
111 3491049 : VarRecord *v;
112 :
113 3491049 : mb = (MalBlkPtr) GDKmalloc(sizeof(MalBlkRecord));
114 3491049 : if (mb == NULL)
115 : return NULL;
116 :
117 : /* each MAL instruction implies at least one variable
118 : * we reserve some extra for constants */
119 3491049 : assert(elements >= 0);
120 3491049 : elements += 8;
121 3491049 : if (elements % MALCHUNK != 0)
122 3491049 : elements = (elements / MALCHUNK + 1) * MALCHUNK;
123 3491049 : v = (VarRecord *) GDKzalloc(sizeof(VarRecord) * elements);
124 3491048 : if (v == NULL) {
125 0 : GDKfree(mb);
126 0 : return NULL;
127 : }
128 3491048 : *mb = (MalBlkRecord) {
129 : .var = v,
130 : .vsize = elements,
131 : .maxarg = MAXARG, /* the minimum for each instruction */
132 : };
133 3491048 : if (newMalBlkStmt(mb, elements) < 0) {
134 0 : GDKfree(mb->var);
135 0 : GDKfree(mb);
136 0 : return NULL;
137 : }
138 3491047 : ATOMIC_INIT(&mb->workers, 1);
139 3491047 : return mb;
140 : }
141 :
142 : int
143 41302 : resizeMalBlk(MalBlkPtr mb, int elements)
144 : {
145 41302 : int i;
146 41302 : assert(elements >= 0);
147 41302 : if (elements % MALCHUNK != 0)
148 10461 : elements = (elements / MALCHUNK + 1) * MALCHUNK;
149 :
150 41302 : if (elements > mb->ssize) {
151 30841 : InstrPtr *ostmt = mb->stmt;
152 30841 : mb->stmt = GDKrealloc(mb->stmt, elements * sizeof(InstrPtr));
153 30841 : if (mb->stmt) {
154 7921398 : for (i = mb->ssize; i < elements; i++)
155 7890557 : mb->stmt[i] = 0;
156 30841 : mb->ssize = elements;
157 : } else {
158 0 : mb->stmt = ostmt; /* reinstate old pointer */
159 0 : mb->errors = createMalException(mb, 0, TYPE,
160 : SQLSTATE(HY013) MAL_MALLOC_FAIL);
161 0 : return -1;
162 : }
163 : }
164 :
165 :
166 41302 : if (elements > mb->vsize) {
167 0 : VarRecord *ovar = mb->var;
168 0 : mb->var = GDKrealloc(mb->var, elements * sizeof(VarRecord));
169 0 : if (mb->var) {
170 0 : memset(((char *) mb->var) +sizeof(VarRecord) * mb->vsize, 0,
171 0 : (elements - mb->vsize) * sizeof(VarRecord));
172 0 : mb->vsize = elements;
173 : } else {
174 0 : mb->var = ovar;
175 0 : mb->errors = createMalException(mb, 0, TYPE,
176 : SQLSTATE(HY013) MAL_MALLOC_FAIL);
177 0 : return -1;
178 : }
179 : }
180 : return 0;
181 : }
182 :
183 : /* For a MAL session we have to keep the variables around
184 : * and only need to reset the instruction pointer
185 : */
186 : void
187 558569 : resetMalTypes(MalBlkPtr mb, int stop)
188 : {
189 558569 : int i;
190 :
191 26836627 : for (i = 0; i < stop; i++)
192 26278058 : mb->stmt[i]->typechk = TYPE_UNKNOWN;
193 558569 : mb->stop = stop;
194 558569 : mb->errors = NULL;
195 558569 : }
196 :
197 : /* For SQL operations we have to cleanup variables and trim the space
198 : * A portion is retained for the next query */
199 : void
200 543996 : resetMalBlk(MalBlkPtr mb)
201 : {
202 543996 : int i;
203 543996 : InstrPtr *new;
204 543996 : VarRecord *vnew;
205 :
206 4056316 : for (i = MALCHUNK; i < mb->ssize; i++) {
207 3512320 : freeInstruction(mb->stmt[i]);
208 3512320 : mb->stmt[i] = NULL;
209 : }
210 543996 : if (mb->ssize != MALCHUNK) {
211 8330 : new = GDKrealloc(mb->stmt, sizeof(InstrPtr) * MALCHUNK);
212 8330 : if (new == NULL) {
213 : /* the only place to return an error signal at this stage. */
214 : /* The Client context should be passed around more deeply */
215 0 : mb->errors = createMalException(mb, 0, TYPE,
216 : SQLSTATE(HY013) MAL_MALLOC_FAIL);
217 0 : return;
218 : }
219 8330 : mb->stmt = new;
220 8330 : mb->ssize = MALCHUNK;
221 : }
222 : /* Reuse the initial function statement */
223 543996 : mb->stop = 0;
224 :
225 54299522 : for (i = 0; i < mb->vtop; i++) {
226 53755545 : if (isVarConstant(mb, i))
227 17493334 : VALclear(&getVarConstant(mb, i));
228 : }
229 :
230 543977 : if (mb->vsize != MALCHUNK) {
231 22018 : vnew = GDKrealloc(mb->var, sizeof(VarRecord) * MALCHUNK);
232 22018 : if (vnew == NULL) {
233 : /* the only place to return an error signal at this stage. */
234 : /* The Client context should be passed around more deeply */
235 0 : mb->errors = createMalException(mb, 0, TYPE,
236 : SQLSTATE(HY013) MAL_MALLOC_FAIL);
237 0 : return;
238 : }
239 22018 : mb->var = vnew;
240 22018 : mb->vsize = MALCHUNK;
241 : }
242 543977 : mb->vtop = 0;
243 543977 : mb->vid = 0;
244 : }
245 :
246 :
247 : /* The freeMalBlk code is quite defensive. It is used to localize an
248 : * illegal re-use of a MAL blk. */
249 : void
250 3471162 : freeMalBlk(MalBlkPtr mb)
251 : {
252 3471162 : int i;
253 :
254 892205183 : for (i = 0; i < mb->ssize; i++)
255 888734021 : if (mb->stmt[i]) {
256 3600950 : freeInstruction(mb->stmt[i]);
257 3600950 : mb->stmt[i] = NULL;
258 : }
259 3471162 : mb->stop = 0;
260 16942331 : for (i = 0; i < mb->vtop; i++)
261 13471170 : if (isVarConstant(mb, i))
262 67330 : VALclear(&getVarConstant(mb, i));
263 3471161 : mb->vtop = 0;
264 3471161 : mb->vid = 0;
265 3471161 : GDKfree(mb->stmt);
266 3471162 : mb->stmt = 0;
267 3471162 : GDKfree(mb->var);
268 3471162 : mb->var = 0;
269 :
270 3471162 : mb->binding[0] = 0;
271 3471162 : mb->tag = 0;
272 3471162 : mb->memory = 0;
273 3471162 : if (mb->help && mb->statichelp != mb->help)
274 2 : GDKfree(mb->help);
275 3471162 : mb->help = 0;
276 3471162 : mb->statichelp = 0;
277 3471162 : mb->inlineProp = 0;
278 3471162 : mb->unsafeProp = 0;
279 3471162 : freeException(mb->errors);
280 3471162 : ATOMIC_DESTROY(&mb->workers);
281 3471162 : GDKfree(mb);
282 3471162 : }
283 :
284 : /* The routine below should assure that all referenced structures are
285 : * private. The copying is memory conservative. */
286 : MalBlkPtr
287 244 : copyMalBlk(MalBlkPtr old)
288 : {
289 244 : MalBlkPtr mb;
290 244 : int i;
291 :
292 244 : mb = (MalBlkPtr) GDKzalloc(sizeof(MalBlkRecord));
293 244 : if (mb == NULL)
294 : return NULL;
295 244 : mb->alternative = old->alternative;
296 :
297 244 : mb->var = (VarRecord *) GDKzalloc(sizeof(VarRecord) * old->vsize);
298 244 : if (mb->var == NULL) {
299 0 : GDKfree(mb);
300 0 : return NULL;
301 : }
302 :
303 244 : mb->vsize = old->vsize;
304 244 : mb->vid = old->vid;
305 :
306 : /* copy all variable records */
307 19106 : for (i = 0; i < old->vtop; i++) {
308 18862 : mb->var[i] = old->var[i];
309 18862 : if (VALcopy(&(mb->var[i].value), &(old->var[i].value)) == NULL) {
310 0 : mb->vtop = i;
311 0 : goto bailout;
312 : }
313 : }
314 244 : mb->vtop = old->vtop;
315 :
316 244 : mb->stmt = (InstrPtr *) GDKzalloc(sizeof(InstrPtr) * old->ssize);
317 244 : if (mb->stmt == NULL) {
318 0 : goto bailout;
319 : }
320 :
321 244 : mb->ssize = old->ssize;
322 244 : assert(old->stop < old->ssize);
323 17633 : for (i = 0; i < old->stop; i++) {
324 17389 : mb->stmt[i] = copyInstruction(old->stmt[i]);
325 17389 : if (mb->stmt[i] == NULL) {
326 0 : mb->stop = i;
327 0 : goto bailout;
328 : }
329 : }
330 244 : mb->stop = old->stop;
331 244 : if (old->help && (mb->help = GDKstrdup(old->help)) == NULL) {
332 0 : goto bailout;
333 : }
334 :
335 244 : strcpy_len(mb->binding, old->binding, sizeof(mb->binding));
336 244 : mb->errors = old->errors ? GDKstrdup(old->errors) : 0;
337 244 : mb->tag = old->tag;
338 244 : mb->runtime = old->runtime;
339 244 : mb->calls = old->calls;
340 244 : mb->optimize = old->optimize;
341 244 : mb->maxarg = old->maxarg;
342 244 : mb->inlineProp = old->inlineProp;
343 244 : mb->unsafeProp = old->unsafeProp;
344 244 : return mb;
345 :
346 : bailout:
347 0 : for (i = 0; i < old->stop; i++)
348 0 : freeInstruction(mb->stmt[i]);
349 0 : for (i = 0; i < old->vtop; i++)
350 0 : VALclear(&mb->var[i].value);
351 0 : GDKfree(mb->var);
352 0 : GDKfree(mb->stmt);
353 0 : GDKfree(mb);
354 0 : return NULL;
355 : }
356 :
357 : /* The MAL records should be managed from a pool to
358 : * avoid repeated alloc/free and reduce probability of
359 : * memory fragmentation. (todo)
360 : * The complicating factor is their variable size,
361 : * which leads to growing records as a result of pushArguments
362 : * Allocation of an instruction should always succeed.
363 : */
364 : InstrPtr
365 34050436 : newInstructionArgs(MalBlkPtr mb, const char *modnme, const char *fcnnme,
366 : int args)
367 : {
368 34050436 : InstrPtr p;
369 :
370 34050436 : if (mb && mb->errors)
371 : return NULL;
372 34050421 : if (args <= 0)
373 : args = 1;
374 34050421 : p = GDKmalloc(args * sizeof(p->argv[0]) + offsetof(InstrRecord, argv));
375 34048497 : if (p == NULL) {
376 0 : if (mb)
377 0 : mb->errors = createMalException(mb, 0, TYPE,
378 : SQLSTATE(HY013) MAL_MALLOC_FAIL);
379 0 : return NULL;
380 : }
381 34048497 : *p = (InstrRecord) {
382 : .maxarg = args,
383 : .typechk = TYPE_UNKNOWN,
384 : .modname = modnme,
385 : .fcnname = fcnnme,
386 : .argc = 1,
387 : .retc = 1,
388 : /* Flow of control instructions are always marked as an assignment
389 : * with modifier */
390 : .token = ASSIGNsymbol,
391 : };
392 34048497 : memset(p->argv, 0, args * sizeof(p->argv[0]));
393 34048497 : p->argv[0] = -1;
394 34048497 : return p;
395 : }
396 :
397 : InstrPtr
398 2655562 : newInstruction(MalBlkPtr mb, const char *modnme, const char *fcnnme)
399 : {
400 2655562 : return newInstructionArgs(mb, modnme, fcnnme, MAXARG);
401 : }
402 :
403 : InstrPtr
404 13756883 : copyInstructionArgs(const InstrRecord *p, int args)
405 : {
406 13756883 : if (args < p->maxarg)
407 : args = p->maxarg;
408 13756883 : InstrPtr new = (InstrPtr) GDKmalloc(offsetof(InstrRecord, argv) +
409 : args * sizeof(p->argv[0]));
410 13757121 : if (new == NULL)
411 : return new;
412 13757121 : memcpy(new, p,
413 13757121 : offsetof(InstrRecord, argv) + p->maxarg * sizeof(p->argv[0]));
414 13757121 : if (args > p->maxarg)
415 1648656 : memset(new->argv + p->maxarg, 0,
416 1648656 : (args - p->maxarg) * sizeof(new->argv[0]));
417 13757121 : new->typechk = TYPE_UNKNOWN;
418 13757121 : new->maxarg = args;
419 13757121 : return new;
420 : }
421 :
422 : InstrPtr
423 10060388 : copyInstruction(const InstrRecord *p)
424 : {
425 10060388 : return copyInstructionArgs(p, p->maxarg);
426 : }
427 :
428 : void
429 571670 : clrFunction(InstrPtr p)
430 : {
431 571670 : p->token = ASSIGNsymbol;
432 571670 : p->fcn = 0;
433 571670 : p->blk = 0;
434 571670 : p->typechk = TYPE_UNKNOWN;
435 571670 : setModuleId(p, NULL);
436 571671 : setFunctionId(p, NULL);
437 571670 : }
438 :
439 : void
440 0 : clrInstruction(InstrPtr p)
441 : {
442 0 : clrFunction(p);
443 0 : memset(p, 0, offsetof(InstrRecord, argv) + p->maxarg * sizeof(p->argv[0]));
444 0 : }
445 :
446 : void
447 49252567 : freeInstruction(InstrPtr p)
448 : {
449 49252567 : GDKfree(p);
450 49253274 : }
451 :
452 : /* Query optimizers walk their way through a MAL program block. They
453 : * require some primitives to move instructions around and to remove
454 : * superflous instructions. The removal is based on the assumption
455 : * that indeed the instruction belonged to the block. */
456 : void
457 0 : removeInstruction(MalBlkPtr mb, InstrPtr p)
458 : {
459 0 : int i;
460 0 : for (i = 0; i < mb->stop - 1; i++)
461 0 : if (mb->stmt[i] == p)
462 : break;
463 0 : if (i == mb->stop)
464 : return;
465 0 : for (; i < mb->stop - 1; i++)
466 0 : mb->stmt[i] = mb->stmt[i + 1];
467 0 : mb->stmt[i] = 0;
468 0 : mb->stop--;
469 0 : assert(i == mb->stop); /* move statement after stop */
470 0 : mb->stmt[i] = p;
471 : }
472 :
473 : void
474 0 : removeInstructionBlock(MalBlkPtr mb, int pc, int cnt)
475 : {
476 0 : int i;
477 0 : InstrPtr p;
478 0 : for (i = pc; i < pc + cnt; i++) {
479 0 : p = getInstrPtr(mb, i);
480 0 : freeInstruction(p);
481 0 : mb->stmt[i] = NULL;
482 0 : } for (i = pc; i < mb->stop - cnt; i++)
483 0 : mb->stmt[i] = mb->stmt[i + cnt];
484 0 : mb->stop -= cnt;
485 0 : for (; i < mb->stop; i++)
486 : mb->stmt[i] = 0;
487 0 : }
488 :
489 : void
490 0 : moveInstruction(MalBlkPtr mb, int pc, int target)
491 : {
492 0 : InstrPtr p;
493 0 : int i;
494 0 : p = getInstrPtr(mb, pc);
495 0 : if (pc > target) {
496 0 : for (i = pc; i > target; i--)
497 0 : mb->stmt[i] = mb->stmt[i - 1];
498 0 : mb->stmt[i] = p;
499 : } else {
500 0 : for (i = target; i > pc; i--)
501 0 : mb->stmt[i] = mb->stmt[i - 1];
502 0 : mb->stmt[i] = p;
503 : }
504 0 : }
505 :
506 : /* Beware that the first argument of a signature is reserved for the
507 : * function return type , which should be equal to the destination
508 : * variable type.
509 : */
510 : int
511 621181 : findVariable(MalBlkPtr mb, const char *name)
512 : {
513 621181 : int i;
514 621181 : if (name == NULL)
515 : return -1;
516 3498291 : for (i = mb->vtop - 1; i >= 0; i--)
517 3486634 : if (idcmp(name, getVarName(mb, i)) == 0)
518 609524 : return i;
519 : return -1;
520 : }
521 :
522 : /* The second version of findVariable assumes you have not yet
523 : * allocated a private structure. This is particularly useful during
524 : * parsing, because most variables are already defined. This way we
525 : * safe GDKmalloc/GDKfree. */
526 : int
527 54347 : findVariableLength(MalBlkPtr mb, const char *name, int len)
528 : {
529 54347 : int i;
530 2565338 : for (i = mb->vtop - 1; i >= 0; i--) {
531 2522273 : const char *s = mb->var[i].name;
532 2522273 : if (s && strncmp(name, s, len) == 0 && s[len] == 0)
533 11282 : return i;
534 : }
535 : return -1;
536 : }
537 :
538 : str
539 174 : getArgDefault(MalBlkPtr mb, InstrPtr p, int idx)
540 : {
541 174 : ValPtr v = &getVarConstant(mb, getArg(p, idx));
542 174 : if (v->vtype == TYPE_str)
543 174 : return v->val.sval;
544 : return NULL;
545 : }
546 :
547 : /* All variables are implicitly declared upon their first assignment.
548 : *
549 : * Lexical constants require some care. They typically appear as
550 : * arguments in operator/function calls. To simplify program analysis
551 : * later on, we stick to the situation that function/operator
552 : * arguments are always references to by variables.
553 : *
554 : * Reserved words
555 : * Although MAL has been designed as a minimal language, several
556 : * identifiers are not eligible as variables. The encoding below is
557 : * geared at simple and speed. */
558 : #if 0
559 : int
560 : isReserved(str nme)
561 : {
562 : switch (*nme) {
563 : case 'A':
564 : case 'a':
565 : if (idcmp("atom", nme) == 0)
566 : return 1;
567 : break;
568 : case 'B':
569 : case 'b':
570 : if (idcmp("barrier", nme) == 0)
571 : return 1;
572 : break;
573 : case 'C':
574 : case 'c':
575 : if (idcmp("command", nme) == 0)
576 : return 1;
577 : break;
578 : case 'E':
579 : case 'e':
580 : if (idcmp("exit", nme) == 0)
581 : return 1;
582 : if (idcmp("end", nme) == 0)
583 : return 1;
584 : break;
585 : case 'F':
586 : case 'f':
587 : if (idcmp("false", nme) == 0)
588 : return 1;
589 : if (idcmp("function", nme) == 0)
590 : return 1;
591 : break;
592 : case 'I':
593 : case 'i':
594 : if (idcmp("include", nme) == 0)
595 : return 1;
596 : break;
597 : case 'M':
598 : case 'm':
599 : if (idcmp("module", nme) == 0)
600 : return 1;
601 : if (idcmp("macro", nme) == 0)
602 : return 1;
603 : break;
604 : case 'O':
605 : case 'o':
606 : if (idcmp("orcam", nme) == 0)
607 : return 1;
608 : break;
609 : case 'P':
610 : case 'p':
611 : if (idcmp("pattern", nme) == 0)
612 : return 1;
613 : break;
614 : case 'T':
615 : case 't':
616 : if (idcmp("thread", nme) == 0)
617 : return 1;
618 : if (idcmp("true", nme) == 0)
619 : return 1;
620 : break;
621 : }
622 : return 0;
623 : }
624 : #endif
625 :
626 : /* Beware, the symbol table structure assumes that it is relatively
627 : * cheap to perform a linear search to a variable or constant. */
628 : static int
629 66814930 : makeVarSpace(MalBlkPtr mb)
630 : {
631 66814930 : if (mb->vtop >= mb->vsize) {
632 59317 : VarRecord *new;
633 59317 : int s = (mb->vtop / MALCHUNK + 1) * MALCHUNK;
634 59317 : new = (VarRecord *) GDKrealloc(mb->var, s * sizeof(VarRecord));
635 59317 : if (new == NULL) {
636 : /* the only place to return an error signal at this stage. */
637 : /* The Client context should be passed around more deeply */
638 0 : mb->errors = createMalException(mb, 0, TYPE, SQLSTATE(HY013) MAL_MALLOC_FAIL);
639 0 : return -1;
640 : }
641 59317 : memset(new + mb->vsize, 0, (s - mb->vsize) * sizeof(VarRecord));
642 59317 : mb->vsize = s;
643 59317 : mb->var = new;
644 : }
645 : return 0;
646 : }
647 :
648 : /* create and initialize a variable record*/
649 : void
650 66800081 : setVariableType(MalBlkPtr mb, const int n, malType type)
651 : {
652 66800081 : assert(n >= 0 && n < mb->vtop);
653 66800081 : setVarType(mb, n, type);
654 66800081 : setRowCnt(mb, n, 0);
655 66800081 : clrVarFixed(mb, n);
656 66800081 : clrVarUsed(mb, n);
657 66800081 : clrVarInit(mb, n);
658 66800081 : clrVarDisabled(mb, n);
659 66800081 : clrVarConstant(mb, n);
660 66800081 : clrVarCleanup(mb, n);
661 66800081 : }
662 :
663 : char *
664 3898221 : getVarName(MalBlkPtr mb, int idx)
665 : {
666 3898221 : char *s = mb->var[idx].name;
667 3898221 : if (getVarKind(mb, idx) == 0)
668 0 : setVarKind(mb, idx, REFMARKER);
669 3898221 : if (*s == 0)
670 442027 : (void) snprintf(s, IDLENGTH, "%c_%d", getVarKind(mb, idx), mb->vid++);
671 3898221 : return s;
672 : }
673 :
674 : int
675 66815178 : newVariable(MalBlkPtr mb, const char *name, size_t len, malType type)
676 : {
677 66815178 : int n;
678 66815178 : int kind = REFMARKER;
679 66815178 : if (mb->errors)
680 : return -1;
681 66815178 : if (len >= IDLENGTH) {
682 1 : mb->errors = createMalException(mb, 0, TYPE, "newVariable: id too long");
683 1 : return -1;
684 : }
685 66815177 : if (makeVarSpace(mb)) { /* no space for a new variable */
686 : return -1;
687 : }
688 66813866 : n = mb->vtop;
689 66813866 : if (name == 0 || len == 0) {
690 66755173 : mb->var[n].name[0] = 0;
691 : } else { /* avoid calling strcpy_len since we're not interested in the * source length, and that may be very large */
692 58693 : char *nme = mb->var[n].name;
693 376734 : for (size_t i = 0; i < len; i++)
694 318041 : nme[i] = name[i];
695 58693 : nme[len] = 0;
696 58693 : kind = nme[0];
697 66813866 : } mb->vtop++;
698 66813866 : setVarKind(mb, n, kind);
699 66813866 : setVariableType(mb, n, type);
700 66813866 : return n;
701 : }
702 :
703 : /* Simplified cloning. */
704 : int
705 0 : cloneVariable(MalBlkPtr tm, MalBlkPtr mb, int x)
706 : {
707 0 : int res;
708 0 : if (isVarConstant(mb, x))
709 0 : res = cpyConstant(tm, getVar(mb, x));
710 : else {
711 0 : res = newTmpVariable(tm, getVarType(mb, x));
712 0 : if (*mb->var[x].name)
713 0 : strcpy(tm->var[x].name, mb->var[x].name); /* res = newVariable(tm, getVarName(mb, x), strlen(getVarName(mb,x)), getVarType(mb, x)); */
714 : }
715 0 : if (res < 0)
716 : return res;
717 0 : if (isVarFixed(mb, x))
718 0 : setVarFixed(tm, res);
719 0 : if (isVarUsed(mb, x))
720 0 : setVarUsed(tm, res);
721 0 : if (isVarInit(mb, x))
722 0 : setVarInit(tm, res);
723 0 : if (isVarDisabled(mb, x))
724 0 : setVarDisabled(tm, res);
725 0 : if (isVarCleanup(mb, x))
726 0 : setVarCleanup(tm, res);
727 0 : getVarSTC(tm, x) = getVarSTC(mb, x);
728 0 : setVarKind(tm, x, getVarKind(mb, x));
729 0 : return res;
730 : }
731 :
732 : int
733 66756078 : newTmpVariable(MalBlkPtr mb, malType type)
734 : {
735 66756078 : return newVariable(mb, 0, 0, type);
736 : }
737 :
738 : int
739 275 : newTypeVariable(MalBlkPtr mb, malType type)
740 : {
741 275 : int n, i;
742 1288 : for (i = 0; i < mb->vtop; i++)
743 1056 : if (isVarTypedef(mb, i) && getVarType(mb, i) == type)
744 : break;
745 275 : if (i < mb->vtop)
746 : return i;
747 232 : n = newTmpVariable(mb, type);
748 232 : if (n >= 0)
749 232 : setVarTypedef(mb, n);
750 : return n;
751 : }
752 :
753 : void
754 68832 : clearVariable(MalBlkPtr mb, int varid)
755 : {
756 68832 : VarPtr v;
757 68832 : v = getVar(mb, varid);
758 68832 : if (isVarConstant(mb, varid) || isVarDisabled(mb, varid))
759 26531 : VALclear(&v->value);
760 68832 : v->type = 0;
761 68832 : v->constant = 0;
762 68832 : v->typevar = 0;
763 68832 : v->fixedtype = 0;
764 68832 : v->cleanup = 0;
765 68832 : v->initialized = 0;
766 68832 : v->used = 0;
767 68832 : v->rowcnt = 0;
768 68832 : v->eolife = 0;
769 68832 : v->stc = 0;
770 68832 : }
771 :
772 : void
773 55 : freeVariable(MalBlkPtr mb, int varid)
774 : {
775 55 : clearVariable(mb, varid);
776 55 : }
777 :
778 : /* A special action is to reduce the variable space by removing all
779 : * that do not contribute.
780 : * All temporary variables are renamed in the process to trim the varid.
781 : */
782 : void
783 3 : trimMalVariables_(MalBlkPtr mb, MalStkPtr glb)
784 : {
785 3 : int *alias, cnt = 0, i, j;
786 3 : InstrPtr q;
787 3 : if (mb->vtop == 0)
788 : return;
789 3 : alias = (int *) GDKzalloc(mb->vtop * sizeof(int));
790 3 : if (alias == NULL)
791 : return; /* forget it if we run out of memory *//* build the alias table */
792 472 : for (i = 0; i < mb->vtop; i++) {
793 469 : if (isVarUsed(mb, i) == 0) {
794 55 : if (glb && i < glb->stktop && isVarConstant(mb, i))
795 0 : VALclear(&glb->stk[i]);
796 55 : freeVariable(mb, i);
797 55 : continue;
798 : }
799 414 : if (i > cnt) { /* remap temporary variables */
800 354 : VarRecord t = mb->var[cnt];
801 354 : mb->var[cnt] = mb->var[i];
802 354 : mb->var[i] = t;
803 : } /* valgrind finds a leak when we move these variable record * pointers around. */
804 414 : alias[i] = cnt;
805 414 : if (glb && i < glb->stktop && i != cnt) {
806 0 : glb->stk[cnt] = glb->stk[i];
807 0 : VALempty(&glb->stk[i]);
808 : }
809 414 : cnt++;
810 : } /* remap all variable references to their new position. */
811 3 : if (cnt < mb->vtop) {
812 277 : for (i = 0; i < mb->stop; i++) {
813 274 : q = getInstrPtr(mb, i);
814 1523 : for (j = 0; j < q->argc; j++) {
815 1249 : getArg(q, j) = alias[getArg(q, j)];
816 : }
817 : }
818 3 : mb->vtop = cnt;
819 : }
820 3 : mb->vid = 0;
821 3 : GDKfree(alias);
822 : }
823 :
824 : void
825 3 : trimMalVariables(MalBlkPtr mb, MalStkPtr stk)
826 : {
827 3 : int i, j;
828 3 : InstrPtr q; /* reset the use bit for all non-signature arguments */
829 472 : for (i = 0; i < mb->vtop; i++)
830 469 : clrVarUsed(mb, i); /* build the use table */
831 277 : for (i = 0; i < mb->stop; i++) {
832 274 : q = getInstrPtr(mb, i);
833 1523 : for (j = 0; j < q->argc; j++)
834 1249 : setVarUsed(mb, getArg(q, j));
835 : }
836 3 : trimMalVariables_(mb, stk);
837 3 : }
838 :
839 : /* MAL constants
840 : * Constants are stored in the symbol table and referenced by a
841 : * variable identifier. This means that per MAL instruction, we may
842 : * end up with MAXARG entries in the symbol table. This may lead to
843 : * long searches for variables. An optimization strategy deployed in
844 : * the current implementation is to look around for a similar
845 : * (constant) definition and to reuse its identifier. This avoids an
846 : * exploding symbol table with a lot of temporary variables (as in
847 : * tst400cHuge)
848 : *
849 : * But then the question becomes how far to search? Searching through
850 : * all variables is only useful when the list remains short or when
851 : * the constant-variable-name is easily derivable from its literal
852 : * value and a hash-based index leads you quickly to it.
853 : *
854 : * For the time being, we use a MAL system parameter, MAL_VAR_WINDOW,
855 : * to indicate the number of symbol table entries to consider. Setting
856 : * it to >= MAXARG will at least capture repeated use of a constant
857 : * within a single function call or repeated use within a small block
858 : * of code.
859 : *
860 : * The final step is to prepare a GDK value record, from which the
861 : * internal representation can be obtained during MAL interpretation.
862 : *
863 : * The constant values are linked together to improve searching
864 : * them. This start of the constant list is kept in the MalBlk.
865 : *
866 : * Conversion of a constant to another type is limited to well-known
867 : * coercion rules. Errors are reported and the nil value is set. */
868 :
869 : /* Converts the constant in vr to the MAL type type. Conversion is
870 : * done in the vr struct. */
871 : str
872 294489 : convertConstant(int type, ValPtr vr)
873 : {
874 294489 : if (type > GDKatomcnt)
875 0 : throw(SYNTAX, "convertConstant", "type index out of bound");
876 294489 : if (vr->vtype == type)
877 : return MAL_SUCCEED;
878 294485 : if (type == TYPE_bat || isaBatType(type)) { /* BAT variables can only be set to nil */
879 147 : if (vr->vtype != TYPE_void)
880 0 : throw(SYNTAX, "convertConstant", "BAT conversion error");
881 147 : VALclear(vr);
882 147 : vr->vtype = type;
883 147 : vr->val.bval = bat_nil;
884 147 : return MAL_SUCCEED;
885 : }
886 294338 : if (type == TYPE_ptr) { /* all coercions should be avoided to protect against memory probing */
887 32 : if (vr->vtype == TYPE_void) {
888 32 : VALclear(vr);
889 32 : vr->vtype = type;
890 32 : vr->val.pval = NULL;
891 32 : return MAL_SUCCEED;
892 : }
893 0 : if (vr->vtype != type)
894 0 : throw(SYNTAX, "convertConstant", "pointer conversion error");
895 : return MAL_SUCCEED;
896 : }
897 294306 : if (type == TYPE_any) {
898 : #ifndef DEBUG_MAL_INSTR
899 : assert(0);
900 : #endif
901 0 : throw(SYNTAX, "convertConstant", "missing type");
902 : }
903 294306 : if (VALconvert(type, vr) == NULL) {
904 3 : if (vr->vtype == TYPE_str)
905 0 : throw(SYNTAX, "convertConstant", "parse error in '%s'", vr->val.sval);
906 3 : throw(SYNTAX, "convertConstant", "coercion failed");
907 : }
908 : return MAL_SUCCEED;
909 : }
910 :
911 : int
912 46848718 : fndConstant(MalBlkPtr mb, const ValRecord *cst, int depth)
913 : {
914 46848718 : int i, k;
915 46848718 : const void *p; /* pointers never match */
916 46848718 : if (ATOMstorage(cst->vtype) == TYPE_ptr)
917 : return -1;
918 46699472 : p = VALptr(cst);
919 46699472 : k = mb->vtop - depth;
920 46699472 : if (k < 0)
921 : k = 0;
922 511898863 : for (i = k; i < mb->vtop - 1; i++) {
923 493006670 : VarPtr v = getVar(mb, i);
924 493006670 : if (v->constant) {
925 200704825 : if (v && v->type == cst->vtype && v->value.len == cst->len
926 95903784 : && ATOMcmp(cst->vtype, VALptr(&v->value), p) == 0)
927 27794722 : return i;
928 : }
929 : }
930 : return -1;
931 : }
932 :
933 : int
934 3005 : cpyConstant(MalBlkPtr mb, VarPtr vr)
935 : {
936 3005 : int i;
937 3005 : ValRecord cst;
938 3005 : if (VALcopy(&cst, &vr->value) == NULL)
939 : return -1;
940 3005 : i = defConstant(mb, vr->type, &cst);
941 3005 : if (i < 0)
942 : return -1;
943 : return i;
944 : }
945 :
946 : int
947 42017321 : defConstant(MalBlkPtr mb, int type, ValPtr cst)
948 : {
949 42017321 : int k;
950 42017321 : str msg;
951 42017321 : if (isaBatType(type)) {
952 263 : if (cst->vtype == TYPE_void) {
953 262 : cst->vtype = TYPE_bat;
954 262 : cst->val.bval = bat_nil;
955 : } else {
956 1 : mb->errors = createMalException(mb, 0, TYPE, "BAT coercion error");
957 1 : VALclear(cst); /* it could contain allocated space */
958 1 : return -1;
959 : }
960 42017058 : } else if (cst->vtype != type && !isPolyType(type)) {
961 2065 : int otype = cst->vtype;
962 2065 : assert(type != TYPE_any); /* help Coverity */
963 2065 : msg = convertConstant(getBatType(type), cst);
964 2065 : if (msg) {
965 3 : str ft, tt; /* free old value */
966 3 : ft = getTypeName(otype);
967 3 : tt = getTypeName(type);
968 3 : if (ft && tt)
969 3 : mb->errors = createMalException(mb, 0, TYPE,
970 : "constant coercion error from %s to %s",
971 : ft, tt);
972 : else
973 0 : mb->errors = createMalException(mb, 0, TYPE,
974 : "constant coercion error");
975 3 : GDKfree(ft);
976 3 : GDKfree(tt);
977 3 : freeException(msg);
978 3 : VALclear(cst); /* it could contain allocated space */
979 3 : return -1;
980 : } else {
981 2062 : assert(cst->vtype == type);
982 : }
983 : }
984 42017317 : k = fndConstant(mb, cst, MAL_VAR_WINDOW);
985 42016807 : if (k >= 0) { /* protect against leaks coming from constant reuse */
986 24426828 : VALclear(cst);
987 24426828 : return k;
988 : }
989 17589979 : k = newTmpVariable(mb, type);
990 17586510 : if (k < 0) {
991 0 : VALclear(cst);
992 0 : return -1;
993 : }
994 17586510 : setVarConstant(mb, k);
995 17586510 : setVarFixed(mb, k);
996 17586510 : if (type >= 0 && type < GDKatomcnt && ATOMextern(type))
997 4436857 : setVarCleanup(mb, k);
998 : else
999 13149653 : clrVarCleanup(mb, k); /* if cst is external, we give its allocated buffer away, so clear * it to avoid confusion */
1000 17586510 : getVarConstant(mb, k) = *cst;
1001 17586510 : VALempty(cst);
1002 17586510 : return k;
1003 : }
1004 :
1005 : /* Argument handling
1006 : * The number of arguments for procedures is currently
1007 : * limited. Furthermore, we should assure that no variable is
1008 : * referenced before being assigned. Failure to obey should mark the
1009 : * instruction as type-error. */
1010 : static InstrPtr
1011 319 : extendInstruction(MalBlkPtr mb, InstrPtr p)
1012 : {
1013 319 : InstrPtr pn = p;
1014 319 : if (p->argc == p->maxarg) {
1015 319 : int space = p->maxarg * sizeof(p->argv[0]) + offsetof(InstrRecord, argv);
1016 319 : pn = (InstrPtr) GDKrealloc(p, space + MAXARG * sizeof(p->argv[0]));
1017 319 : if (pn == NULL) { /* In the exceptional case we can not allocate more space * then we show an exception, mark the block as erroneous * and leave the instruction as is. */
1018 0 : mb->errors = createMalException(mb, 0, TYPE,
1019 : SQLSTATE(HY013) MAL_MALLOC_FAIL);
1020 0 : return p;
1021 : }
1022 319 : memset(((char *) pn) + space, 0, MAXARG * sizeof(pn->argv[0]));
1023 319 : pn->maxarg += MAXARG;
1024 : }
1025 : return pn;
1026 : }
1027 :
1028 : InstrPtr
1029 128450492 : pushArgument(MalBlkPtr mb, InstrPtr p, int varid)
1030 : {
1031 128450492 : if (p == NULL || mb->errors)
1032 : return p;
1033 128450492 : if (varid < 0) { /* leave everything as is in this exceptional programming error */
1034 0 : mb->errors = createMalException(mb, 0, TYPE, "improper variable id");
1035 0 : return p;
1036 : }
1037 128450492 : if (p->argc == p->maxarg) {
1038 : #ifndef NDEBUG
1039 1623 : for (int i = 0; i < mb->stop; i++)
1040 1304 : assert(mb->stmt[i] != p);
1041 : #endif
1042 319 : p = extendInstruction(mb, p);
1043 319 : if (mb->errors)
1044 : return p;
1045 : } /* protect against the case that the instruction is malloced in isolation */
1046 128450492 : if (mb->maxarg < p->maxarg)
1047 36969 : mb->maxarg = p->maxarg;
1048 128450492 : p->argv[p->argc++] = varid;
1049 128450492 : return p;
1050 : }
1051 :
1052 : InstrPtr
1053 4364697 : setArgument(MalBlkPtr mb, InstrPtr p, int idx, int varid)
1054 : {
1055 4364697 : int i;
1056 4364697 : if (p == NULL || mb->errors)
1057 : return p;
1058 4364697 : p = pushArgument(mb, p, varid); /* make space */
1059 4368641 : for (i = p->argc - 1; i > idx; i--)
1060 3946 : getArg(p, i) = getArg(p, i - 1);
1061 4364695 : getArg(p, i) = varid;
1062 4364695 : return p;
1063 : }
1064 :
1065 : InstrPtr
1066 4365620 : pushReturn(MalBlkPtr mb, InstrPtr p, int varid)
1067 : {
1068 4365620 : if (p == NULL || mb->errors)
1069 : return p;
1070 4365620 : if (p->retc == 1 && p->argv[0] == -1) {
1071 926 : p->argv[0] = varid;
1072 926 : return p;
1073 : }
1074 4364694 : p = setArgument(mb, p, p->retc, varid);
1075 4364694 : p->retc++;
1076 4364694 : return p;
1077 : }
1078 :
1079 : /* Store the information of a destination variable in the signature
1080 : * structure of each instruction. This code is largely equivalent to
1081 : * pushArgument, but it is more efficient in searching and collecting
1082 : * the information.
1083 : * TODO */
1084 : /* swallows name argument */
1085 : InstrPtr
1086 5384 : pushArgumentId(MalBlkPtr mb, InstrPtr p, const char *name)
1087 : {
1088 5384 : int v;
1089 5384 : if (p == NULL || mb->errors)
1090 : return p;
1091 5384 : v = findVariable(mb, name);
1092 5384 : if (v < 0) {
1093 523 : size_t namelen = strlen(name);
1094 523 : if ((v = newVariable(mb, name, namelen, getAtomIndex(name, namelen, TYPE_any))) < 0) {
1095 : /* set the MAL block to erroneous and simply return without
1096 : * doing anything */
1097 : /* mb->errors already set */
1098 : return p;
1099 : }
1100 : }
1101 5383 : return pushArgument(mb, p, v);
1102 : }
1103 :
1104 : /* The alternative is to remove arguments from an instruction
1105 : * record. This is typically part of instruction constructions. */
1106 : void
1107 1396613 : delArgument(InstrPtr p, int idx)
1108 : {
1109 1396613 : int i;
1110 1745560 : for (i = idx; i < p->argc - 1; i++)
1111 348947 : p->argv[i] = p->argv[i + 1];
1112 1396613 : p->argc--;
1113 1396613 : if (idx < p->retc)
1114 93129 : p->retc--;
1115 1396613 : }
1116 :
1117 : void
1118 31907 : setArgType(MalBlkPtr mb, InstrPtr p, int i, int tpe)
1119 : {
1120 31907 : assert(p->argv[i] < mb->vsize);
1121 31907 : setVarType(mb, getArg(p, i), tpe);
1122 31907 : }
1123 :
1124 : void
1125 0 : setReturnArgument(InstrPtr p, int i)
1126 : {
1127 0 : setDestVar(p, i);
1128 0 : }
1129 :
1130 : malType
1131 0 : destinationType(MalBlkPtr mb, InstrPtr p)
1132 : {
1133 0 : if (p->argc > 0)
1134 0 : return getVarType(mb, getDestVar(p));
1135 : return TYPE_any;
1136 : }
1137 :
1138 : /* For polymorphic instructions we should keep around the maximal
1139 : * index to later allocate sufficient space for type resolutions maps.
1140 : * Beware, that we should only consider the instruction polymorphic if
1141 : * it has a positive index or belongs to the signature.
1142 : * BATs can only have a polymorphic type at the tail.
1143 : */
1144 : inline void
1145 653227 : setPolymorphic(InstrPtr p, int tpe, int force)
1146 : {
1147 653227 : int c1 = 0, c2 = 0;
1148 653227 : if (force == FALSE && tpe == TYPE_any)
1149 : return;
1150 653227 : if (isaBatType(tpe))
1151 469111 : c1 = TYPE_oid;
1152 653227 : if (getTypeIndex(tpe) > 0)
1153 : c2 = getTypeIndex(tpe);
1154 653190 : else if (getBatType(tpe) == TYPE_any)
1155 643749 : c2 = 1;
1156 653227 : c1 = c1 > c2 ? c1 : c2;
1157 653227 : if (c1 > 0 && c1 >= p->polymorphic)
1158 433327 : p->polymorphic = c1 + 1;
1159 : }
1160 :
1161 : /* Instructions are simply appended to a MAL block. It should always succeed.
1162 : * The assumption is to push it when you are completely done with its preparation.
1163 : */
1164 : void
1165 242820413 : pushInstruction(MalBlkPtr mb, InstrPtr p)
1166 : {
1167 242820413 : int i;
1168 242820413 : int extra;
1169 242820413 : InstrPtr q;
1170 242820413 : if (p == NULL)
1171 : return;
1172 242820413 : extra = mb->vsize - mb->vtop; /* the extra variables already known */
1173 242820413 : if (mb->stop + 1 >= mb->ssize) {
1174 30841 : int s = ((mb->ssize + extra) / MALCHUNK + 1) * MALCHUNK;
1175 30841 : if (resizeMalBlk(mb, s) < 0) {
1176 : /* we are now left with the situation that the new
1177 : * instruction is dangling. The hack is to take an
1178 : * instruction out of the block that is likely not
1179 : * referenced independently. The last resort is to take the
1180 : * first, which should always be there. This assumes that
1181 : * no references are kept elsewhere to the statement. */
1182 0 : assert(mb->errors != NULL);
1183 0 : for (i = 1; i < mb->stop; i++) {
1184 0 : q = getInstrPtr(mb, i);
1185 0 : if (q->token == REMsymbol) {
1186 0 : freeInstruction(q);
1187 0 : mb->stmt[i] = p;
1188 0 : return;
1189 : }
1190 : }
1191 0 : freeInstruction(getInstrPtr(mb, 0));
1192 0 : mb->stmt[0] = p;
1193 0 : return;
1194 : }
1195 : }
1196 242820413 : if (mb->stmt[mb->stop])
1197 19082 : freeInstruction(mb->stmt[mb->stop]);
1198 242812411 : p->pc = mb->stop;
1199 242812411 : mb->stmt[mb->stop++] = p;
1200 : }
|