Line data Source code
1 : /*
2 : * SPDX-License-Identifier: MPL-2.0
3 : *
4 : * This Source Code Form is subject to the terms of the Mozilla Public
5 : * License, v. 2.0. If a copy of the MPL was not distributed with this
6 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
7 : *
8 : * Copyright 2024 MonetDB Foundation;
9 : * Copyright August 2008 - 2023 MonetDB B.V.;
10 : * Copyright 1997 - July 2008 CWI.
11 : */
12 :
13 : /*
14 : * author M.L. Kersten
15 : * The default SQL optimizer pipeline can be set per server. See the
16 : * optpipe setting in monetdb(1) when using merovingian. During SQL
17 : * initialization, the optimizer pipeline is checked against the
18 : * dependency information maintained in the optimizer library to ensure
19 : * there are no conflicts and at least the pre-requisite optimizers are
20 : * used. The setting of sql_optimizer can be either the list of
21 : * optimizers to run, or one or more variables containing the optimizer
22 : * pipeline to run. The latter is provided for readability purposes
23 : * only.
24 : */
25 : #include "monetdb_config.h"
26 : #include "opt_pipes.h"
27 : #include "mal_import.h"
28 : #include "opt_support.h"
29 : #include "mal_client.h"
30 : #include "mal_instruction.h"
31 : #include "mal_function.h"
32 : #include "mal_listing.h"
33 : #include "mal_linker.h"
34 :
35 : #define MAXOPTPIPES 64
36 :
37 : static struct pipeline {
38 : char *name;
39 : char **def; /* NULL terminated list of optimizers */
40 : bool builtin;
41 : } pipes[MAXOPTPIPES] = {
42 : /* The minimal pipeline necessary by the server to operate correctly
43 : *
44 : * NOTE:
45 : * If you change the minimal pipe, please also update the man page
46 : * (see tools/mserver/mserver5.1) accordingly!
47 : */
48 : {"minimal_pipe",
49 : (char *[]) {
50 : "inline",
51 : "remap",
52 : "emptybind",
53 : "deadcode",
54 : "for",
55 : "dict",
56 : "multiplex",
57 : "generator",
58 : "profiler",
59 : "garbageCollector",
60 : NULL,
61 : },
62 : true,
63 : },
64 : {"minimal_fast",
65 : (char *[]) {
66 : "minimalfast",
67 : NULL,
68 : },
69 : true,
70 : },
71 : /* NOTE:
72 : * If you change the default pipe, please also update the no_mitosis
73 : * pipe and sequential pipe (see below, as well as the man page (see
74 : * tools/mserver/mserver5.1) accordingly!
75 : */
76 : {"default_pipe",
77 : (char *[]) {
78 : "inline",
79 : "remap",
80 : "costModel",
81 : "coercions",
82 : "aliases",
83 : "evaluate",
84 : "emptybind",
85 : "deadcode",
86 : "pushselect",
87 : "aliases",
88 : "for",
89 : "dict",
90 : "mitosis",
91 : "mergetable",
92 : "aliases",
93 : "constants",
94 : "commonTerms",
95 : "projectionpath",
96 : "deadcode",
97 : "matpack",
98 : "reorder",
99 : "dataflow",
100 : "querylog",
101 : "multiplex",
102 : "generator",
103 : "candidates",
104 : "deadcode",
105 : "postfix",
106 : "profiler",
107 : "garbageCollector",
108 : NULL,
109 : },
110 : true,
111 : },
112 : {"default_fast",
113 : (char *[]) {
114 : "defaultfast",
115 : NULL,
116 : },
117 : true,
118 : },
119 : /* The no_mitosis pipe line is (and should be kept!) identical to the
120 : * default pipeline, except that optimizer mitosis is omitted. It is
121 : * used mainly to make some tests work deterministically, and to check
122 : * / debug whether "unexpected" problems are related to mitosis
123 : * (and/or mergetable).
124 : *
125 : * NOTE:
126 : * If you change the no_mitosis pipe, please also update the man page
127 : * (see tools/mserver/mserver5.1) accordingly!
128 : */
129 : {"no_mitosis_pipe",
130 : (char *[]) {
131 : "inline",
132 : "remap",
133 : "costModel",
134 : "coercions",
135 : "aliases",
136 : "evaluate",
137 : "emptybind",
138 : "deadcode",
139 : "pushselect",
140 : "aliases",
141 : "mergetable",
142 : "aliases",
143 : "constants",
144 : "commonTerms",
145 : "projectionpath",
146 : "deadcode",
147 : "matpack",
148 : "reorder",
149 : "dataflow",
150 : "querylog",
151 : "multiplex",
152 : "generator",
153 : "candidates",
154 : "deadcode",
155 : "postfix",
156 : "profiler",
157 : "garbageCollector",
158 : NULL,
159 : },
160 : true,
161 : },
162 : /* The sequential pipe line is (and should be kept!) identical to the
163 : * default pipeline, except that optimizers mitosis & dataflow are
164 : * omitted. It is used mainly to make some tests work
165 : * deterministically, i.e., avoid ambiguous output, by avoiding
166 : * parallelism.
167 : *
168 : * NOTE:
169 : * If you change the sequential pipe, please also update the man page
170 : * (see tools/mserver/mserver5.1) accordingly!
171 : */
172 : {"sequential_pipe",
173 : (char *[]) {
174 : "inline",
175 : "remap",
176 : "costModel",
177 : "coercions",
178 : "aliases",
179 : "evaluate",
180 : "emptybind",
181 : "deadcode",
182 : "pushselect",
183 : "aliases",
184 : "for",
185 : "dict",
186 : "mergetable",
187 : "aliases",
188 : "constants",
189 : "commonTerms",
190 : "projectionpath",
191 : "deadcode",
192 : "matpack",
193 : "reorder",
194 : "querylog",
195 : "multiplex",
196 : "generator",
197 : "candidates",
198 : "deadcode",
199 : "postfix",
200 : "profiler",
201 : "garbageCollector",
202 : NULL,
203 : },
204 : true,
205 : },
206 : {"recursive_pipe",
207 : (char *[]) {
208 : "inline",
209 : "remap",
210 : "costModel",
211 : "coercions",
212 : "aliases",
213 : "evaluate",
214 : "deadcode",
215 : "pushselect",
216 : "aliases",
217 : "for",
218 : "dict",
219 : "mergetable",
220 : "aliases",
221 : "constants",
222 : "projectionpath",
223 : "deadcode",
224 : "matpack",
225 : "querylog",
226 : "multiplex",
227 : "generator",
228 : "candidates",
229 : "deadcode",
230 : "postfix",
231 : "profiler",
232 : "garbageCollector",
233 : NULL,
234 : },
235 : true,
236 : },
237 : /* Experimental pipelines stressing various components under
238 : * development. Do not use any of these pipelines in production
239 : * settings!
240 : */
241 : /* sentinel */
242 : {NULL, NULL, false,},
243 : };
244 :
245 : #include "optimizer_private.h"
246 :
247 : static MT_Lock pipeLock = MT_LOCK_INITIALIZER(pipeLock);
248 :
249 : static str
250 23 : validatePipe(struct pipeline *pipe)
251 : {
252 23 : bool mitosis = false, deadcode = false, mergetable = false;
253 23 : bool multiplex = false, garbage = false, generator = false, remap = false;
254 23 : int i;
255 :
256 23 : if (pipe->def == NULL || pipe->def[0] == NULL)
257 0 : throw(MAL, "optimizer.validate", SQLSTATE(42000) "missing optimizers");
258 :
259 23 : if (strcmp(pipe->def[0], "defaultfast") == 0
260 23 : || strcmp(pipe->def[0], "minimalfast") == 0)
261 : return MAL_SUCCEED;
262 :
263 23 : if (strcmp(pipe->def[0], "inline") != 0)
264 15 : throw(MAL, "optimizer.validate",
265 : SQLSTATE(42000) "'inline' should be the first\n");
266 :
267 48 : for (i = 0; pipe->def[i]; i++) {
268 41 : const char *fname = pipe->def[i];
269 41 : if (garbage)
270 1 : throw(MAL, "optimizer.validate",
271 : SQLSTATE(42000)
272 : "'garbageCollector' should be used as the last one\n");
273 40 : if (strcmp(fname, "deadcode") == 0)
274 : deadcode = true;
275 33 : else if (strcmp(fname, "remap") == 0)
276 : remap = true;
277 32 : else if (strcmp(fname, "mitosis") == 0)
278 : mitosis = true;
279 31 : else if (strcmp(fname, "mergetable") == 0)
280 : mergetable = true;
281 30 : else if (strcmp(fname, "multiplex") == 0)
282 : multiplex = true;
283 24 : else if (strcmp(fname, "generator") == 0)
284 : generator = true;
285 23 : else if (strcmp(fname, "garbageCollector") == 0)
286 4 : garbage = true;
287 : }
288 :
289 7 : if (mitosis && !mergetable)
290 0 : throw(MAL, "optimizer.validate",
291 : SQLSTATE(42000) "'mitosis' needs 'mergetable'\n");
292 :
293 : /* several optimizer should be used */
294 7 : if (!multiplex)
295 1 : throw(MAL, "optimizer.validate",
296 : SQLSTATE(42000) "'multiplex' should be used\n");
297 6 : if (!deadcode)
298 1 : throw(MAL, "optimizer.validate",
299 : SQLSTATE(42000) "'deadcode' should be used at least once\n");
300 5 : if (!garbage)
301 2 : throw(MAL, "optimizer.validate",
302 : SQLSTATE(42000)
303 : "'garbageCollector' should be used as the last one\n");
304 3 : if (!remap)
305 2 : throw(MAL, "optimizer.validate",
306 : SQLSTATE(42000) "'remap' should be used\n");
307 1 : if (!generator)
308 0 : throw(MAL, "optimizer.validate",
309 : SQLSTATE(42000) "'generator' should be used\n");
310 :
311 : return MAL_SUCCEED;
312 : }
313 :
314 : /* the session_pipe is the one defined by the user */
315 : str
316 23 : addPipeDefinition(Client cntxt, const char *name, const char *pipe)
317 : {
318 23 : int i, n;
319 23 : str msg = MAL_SUCCEED;
320 23 : struct pipeline oldpipe;
321 23 : const char *p;
322 :
323 23 : (void) cntxt;
324 23 : MT_lock_set(&pipeLock);
325 207 : for (i = 0; i < MAXOPTPIPES && pipes[i].name; i++)
326 161 : if (strcmp(name, pipes[i].name) == 0)
327 : break;
328 :
329 23 : if (i == MAXOPTPIPES) {
330 0 : MT_lock_unset(&pipeLock);
331 0 : throw(MAL, "optimizer.addPipeDefinition",
332 : SQLSTATE(HY013) "Out of slots");
333 : }
334 23 : if (pipes[i].name && pipes[i].builtin) {
335 0 : MT_lock_unset(&pipeLock);
336 0 : throw(MAL, "optimizer.addPipeDefinition",
337 : SQLSTATE(42000) "No overwrite of built in allowed");
338 : }
339 :
340 : /* save old value */
341 23 : oldpipe = pipes[i];
342 46 : pipes[i] = (struct pipeline) {
343 23 : .name = GDKstrdup(name),
344 : };
345 23 : if (pipes[i].name == NULL)
346 0 : goto bailout;
347 : n = 1;
348 173 : for (p = pipe; p; p = strchr(p, ';')) {
349 150 : p++;
350 150 : n++;
351 : }
352 23 : if ((pipes[i].def = GDKmalloc(n * sizeof(char *))) == NULL)
353 0 : goto bailout;
354 : n = 0;
355 150 : while ((p = strchr(pipe, ';')) != NULL) {
356 127 : if (strncmp(pipe, "optimizer.", 10) == 0)
357 126 : pipe += 10;
358 127 : const char *q = pipe;
359 1349 : while (q < p && *q != '(' && *q != '.' && !GDKisspace(*q))
360 1222 : q++;
361 127 : if (*q == '.') {
362 0 : msg = createException(MAL, "optimizer.addPipeDefinition",
363 : SQLSTATE(42000) "Bad pipeline definition");
364 0 : goto bailout;
365 : }
366 127 : if (q > pipe) {
367 126 : if ((pipes[i].def[n++] = GDKstrndup(pipe, q - pipe)) == NULL)
368 0 : goto bailout;
369 : }
370 127 : pipe = p + 1;
371 143 : while (*pipe && GDKisspace(*pipe))
372 16 : pipe++;
373 : }
374 23 : pipes[i].def[n] = NULL;
375 23 : msg = validatePipe(&pipes[i]);
376 23 : if (msg != MAL_SUCCEED) {
377 : /* failed: restore old value */
378 22 : goto bailout;
379 : }
380 1 : MT_lock_unset(&pipeLock);
381 : /* succeeded: destroy old value */
382 1 : GDKfree(oldpipe.name);
383 1 : if (oldpipe.def)
384 0 : for (n = 0; oldpipe.def[n]; n++)
385 0 : GDKfree(oldpipe.def[n]);
386 1 : GDKfree(oldpipe.def);
387 1 : return msg;
388 :
389 22 : bailout:
390 22 : GDKfree(pipes[i].name);
391 22 : if (pipes[i].def)
392 128 : for (n = 0; pipes[i].def[n]; n++)
393 106 : GDKfree(pipes[i].def[n]);
394 22 : GDKfree(pipes[i].def);
395 22 : pipes[i] = oldpipe;
396 22 : MT_lock_unset(&pipeLock);
397 22 : if (msg)
398 : return msg;
399 0 : throw(MAL, "optimizer.addPipeDefinition", SQLSTATE(HY013) MAL_MALLOC_FAIL);
400 : }
401 :
402 : bool
403 685 : isOptimizerPipe(const char *name)
404 : {
405 685 : int i;
406 :
407 2490 : for (i = 0; i < MAXOPTPIPES && pipes[i].name; i++)
408 2438 : if (strcmp(name, pipes[i].name) == 0)
409 : return true;
410 : return false;
411 : }
412 :
413 : str
414 87 : getPipeCatalog(bat *nme, bat *def, bat *stat)
415 : {
416 87 : BAT *b, *bn, *bs;
417 87 : int i;
418 87 : size_t l = 2048;
419 87 : char *buf = GDKmalloc(l);
420 :
421 87 : b = COLnew(0, TYPE_str, 20, TRANSIENT);
422 87 : bn = COLnew(0, TYPE_str, 20, TRANSIENT);
423 87 : bs = COLnew(0, TYPE_str, 20, TRANSIENT);
424 87 : if (buf == NULL || b == NULL || bn == NULL || bs == NULL) {
425 0 : BBPreclaim(b);
426 0 : BBPreclaim(bn);
427 0 : BBPreclaim(bs);
428 0 : GDKfree(buf);
429 0 : throw(MAL, "optimizer.getpipeDefinition",
430 : SQLSTATE(HY013) MAL_MALLOC_FAIL);
431 : }
432 :
433 696 : for (i = 0; i < MAXOPTPIPES && pipes[i].name; i++) {
434 : size_t n = 1;
435 11223 : for (int j = 0; pipes[i].def[j]; j++)
436 10614 : n += strlen(pipes[i].def[j]) + 13;
437 609 : if (n > l) {
438 0 : GDKfree(buf);
439 0 : buf = GDKmalloc(n);
440 0 : l = n;
441 0 : if (buf == NULL) {
442 0 : BBPreclaim(b);
443 0 : BBPreclaim(bn);
444 0 : BBPreclaim(bs);
445 0 : GDKfree(buf);
446 0 : throw(MAL, "optimizer.getpipeDefinition",
447 : SQLSTATE(HY013) MAL_MALLOC_FAIL);
448 : }
449 : }
450 609 : char *p = buf;
451 11223 : for (int j = 0; pipes[i].def[j]; j++) {
452 10614 : p = stpcpy(p, "optimizer.");
453 10614 : p = stpcpy(p, pipes[i].def[j]);
454 10614 : p = stpcpy(p, "();");
455 : }
456 609 : if (BUNappend(b, pipes[i].name, false) != GDK_SUCCEED
457 609 : || BUNappend(bn, buf, false) != GDK_SUCCEED
458 609 : || BUNappend(bs, pipes[i].builtin ? "stable" : "experimental",
459 : false) != GDK_SUCCEED) {
460 0 : BBPreclaim(b);
461 0 : BBPreclaim(bn);
462 0 : BBPreclaim(bs);
463 0 : GDKfree(buf);
464 0 : throw(MAL, "optimizer.getpipeDefinition",
465 : SQLSTATE(HY013) MAL_MALLOC_FAIL);
466 : }
467 : }
468 87 : GDKfree(buf);
469 :
470 87 : *nme = b->batCacheid;
471 87 : BBPkeepref(b);
472 87 : *def = bn->batCacheid;
473 87 : BBPkeepref(bn);
474 87 : *stat = bs->batCacheid;
475 87 : BBPkeepref(bs);
476 87 : return MAL_SUCCEED;
477 : }
478 :
479 : /*
480 : * Add a new components of the optimizer pipe to the plan
481 : */
482 : str
483 569400 : addOptimizerPipe(Client cntxt, MalBlkPtr mb, const char *name)
484 : {
485 569400 : int i, j;
486 569400 : InstrPtr p;
487 569400 : str msg = MAL_SUCCEED;
488 :
489 569400 : (void) cntxt;
490 569400 : if (strcmp(name, "default_fast") == 0 && isSimpleSQL(mb)) {
491 2 : for (i = 0; i < MAXOPTPIPES && pipes[i].name; i++)
492 2 : if (strcmp(pipes[i].name, "minimal_fast") == 0)
493 : break;
494 : } else {
495 1671678 : for (i = 0; i < MAXOPTPIPES && pipes[i].name; i++)
496 1671678 : if (strcmp(pipes[i].name, name) == 0)
497 : break;
498 : }
499 :
500 569427 : if (i == MAXOPTPIPES || pipes[i].name == NULL)
501 0 : throw(MAL, "optimizer.addOptimizerPipe",
502 : SQLSTATE(22023) "Unknown optimizer");
503 :
504 14720869 : for (j = 0; pipes[i].def[j]; j++) {
505 14150556 : p = newFcnCall(mb, optimizerRef, pipes[i].def[j]);
506 14151495 : if (p == NULL)
507 0 : throw(MAL, "optimizer.addOptimizerPipe",
508 : SQLSTATE(HY013) MAL_MALLOC_FAIL);
509 14151495 : p->fcn = (MALfcn) OPTwrapper;
510 14151495 : p->token = PATcall;
511 14151495 : pushInstruction(mb, p);
512 : }
513 : return msg;
514 : }
515 :
516 : void
517 350 : opt_pipes_reset(void)
518 : {
519 22750 : for (int i = 0; i < MAXOPTPIPES; i++)
520 22400 : if (pipes[i].name && !pipes[i].builtin) {
521 1 : GDKfree(pipes[i].name);
522 1 : if (pipes[i].def)
523 21 : for (int n = 0; pipes[i].def[n]; n++)
524 20 : GDKfree(pipes[i].def[n]);
525 1 : GDKfree(pipes[i].def);
526 1 : pipes[i] = (struct pipeline) {
527 : .name = NULL,
528 : };
529 : }
530 350 : }
|