Line data Source code
1 : /*
2 : * SPDX-License-Identifier: MPL-2.0
3 : *
4 : * This Source Code Form is subject to the terms of the Mozilla Public
5 : * License, v. 2.0. If a copy of the MPL was not distributed with this
6 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
7 : *
8 : * Copyright 2024 MonetDB Foundation;
9 : * Copyright August 2008 - 2023 MonetDB B.V.;
10 : * Copyright 1997 - July 2008 CWI.
11 : */
12 :
13 : /*
14 : * author M.L. Kersten
15 : * The default SQL optimizer pipeline can be set per server. See the
16 : * optpipe setting in monetdb(1) when using merovingian. During SQL
17 : * initialization, the optimizer pipeline is checked against the
18 : * dependency information maintained in the optimizer library to ensure
19 : * there are no conflicts and at least the pre-requisite optimizers are
20 : * used. The setting of sql_optimizer can be either the list of
21 : * optimizers to run, or one or more variables containing the optimizer
22 : * pipeline to run. The latter is provided for readability purposes
23 : * only.
24 : */
25 : #include "monetdb_config.h"
26 : #include "opt_pipes.h"
27 : #include "mal_import.h"
28 : #include "opt_support.h"
29 : #include "mal_client.h"
30 : #include "mal_instruction.h"
31 : #include "mal_function.h"
32 : #include "mal_listing.h"
33 : #include "mal_linker.h"
34 :
35 : #define MAXOPTPIPES 64
36 :
37 : static struct pipeline {
38 : char *name;
39 : char **def; /* NULL terminated list of optimizers */
40 : bool builtin;
41 : } pipes[MAXOPTPIPES] = {
42 : /* The minimal pipeline necessary by the server to operate correctly
43 : *
44 : * NOTE:
45 : * If you change the minimal pipe, please also update the man page
46 : * (see tools/mserver/mserver5.1) accordingly!
47 : */
48 : {"minimal_pipe",
49 : (char *[]) {
50 : "inline",
51 : "remap",
52 : "emptybind",
53 : "deadcode",
54 : "for",
55 : "dict",
56 : "multiplex",
57 : "generator",
58 : "profiler",
59 : "garbageCollector",
60 : NULL,
61 : },
62 : true,
63 : },
64 : {"minimal_fast",
65 : (char *[]) {
66 : "minimalfast",
67 : NULL,
68 : },
69 : true,
70 : },
71 : /* NOTE:
72 : * If you change the default pipe, please also update the no_mitosis
73 : * pipe and sequential pipe (see below, as well as the man page (see
74 : * tools/mserver/mserver5.1) accordingly!
75 : */
76 : {"default_pipe",
77 : (char *[]) {
78 : "inline",
79 : "remap",
80 : "costModel",
81 : "coercions",
82 : "aliases",
83 : "evaluate",
84 : "emptybind",
85 : "deadcode",
86 : "pushselect",
87 : "aliases",
88 : "for",
89 : "dict",
90 : "mitosis",
91 : "mergetable",
92 : "aliases",
93 : "constants",
94 : "commonTerms",
95 : "projectionpath",
96 : "deadcode",
97 : "matpack",
98 : "reorder",
99 : "dataflow",
100 : "querylog",
101 : "multiplex",
102 : "generator",
103 : "candidates",
104 : "deadcode",
105 : "postfix",
106 : "profiler",
107 : "garbageCollector",
108 : NULL,
109 : },
110 : true,
111 : },
112 : {"default_fast",
113 : (char *[]) {
114 : "defaultfast",
115 : NULL,
116 : },
117 : true,
118 : },
119 : /* The no_mitosis pipe line is (and should be kept!) identical to the
120 : * default pipeline, except that optimizer mitosis is omitted. It is
121 : * used mainly to make some tests work deterministically, and to check
122 : * / debug whether "unexpected" problems are related to mitosis
123 : * (and/or mergetable).
124 : *
125 : * NOTE:
126 : * If you change the no_mitosis pipe, please also update the man page
127 : * (see tools/mserver/mserver5.1) accordingly!
128 : */
129 : {"no_mitosis_pipe",
130 : (char *[]) {
131 : "inline",
132 : "remap",
133 : "costModel",
134 : "coercions",
135 : "aliases",
136 : "evaluate",
137 : "emptybind",
138 : "deadcode",
139 : "pushselect",
140 : "aliases",
141 : "mergetable",
142 : "aliases",
143 : "constants",
144 : "commonTerms",
145 : "projectionpath",
146 : "deadcode",
147 : "matpack",
148 : "reorder",
149 : "dataflow",
150 : "querylog",
151 : "multiplex",
152 : "generator",
153 : "candidates",
154 : "deadcode",
155 : "postfix",
156 : "profiler",
157 : "garbageCollector",
158 : NULL,
159 : },
160 : true,
161 : },
162 : /* The sequential pipe line is (and should be kept!) identical to the
163 : * default pipeline, except that optimizers mitosis & dataflow are
164 : * omitted. It is used mainly to make some tests work
165 : * deterministically, i.e., avoid ambiguous output, by avoiding
166 : * parallelism.
167 : *
168 : * NOTE:
169 : * If you change the sequential pipe, please also update the man page
170 : * (see tools/mserver/mserver5.1) accordingly!
171 : */
172 : {"sequential_pipe",
173 : (char *[]) {
174 : "inline",
175 : "remap",
176 : "costModel",
177 : "coercions",
178 : "aliases",
179 : "evaluate",
180 : "emptybind",
181 : "deadcode",
182 : "pushselect",
183 : "aliases",
184 : "for",
185 : "dict",
186 : "mergetable",
187 : "aliases",
188 : "constants",
189 : "commonTerms",
190 : "projectionpath",
191 : "deadcode",
192 : "matpack",
193 : "reorder",
194 : "querylog",
195 : "multiplex",
196 : "generator",
197 : "candidates",
198 : "deadcode",
199 : "postfix",
200 : "profiler",
201 : "garbageCollector",
202 : NULL,
203 : },
204 : true,
205 : },
206 : /* Experimental pipelines stressing various components under
207 : * development. Do not use any of these pipelines in production
208 : * settings!
209 : */
210 : /* sentinel */
211 : {NULL, NULL, false,},
212 : };
213 :
214 : #include "optimizer_private.h"
215 :
216 : static MT_Lock pipeLock = MT_LOCK_INITIALIZER(pipeLock);
217 :
218 : static str
219 23 : validatePipe(struct pipeline *pipe)
220 : {
221 23 : bool mitosis = false, deadcode = false, mergetable = false;
222 23 : bool multiplex = false, garbage = false, generator = false, remap = false;
223 23 : int i;
224 :
225 23 : if (pipe->def == NULL || pipe->def[0] == NULL)
226 0 : throw(MAL, "optimizer.validate", SQLSTATE(42000) "missing optimizers");
227 :
228 23 : if (strcmp(pipe->def[0], "defaultfast") == 0
229 23 : || strcmp(pipe->def[0], "minimalfast") == 0)
230 : return MAL_SUCCEED;
231 :
232 23 : if (strcmp(pipe->def[0], "inline") != 0)
233 15 : throw(MAL, "optimizer.validate",
234 : SQLSTATE(42000) "'inline' should be the first\n");
235 :
236 48 : for (i = 0; pipe->def[i]; i++) {
237 41 : const char *fname = pipe->def[i];
238 41 : if (garbage)
239 1 : throw(MAL, "optimizer.validate",
240 : SQLSTATE(42000)
241 : "'garbageCollector' should be used as the last one\n");
242 40 : if (strcmp(fname, "deadcode") == 0)
243 : deadcode = true;
244 33 : else if (strcmp(fname, "remap") == 0)
245 : remap = true;
246 32 : else if (strcmp(fname, "mitosis") == 0)
247 : mitosis = true;
248 31 : else if (strcmp(fname, "mergetable") == 0)
249 : mergetable = true;
250 30 : else if (strcmp(fname, "multiplex") == 0)
251 : multiplex = true;
252 24 : else if (strcmp(fname, "generator") == 0)
253 : generator = true;
254 23 : else if (strcmp(fname, "garbageCollector") == 0)
255 4 : garbage = true;
256 : }
257 :
258 7 : if (mitosis && !mergetable)
259 0 : throw(MAL, "optimizer.validate",
260 : SQLSTATE(42000) "'mitosis' needs 'mergetable'\n");
261 :
262 : /* several optimizer should be used */
263 7 : if (!multiplex)
264 1 : throw(MAL, "optimizer.validate",
265 : SQLSTATE(42000) "'multiplex' should be used\n");
266 6 : if (!deadcode)
267 1 : throw(MAL, "optimizer.validate",
268 : SQLSTATE(42000) "'deadcode' should be used at least once\n");
269 5 : if (!garbage)
270 2 : throw(MAL, "optimizer.validate",
271 : SQLSTATE(42000)
272 : "'garbageCollector' should be used as the last one\n");
273 3 : if (!remap)
274 2 : throw(MAL, "optimizer.validate",
275 : SQLSTATE(42000) "'remap' should be used\n");
276 1 : if (!generator)
277 0 : throw(MAL, "optimizer.validate",
278 : SQLSTATE(42000) "'generator' should be used\n");
279 :
280 : return MAL_SUCCEED;
281 : }
282 :
283 : /* the session_pipe is the one defined by the user */
284 : str
285 23 : addPipeDefinition(Client cntxt, const char *name, const char *pipe)
286 : {
287 23 : int i, n;
288 23 : str msg = MAL_SUCCEED;
289 23 : struct pipeline oldpipe;
290 23 : const char *p;
291 :
292 23 : (void) cntxt;
293 23 : MT_lock_set(&pipeLock);
294 184 : for (i = 0; i < MAXOPTPIPES && pipes[i].name; i++)
295 138 : if (strcmp(name, pipes[i].name) == 0)
296 : break;
297 :
298 23 : if (i == MAXOPTPIPES) {
299 0 : MT_lock_unset(&pipeLock);
300 0 : throw(MAL, "optimizer.addPipeDefinition",
301 : SQLSTATE(HY013) "Out of slots");
302 : }
303 23 : if (pipes[i].name && pipes[i].builtin) {
304 0 : MT_lock_unset(&pipeLock);
305 0 : throw(MAL, "optimizer.addPipeDefinition",
306 : SQLSTATE(42000) "No overwrite of built in allowed");
307 : }
308 :
309 : /* save old value */
310 23 : oldpipe = pipes[i];
311 46 : pipes[i] = (struct pipeline) {
312 23 : .name = GDKstrdup(name),
313 : };
314 23 : if (pipes[i].name == NULL)
315 0 : goto bailout;
316 : n = 1;
317 173 : for (p = pipe; p; p = strchr(p, ';')) {
318 150 : p++;
319 150 : n++;
320 : }
321 23 : if ((pipes[i].def = GDKmalloc(n * sizeof(char *))) == NULL)
322 0 : goto bailout;
323 : n = 0;
324 150 : while ((p = strchr(pipe, ';')) != NULL) {
325 127 : if (strncmp(pipe, "optimizer.", 10) == 0)
326 126 : pipe += 10;
327 127 : const char *q = pipe;
328 1349 : while (q < p && *q != '(' && *q != '.' && !GDKisspace(*q))
329 1222 : q++;
330 127 : if (*q == '.') {
331 0 : msg = createException(MAL, "optimizer.addPipeDefinition",
332 : SQLSTATE(42000) "Bad pipeline definition");
333 0 : goto bailout;
334 : }
335 127 : if (q > pipe) {
336 126 : if ((pipes[i].def[n++] = GDKstrndup(pipe, q - pipe)) == NULL)
337 0 : goto bailout;
338 : }
339 127 : pipe = p + 1;
340 143 : while (*pipe && GDKisspace(*pipe))
341 16 : pipe++;
342 : }
343 23 : pipes[i].def[n] = NULL;
344 23 : msg = validatePipe(&pipes[i]);
345 23 : if (msg != MAL_SUCCEED) {
346 : /* failed: restore old value */
347 22 : goto bailout;
348 : }
349 1 : MT_lock_unset(&pipeLock);
350 : /* succeeded: destroy old value */
351 1 : GDKfree(oldpipe.name);
352 1 : if (oldpipe.def)
353 0 : for (n = 0; oldpipe.def[n]; n++)
354 0 : GDKfree(oldpipe.def[n]);
355 1 : GDKfree(oldpipe.def);
356 1 : return msg;
357 :
358 22 : bailout:
359 22 : GDKfree(pipes[i].name);
360 22 : if (pipes[i].def)
361 128 : for (n = 0; pipes[i].def[n]; n++)
362 106 : GDKfree(pipes[i].def[n]);
363 22 : GDKfree(pipes[i].def);
364 22 : pipes[i] = oldpipe;
365 22 : MT_lock_unset(&pipeLock);
366 22 : if (msg)
367 : return msg;
368 0 : throw(MAL, "optimizer.addPipeDefinition", SQLSTATE(HY013) MAL_MALLOC_FAIL);
369 : }
370 :
371 : bool
372 669 : isOptimizerPipe(const char *name)
373 : {
374 669 : int i;
375 :
376 2390 : for (i = 0; i < MAXOPTPIPES && pipes[i].name; i++)
377 2338 : if (strcmp(name, pipes[i].name) == 0)
378 : return true;
379 : return false;
380 : }
381 :
382 : str
383 55 : getPipeCatalog(bat *nme, bat *def, bat *stat)
384 : {
385 55 : BAT *b, *bn, *bs;
386 55 : int i;
387 55 : size_t l = 2048;
388 55 : char *buf = GDKmalloc(l);
389 :
390 55 : b = COLnew(0, TYPE_str, 20, TRANSIENT);
391 55 : bn = COLnew(0, TYPE_str, 20, TRANSIENT);
392 55 : bs = COLnew(0, TYPE_str, 20, TRANSIENT);
393 55 : if (buf == NULL || b == NULL || bn == NULL || bs == NULL) {
394 0 : BBPreclaim(b);
395 0 : BBPreclaim(bn);
396 0 : BBPreclaim(bs);
397 0 : GDKfree(buf);
398 0 : throw(MAL, "optimizer.getpipeDefinition",
399 : SQLSTATE(HY013) MAL_MALLOC_FAIL);
400 : }
401 :
402 385 : for (i = 0; i < MAXOPTPIPES && pipes[i].name; i++) {
403 : size_t n = 1;
404 5665 : for (int j = 0; pipes[i].def[j]; j++)
405 5335 : n += strlen(pipes[i].def[j]) + 13;
406 330 : if (n > l) {
407 0 : GDKfree(buf);
408 0 : buf = GDKmalloc(n);
409 0 : l = n;
410 0 : if (buf == NULL) {
411 0 : BBPreclaim(b);
412 0 : BBPreclaim(bn);
413 0 : BBPreclaim(bs);
414 0 : GDKfree(buf);
415 0 : throw(MAL, "optimizer.getpipeDefinition",
416 : SQLSTATE(HY013) MAL_MALLOC_FAIL);
417 : }
418 : }
419 330 : char *p = buf;
420 5665 : for (int j = 0; pipes[i].def[j]; j++) {
421 5335 : p = stpcpy(p, "optimizer.");
422 5335 : p = stpcpy(p, pipes[i].def[j]);
423 5335 : p = stpcpy(p, "();");
424 : }
425 330 : if (BUNappend(b, pipes[i].name, false) != GDK_SUCCEED
426 330 : || BUNappend(bn, buf, false) != GDK_SUCCEED
427 330 : || BUNappend(bs, pipes[i].builtin ? "stable" : "experimental",
428 : false) != GDK_SUCCEED) {
429 0 : BBPreclaim(b);
430 0 : BBPreclaim(bn);
431 0 : BBPreclaim(bs);
432 0 : GDKfree(buf);
433 0 : throw(MAL, "optimizer.getpipeDefinition",
434 : SQLSTATE(HY013) MAL_MALLOC_FAIL);
435 : }
436 : }
437 55 : GDKfree(buf);
438 :
439 55 : *nme = b->batCacheid;
440 55 : BBPkeepref(b);
441 55 : *def = bn->batCacheid;
442 55 : BBPkeepref(bn);
443 55 : *stat = bs->batCacheid;
444 55 : BBPkeepref(bs);
445 55 : return MAL_SUCCEED;
446 : }
447 :
448 : /*
449 : * Add a new components of the optimizer pipe to the plan
450 : */
451 : str
452 538846 : addOptimizerPipe(Client cntxt, MalBlkPtr mb, const char *name)
453 : {
454 538846 : int i, j;
455 538846 : InstrPtr p;
456 538846 : str msg = MAL_SUCCEED;
457 :
458 538846 : (void) cntxt;
459 538846 : if (strcmp(name, "default_fast") == 0 && isSimpleSQL(mb)) {
460 2 : for (i = 0; i < MAXOPTPIPES && pipes[i].name; i++)
461 2 : if (strcmp(pipes[i].name, "minimal_fast") == 0)
462 : break;
463 : } else {
464 1579243 : for (i = 0; i < MAXOPTPIPES && pipes[i].name; i++)
465 1579243 : if (strcmp(pipes[i].name, name) == 0)
466 : break;
467 : }
468 :
469 538846 : if (i == MAXOPTPIPES || pipes[i].name == NULL)
470 0 : throw(MAL, "optimizer.addOptimizerPipe",
471 : SQLSTATE(22023) "Unknown optimizer");
472 :
473 13745747 : for (j = 0; pipes[i].def[j]; j++) {
474 13206895 : p = newFcnCall(mb, optimizerRef, pipes[i].def[j]);
475 13206910 : if (p == NULL)
476 0 : throw(MAL, "optimizer.addOptimizerPipe",
477 : SQLSTATE(HY013) MAL_MALLOC_FAIL);
478 13206910 : p->fcn = (MALfcn) OPTwrapper;
479 13206910 : p->token = PATcall;
480 13206910 : pushInstruction(mb, p);
481 : }
482 : return msg;
483 : }
484 :
485 : void
486 334 : opt_pipes_reset(void)
487 : {
488 21710 : for (int i = 0; i < MAXOPTPIPES; i++)
489 21376 : if (pipes[i].name && !pipes[i].builtin) {
490 1 : GDKfree(pipes[i].name);
491 1 : if (pipes[i].def)
492 21 : for (int n = 0; pipes[i].def[n]; n++)
493 20 : GDKfree(pipes[i].def[n]);
494 1 : GDKfree(pipes[i].def);
495 1 : pipes[i] = (struct pipeline) {
496 : .name = NULL,
497 : };
498 : }
499 334 : }
|