Line data Source code
1 : /*
2 : * SPDX-License-Identifier: MPL-2.0
3 : *
4 : * This Source Code Form is subject to the terms of the Mozilla Public
5 : * License, v. 2.0. If a copy of the MPL was not distributed with this
6 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
7 : *
8 : * Copyright 2024 MonetDB Foundation;
9 : * Copyright August 2008 - 2023 MonetDB B.V.;
10 : * Copyright 1997 - July 2008 CWI.
11 : */
12 :
13 : #include "monetdb_config.h"
14 : #include "rel_optimizer.h"
15 : #include "rel_optimizer_private.h"
16 : #include "rel_rel.h"
17 : #include "rel_basetable.h"
18 : #include "rel_exp.h"
19 : #include "rel_propagate.h"
20 : #include "rel_statistics.h"
21 : #include "sql_privileges.h"
22 :
23 : static sql_rel *
24 5499554 : rel_properties(visitor *v, sql_rel *rel)
25 : {
26 5499554 : global_props *gp = (global_props*)v->data;
27 :
28 : /* Don't flag any changes here! */
29 5499554 : gp->cnt[(int)rel->op]++;
30 5499554 : gp->needs_distinct |= need_distinct(rel);
31 5499554 : if (gp->instantiate && is_basetable(rel->op)) {
32 663977 : mvc *sql = v->sql;
33 663977 : sql_table *t = (sql_table *) rel->l;
34 663977 : sql_part *pt;
35 :
36 : /* If the plan has a merge table or a child of one, then rel_merge_table_rewrite has to run */
37 663977 : gp->needs_mergetable_rewrite |= (isMergeTable(t) || (t->s && t->s->parts && (pt = partition_find_part(sql->session->tr, t, NULL))));
38 663984 : gp->needs_remote_replica_rewrite |= (isRemote(t) || isReplicaTable(t));
39 : }
40 5499561 : return rel;
41 : }
42 :
43 : typedef struct {
44 : atom *lval;
45 : atom *hval;
46 : bte anti:1,
47 : semantics:1;
48 : int flag;
49 : list *values;
50 : } range_limit;
51 :
52 : typedef struct {
53 : list *cols;
54 : list *ranges;
55 : sql_rel *sel;
56 : } merge_table_prune_info;
57 :
58 : static sql_rel *
59 322 : merge_table_prune_and_unionize(visitor *v, sql_rel *mt_rel, merge_table_prune_info *info)
60 : {
61 322 : if (mvc_highwater(v->sql))
62 0 : return sql_error(v->sql, 10, SQLSTATE(42000) "Query too complex: running out of stack space");
63 :
64 322 : sql_rel *nrel = NULL;
65 322 : sql_table *mt = (sql_table*) mt_rel->l;
66 322 : const char *mtalias = exp_relname(mt_rel->exps->h->data);
67 322 : list *tables = sa_list(v->sql->sa);
68 :
69 933 : for (node *nt = mt->members->h; nt; nt = nt->next) {
70 613 : sql_part *pd = nt->data;
71 613 : sql_table *pt = find_sql_table_id(v->sql->session->tr, mt->s, pd->member);
72 613 : sqlstore *store = v->sql->session->tr->store;
73 613 : int skip = 0;
74 :
75 : /* At the moment we throw an error in the optimizer, but later this rewriter should move out from the optimizers */
76 613 : if ((isMergeTable(pt) || isReplicaTable(pt)) && list_empty(pt->members))
77 2 : return sql_error(v->sql, 02, SQLSTATE(42000) "%s '%s'.'%s' should have at least one table associated",
78 2 : TABLE_TYPE_DESCRIPTION(pt->type, pt->properties), pt->s->base.name, pt->base.name);
79 : /* Do not include empty partitions */
80 611 : if (isTable(pt) && pt->access == TABLE_READONLY && !store->storage_api.count_col(v->sql->session->tr, ol_first_node(pt->columns)->data, 10)) /* count active rows only */
81 1 : continue;
82 :
83 1626 : for (node *n = mt_rel->exps->h; n && !skip; n = n->next) { /* for each column of the child table */
84 1016 : sql_exp *e = n->data;
85 1016 : int i = 0;
86 1016 : bool first_attempt = true;
87 1016 : atom *cmin = NULL, *cmax = NULL, *rmin = NULL, *rmax = NULL;
88 1016 : list *inlist = NULL;
89 1016 : const char *cname = e->r;
90 1016 : sql_column *mt_col = NULL, *col = NULL;
91 :
92 1016 : if (cname[0] == '%') /* Ignore TID and indexes here */
93 73 : continue;
94 :
95 943 : mt_col = ol_find_name(mt->columns, cname)->data;
96 943 : col = ol_fetch(pt->columns, mt_col->colnr);
97 943 : assert(e && e->type == e_column && col);
98 :
99 943 : if (isTable(pt) && info && !list_empty(info->cols) && ATOMlinear(exp_subtype(e)->type->localtype)) {
100 715 : for (node *nn = info->cols->h ; nn && !skip; nn = nn->next) { /* test if it passes all predicates around it */
101 372 : if (nn->data == e) {
102 254 : range_limit *next = list_fetch(info->ranges, i);
103 254 : atom *lval = next->lval, *hval = next->hval;
104 254 : list *values = next->values;
105 :
106 : /* I don't handle cmp_in or cmp_notin cases with anti or null semantics yet */
107 254 : if (next->flag == cmp_in && (next->anti || next->semantics))
108 0 : continue;
109 :
110 254 : assert(col && (lval || values));
111 254 : if (!skip && pt->access == TABLE_READONLY) {
112 : /* check if the part falls within the bounds of the select expression else skip this (keep at least on part-table) */
113 87 : if (!cmin && !cmax && first_attempt) {
114 87 : void *min = NULL, *max = NULL;
115 87 : if (sql_trans_ranges(v->sql->session->tr, col, &min, &max) && min && max) {
116 84 : cmin = atom_general_ptr(v->sql->sa, &col->type, min);
117 84 : cmax = atom_general_ptr(v->sql->sa, &col->type, max);
118 : }
119 87 : first_attempt = false; /* no more attempts to read from storage */
120 : }
121 :
122 87 : if (cmin && cmax) {
123 84 : if (lval) {
124 80 : if (!next->semantics && ((lval && lval->isnull) || (hval && hval->isnull))) {
125 : skip = 1; /* NULL values don't match, skip them */
126 80 : } else if (!next->semantics) {
127 80 : if (next->flag == cmp_equal) {
128 20 : skip |= next->anti ? exp_range_overlap(cmin, cmax, lval, hval, false, false) != 0 :
129 10 : exp_range_overlap(cmin, cmax, lval, hval, false, false) == 0;
130 70 : } else if (hval != lval) { /* range case */
131 70 : comp_type lower = range2lcompare(next->flag), higher = range2rcompare(next->flag);
132 140 : skip |= next->anti ? exp_range_overlap(cmin, cmax, lval, hval, higher == cmp_lt, lower == cmp_gt) != 0 :
133 70 : exp_range_overlap(cmin, cmax, lval, hval, higher == cmp_lt, lower == cmp_gt) == 0;
134 : } else {
135 0 : switch (next->flag) {
136 0 : case cmp_gt:
137 0 : skip |= next->anti ? VALcmp(&(lval->data), &(cmax->data)) < 0 : VALcmp(&(lval->data), &(cmax->data)) >= 0;
138 0 : break;
139 0 : case cmp_gte:
140 0 : skip |= next->anti ? VALcmp(&(lval->data), &(cmax->data)) <= 0 : VALcmp(&(lval->data), &(cmax->data)) > 0;
141 0 : break;
142 0 : case cmp_lt:
143 0 : skip |= next->anti ? VALcmp(&(lval->data), &(cmax->data)) < 0 : VALcmp(&(cmin->data), &(lval->data)) >= 0;
144 0 : break;
145 0 : case cmp_lte:
146 0 : skip |= next->anti ? VALcmp(&(lval->data), &(cmax->data)) <= 0 : VALcmp(&(cmin->data), &(lval->data)) > 0;
147 0 : break;
148 : default:
149 : break;
150 : }
151 : }
152 : }
153 4 : } else if (next->flag == cmp_in) {
154 4 : int nskip = 1;
155 10 : for (node *m = values->h; m && nskip; m = m->next) {
156 6 : atom *a = m->data;
157 :
158 6 : if (a->isnull)
159 0 : continue;
160 6 : nskip &= exp_range_overlap(cmin, cmax, a, a, false, false) == 0;
161 : }
162 4 : skip |= nskip;
163 : }
164 : }
165 : }
166 254 : if (!skip && isPartitionedByColumnTable(mt) && strcmp(mt->part.pcol->base.name, col->base.name) == 0) {
167 158 : if (!next->semantics && ((lval && lval->isnull) || (hval && hval->isnull))) {
168 : skip = 1; /* NULL values don't match, skip them */
169 158 : } else if (next->semantics) {
170 : /* TODO NOT NULL prunning for partitions that just hold NULL values is still missing */
171 20 : skip |= next->flag == cmp_equal && !next->anti && lval && lval->isnull ? pd->with_nills == 0 : 0; /* *= NULL case */
172 : } else {
173 143 : if (isRangePartitionTable(mt)) {
174 109 : if (!rmin || !rmax) { /* initialize lazily */
175 109 : rmin = atom_general_ptr(v->sql->sa, &col->type, pd->part.range.minvalue);
176 109 : rmax = atom_general_ptr(v->sql->sa, &col->type, pd->part.range.maxvalue);
177 : }
178 :
179 : /* Prune range partitioned tables */
180 109 : if (rmin->isnull && rmax->isnull) {
181 0 : if (pd->with_nills == 1) /* the partition just holds null values, skip it */
182 143 : skip = 1;
183 : /* otherwise it holds all values in the range, cannot be pruned */
184 109 : } else if (rmin->isnull) { /* MINVALUE to limit */
185 4 : if (lval) {
186 4 : if (hval != lval) { /* range case */
187 : /* There's need to call range2lcompare, because the partition's upper limit is always exclusive */
188 2 : skip |= next->anti ? VALcmp(&(lval->data), &(rmax->data)) < 0 : VALcmp(&(lval->data), &(rmax->data)) >= 0;
189 : } else {
190 2 : switch (next->flag) { /* upper limit always exclusive */
191 2 : case cmp_equal:
192 : case cmp_gt:
193 : case cmp_gte:
194 2 : skip |= next->anti ? VALcmp(&(lval->data), &(rmax->data)) < 0 : VALcmp(&(lval->data), &(rmax->data)) >= 0;
195 2 : break;
196 : default:
197 : break;
198 : }
199 : }
200 0 : } else if (next->flag == cmp_in) {
201 0 : int nskip = 1;
202 0 : for (node *m = values->h; m && nskip; m = m->next) {
203 0 : atom *a = m->data;
204 :
205 0 : if (a->isnull)
206 0 : continue;
207 0 : nskip &= VALcmp(&(a->data), &(rmax->data)) >= 0;
208 : }
209 0 : skip |= nskip;
210 : }
211 105 : } else if (rmax->isnull) { /* limit to MAXVALUE */
212 29 : if (lval) {
213 26 : if (hval != lval) { /* range case */
214 17 : comp_type higher = range2rcompare(next->flag);
215 17 : if (higher == cmp_lt) {
216 6 : skip |= next->anti ? VALcmp(&(rmin->data), &(hval->data)) < 0 : VALcmp(&(rmin->data), &(hval->data)) >= 0;
217 11 : } else if (higher == cmp_lte) {
218 11 : skip |= next->anti ? VALcmp(&(rmin->data), &(hval->data)) <= 0 : VALcmp(&(rmin->data), &(hval->data)) > 0;
219 : } else {
220 0 : assert(0);
221 : }
222 : } else {
223 9 : switch (next->flag) {
224 2 : case cmp_lt:
225 2 : skip |= next->anti ? VALcmp(&(rmin->data), &(hval->data)) < 0 : VALcmp(&(rmin->data), &(hval->data)) >= 0;
226 2 : break;
227 6 : case cmp_equal:
228 : case cmp_lte:
229 6 : skip |= next->anti ? VALcmp(&(rmin->data), &(hval->data)) <= 0 : VALcmp(&(rmin->data), &(hval->data)) > 0;
230 6 : break;
231 : default:
232 : break;
233 : }
234 : }
235 3 : } else if (next->flag == cmp_in) {
236 3 : int nskip = 1;
237 10 : for (node *m = values->h; m && nskip; m = m->next) {
238 7 : atom *a = m->data;
239 :
240 7 : if (a->isnull)
241 0 : continue;
242 7 : nskip &= VALcmp(&(rmin->data), &(a->data)) > 0;
243 : }
244 3 : skip |= nskip;
245 : }
246 : } else { /* limit1 to limit2 (general case), limit2 is exclusive */
247 76 : bool max_differ_min = ATOMcmp(col->type.type->localtype, &rmin->data.val, &rmax->data.val) != 0;
248 :
249 76 : if (lval) {
250 68 : if (next->flag == cmp_equal) {
251 24 : skip |= next->anti ? exp_range_overlap(rmin, rmax, lval, hval, false, max_differ_min) != 0 :
252 12 : exp_range_overlap(rmin, rmax, lval, hval, false, max_differ_min) == 0;
253 56 : } else if (hval != lval) { /* For the between case */
254 40 : comp_type higher = range2rcompare(next->flag);
255 70 : skip |= next->anti ? exp_range_overlap(rmin, rmax, lval, hval, higher == cmp_lt, max_differ_min) != 0 :
256 30 : exp_range_overlap(rmin, rmax, lval, hval, higher == cmp_lt, max_differ_min) == 0;
257 : } else {
258 16 : switch (next->flag) {
259 4 : case cmp_gt:
260 4 : skip |= next->anti ? VALcmp(&(lval->data), &(rmax->data)) < 0 : VALcmp(&(lval->data), &(rmax->data)) >= 0;
261 4 : break;
262 2 : case cmp_gte:
263 2 : if (max_differ_min)
264 2 : skip |= next->anti ? VALcmp(&(lval->data), &(rmax->data)) < 0 : VALcmp(&(lval->data), &(rmax->data)) >= 0;
265 : else
266 0 : skip |= next->anti ? VALcmp(&(lval->data), &(rmax->data)) <= 0 : VALcmp(&(lval->data), &(rmax->data)) > 0;
267 : break;
268 2 : case cmp_lt:
269 2 : skip |= next->anti ? VALcmp(&(rmin->data), &(lval->data)) < 0 : VALcmp(&(rmin->data), &(lval->data)) >= 0;
270 2 : break;
271 8 : case cmp_lte:
272 8 : skip |= next->anti ? VALcmp(&(rmin->data), &(lval->data)) <= 0 : VALcmp(&(rmin->data), &(lval->data)) > 0;
273 8 : break;
274 : default:
275 : break;
276 : }
277 : }
278 8 : } else if (next->flag == cmp_in) {
279 8 : int nskip = 1;
280 20 : for (node *m = values->h; m && nskip; m = m->next) {
281 12 : atom *a = m->data;
282 :
283 12 : if (a->isnull)
284 0 : continue;
285 12 : nskip &= exp_range_overlap(rmin, rmax, a, a, false, max_differ_min) == 0;
286 : }
287 8 : skip |= nskip;
288 : }
289 : }
290 : }
291 :
292 143 : if (isListPartitionTable(mt) && (next->flag == cmp_equal || next->flag == cmp_in) && !next->anti) {
293 : /* if we find a value equal to one of the predicates, we don't prune */
294 : /* if the partition just holds null values, it will be skipped */
295 33 : if (!inlist) { /* initialize lazily */
296 32 : inlist = sa_list(v->sql->sa);
297 106 : for (node *m = pd->part.values->h; m; m = m->next) {
298 74 : sql_part_value *spv = (sql_part_value*) m->data;
299 74 : atom *pa = atom_general_ptr(v->sql->sa, &col->type, spv->value);
300 :
301 74 : list_append(inlist, pa);
302 : }
303 : }
304 :
305 33 : if (next->flag == cmp_equal) {
306 12 : int nskip = 1;
307 38 : for (node *m = inlist->h; m && nskip; m = m->next) {
308 26 : atom *pa = m->data;
309 26 : assert(!pa->isnull);
310 26 : nskip &= VALcmp(&(pa->data), &(lval->data)) != 0;
311 : }
312 12 : skip |= nskip;
313 21 : } else if (next->flag == cmp_in) {
314 50 : for (node *o = values->h; o && !skip; o = o->next) {
315 29 : atom *a = o->data;
316 29 : int nskip = 1;
317 :
318 29 : if (a->isnull)
319 0 : continue;
320 87 : for (node *m = inlist->h; m && nskip; m = m->next) {
321 58 : atom *pa = m->data;
322 58 : assert(!pa->isnull);
323 58 : nskip &= VALcmp(&(pa->data), &(a->data)) != 0;
324 : }
325 29 : skip |= nskip;
326 : }
327 : }
328 : }
329 : }
330 : }
331 : }
332 372 : i++;
333 : }
334 : }
335 : }
336 610 : if (!skip)
337 482 : append(tables, rel_rename_part(v->sql, rel_basetable(v->sql, pt, pt->base.name), mt_rel, mtalias));
338 : }
339 320 : if (list_empty(tables)) { /* No table passed the predicates, generate dummy relation */
340 18 : list *converted = sa_list(v->sql->sa);
341 18 : nrel = rel_project_exp(v->sql, exp_atom_bool(v->sql->sa, 1));
342 18 : nrel = rel_select(v->sql->sa, nrel, exp_atom_bool(v->sql->sa, 0));
343 18 : set_processed(nrel);
344 :
345 41 : for (node *n = mt_rel->exps->h ; n ; n = n->next) {
346 23 : sql_exp *e = n->data, *a = exp_atom(v->sql->sa, atom_general(v->sql->sa, exp_subtype(e), NULL));
347 23 : exp_prop_alias(v->sql->sa, a, e);
348 23 : list_append(converted, a);
349 : }
350 18 : nrel = rel_project(v->sql->sa, nrel, converted);
351 : } else { /* Unionize children tables */
352 784 : for (node *n = tables->h; n ; n = n->next) {
353 482 : sql_rel *next = n->data;
354 482 : sql_table *subt = (sql_table *) next->l;
355 :
356 482 : if (isMergeTable(subt)) { /* apply select predicate recursively for nested merge tables */
357 14 : if (!(next = merge_table_prune_and_unionize(v, next, info)))
358 : return NULL;
359 468 : } else if (info) { /* propagate select under union */
360 128 : next = rel_select(v->sql->sa, next, NULL);
361 128 : next->exps = exps_copy(v->sql, info->sel->exps);
362 128 : set_processed(next);
363 : }
364 :
365 482 : if (nrel) {
366 180 : nrel = rel_setop(v->sql->sa, nrel, next, op_union);
367 180 : rel_setop_set_exps(v->sql, nrel, rel_projections(v->sql, mt_rel, NULL, 1, 1), true);
368 180 : set_processed(nrel);
369 : } else {
370 : nrel = next;
371 : }
372 : }
373 : }
374 : return nrel;
375 : }
376 :
377 : /* rewrite merge tables into union of base tables */
378 : static sql_rel *
379 12359 : rel_merge_table_rewrite_(visitor *v, sql_rel *rel)
380 : {
381 12359 : if (is_modify(rel->op)) {
382 1079 : sql_query *query = query_create(v->sql);
383 1079 : return rel_propagate(query, rel, &v->changes);
384 : } else {
385 11280 : sql_rel *bt = rel, *sel = NULL, *nrel = NULL;
386 :
387 11280 : if (is_select(rel->op)) {
388 1721 : sel = rel;
389 1721 : bt = rel->l;
390 : }
391 11280 : if (is_basetable(bt->op) && rel_base_table(bt) && isMergeTable((sql_table*)bt->l)) {
392 575 : sql_table *mt = rel_base_table(bt);
393 575 : merge_table_prune_info *info = NULL;
394 :
395 575 : if (list_empty(mt->members)) /* in DDL statement cases skip if mergetable is empty */
396 : return rel;
397 308 : if (sel && !list_empty(sel->exps)) { /* prepare prunning information once */
398 101 : info = SA_NEW(v->sql->sa, merge_table_prune_info);
399 202 : *info = (merge_table_prune_info) {
400 101 : .cols = sa_list(v->sql->sa),
401 101 : .ranges = sa_list(v->sql->sa),
402 : .sel = sel
403 : };
404 212 : for (node *n = sel->exps->h; n; n = n->next) {
405 111 : sql_exp *e = n->data, *c = e->l;
406 111 : int flag = e->flag;
407 :
408 111 : if (e->type != e_cmp || (!is_theta_exp(flag) && flag != cmp_in) || is_symmetric(e) || !(c = rel_find_exp(rel, c)))
409 7 : continue;
410 :
411 104 : if (flag == cmp_gt || flag == cmp_gte || flag == cmp_lte || flag == cmp_lt || flag == cmp_equal) {
412 89 : sql_exp *l = e->r, *h = e->f;
413 89 : atom *lval = exp_flatten(v->sql, v->value_based_opt, l);
414 89 : atom *hval = h ? exp_flatten(v->sql, v->value_based_opt, h) : lval;
415 :
416 89 : if (lval && hval) {
417 87 : range_limit *next = SA_NEW(v->sql->sa, range_limit);
418 :
419 87 : *next = (range_limit) {
420 : .lval = lval,
421 : .hval = hval,
422 : .flag = flag,
423 87 : .anti = is_anti(e),
424 87 : .semantics = is_semantics(e),
425 : };
426 87 : list_append(info->cols, c);
427 87 : list_append(info->ranges, next);
428 : }
429 : }
430 104 : if (flag == cmp_in) { /* handle in lists */
431 15 : list *vals = e->r, *vlist = sa_list(v->sql->sa);
432 :
433 15 : node *m = NULL;
434 48 : for (m = vals->h; m; m = m->next) {
435 33 : sql_exp *l = m->data;
436 33 : atom *lval = exp_flatten(v->sql, v->value_based_opt, l);
437 :
438 33 : if (!lval)
439 : break;
440 33 : list_append(vlist, lval);
441 : }
442 15 : if (!m) {
443 15 : range_limit *next = SA_NEW(v->sql->sa, range_limit);
444 :
445 15 : *next = (range_limit) {
446 : .values = vlist, /* mark high as value list */
447 : .flag = flag,
448 15 : .anti = is_anti(e),
449 15 : .semantics = is_semantics(e),
450 : };
451 15 : list_append(info->cols, c);
452 15 : list_append(info->ranges, next);
453 : }
454 : }
455 : }
456 : }
457 308 : if (!(nrel = merge_table_prune_and_unionize(v, bt, info)))
458 : return NULL;
459 : /* Always do relation inplace. If the mt relation has more than 1 reference, this is required */
460 306 : if (is_union(nrel->op)) {
461 137 : rel = rel_inplace_setop(v->sql, rel, nrel->l, nrel->r, op_union, nrel->exps);
462 169 : } else if (is_select(nrel->op)) {
463 49 : rel = rel_inplace_select(rel, nrel->l, nrel->exps);
464 120 : } else if (is_basetable(nrel->op)) {
465 102 : rel = rel_inplace_basetable(rel, nrel);
466 : } else {
467 18 : assert(is_simple_project(nrel->op));
468 18 : rel = rel_inplace_project(v->sql->sa, rel, nrel->l, nrel->exps);
469 18 : rel->card = exps_card(nrel->exps);
470 : }
471 306 : nrel->l = nrel->r = NULL;
472 306 : rel_destroy(nrel);
473 306 : v->changes++;
474 : }
475 : }
476 : return rel;
477 : }
478 :
479 : static sql_rel *
480 1805 : rel_merge_table_rewrite(visitor *v, global_props *gp, sql_rel *rel)
481 : {
482 1805 : (void) gp;
483 1805 : return rel_visitor_topdown(v, rel, &rel_merge_table_rewrite_);
484 : }
485 :
486 : run_optimizer
487 596795 : bind_merge_table_rewrite(visitor *v, global_props *gp)
488 : {
489 596795 : (void) v;
490 596795 : return gp->needs_mergetable_rewrite ? rel_merge_table_rewrite : NULL;
491 : }
492 :
493 : /* these optimizers/rewriters run in a cycle loop */
494 : const sql_optimizer pre_sql_optimizers[] = {
495 : { 0, "split_select", bind_split_select},
496 : { 1, "push_project_down", bind_push_project_down},
497 : { 2, "merge_projects", bind_merge_projects},
498 : { 3, "push_project_up", bind_push_project_up},
499 : { 4, "split_project", bind_split_project},
500 : { 5, "remove_redundant_join", bind_remove_redundant_join},
501 : { 6, "simplify_math", bind_simplify_math},
502 : { 7, "optimize_exps", bind_optimize_exps},
503 : { 8, "optimize_select_and_joins_bottomup", bind_optimize_select_and_joins_bottomup},
504 : { 9, "project_reduce_casts", bind_project_reduce_casts},
505 : {10, "optimize_unions_bottomup", bind_optimize_unions_bottomup},
506 : {11, "optimize_projections", bind_optimize_projections},
507 : {12, "optimize_joins", bind_optimize_joins},
508 : {13, "join_order", bind_join_order},
509 : {14, "optimize_semi_and_anti", bind_optimize_semi_and_anti},
510 : {15, "optimize_select_and_joins_topdown", bind_optimize_select_and_joins_topdown},
511 : {16, "optimize_unions_topdown", bind_optimize_unions_topdown},
512 : {17, "dce", bind_dce},
513 : {18, "push_func_and_select_down", bind_push_func_and_select_down},
514 : {19, "push_topn_and_sample_down", bind_push_topn_and_sample_down},
515 : {20, "distinct_project2groupby", bind_distinct_project2groupby},
516 : {21, "merge_table_rewrite", bind_merge_table_rewrite},
517 : { 0, NULL, NULL}
518 : };
519 :
520 : /* these optimizers/rewriters only run once after the cycle loop */
521 : const sql_optimizer post_sql_optimizers[] = {
522 : /* Merge table rewrites may introduce remote or replica tables */
523 : /* At the moment, make sure the remote table rewriters always run after the merge table one */
524 : {23, "rewrite_remote", bind_rewrite_remote},
525 : {24, "rewrite_replica", bind_rewrite_replica},
526 : {25, "remote_func", bind_remote_func},
527 : {26, "get_statistics", bind_get_statistics}, /* gather statistics */
528 : {27, "join_order2", bind_join_order2}, /* run join order one more time with statistics */
529 : {28, "final_optimization_loop", bind_final_optimization_loop}, /* run select and group by order with statistics gathered */
530 : { 0, NULL, NULL}
531 : /* If an optimizer is going to be added, don't forget to update NSQLREWRITERS macro */
532 : };
533 :
534 :
535 : /* make sure the outer project (without order by or distinct) has all the aliases */
536 : static sql_rel *
537 686780 : rel_keep_renames(mvc *sql, sql_rel *rel)
538 : {
539 686780 : if (!rel || !is_simple_project(rel->op) || (!rel->r && !need_distinct(rel)) || list_length(rel->exps) <= 1)
540 670258 : return rel;
541 :
542 16522 : int needed = 0;
543 179338 : for(node *n = rel->exps->h; n && !needed; n = n->next) {
544 162816 : sql_exp *e = n->data;
545 :
546 162816 : if (exp_name(e) && (e->type != e_column || strcmp(exp_name(e), e->r) != 0))
547 : needed = 1;
548 : }
549 16522 : if (!needed)
550 : return rel;
551 :
552 19 : list *new_outer_exps = sa_list(sql->sa);
553 19 : list *new_inner_exps = sa_list(sql->sa);
554 65 : for(node *n = rel->exps->h; n; n = n->next) {
555 46 : sql_exp *e = n->data, *ie, *oe;
556 46 : const char *rname = exp_relname(e);
557 46 : const char *name = exp_name(e);
558 :
559 46 : exp_label(sql->sa, e, ++sql->label);
560 46 : ie = e;
561 46 : oe = exp_ref(sql, ie);
562 46 : exp_setname(sql->sa, oe, rname, name);
563 46 : append(new_inner_exps, ie);
564 46 : append(new_outer_exps, oe);
565 : }
566 19 : rel->exps = new_inner_exps;
567 19 : rel = rel_project(sql->sa, rel, new_outer_exps);
568 19 : return rel;
569 : }
570 :
571 : /* for trivial queries don't run optimizers */
572 : static int
573 752931 : calculate_opt_level(mvc *sql, sql_rel *rel)
574 : {
575 932307 : if (rel->card <= CARD_ATOM) {
576 745301 : if (is_insert(rel->op))
577 105453 : return rel->r ? calculate_opt_level(sql, rel->r) : 0;
578 639848 : if (is_simple_project(rel->op))
579 230244 : return rel->l ? calculate_opt_level(sql, rel->l) : 0;
580 : }
581 : return 1;
582 : }
583 :
584 : static inline sql_rel *
585 1283546 : run_optimizer_set(visitor *v, sql_optimizer_run *runs, sql_rel *rel, global_props *gp, const sql_optimizer *set)
586 : {
587 : /* if 'runs' is set, it means profiling is intended */
588 18532917 : for (int i = 0 ; set[i].name ; i++) {
589 17249358 : run_optimizer opt = NULL;
590 :
591 17249358 : if ((opt = set[i].bind_optimizer(v, gp))) {
592 3264770 : if (runs) {
593 0 : sql_optimizer_run *run = &(runs[set[i].index]);
594 0 : run->name = set[i].name;
595 0 : int changes = v->changes;
596 0 : lng clk = GDKusec();
597 0 : rel = opt(v, gp, rel);
598 0 : run->time += (GDKusec() - clk);
599 0 : run->nchanges += (v->changes - changes);
600 : } else {
601 3264770 : rel = opt(v, gp, rel);
602 : }
603 : }
604 : }
605 1283559 : return rel;
606 : }
607 :
608 : /* 'profile' means to benchmark each individual optimizer run */
609 : /* 'instantiate' means to rewrite logical tables: (merge, remote, replica tables) */
610 : sql_rel *
611 686777 : rel_optimizer(mvc *sql, sql_rel *rel, int profile, int instantiate, int value_based_opt, int storage_based_opt)
612 : {
613 1373554 : global_props gp = (global_props) {.cnt = {0}, .instantiate = (uint8_t)instantiate, .opt_cycle = 0,
614 686777 : .has_special_modify = rel && is_modify(rel->op) && rel->flag&UPD_COMP};
615 686777 : visitor v = { .sql = sql, .value_based_opt = value_based_opt, .storage_based_opt = storage_based_opt, .changes = 1, .data = &gp };
616 :
617 686777 : if (!(rel = rel_keep_renames(sql, rel)))
618 : return rel;
619 :
620 686776 : sql->runs = !(ATOMIC_GET(&GDKdebug) & FORCEMITOMASK) && profile ? sa_zalloc(sql->sa, NSQLREWRITERS * sizeof(sql_optimizer_run)) : NULL;
621 1283571 : for ( ;rel && gp.opt_cycle < 20 && v.changes; gp.opt_cycle++) {
622 752938 : v.changes = 0;
623 752938 : gp = (global_props) {.cnt = {0}, .instantiate = (uint8_t)instantiate, .opt_cycle = gp.opt_cycle, .has_special_modify = gp.has_special_modify};
624 752938 : rel = rel_visitor_topdown(&v, rel, &rel_properties); /* collect relational tree properties */
625 752931 : gp.opt_level = calculate_opt_level(sql, rel);
626 752931 : if (gp.opt_level == 0 && !gp.needs_mergetable_rewrite)
627 : break;
628 596798 : rel = run_optimizer_set(&v, sql->runs, rel, &gp, pre_sql_optimizers);
629 : }
630 : #ifndef NDEBUG
631 686766 : assert(gp.opt_cycle < 20);
632 : #endif
633 :
634 : /* these optimizers run statistics gathered by the last optimization cycle */
635 686766 : rel = run_optimizer_set(&v, sql->runs, rel, &gp, post_sql_optimizers);
636 686766 : return rel;
637 : }
|