Line data Source code
1 : /*
2 : * SPDX-License-Identifier: MPL-2.0
3 : *
4 : * This Source Code Form is subject to the terms of the Mozilla Public
5 : * License, v. 2.0. If a copy of the MPL was not distributed with this
6 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
7 : *
8 : * Copyright 2024 MonetDB Foundation;
9 : * Copyright August 2008 - 2023 MonetDB B.V.;
10 : * Copyright 1997 - July 2008 CWI.
11 : */
12 :
13 : #include "monetdb_config.h"
14 : #include "rel_optimizer.h"
15 : #include "rel_optimizer_private.h"
16 : #include "rel_rel.h"
17 : #include "rel_basetable.h"
18 : #include "rel_exp.h"
19 : #include "rel_propagate.h"
20 : #include "rel_statistics.h"
21 : #include "sql_privileges.h"
22 : #include "sql_storage.h"
23 :
24 : static sql_rel *
25 5029120 : rel_properties(visitor *v, sql_rel *rel)
26 : {
27 5029120 : global_props *gp = (global_props*)v->data;
28 :
29 : /* Don't flag any changes here! */
30 5029120 : gp->cnt[(int)rel->op]++;
31 5029120 : gp->needs_distinct |= need_distinct(rel);
32 5029120 : gp->recursive |= is_recursive(rel);
33 5029120 : if (gp->instantiate && is_basetable(rel->op)) {
34 738146 : mvc *sql = v->sql;
35 738146 : sql_table *t = (sql_table *) rel->l;
36 738146 : sql_part *pt;
37 :
38 : /* If the plan has a merge table or a child of one, then rel_merge_table_rewrite has to run */
39 738146 : gp->needs_mergetable_rewrite |= (isMergeTable(t) || (t->s && t->s->parts && (pt = partition_find_part(sql->session->tr, t, NULL))));
40 737998 : gp->needs_remote_replica_rewrite |= (isRemote(t) || isReplicaTable(t));
41 : }
42 5028972 : return rel;
43 : }
44 :
45 : typedef struct {
46 : atom *lval;
47 : atom *hval;
48 : bte anti:1,
49 : semantics:1;
50 : int flag;
51 : list *values;
52 : } range_limit;
53 :
54 : typedef struct {
55 : list *cols;
56 : list *ranges;
57 : sql_rel *sel;
58 : } merge_table_prune_info;
59 :
60 : static sql_rel *merge_table_prune_and_unionize(visitor *v, sql_rel *mt_rel, merge_table_prune_info *info);
61 :
62 : static sql_rel *
63 549 : rel_wrap_select_around_mt_child(visitor *v, sql_rel *t, merge_table_prune_info *info)
64 : {
65 : // TODO: it has to be a table (merge table component) add checks
66 549 : sql_table *subt = (sql_table *)t->l;
67 :
68 549 : if (isMergeTable(subt)) {
69 26 : if ((t = merge_table_prune_and_unionize(v, t, info)) == NULL)
70 : return NULL;
71 : }
72 :
73 549 : if (info) {
74 148 : t = rel_select(v->sql->sa, t, NULL);
75 148 : t->exps = exps_copy(v->sql, info->sel->exps);
76 148 : set_processed(t);
77 148 : set_processed(t);
78 : }
79 : return t;
80 : }
81 :
82 : #if 0
83 : static sql_rel *
84 : rel_unionize_mt_tables_balanced(visitor *v, sql_rel* mt, list* tables, merge_table_prune_info *info)
85 : {
86 : /* This function is creating the union tree in the tables list calling
87 : * itself recursively until the tables list has a single entry (the union tree)
88 : */
89 :
90 : /* base case */
91 : if (tables->cnt == 1) // XXX: or/and h->next == NULL
92 : return tables->h->data;
93 : /* merge (via union) every *two* consecutive nodes of the list */
94 : for (node *n = tables->h; n && n->next; n = n->next->next) {
95 : /* first (left) node */
96 : sql_rel *tl = rel_wrap_select_around_mt_child(v, n->data, info);
97 : /* second (right) node */
98 : sql_rel *tr = rel_wrap_select_around_mt_child(v, n->next->data, info);
99 : /* create the union */
100 : sql_rel *tu = rel_setop(v->sql->sa, tl, tr, op_union);
101 : rel_setop_set_exps(v->sql, tu, rel_projections(v->sql, mt, NULL, 1, 1), true);
102 : set_processed(tu);
103 : /* replace the two nodes with the new relation */
104 : list_append_before(tables, n, tu);
105 : list_remove_node(tables, NULL, n);
106 : list_remove_node(tables, NULL, n->next);
107 : // TODO: do i need to rebuild the hash of the list?
108 : }
109 : return rel_unionize_mt_tables_balanced(v, mt, tables, info);
110 : }
111 : #endif
112 :
113 : static sql_rel *
114 167 : rel_unionize_mt_tables_munion(visitor *v, sql_rel* mt, list* tables, merge_table_prune_info *info)
115 : {
116 : /* create the list of all the operand rels */
117 167 : list *rels = sa_list(v->sql->sa);
118 547 : for (node *n = tables->h; n; n = n->next) {
119 380 : sql_rel *r = rel_wrap_select_around_mt_child(v, n->data, info);
120 380 : append(rels, r);
121 : }
122 :
123 : /* create the munion */
124 167 : sql_rel *mu = rel_setop_n_ary(v->sql->sa, rels, op_munion);
125 167 : rel_setop_n_ary_set_exps(v->sql, mu, rel_projections(v->sql, mt, NULL, 1, 1), true);
126 167 : set_processed(mu);
127 :
128 167 : return mu;
129 : }
130 :
131 : static sql_rel *
132 356 : merge_table_prune_and_unionize(visitor *v, sql_rel *mt_rel, merge_table_prune_info *info)
133 : {
134 356 : if (mvc_highwater(v->sql))
135 0 : return sql_error(v->sql, 10, SQLSTATE(42000) "Query too complex: running out of stack space");
136 :
137 356 : sql_rel *nrel = NULL;
138 356 : sql_table *mt = (sql_table*) mt_rel->l;
139 356 : const char *mtalias = exp_relname(mt_rel->exps->h->data);
140 356 : list *tables = sa_list(v->sql->sa);
141 :
142 1034 : for (node *nt = mt->members->h; nt; nt = nt->next) {
143 680 : sql_part *pd = nt->data;
144 680 : sql_table *pt = find_sql_table_id(v->sql->session->tr, mt->s, pd->member);
145 680 : sqlstore *store = v->sql->session->tr->store;
146 680 : int skip = 0;
147 :
148 : /* At the moment we throw an error in the optimizer, but later this rewriter should move out from the optimizers */
149 680 : if ((isMergeTable(pt) || isReplicaTable(pt)) && list_empty(pt->members))
150 2 : return sql_error(v->sql, 02, SQLSTATE(42000) "%s '%s'.'%s' should have at least one table associated",
151 2 : TABLE_TYPE_DESCRIPTION(pt->type, pt->properties), pt->s->base.name, pt->base.name);
152 : /* Do not include empty partitions */
153 678 : if (isTable(pt) && pt->access == TABLE_READONLY && !store->storage_api.count_col(v->sql->session->tr, ol_first_node(pt->columns)->data, 10)) /* count active rows only */
154 1 : continue;
155 :
156 1739 : for (node *n = mt_rel->exps->h; n && !skip; n = n->next) { /* for each column of the child table */
157 1062 : sql_exp *e = n->data;
158 1062 : int i = 0;
159 1062 : bool first_attempt = true;
160 1062 : atom *cmin = NULL, *cmax = NULL, *rmin = NULL, *rmax = NULL;
161 1062 : list *inlist = NULL;
162 1062 : const char *cname = e->r;
163 1062 : sql_column *mt_col = NULL, *col = NULL;
164 :
165 1062 : if (cname[0] == '%') /* Ignore TID and indexes here */
166 55 : continue;
167 :
168 1007 : mt_col = ol_find_name(mt->columns, cname)->data;
169 1007 : col = ol_fetch(pt->columns, mt_col->colnr);
170 1007 : assert(e && e->type == e_column && col);
171 :
172 1007 : if (isTable(pt) && info && !list_empty(info->cols) && ATOMlinear(exp_subtype(e)->type->localtype)) {
173 747 : for (node *nn = info->cols->h ; nn && !skip; nn = nn->next) { /* test if it passes all predicates around it */
174 388 : if (nn->data == e) {
175 262 : range_limit *next = list_fetch(info->ranges, i);
176 262 : atom *lval = next->lval, *hval = next->hval;
177 262 : list *values = next->values;
178 :
179 : /* I don't handle cmp_in or cmp_notin cases with anti or null semantics yet */
180 262 : if (next->flag == cmp_in && (next->anti || next->semantics))
181 0 : continue;
182 :
183 262 : assert(col && (lval || values));
184 262 : if (!skip && pt->access == TABLE_READONLY) {
185 : /* check if the part falls within the bounds of the select expression else skip this (keep at least on part-table) */
186 87 : if (!cmin && !cmax && first_attempt) {
187 87 : void *min = NULL, *max = NULL;
188 87 : if (sql_trans_ranges(v->sql->session->tr, col, &min, &max) && min && max) {
189 84 : cmin = atom_general_ptr(v->sql->sa, &col->type, min);
190 84 : cmax = atom_general_ptr(v->sql->sa, &col->type, max);
191 : }
192 87 : first_attempt = false; /* no more attempts to read from storage */
193 : }
194 :
195 87 : if (cmin && cmax) {
196 84 : if (lval) {
197 80 : if (!next->semantics && ((lval && lval->isnull) || (hval && hval->isnull))) {
198 : skip = 1; /* NULL values don't match, skip them */
199 80 : } else if (!next->semantics) {
200 80 : if (next->flag == cmp_equal) {
201 20 : skip |= next->anti ? exp_range_overlap(cmin, cmax, lval, hval, false, false) != 0 :
202 10 : exp_range_overlap(cmin, cmax, lval, hval, false, false) == 0;
203 70 : } else if (hval != lval) { /* range case */
204 70 : comp_type lower = range2lcompare(next->flag), higher = range2rcompare(next->flag);
205 140 : skip |= next->anti ? exp_range_overlap(cmin, cmax, lval, hval, higher == cmp_lt, lower == cmp_gt) != 0 :
206 70 : exp_range_overlap(cmin, cmax, lval, hval, higher == cmp_lt, lower == cmp_gt) == 0;
207 : } else {
208 0 : switch (next->flag) {
209 0 : case cmp_gt:
210 0 : skip |= next->anti ? VALcmp(&(lval->data), &(cmax->data)) < 0 : VALcmp(&(lval->data), &(cmax->data)) >= 0;
211 0 : break;
212 0 : case cmp_gte:
213 0 : skip |= next->anti ? VALcmp(&(lval->data), &(cmax->data)) <= 0 : VALcmp(&(lval->data), &(cmax->data)) > 0;
214 0 : break;
215 0 : case cmp_lt:
216 0 : skip |= next->anti ? VALcmp(&(lval->data), &(cmax->data)) < 0 : VALcmp(&(cmin->data), &(lval->data)) >= 0;
217 0 : break;
218 0 : case cmp_lte:
219 0 : skip |= next->anti ? VALcmp(&(lval->data), &(cmax->data)) <= 0 : VALcmp(&(cmin->data), &(lval->data)) > 0;
220 0 : break;
221 : default:
222 : break;
223 : }
224 : }
225 : }
226 4 : } else if (next->flag == cmp_in) {
227 4 : int nskip = 1;
228 10 : for (node *m = values->h; m && nskip; m = m->next) {
229 6 : atom *a = m->data;
230 :
231 6 : if (a->isnull)
232 0 : continue;
233 6 : nskip &= exp_range_overlap(cmin, cmax, a, a, false, false) == 0;
234 : }
235 4 : skip |= nskip;
236 : }
237 : }
238 : }
239 262 : if (!skip && isPartitionedByColumnTable(mt) && strcmp(mt->part.pcol->base.name, col->base.name) == 0) {
240 158 : if (!next->semantics && ((lval && lval->isnull) || (hval && hval->isnull))) {
241 : skip = 1; /* NULL values don't match, skip them */
242 158 : } else if (next->semantics) {
243 : /* TODO NOT NULL prunning for partitions that just hold NULL values is still missing */
244 20 : skip |= next->flag == cmp_equal && !next->anti && lval && lval->isnull ? pd->with_nills == 0 : 0; /* *= NULL case */
245 : } else {
246 143 : if (isRangePartitionTable(mt)) {
247 109 : if (!rmin || !rmax) { /* initialize lazily */
248 109 : rmin = atom_general_ptr(v->sql->sa, &col->type, pd->part.range.minvalue);
249 109 : rmax = atom_general_ptr(v->sql->sa, &col->type, pd->part.range.maxvalue);
250 : }
251 :
252 : /* Prune range partitioned tables */
253 109 : if (rmin->isnull && rmax->isnull) {
254 0 : if (pd->with_nills == 1) /* the partition just holds null values, skip it */
255 143 : skip = 1;
256 : /* otherwise it holds all values in the range, cannot be pruned */
257 109 : } else if (rmin->isnull) { /* MINVALUE to limit */
258 4 : if (lval) {
259 4 : if (hval != lval) { /* range case */
260 : /* There's need to call range2lcompare, because the partition's upper limit is always exclusive */
261 2 : skip |= next->anti ? VALcmp(&(lval->data), &(rmax->data)) < 0 : VALcmp(&(lval->data), &(rmax->data)) >= 0;
262 : } else {
263 2 : switch (next->flag) { /* upper limit always exclusive */
264 2 : case cmp_equal:
265 : case cmp_gt:
266 : case cmp_gte:
267 2 : skip |= next->anti ? VALcmp(&(lval->data), &(rmax->data)) < 0 : VALcmp(&(lval->data), &(rmax->data)) >= 0;
268 2 : break;
269 : default:
270 : break;
271 : }
272 : }
273 0 : } else if (next->flag == cmp_in) {
274 0 : int nskip = 1;
275 0 : for (node *m = values->h; m && nskip; m = m->next) {
276 0 : atom *a = m->data;
277 :
278 0 : if (a->isnull)
279 0 : continue;
280 0 : nskip &= VALcmp(&(a->data), &(rmax->data)) >= 0;
281 : }
282 0 : skip |= nskip;
283 : }
284 105 : } else if (rmax->isnull) { /* limit to MAXVALUE */
285 29 : if (lval) {
286 26 : if (hval != lval) { /* range case */
287 17 : comp_type higher = range2rcompare(next->flag);
288 17 : if (higher == cmp_lt) {
289 6 : skip |= next->anti ? VALcmp(&(rmin->data), &(hval->data)) < 0 : VALcmp(&(rmin->data), &(hval->data)) >= 0;
290 11 : } else if (higher == cmp_lte) {
291 11 : skip |= next->anti ? VALcmp(&(rmin->data), &(hval->data)) <= 0 : VALcmp(&(rmin->data), &(hval->data)) > 0;
292 : } else {
293 0 : assert(0);
294 : }
295 : } else {
296 9 : switch (next->flag) {
297 2 : case cmp_lt:
298 2 : skip |= next->anti ? VALcmp(&(rmin->data), &(hval->data)) < 0 : VALcmp(&(rmin->data), &(hval->data)) >= 0;
299 2 : break;
300 6 : case cmp_equal:
301 : case cmp_lte:
302 6 : skip |= next->anti ? VALcmp(&(rmin->data), &(hval->data)) <= 0 : VALcmp(&(rmin->data), &(hval->data)) > 0;
303 6 : break;
304 : default:
305 : break;
306 : }
307 : }
308 3 : } else if (next->flag == cmp_in) {
309 3 : int nskip = 1;
310 10 : for (node *m = values->h; m && nskip; m = m->next) {
311 7 : atom *a = m->data;
312 :
313 7 : if (a->isnull)
314 0 : continue;
315 7 : nskip &= VALcmp(&(rmin->data), &(a->data)) > 0;
316 : }
317 3 : skip |= nskip;
318 : }
319 : } else { /* limit1 to limit2 (general case), limit2 is exclusive */
320 76 : bool max_differ_min = ATOMcmp(col->type.type->localtype, &rmin->data.val, &rmax->data.val) != 0;
321 :
322 76 : if (lval) {
323 68 : if (next->flag == cmp_equal) {
324 24 : skip |= next->anti ? exp_range_overlap(rmin, rmax, lval, hval, false, max_differ_min) != 0 :
325 12 : exp_range_overlap(rmin, rmax, lval, hval, false, max_differ_min) == 0;
326 56 : } else if (hval != lval) { /* For the between case */
327 40 : comp_type higher = range2rcompare(next->flag);
328 70 : skip |= next->anti ? exp_range_overlap(rmin, rmax, lval, hval, higher == cmp_lt, max_differ_min) != 0 :
329 30 : exp_range_overlap(rmin, rmax, lval, hval, higher == cmp_lt, max_differ_min) == 0;
330 : } else {
331 16 : switch (next->flag) {
332 4 : case cmp_gt:
333 4 : skip |= next->anti ? VALcmp(&(lval->data), &(rmax->data)) < 0 : VALcmp(&(lval->data), &(rmax->data)) >= 0;
334 4 : break;
335 2 : case cmp_gte:
336 2 : if (max_differ_min)
337 2 : skip |= next->anti ? VALcmp(&(lval->data), &(rmax->data)) < 0 : VALcmp(&(lval->data), &(rmax->data)) >= 0;
338 : else
339 0 : skip |= next->anti ? VALcmp(&(lval->data), &(rmax->data)) <= 0 : VALcmp(&(lval->data), &(rmax->data)) > 0;
340 : break;
341 2 : case cmp_lt:
342 2 : skip |= next->anti ? VALcmp(&(rmin->data), &(lval->data)) < 0 : VALcmp(&(rmin->data), &(lval->data)) >= 0;
343 2 : break;
344 8 : case cmp_lte:
345 8 : skip |= next->anti ? VALcmp(&(rmin->data), &(lval->data)) <= 0 : VALcmp(&(rmin->data), &(lval->data)) > 0;
346 8 : break;
347 : default:
348 : break;
349 : }
350 : }
351 8 : } else if (next->flag == cmp_in) {
352 8 : int nskip = 1;
353 20 : for (node *m = values->h; m && nskip; m = m->next) {
354 12 : atom *a = m->data;
355 :
356 12 : if (a->isnull)
357 0 : continue;
358 12 : nskip &= exp_range_overlap(rmin, rmax, a, a, false, max_differ_min) == 0;
359 : }
360 8 : skip |= nskip;
361 : }
362 : }
363 : }
364 :
365 143 : if (isListPartitionTable(mt) && (next->flag == cmp_equal || next->flag == cmp_in) && !next->anti) {
366 : /* if we find a value equal to one of the predicates, we don't prune */
367 : /* if the partition just holds null values, it will be skipped */
368 33 : if (!inlist) { /* initialize lazily */
369 32 : inlist = sa_list(v->sql->sa);
370 106 : for (node *m = pd->part.values->h; m; m = m->next) {
371 74 : sql_part_value *spv = (sql_part_value*) m->data;
372 74 : atom *pa = atom_general_ptr(v->sql->sa, &col->type, spv->value);
373 :
374 74 : list_append(inlist, pa);
375 : }
376 : }
377 :
378 33 : if (next->flag == cmp_equal) {
379 12 : int nskip = 1;
380 38 : for (node *m = inlist->h; m && nskip; m = m->next) {
381 26 : atom *pa = m->data;
382 26 : assert(!pa->isnull);
383 26 : nskip &= VALcmp(&(pa->data), &(lval->data)) != 0;
384 : }
385 12 : skip |= nskip;
386 21 : } else if (next->flag == cmp_in) {
387 50 : for (node *o = values->h; o && !skip; o = o->next) {
388 29 : atom *a = o->data;
389 29 : int nskip = 1;
390 :
391 29 : if (a->isnull)
392 0 : continue;
393 87 : for (node *m = inlist->h; m && nskip; m = m->next) {
394 58 : atom *pa = m->data;
395 58 : assert(!pa->isnull);
396 58 : nskip &= VALcmp(&(pa->data), &(a->data)) != 0;
397 : }
398 29 : skip |= nskip;
399 : }
400 : }
401 : }
402 : }
403 : }
404 : }
405 388 : i++;
406 : }
407 : }
408 : }
409 677 : if (!skip)
410 549 : append(tables, rel_rename_part(v->sql, rel_basetable(v->sql, pt, pt->base.name), mt_rel, mtalias));
411 : }
412 354 : if (list_empty(tables)) { /* No table passed the predicates, generate dummy relation */
413 18 : list *converted = sa_list(v->sql->sa);
414 18 : nrel = rel_project_exp(v->sql, exp_atom_bool(v->sql->sa, 1));
415 18 : nrel = rel_select(v->sql->sa, nrel, exp_atom_bool(v->sql->sa, 0));
416 18 : set_processed(nrel);
417 :
418 41 : for (node *n = mt_rel->exps->h ; n ; n = n->next) {
419 23 : sql_exp *e = n->data, *a = exp_atom(v->sql->sa, atom_general(v->sql->sa, exp_subtype(e), NULL, 0));
420 23 : exp_prop_alias(v->sql->sa, a, e);
421 23 : list_append(converted, a);
422 : }
423 18 : nrel = rel_project(v->sql->sa, nrel, converted);
424 : } else { /* Unionize children tables */
425 :
426 336 : if (mvc_debug_on(v->sql, 16)) {
427 : /* In case of a single table there in nothing to unionize */
428 0 : if (tables->cnt == 1) {
429 0 : nrel = rel_wrap_select_around_mt_child(v, tables->h->data, info);
430 : } else {
431 : //nrel = rel_unionize_mt_tables_balanced(v, mt_rel, tables, info);
432 0 : nrel = rel_setop_n_ary(v->sql->sa, tables, op_munion);
433 : }
434 336 : } else if (mvc_debug_on(v->sql, 32)) {
435 0 : for (node *n = tables->h; n ; n = n->next) {
436 0 : sql_rel *next = n->data;
437 0 : sql_table *subt = (sql_table *) next->l;
438 :
439 0 : if (isMergeTable(subt)) { /* apply select predicate recursively for nested merge tables */
440 0 : if (!(next = merge_table_prune_and_unionize(v, next, info)))
441 : return NULL;
442 0 : } else if (info) { /* propagate select under union */
443 0 : next = rel_select(v->sql->sa, next, NULL);
444 0 : next->exps = exps_copy(v->sql, info->sel->exps);
445 0 : set_processed(next);
446 : }
447 :
448 0 : if (nrel) {
449 0 : nrel = rel_setop(v->sql->sa, nrel, next, op_union);
450 0 : rel_setop_set_exps(v->sql, nrel, rel_projections(v->sql, mt_rel, NULL, 1, 1), true);
451 0 : set_processed(nrel);
452 : } else {
453 : nrel = next;
454 : }
455 : }
456 : } else {
457 336 : if (tables->cnt == 1) {
458 169 : nrel = rel_wrap_select_around_mt_child(v, tables->h->data, info);
459 : } else {
460 167 : nrel = rel_unionize_mt_tables_munion(v, mt_rel, tables, info);
461 : }
462 : }
463 : }
464 : return nrel;
465 : }
466 :
467 : /* rewrite merge tables into union of base tables */
468 : static sql_rel *
469 13382 : rel_merge_table_rewrite_(visitor *v, sql_rel *rel)
470 : {
471 13382 : if (is_modify(rel->op)) {
472 1113 : sql_query *query = query_create(v->sql);
473 1113 : return rel_propagate(query, rel, &v->changes);
474 : } else {
475 12269 : sql_rel *bt = rel, *sel = NULL, *nrel = NULL;
476 :
477 12269 : if (is_select(rel->op)) {
478 1937 : sel = rel;
479 1937 : bt = rel->l;
480 : }
481 12269 : if (is_basetable(bt->op) && rel_base_table(bt) && isMergeTable((sql_table*)bt->l)) {
482 619 : sql_table *mt = rel_base_table(bt);
483 619 : merge_table_prune_info *info = NULL;
484 :
485 619 : if (list_empty(mt->members)) /* in DDL statement cases skip if mergetable is empty */
486 : return rel;
487 330 : if (sel && !list_empty(sel->exps)) { /* prepare prunning information once */
488 105 : info = SA_NEW(v->sql->sa, merge_table_prune_info);
489 210 : *info = (merge_table_prune_info) {
490 105 : .cols = sa_list(v->sql->sa),
491 105 : .ranges = sa_list(v->sql->sa),
492 : .sel = sel
493 : };
494 220 : for (node *n = sel->exps->h; n; n = n->next) {
495 115 : sql_exp *e = n->data, *c = e->l;
496 115 : int flag = e->flag;
497 :
498 115 : if (e->type != e_cmp || (!is_theta_exp(flag) && flag != cmp_in) || is_symmetric(e) || !(c = rel_find_exp(rel, c)))
499 7 : continue;
500 :
501 108 : if (flag == cmp_gt || flag == cmp_gte || flag == cmp_lte || flag == cmp_lt || flag == cmp_equal) {
502 93 : sql_exp *l = e->r, *h = e->f;
503 93 : atom *lval = exp_flatten(v->sql, v->value_based_opt, l);
504 93 : atom *hval = h ? exp_flatten(v->sql, v->value_based_opt, h) : lval;
505 :
506 93 : if (lval && hval) {
507 91 : range_limit *next = SA_NEW(v->sql->sa, range_limit);
508 :
509 91 : *next = (range_limit) {
510 : .lval = lval,
511 : .hval = hval,
512 : .flag = flag,
513 91 : .anti = is_anti(e),
514 91 : .semantics = is_semantics(e),
515 : };
516 91 : list_append(info->cols, c);
517 91 : list_append(info->ranges, next);
518 : }
519 : }
520 108 : if (flag == cmp_in) { /* handle in lists */
521 15 : list *vals = e->r, *vlist = sa_list(v->sql->sa);
522 :
523 15 : node *m = NULL;
524 48 : for (m = vals->h; m; m = m->next) {
525 33 : sql_exp *l = m->data;
526 33 : atom *lval = exp_flatten(v->sql, v->value_based_opt, l);
527 :
528 33 : if (!lval)
529 : break;
530 33 : list_append(vlist, lval);
531 : }
532 15 : if (!m) {
533 15 : range_limit *next = SA_NEW(v->sql->sa, range_limit);
534 :
535 15 : *next = (range_limit) {
536 : .values = vlist, /* mark high as value list */
537 : .flag = flag,
538 15 : .anti = is_anti(e),
539 15 : .semantics = is_semantics(e),
540 : };
541 15 : list_append(info->cols, c);
542 15 : list_append(info->ranges, next);
543 : }
544 : }
545 : }
546 : }
547 330 : if (!(nrel = merge_table_prune_and_unionize(v, bt, info)))
548 : return NULL;
549 : /* Always do relation inplace. If the mt relation has more than 1 reference, this is required */
550 328 : if (is_union(nrel->op)) {
551 0 : rel = rel_inplace_setop(v->sql, rel, nrel->l, nrel->r, op_union, nrel->exps);
552 : } else if (is_munion(nrel->op)) {
553 150 : rel = rel_inplace_setop_n_ary(v->sql, rel, nrel->l, op_munion, nrel->exps);
554 : } else if (is_select(nrel->op)) {
555 49 : rel = rel_inplace_select(rel, nrel->l, nrel->exps);
556 : } else if (is_basetable(nrel->op)) {
557 111 : rel = rel_inplace_basetable(rel, nrel);
558 : } else {
559 0 : assert(is_simple_project(nrel->op));
560 18 : rel = rel_inplace_project(v->sql->sa, rel, nrel->l, nrel->exps);
561 18 : rel->card = exps_card(nrel->exps);
562 : }
563 : /* make sure that we do NOT destroy the subrels */
564 328 : nrel->l = nrel->r = NULL;
565 328 : rel_destroy(nrel);
566 328 : v->changes++;
567 : }
568 : }
569 : return rel;
570 : }
571 :
572 : static sql_rel *
573 1994 : rel_merge_table_rewrite(visitor *v, global_props *gp, sql_rel *rel)
574 : {
575 1994 : (void) gp;
576 1994 : return rel_visitor_topdown(v, rel, &rel_merge_table_rewrite_);
577 : }
578 :
579 : run_optimizer
580 631171 : bind_merge_table_rewrite(visitor *v, global_props *gp)
581 : {
582 631171 : (void) v;
583 631171 : return gp->needs_mergetable_rewrite ? rel_merge_table_rewrite : NULL;
584 : }
585 :
586 : /* these optimizers/rewriters run in a cycle loop */
587 : const sql_optimizer pre_sql_optimizers[] = {
588 : { 0, "split_select", bind_split_select},
589 : { 1, "push_project_down", bind_push_project_down},
590 : { 2, "merge_projects", bind_merge_projects},
591 : { 3, "push_project_up", bind_push_project_up},
592 : { 4, "split_project", bind_split_project},
593 : { 5, "remove_redundant_join", bind_remove_redundant_join},
594 : { 6, "simplify_math", bind_simplify_math},
595 : { 7, "optimize_exps", bind_optimize_exps},
596 : { 8, "optimize_select_and_joins_bottomup", bind_optimize_select_and_joins_bottomup},
597 : { 9, "project_reduce_casts", bind_project_reduce_casts},
598 : {10, "optimize_unions_bottomup", bind_optimize_unions_bottomup},
599 : {11, "optimize_projections", bind_optimize_projections},
600 : {12, "optimize_joins", bind_optimize_joins},
601 : {13, "join_order", bind_join_order},
602 : {14, "optimize_semi_and_anti", bind_optimize_semi_and_anti},
603 : {15, "optimize_select_and_joins_topdown", bind_optimize_select_and_joins_topdown},
604 : {16, "optimize_unions_topdown", bind_optimize_unions_topdown},
605 : {17, "dce", bind_dce},
606 : {18, "push_func_and_select_down", bind_push_func_and_select_down},
607 : {19, "push_topn_and_sample_down", bind_push_topn_and_sample_down},
608 : {20, "distinct_project2groupby", bind_distinct_project2groupby},
609 : {21, "merge_table_rewrite", bind_merge_table_rewrite},
610 : { 0, NULL, NULL}
611 : };
612 :
613 : /* these optimizers/rewriters only run once after the cycle loop */
614 : const sql_optimizer post_sql_optimizers[] = {
615 : /* Merge table rewrites may introduce remote or replica tables */
616 : /* At the moment, make sure the remote table rewriters always run after the merge table one */
617 : {23, "rewrite_remote", bind_rewrite_remote},
618 : {24, "rewrite_replica", bind_rewrite_replica},
619 : {25, "remote_func", bind_remote_func},
620 : {26, "get_statistics", bind_get_statistics}, /* gather statistics */
621 : {27, "join_order2", bind_join_order2}, /* run join order one more time with statistics */
622 : {28, "final_optimization_loop", bind_final_optimization_loop}, /* run select and group by order with statistics gathered */
623 : { 0, NULL, NULL}
624 : /* If an optimizer is going to be added, don't forget to update NSQLREWRITERS macro */
625 : };
626 :
627 :
628 : /* for trivial queries don't run optimizers */
629 : static int
630 799448 : calculate_opt_level(mvc *sql, sql_rel *rel)
631 : {
632 990044 : if (rel->card <= CARD_ATOM) {
633 774661 : if (is_insert(rel->op))
634 109235 : return rel->r ? calculate_opt_level(sql, rel->r) : 0;
635 665426 : if (is_simple_project(rel->op))
636 249737 : return rel->l ? calculate_opt_level(sql, rel->l) : 0;
637 : }
638 : return 1;
639 : }
640 :
641 : static inline sql_rel *
642 1353564 : run_optimizer_set(visitor *v, sql_optimizer_run *runs, sql_rel *rel, global_props *gp, const sql_optimizer *set)
643 : {
644 : /* if 'runs' is set, it means profiling is intended */
645 19571243 : for (int i = 0 ; set[i].name ; i++) {
646 18217447 : run_optimizer opt = NULL;
647 :
648 18217447 : if ((opt = set[i].bind_optimizer(v, gp))) {
649 3605484 : if (runs) {
650 0 : sql_optimizer_run *run = &(runs[set[i].index]);
651 0 : run->name = set[i].name;
652 0 : int changes = v->changes;
653 0 : lng clk = GDKusec();
654 0 : rel = opt(v, gp, rel);
655 0 : run->time += (GDKusec() - clk);
656 0 : run->nchanges += (v->changes - changes);
657 : } else {
658 3605484 : rel = opt(v, gp, rel);
659 : }
660 : }
661 : }
662 1353796 : return rel;
663 : }
664 :
665 : /* 'profile' means to benchmark each individual optimizer run */
666 : /* 'instantiate' means to rewrite logical tables: (merge, remote, replica tables) */
667 : static sql_rel *
668 721870 : rel_optimizer_one(mvc *sql, sql_rel *rel, int profile, int instantiate, int value_based_opt, int storage_based_opt)
669 : {
670 1443740 : global_props gp = (global_props) {.cnt = {0}, .instantiate = (uint8_t)instantiate, .opt_cycle = 0,
671 721870 : .has_special_modify = rel && is_modify(rel->op) && rel->flag&UPD_COMP};
672 721870 : visitor v = { .sql = sql, .value_based_opt = value_based_opt, .storage_based_opt = storage_based_opt, .changes = 1, .data = &gp };
673 :
674 721870 : sql->runs = !(ATOMIC_GET(&GDKdebug) & TESTINGMASK) && profile ? sa_zalloc(sql->sa, NSQLREWRITERS * sizeof(sql_optimizer_run)) : NULL;
675 1353134 : for ( ;rel && gp.opt_cycle < 20 && v.changes; gp.opt_cycle++) {
676 799009 : v.changes = 0;
677 799009 : gp = (global_props) {.cnt = {0}, .instantiate = (uint8_t)instantiate, .opt_cycle = gp.opt_cycle, .has_special_modify = gp.has_special_modify};
678 799009 : rel = rel_visitor_topdown(&v, rel, &rel_properties); /* collect relational tree properties */
679 799432 : gp.opt_level = calculate_opt_level(sql, rel);
680 799432 : if (gp.opt_level == 0 && !gp.needs_mergetable_rewrite)
681 : break;
682 631221 : sql->recursive = gp.recursive;
683 631221 : rel = run_optimizer_set(&v, sql->runs, rel, &gp, pre_sql_optimizers);
684 : }
685 : #ifndef NDEBUG
686 722336 : assert(gp.opt_cycle < 20);
687 : #endif
688 :
689 : /* these optimizers run statistics gathered by the last optimization cycle */
690 722336 : rel = run_optimizer_set(&v, sql->runs, rel, &gp, post_sql_optimizers);
691 722516 : return rel;
692 : }
693 :
694 : static sql_exp *
695 302989 : exp_optimize_one(visitor *v, sql_rel *rel, sql_exp *e, int depth )
696 : {
697 302989 : (void)rel;
698 302989 : (void)depth;
699 302989 : if (e->type == e_psm && e->flag == PSM_REL && e->l) {
700 19584 : e->l = rel_optimizer_one(v->sql, e->l, 0, v->changes, v->value_based_opt, v->storage_based_opt);
701 : }
702 302989 : return e;
703 : }
704 :
705 : sql_rel *
706 725463 : rel_optimizer(mvc *sql, sql_rel *rel, int profile, int instantiate, int value_based_opt, int storage_based_opt)
707 : {
708 725463 : if (rel && rel->op == op_ddl && rel->flag == ddl_psm) {
709 23088 : if (!list_empty(rel->exps)) {
710 22997 : bool changed = 0;
711 22997 : visitor v = { .sql = sql, .value_based_opt = value_based_opt, .storage_based_opt = storage_based_opt, .changes = instantiate };
712 63740 : for(node *n = rel->exps->h; n; n = n->next) {
713 40743 : sql_exp *e = n->data;
714 40743 : exp_visitor(&v, rel, e, 1, exp_optimize_one, true, true, true, &changed);
715 : }
716 : }
717 23088 : return rel;
718 : } else {
719 702375 : return rel_optimizer_one(sql, rel, profile, instantiate, value_based_opt, storage_based_opt);
720 : }
721 : }
|