Line data Source code
1 : /*
2 : * SPDX-License-Identifier: MPL-2.0
3 : *
4 : * This Source Code Form is subject to the terms of the Mozilla Public
5 : * License, v. 2.0. If a copy of the MPL was not distributed with this
6 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
7 : *
8 : * Copyright 2024 MonetDB Foundation;
9 : * Copyright August 2008 - 2023 MonetDB B.V.;
10 : * Copyright 1997 - July 2008 CWI.
11 : */
12 :
13 : #include "monetdb_config.h"
14 : #include "rel_optimizer.h"
15 : #include "rel_optimizer_private.h"
16 : #include "rel_rel.h"
17 : #include "rel_basetable.h"
18 : #include "rel_exp.h"
19 : #include "rel_propagate.h"
20 : #include "rel_statistics.h"
21 : #include "sql_privileges.h"
22 :
23 : static sql_rel *
24 5225997 : rel_properties(visitor *v, sql_rel *rel)
25 : {
26 5225997 : global_props *gp = (global_props*)v->data;
27 :
28 : /* Don't flag any changes here! */
29 5225997 : gp->cnt[(int)rel->op]++;
30 5225997 : gp->needs_distinct |= need_distinct(rel);
31 5225997 : if (gp->instantiate && is_basetable(rel->op)) {
32 618019 : mvc *sql = v->sql;
33 618019 : sql_table *t = (sql_table *) rel->l;
34 618019 : sql_part *pt;
35 :
36 : /* If the plan has a merge table or a child of one, then rel_merge_table_rewrite has to run */
37 618019 : gp->needs_mergetable_rewrite |= (isMergeTable(t) || (t->s && t->s->parts && (pt = partition_find_part(sql->session->tr, t, NULL))));
38 617882 : gp->needs_remote_replica_rewrite |= (isRemote(t) || isReplicaTable(t));
39 : }
40 5225860 : return rel;
41 : }
42 :
43 : typedef struct {
44 : atom *lval;
45 : atom *hval;
46 : bte anti:1,
47 : semantics:1;
48 : int flag;
49 : list *values;
50 : } range_limit;
51 :
52 : typedef struct {
53 : list *cols;
54 : list *ranges;
55 : sql_rel *sel;
56 : } merge_table_prune_info;
57 :
58 : static sql_rel *
59 0 : rel_wrap_select_around_table(visitor *v, sql_rel *t, merge_table_prune_info *info)
60 : {
61 : // TODO: it has to be a table (merge table component) add checks
62 0 : sql_table *subt = (sql_table *)t->l;
63 :
64 0 : if (isMergeTable(subt)) {
65 : // TODO: handle it
66 : return NULL;
67 : }
68 :
69 0 : if (info) {
70 0 : t = rel_select(v->sql->sa, t, NULL);
71 0 : t->exps = exps_copy(v->sql, info->sel->exps);
72 0 : set_processed(t);
73 0 : set_processed(t);
74 : }
75 : return t;
76 : }
77 :
78 : #if 0
79 : static sql_rel *
80 : rel_unionize_mt_tables_balanced(visitor *v, sql_rel* mt, list* tables, merge_table_prune_info *info)
81 : {
82 : /* This function is creating the union tree in the tables list calling
83 : * itself recursively until the tables list has a single entry (the union tree)
84 : */
85 :
86 : /* base case */
87 : if (tables->cnt == 1) // XXX: or/and h->next == NULL
88 : return tables->h->data;
89 : /* merge (via union) every *two* consequtive nodes of the list */
90 : for (node *n = tables->h; n && n->next; n = n->next->next) {
91 : /* first (left) node */
92 : sql_rel *tl = rel_wrap_select_around_table(v, n->data, info);
93 : /* second (right) node */
94 : sql_rel *tr = rel_wrap_select_around_table(v, n->next->data, info);
95 : /* create the union */
96 : sql_rel *tu = rel_setop(v->sql->sa, tl, tr, op_union);
97 : rel_setop_set_exps(v->sql, tu, rel_projections(v->sql, mt, NULL, 1, 1), true);
98 : set_processed(tu);
99 : /* replace the two nodes with the new relation */
100 : list_append_before(tables, n, tu);
101 : list_remove_node(tables, NULL, n);
102 : list_remove_node(tables, NULL, n->next);
103 : // TODO: do i need to rebuild the hash of the list?
104 : }
105 : return rel_unionize_mt_tables_balanced(v, mt, tables, info);
106 : }
107 : #endif
108 :
109 : static sql_rel *
110 0 : rel_unionize_mt_tables_munion(visitor *v, sql_rel* mt, list* tables, merge_table_prune_info *info)
111 : {
112 : /* create the list of all the operand rels */
113 0 : list *rels = sa_list(v->sql->sa);
114 0 : for (node *n = tables->h; n; n = n->next) {
115 0 : sql_rel *r = rel_wrap_select_around_table(v, n->data, info);
116 0 : append(rels, r);
117 : }
118 :
119 : /* create the munion */
120 0 : sql_rel *mu = rel_setop_n_ary(v->sql->sa, rels, op_munion);
121 0 : rel_setop_n_ary_set_exps(v->sql, mu, rel_projections(v->sql, mt, NULL, 1, 1), true);
122 0 : set_processed(mu);
123 :
124 0 : return mu;
125 : }
126 :
127 : static sql_rel *
128 281 : merge_table_prune_and_unionize(visitor *v, sql_rel *mt_rel, merge_table_prune_info *info)
129 : {
130 281 : if (mvc_highwater(v->sql))
131 0 : return sql_error(v->sql, 10, SQLSTATE(42000) "Query too complex: running out of stack space");
132 :
133 281 : sql_rel *nrel = NULL;
134 281 : sql_table *mt = (sql_table*) mt_rel->l;
135 281 : const char *mtalias = exp_relname(mt_rel->exps->h->data);
136 281 : list *tables = sa_list(v->sql->sa);
137 :
138 808 : for (node *nt = mt->members->h; nt; nt = nt->next) {
139 528 : sql_part *pd = nt->data;
140 528 : sql_table *pt = find_sql_table_id(v->sql->session->tr, mt->s, pd->member);
141 528 : sqlstore *store = v->sql->session->tr->store;
142 528 : int skip = 0;
143 :
144 : /* At the moment we throw an error in the optimizer, but later this rewriter should move out from the optimizers */
145 528 : if ((isMergeTable(pt) || isReplicaTable(pt)) && list_empty(pt->members))
146 1 : return sql_error(v->sql, 02, SQLSTATE(42000) "%s '%s'.'%s' should have at least one table associated",
147 1 : TABLE_TYPE_DESCRIPTION(pt->type, pt->properties), pt->s->base.name, pt->base.name);
148 : /* Do not include empty partitions */
149 527 : if (isTable(pt) && pt->access == TABLE_READONLY && !store->storage_api.count_col(v->sql->session->tr, ol_first_node(pt->columns)->data, 10)) /* count active rows only */
150 0 : continue;
151 :
152 1313 : for (node *n = mt_rel->exps->h; n && !skip; n = n->next) { /* for each column of the child table */
153 786 : sql_exp *e = n->data;
154 786 : int i = 0;
155 786 : bool first_attempt = true;
156 786 : atom *cmin = NULL, *cmax = NULL, *rmin = NULL, *rmax = NULL;
157 786 : list *inlist = NULL;
158 786 : const char *cname = e->r;
159 786 : sql_column *mt_col = NULL, *col = NULL;
160 :
161 786 : if (cname[0] == '%') /* Ignore TID and indexes here */
162 51 : continue;
163 :
164 735 : mt_col = ol_find_name(mt->columns, cname)->data;
165 735 : col = ol_fetch(pt->columns, mt_col->colnr);
166 735 : assert(e && e->type == e_column && col);
167 :
168 735 : if (isTable(pt) && info && !list_empty(info->cols) && ATOMlinear(exp_subtype(e)->type->localtype)) {
169 465 : for (node *nn = info->cols->h ; nn && !skip; nn = nn->next) { /* test if it passes all predicates around it */
170 241 : if (nn->data == e) {
171 201 : range_limit *next = list_fetch(info->ranges, i);
172 201 : atom *lval = next->lval, *hval = next->hval;
173 201 : list *values = next->values;
174 :
175 : /* I don't handle cmp_in or cmp_notin cases with anti or null semantics yet */
176 201 : if (next->flag == cmp_in && (next->anti || next->semantics))
177 0 : continue;
178 :
179 201 : assert(col && (lval || values));
180 201 : if (!skip && pt->access == TABLE_READONLY) {
181 : /* check if the part falls within the bounds of the select expression else skip this (keep at least on part-table) */
182 34 : if (!cmin && !cmax && first_attempt) {
183 34 : void *min = NULL, *max = NULL;
184 34 : if (sql_trans_ranges(v->sql->session->tr, col, &min, &max) && min && max) {
185 34 : cmin = atom_general_ptr(v->sql->sa, &col->type, min);
186 34 : cmax = atom_general_ptr(v->sql->sa, &col->type, max);
187 : }
188 34 : first_attempt = false; /* no more attempts to read from storage */
189 : }
190 :
191 34 : if (cmin && cmax) {
192 34 : if (lval) {
193 34 : if (!next->semantics && ((lval && lval->isnull) || (hval && hval->isnull))) {
194 : skip = 1; /* NULL values don't match, skip them */
195 34 : } else if (!next->semantics) {
196 34 : if (next->flag == cmp_equal) {
197 0 : skip |= next->anti ? exp_range_overlap(cmin, cmax, lval, hval, false, false) != 0 :
198 0 : exp_range_overlap(cmin, cmax, lval, hval, false, false) == 0;
199 34 : } else if (hval != lval) { /* range case */
200 34 : comp_type lower = range2lcompare(next->flag), higher = range2rcompare(next->flag);
201 68 : skip |= next->anti ? exp_range_overlap(cmin, cmax, lval, hval, higher == cmp_lt, lower == cmp_gt) != 0 :
202 34 : exp_range_overlap(cmin, cmax, lval, hval, higher == cmp_lt, lower == cmp_gt) == 0;
203 : } else {
204 0 : switch (next->flag) {
205 0 : case cmp_gt:
206 0 : skip |= next->anti ? VALcmp(&(lval->data), &(cmax->data)) < 0 : VALcmp(&(lval->data), &(cmax->data)) >= 0;
207 0 : break;
208 0 : case cmp_gte:
209 0 : skip |= next->anti ? VALcmp(&(lval->data), &(cmax->data)) <= 0 : VALcmp(&(lval->data), &(cmax->data)) > 0;
210 0 : break;
211 0 : case cmp_lt:
212 0 : skip |= next->anti ? VALcmp(&(lval->data), &(cmax->data)) < 0 : VALcmp(&(cmin->data), &(lval->data)) >= 0;
213 0 : break;
214 0 : case cmp_lte:
215 0 : skip |= next->anti ? VALcmp(&(lval->data), &(cmax->data)) <= 0 : VALcmp(&(cmin->data), &(lval->data)) > 0;
216 0 : break;
217 : default:
218 : break;
219 : }
220 : }
221 : }
222 0 : } else if (next->flag == cmp_in) {
223 0 : int nskip = 1;
224 0 : for (node *m = values->h; m && nskip; m = m->next) {
225 0 : atom *a = m->data;
226 :
227 0 : if (a->isnull)
228 0 : continue;
229 0 : nskip &= exp_range_overlap(cmin, cmax, a, a, false, false) == 0;
230 : }
231 0 : skip |= nskip;
232 : }
233 : }
234 : }
235 201 : if (!skip && isPartitionedByColumnTable(mt) && strcmp(mt->part.pcol->base.name, col->base.name) == 0) {
236 158 : if (!next->semantics && ((lval && lval->isnull) || (hval && hval->isnull))) {
237 : skip = 1; /* NULL values don't match, skip them */
238 158 : } else if (next->semantics) {
239 : /* TODO NOT NULL prunning for partitions that just hold NULL values is still missing */
240 20 : skip |= next->flag == cmp_equal && !next->anti && lval && lval->isnull ? pd->with_nills == 0 : 0; /* *= NULL case */
241 : } else {
242 143 : if (isRangePartitionTable(mt)) {
243 109 : if (!rmin || !rmax) { /* initialize lazily */
244 109 : rmin = atom_general_ptr(v->sql->sa, &col->type, pd->part.range.minvalue);
245 109 : rmax = atom_general_ptr(v->sql->sa, &col->type, pd->part.range.maxvalue);
246 : }
247 :
248 : /* Prune range partitioned tables */
249 109 : if (rmin->isnull && rmax->isnull) {
250 0 : if (pd->with_nills == 1) /* the partition just holds null values, skip it */
251 143 : skip = 1;
252 : /* otherwise it holds all values in the range, cannot be pruned */
253 109 : } else if (rmin->isnull) { /* MINVALUE to limit */
254 4 : if (lval) {
255 4 : if (hval != lval) { /* range case */
256 : /* There's need to call range2lcompare, because the partition's upper limit is always exclusive */
257 2 : skip |= next->anti ? VALcmp(&(lval->data), &(rmax->data)) < 0 : VALcmp(&(lval->data), &(rmax->data)) >= 0;
258 : } else {
259 2 : switch (next->flag) { /* upper limit always exclusive */
260 2 : case cmp_equal:
261 : case cmp_gt:
262 : case cmp_gte:
263 2 : skip |= next->anti ? VALcmp(&(lval->data), &(rmax->data)) < 0 : VALcmp(&(lval->data), &(rmax->data)) >= 0;
264 2 : break;
265 : default:
266 : break;
267 : }
268 : }
269 0 : } else if (next->flag == cmp_in) {
270 0 : int nskip = 1;
271 0 : for (node *m = values->h; m && nskip; m = m->next) {
272 0 : atom *a = m->data;
273 :
274 0 : if (a->isnull)
275 0 : continue;
276 0 : nskip &= VALcmp(&(a->data), &(rmax->data)) >= 0;
277 : }
278 0 : skip |= nskip;
279 : }
280 105 : } else if (rmax->isnull) { /* limit to MAXVALUE */
281 29 : if (lval) {
282 26 : if (hval != lval) { /* range case */
283 17 : comp_type higher = range2rcompare(next->flag);
284 17 : if (higher == cmp_lt) {
285 6 : skip |= next->anti ? VALcmp(&(rmin->data), &(hval->data)) < 0 : VALcmp(&(rmin->data), &(hval->data)) >= 0;
286 11 : } else if (higher == cmp_lte) {
287 11 : skip |= next->anti ? VALcmp(&(rmin->data), &(hval->data)) <= 0 : VALcmp(&(rmin->data), &(hval->data)) > 0;
288 : } else {
289 0 : assert(0);
290 : }
291 : } else {
292 9 : switch (next->flag) {
293 2 : case cmp_lt:
294 2 : skip |= next->anti ? VALcmp(&(rmin->data), &(hval->data)) < 0 : VALcmp(&(rmin->data), &(hval->data)) >= 0;
295 2 : break;
296 6 : case cmp_equal:
297 : case cmp_lte:
298 6 : skip |= next->anti ? VALcmp(&(rmin->data), &(hval->data)) <= 0 : VALcmp(&(rmin->data), &(hval->data)) > 0;
299 6 : break;
300 : default:
301 : break;
302 : }
303 : }
304 3 : } else if (next->flag == cmp_in) {
305 3 : int nskip = 1;
306 10 : for (node *m = values->h; m && nskip; m = m->next) {
307 7 : atom *a = m->data;
308 :
309 7 : if (a->isnull)
310 0 : continue;
311 7 : nskip &= VALcmp(&(rmin->data), &(a->data)) > 0;
312 : }
313 3 : skip |= nskip;
314 : }
315 : } else { /* limit1 to limit2 (general case), limit2 is exclusive */
316 76 : bool max_differ_min = ATOMcmp(col->type.type->localtype, &rmin->data.val, &rmax->data.val) != 0;
317 :
318 76 : if (lval) {
319 68 : if (next->flag == cmp_equal) {
320 24 : skip |= next->anti ? exp_range_overlap(rmin, rmax, lval, hval, false, max_differ_min) != 0 :
321 12 : exp_range_overlap(rmin, rmax, lval, hval, false, max_differ_min) == 0;
322 56 : } else if (hval != lval) { /* For the between case */
323 40 : comp_type higher = range2rcompare(next->flag);
324 70 : skip |= next->anti ? exp_range_overlap(rmin, rmax, lval, hval, higher == cmp_lt, max_differ_min) != 0 :
325 30 : exp_range_overlap(rmin, rmax, lval, hval, higher == cmp_lt, max_differ_min) == 0;
326 : } else {
327 16 : switch (next->flag) {
328 4 : case cmp_gt:
329 4 : skip |= next->anti ? VALcmp(&(lval->data), &(rmax->data)) < 0 : VALcmp(&(lval->data), &(rmax->data)) >= 0;
330 4 : break;
331 2 : case cmp_gte:
332 2 : if (max_differ_min)
333 2 : skip |= next->anti ? VALcmp(&(lval->data), &(rmax->data)) < 0 : VALcmp(&(lval->data), &(rmax->data)) >= 0;
334 : else
335 0 : skip |= next->anti ? VALcmp(&(lval->data), &(rmax->data)) <= 0 : VALcmp(&(lval->data), &(rmax->data)) > 0;
336 : break;
337 2 : case cmp_lt:
338 2 : skip |= next->anti ? VALcmp(&(rmin->data), &(lval->data)) < 0 : VALcmp(&(rmin->data), &(lval->data)) >= 0;
339 2 : break;
340 8 : case cmp_lte:
341 8 : skip |= next->anti ? VALcmp(&(rmin->data), &(lval->data)) <= 0 : VALcmp(&(rmin->data), &(lval->data)) > 0;
342 8 : break;
343 : default:
344 : break;
345 : }
346 : }
347 8 : } else if (next->flag == cmp_in) {
348 8 : int nskip = 1;
349 20 : for (node *m = values->h; m && nskip; m = m->next) {
350 12 : atom *a = m->data;
351 :
352 12 : if (a->isnull)
353 0 : continue;
354 12 : nskip &= exp_range_overlap(rmin, rmax, a, a, false, max_differ_min) == 0;
355 : }
356 8 : skip |= nskip;
357 : }
358 : }
359 : }
360 :
361 143 : if (isListPartitionTable(mt) && (next->flag == cmp_equal || next->flag == cmp_in) && !next->anti) {
362 : /* if we find a value equal to one of the predicates, we don't prune */
363 : /* if the partition just holds null values, it will be skipped */
364 33 : if (!inlist) { /* initialize lazily */
365 32 : inlist = sa_list(v->sql->sa);
366 106 : for (node *m = pd->part.values->h; m; m = m->next) {
367 74 : sql_part_value *spv = (sql_part_value*) m->data;
368 74 : atom *pa = atom_general_ptr(v->sql->sa, &col->type, spv->value);
369 :
370 74 : list_append(inlist, pa);
371 : }
372 : }
373 :
374 33 : if (next->flag == cmp_equal) {
375 12 : int nskip = 1;
376 38 : for (node *m = inlist->h; m && nskip; m = m->next) {
377 26 : atom *pa = m->data;
378 26 : assert(!pa->isnull);
379 26 : nskip &= VALcmp(&(pa->data), &(lval->data)) != 0;
380 : }
381 12 : skip |= nskip;
382 21 : } else if (next->flag == cmp_in) {
383 50 : for (node *o = values->h; o && !skip; o = o->next) {
384 29 : atom *a = o->data;
385 29 : int nskip = 1;
386 :
387 29 : if (a->isnull)
388 0 : continue;
389 87 : for (node *m = inlist->h; m && nskip; m = m->next) {
390 58 : atom *pa = m->data;
391 58 : assert(!pa->isnull);
392 58 : nskip &= VALcmp(&(pa->data), &(a->data)) != 0;
393 : }
394 29 : skip |= nskip;
395 : }
396 : }
397 : }
398 : }
399 : }
400 : }
401 241 : i++;
402 : }
403 : }
404 : }
405 527 : if (!skip)
406 416 : append(tables, rel_rename_part(v->sql, rel_basetable(v->sql, pt, pt->base.name), mt_rel, mtalias));
407 : }
408 280 : if (list_empty(tables)) { /* No table passed the predicates, generate dummy relation */
409 15 : list *converted = sa_list(v->sql->sa);
410 15 : nrel = rel_project_exp(v->sql, exp_atom_bool(v->sql->sa, 1));
411 15 : nrel = rel_select(v->sql->sa, nrel, exp_atom_bool(v->sql->sa, 0));
412 15 : set_processed(nrel);
413 :
414 31 : for (node *n = mt_rel->exps->h ; n ; n = n->next) {
415 16 : sql_exp *e = n->data, *a = exp_atom(v->sql->sa, atom_general(v->sql->sa, exp_subtype(e), NULL, 0));
416 16 : exp_prop_alias(v->sql->sa, a, e);
417 16 : list_append(converted, a);
418 : }
419 15 : nrel = rel_project(v->sql->sa, nrel, converted);
420 : } else { /* Unionize children tables */
421 :
422 265 : if (mvc_debug_on(v->sql, 16)) {
423 : /* In case of a single table there in nothing to unionize */
424 0 : if (tables->cnt == 1) {
425 0 : nrel = rel_wrap_select_around_table(v, tables->h->data, info);
426 : } else {
427 : //nrel = rel_unionize_mt_tables_balanced(v, mt_rel, tables, info);
428 0 : nrel = rel_setop_n_ary(v->sql->sa, tables, op_munion);
429 : }
430 265 : } else if (mvc_debug_on(v->sql, 32)) {
431 0 : if (tables->cnt == 1) {
432 0 : nrel = rel_wrap_select_around_table(v, tables->h->data, info);
433 : } else {
434 0 : nrel = rel_unionize_mt_tables_munion(v, mt_rel, tables, info);
435 : }
436 : } else {
437 681 : for (node *n = tables->h; n ; n = n->next) {
438 416 : sql_rel *next = n->data;
439 416 : sql_table *subt = (sql_table *) next->l;
440 :
441 416 : if (isMergeTable(subt)) { /* apply select predicate recursively for nested merge tables */
442 13 : if (!(next = merge_table_prune_and_unionize(v, next, info)))
443 : return NULL;
444 403 : } else if (info) { /* propagate select under union */
445 91 : next = rel_select(v->sql->sa, next, NULL);
446 91 : next->exps = exps_copy(v->sql, info->sel->exps);
447 91 : set_processed(next);
448 : }
449 :
450 416 : if (nrel) {
451 151 : nrel = rel_setop(v->sql->sa, nrel, next, op_union);
452 151 : rel_setop_set_exps(v->sql, nrel, rel_projections(v->sql, mt_rel, NULL, 1, 1), true);
453 151 : set_processed(nrel);
454 : } else {
455 : nrel = next;
456 : }
457 : }
458 : }
459 : }
460 : return nrel;
461 : }
462 :
463 : /* rewrite merge tables into union of base tables */
464 : static sql_rel *
465 11401 : rel_merge_table_rewrite_(visitor *v, sql_rel *rel)
466 : {
467 11401 : if (is_modify(rel->op)) {
468 1091 : sql_query *query = query_create(v->sql);
469 1091 : return rel_propagate(query, rel, &v->changes);
470 : } else {
471 10310 : sql_rel *bt = rel, *sel = NULL, *nrel = NULL;
472 :
473 10310 : if (is_select(rel->op)) {
474 1603 : sel = rel;
475 1603 : bt = rel->l;
476 : }
477 10310 : if (is_basetable(bt->op) && rel_base_table(bt) && isMergeTable((sql_table*)bt->l)) {
478 513 : sql_table *mt = rel_base_table(bt);
479 513 : merge_table_prune_info *info = NULL;
480 :
481 513 : if (list_empty(mt->members)) /* in DDL statement cases skip if mergetable is empty */
482 : return rel;
483 268 : if (sel && !list_empty(sel->exps)) { /* prepare prunning information once */
484 74 : info = SA_NEW(v->sql->sa, merge_table_prune_info);
485 148 : *info = (merge_table_prune_info) {
486 74 : .cols = sa_list(v->sql->sa),
487 74 : .ranges = sa_list(v->sql->sa),
488 : .sel = sel
489 : };
490 156 : for (node *n = sel->exps->h; n; n = n->next) {
491 82 : sql_exp *e = n->data, *c = e->l;
492 82 : int flag = e->flag;
493 :
494 82 : if (e->type != e_cmp || (!is_theta_exp(flag) && flag != cmp_in) || is_symmetric(e) || !(c = rel_find_exp(rel, c)))
495 6 : continue;
496 :
497 76 : if (flag == cmp_gt || flag == cmp_gte || flag == cmp_lte || flag == cmp_lt || flag == cmp_equal) {
498 63 : sql_exp *l = e->r, *h = e->f;
499 63 : atom *lval = exp_flatten(v->sql, v->value_based_opt, l);
500 63 : atom *hval = h ? exp_flatten(v->sql, v->value_based_opt, h) : lval;
501 :
502 63 : if (lval && hval) {
503 63 : range_limit *next = SA_NEW(v->sql->sa, range_limit);
504 :
505 63 : *next = (range_limit) {
506 : .lval = lval,
507 : .hval = hval,
508 : .flag = flag,
509 63 : .anti = is_anti(e),
510 63 : .semantics = is_semantics(e),
511 : };
512 63 : list_append(info->cols, c);
513 63 : list_append(info->ranges, next);
514 : }
515 : }
516 76 : if (flag == cmp_in) { /* handle in lists */
517 13 : list *vals = e->r, *vlist = sa_list(v->sql->sa);
518 :
519 13 : node *m = NULL;
520 42 : for (m = vals->h; m; m = m->next) {
521 29 : sql_exp *l = m->data;
522 29 : atom *lval = exp_flatten(v->sql, v->value_based_opt, l);
523 :
524 29 : if (!lval)
525 : break;
526 29 : list_append(vlist, lval);
527 : }
528 13 : if (!m) {
529 13 : range_limit *next = SA_NEW(v->sql->sa, range_limit);
530 :
531 13 : *next = (range_limit) {
532 : .values = vlist, /* mark high as value list */
533 : .flag = flag,
534 13 : .anti = is_anti(e),
535 13 : .semantics = is_semantics(e),
536 : };
537 13 : list_append(info->cols, c);
538 13 : list_append(info->ranges, next);
539 : }
540 : }
541 : }
542 : }
543 268 : if (!(nrel = merge_table_prune_and_unionize(v, bt, info)))
544 : return NULL;
545 : /* Always do relation inplace. If the mt relation has more than 1 reference, this is required */
546 267 : if (is_union(nrel->op)) {
547 114 : rel = rel_inplace_setop(v->sql, rel, nrel->l, nrel->r, op_union, nrel->exps);
548 : } else if (is_munion(nrel->op)) {
549 0 : rel = rel_inplace_setop_n_ary(v->sql, rel, nrel->l, op_munion, nrel->exps);
550 : } else if (is_select(nrel->op)) {
551 35 : rel = rel_inplace_select(rel, nrel->l, nrel->exps);
552 : } else if (is_basetable(nrel->op)) {
553 103 : rel = rel_inplace_basetable(rel, nrel);
554 : } else {
555 0 : assert(is_simple_project(nrel->op));
556 15 : rel = rel_inplace_project(v->sql->sa, rel, nrel->l, nrel->exps);
557 15 : rel->card = exps_card(nrel->exps);
558 : }
559 : /* make sure that we do NOT destroy the subrels */
560 267 : nrel->l = nrel->r = NULL;
561 267 : rel_destroy(nrel);
562 267 : v->changes++;
563 : }
564 : }
565 : return rel;
566 : }
567 :
568 : static sql_rel *
569 1723 : rel_merge_table_rewrite(visitor *v, global_props *gp, sql_rel *rel)
570 : {
571 1723 : (void) gp;
572 1723 : return rel_visitor_topdown(v, rel, &rel_merge_table_rewrite_);
573 : }
574 :
575 : run_optimizer
576 600970 : bind_merge_table_rewrite(visitor *v, global_props *gp)
577 : {
578 600970 : (void) v;
579 600970 : return gp->needs_mergetable_rewrite ? rel_merge_table_rewrite : NULL;
580 : }
581 :
582 : /* these optimizers/rewriters run in a cycle loop */
583 : const sql_optimizer pre_sql_optimizers[] = {
584 : { 0, "split_select", bind_split_select},
585 : { 1, "push_project_down", bind_push_project_down},
586 : { 2, "merge_projects", bind_merge_projects},
587 : { 3, "push_project_up", bind_push_project_up},
588 : { 4, "split_project", bind_split_project},
589 : { 5, "remove_redundant_join", bind_remove_redundant_join},
590 : { 6, "simplify_math", bind_simplify_math},
591 : { 7, "optimize_exps", bind_optimize_exps},
592 : { 8, "optimize_select_and_joins_bottomup", bind_optimize_select_and_joins_bottomup},
593 : { 9, "project_reduce_casts", bind_project_reduce_casts},
594 : {10, "optimize_unions_bottomup", bind_optimize_unions_bottomup},
595 : {11, "optimize_projections", bind_optimize_projections},
596 : {12, "optimize_joins", bind_optimize_joins},
597 : {13, "join_order", bind_join_order},
598 : {14, "optimize_semi_and_anti", bind_optimize_semi_and_anti},
599 : {15, "optimize_select_and_joins_topdown", bind_optimize_select_and_joins_topdown},
600 : {16, "optimize_unions_topdown", bind_optimize_unions_topdown},
601 : {17, "dce", bind_dce},
602 : {18, "push_func_and_select_down", bind_push_func_and_select_down},
603 : {19, "push_topn_and_sample_down", bind_push_topn_and_sample_down},
604 : {20, "distinct_project2groupby", bind_distinct_project2groupby},
605 : {21, "merge_table_rewrite", bind_merge_table_rewrite},
606 : { 0, NULL, NULL}
607 : };
608 :
609 : /* these optimizers/rewriters only run once after the cycle loop */
610 : const sql_optimizer post_sql_optimizers[] = {
611 : /* Merge table rewrites may introduce remote or replica tables */
612 : /* At the moment, make sure the remote table rewriters always run after the merge table one */
613 : {23, "rewrite_remote", bind_rewrite_remote},
614 : {24, "rewrite_replica", bind_rewrite_replica},
615 : {25, "remote_func", bind_remote_func},
616 : {26, "get_statistics", bind_get_statistics}, /* gather statistics */
617 : {27, "join_order2", bind_join_order2}, /* run join order one more time with statistics */
618 : {28, "final_optimization_loop", bind_final_optimization_loop}, /* run select and group by order with statistics gathered */
619 : { 0, NULL, NULL}
620 : /* If an optimizer is going to be added, don't forget to update NSQLREWRITERS macro */
621 : };
622 :
623 :
624 : /* make sure the outer project (without order by or distinct) has all the aliases */
625 : static sql_rel *
626 698854 : rel_keep_renames(mvc *sql, sql_rel *rel)
627 : {
628 698854 : if (!rel || !is_simple_project(rel->op) || (!rel->r && !need_distinct(rel)) || list_length(rel->exps) <= 1)
629 682018 : return rel;
630 :
631 16841 : int needed = 0;
632 180104 : for(node *n = rel->exps->h; n && !needed; n = n->next) {
633 163263 : sql_exp *e = n->data;
634 :
635 163263 : if (exp_name(e) && (e->type != e_column || strcmp(exp_name(e), e->r) != 0))
636 : needed = 1;
637 : }
638 16841 : if (!needed)
639 : return rel;
640 :
641 17 : list *new_outer_exps = sa_list(sql->sa);
642 17 : list *new_inner_exps = sa_list(sql->sa);
643 59 : for(node *n = rel->exps->h; n; n = n->next) {
644 42 : sql_exp *e = n->data, *ie, *oe;
645 42 : const char *rname = exp_relname(e);
646 42 : const char *name = exp_name(e);
647 :
648 42 : exp_label(sql->sa, e, ++sql->label);
649 42 : ie = e;
650 42 : oe = exp_ref(sql, ie);
651 42 : exp_setname(sql->sa, oe, rname, name);
652 42 : append(new_inner_exps, ie);
653 42 : append(new_outer_exps, oe);
654 : }
655 17 : rel->exps = new_inner_exps;
656 17 : rel = rel_project(sql->sa, rel, new_outer_exps);
657 17 : return rel;
658 : }
659 :
660 : /* for trivial queries don't run optimizers */
661 : static int
662 761973 : calculate_opt_level(mvc *sql, sql_rel *rel)
663 : {
664 937145 : if (rel->card <= CARD_ATOM) {
665 754661 : if (is_insert(rel->op))
666 101685 : return rel->r ? calculate_opt_level(sql, rel->r) : 0;
667 652976 : if (is_simple_project(rel->op))
668 234359 : return rel->l ? calculate_opt_level(sql, rel->l) : 0;
669 : }
670 : return 1;
671 : }
672 :
673 : static inline sql_rel *
674 1300327 : run_optimizer_set(visitor *v, sql_optimizer_run *runs, sql_rel *rel, global_props *gp, const sql_optimizer *set)
675 : {
676 : /* if 'runs' is set, it means profiling is intended */
677 18716539 : for (int i = 0 ; set[i].name ; i++) {
678 17416062 : run_optimizer opt = NULL;
679 :
680 17416062 : if ((opt = set[i].bind_optimizer(v, gp))) {
681 3208080 : if (runs) {
682 0 : sql_optimizer_run *run = &(runs[set[i].index]);
683 0 : run->name = set[i].name;
684 0 : int changes = v->changes;
685 0 : lng clk = GDKusec();
686 0 : rel = opt(v, gp, rel);
687 0 : run->time += (GDKusec() - clk);
688 0 : run->nchanges += (v->changes - changes);
689 : } else {
690 3208080 : rel = opt(v, gp, rel);
691 : }
692 : }
693 : }
694 1300477 : return rel;
695 : }
696 :
697 : /* 'profile' means to benchmark each individual optimizer run */
698 : /* 'instantiate' means to rewrite logical tables: (merge, remote, replica tables) */
699 : sql_rel *
700 698684 : rel_optimizer(mvc *sql, sql_rel *rel, int profile, int instantiate, int value_based_opt, int storage_based_opt)
701 : {
702 1397368 : global_props gp = (global_props) {.cnt = {0}, .instantiate = (uint8_t)instantiate, .opt_cycle = 0,
703 698684 : .has_special_modify = rel && is_modify(rel->op) && rel->flag&UPD_COMP};
704 698684 : visitor v = { .sql = sql, .value_based_opt = value_based_opt, .storage_based_opt = storage_based_opt, .changes = 1, .data = &gp };
705 :
706 698684 : if (!(rel = rel_keep_renames(sql, rel)))
707 : return rel;
708 :
709 698819 : sql->runs = !(ATOMIC_GET(&GDKdebug) & FORCEMITOMASK) && profile ? sa_zalloc(sql->sa, NSQLREWRITERS * sizeof(sql_optimizer_run)) : NULL;
710 1300096 : for ( ;rel && gp.opt_cycle < 20 && v.changes; gp.opt_cycle++) {
711 761727 : v.changes = 0;
712 761727 : gp = (global_props) {.cnt = {0}, .instantiate = (uint8_t)instantiate, .opt_cycle = gp.opt_cycle, .has_special_modify = gp.has_special_modify};
713 761727 : rel = rel_visitor_topdown(&v, rel, &rel_properties); /* collect relational tree properties */
714 762031 : gp.opt_level = calculate_opt_level(sql, rel);
715 762031 : if (gp.opt_level == 0 && !gp.needs_mergetable_rewrite)
716 : break;
717 601311 : rel = run_optimizer_set(&v, sql->runs, rel, &gp, pre_sql_optimizers);
718 : }
719 : #ifndef NDEBUG
720 699089 : assert(gp.opt_cycle < 20);
721 : #endif
722 :
723 : /* these optimizers run statistics gathered by the last optimization cycle */
724 699089 : rel = run_optimizer_set(&v, sql->runs, rel, &gp, post_sql_optimizers);
725 699089 : return rel;
726 : }
|