Line data Source code
1 : /*
2 : * SPDX-License-Identifier: MPL-2.0
3 : *
4 : * This Source Code Form is subject to the terms of the Mozilla Public
5 : * License, v. 2.0. If a copy of the MPL was not distributed with this
6 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
7 : *
8 : * Copyright 2024 MonetDB Foundation;
9 : * Copyright August 2008 - 2023 MonetDB B.V.;
10 : * Copyright 1997 - July 2008 CWI.
11 : */
12 :
13 : #include "monetdb_config.h"
14 : #include "rel_optimizer_private.h"
15 : #include "rel_planner.h"
16 : #include "rel_exp.h"
17 : #include "rel_select.h"
18 : #include "rel_rewriter.h"
19 :
20 : /* Split_select optimizer splits case statements in select expressions. This is a step needed for cse */
21 : static void select_split_exps(mvc *sql, list *exps, sql_rel *rel);
22 :
23 : static sql_exp *
24 218102 : select_split_exp(mvc *sql, sql_exp *e, sql_rel *rel)
25 : {
26 218102 : switch(e->type) {
27 : case e_column:
28 : return e;
29 6660 : case e_convert:
30 6660 : e->l = select_split_exp(sql, e->l, rel);
31 6660 : return e;
32 25167 : case e_aggr:
33 : case e_func:
34 25167 : if (!is_analytic(e) && !exp_has_sideeffect(e)) {
35 25162 : sql_subfunc *f = e->f;
36 25162 : if (e->type == e_func && !f->func->s && is_caselike_func(f) /*is_ifthenelse_func(f)*/)
37 270 : return add_exp_too_project(sql, e, rel);
38 : }
39 : return e;
40 74882 : case e_cmp:
41 74882 : if (e->flag == cmp_or || e->flag == cmp_filter) {
42 18599 : select_split_exps(sql, e->l, rel);
43 18599 : select_split_exps(sql, e->r, rel);
44 56283 : } else if (e->flag == cmp_in || e->flag == cmp_notin) {
45 2793 : e->l = select_split_exp(sql, e->l, rel);
46 2793 : select_split_exps(sql, e->r, rel);
47 : } else {
48 53490 : e->l = select_split_exp(sql, e->l, rel);
49 53490 : e->r = select_split_exp(sql, e->r, rel);
50 53490 : if (e->f)
51 3576 : e->f = select_split_exp(sql, e->f, rel);
52 : }
53 : return e;
54 : case e_atom:
55 : case e_psm:
56 : return e;
57 : }
58 : return e;
59 : }
60 :
61 : static void
62 64941 : select_split_exps(mvc *sql, list *exps, sql_rel *rel)
63 : {
64 64941 : node *n;
65 :
66 64941 : if (!exps)
67 : return;
68 163034 : for(n=exps->h; n; n = n->next){
69 98093 : sql_exp *e = n->data;
70 :
71 98093 : e = select_split_exp(sql, e, rel);
72 98093 : n->data = e;
73 : }
74 : }
75 :
76 : static sql_rel *
77 1678974 : rel_split_select_(visitor *v, sql_rel *rel)
78 : {
79 1678974 : if (!rel || !is_select(rel->op) || list_empty(rel->exps) || !rel->l || mvc_highwater(v->sql))
80 1456004 : return rel;
81 :
82 222970 : bool funcs = false;
83 :
84 : /* are there functions */
85 478203 : for (node *n = rel->exps->h; n && !funcs; n = n->next)
86 255233 : funcs = exp_has_func(n->data);
87 :
88 : /* introduce extra project */
89 222970 : if (funcs) {
90 24950 : sql_rel *nrel = rel_project(v->sql->sa, rel->l,
91 24950 : rel_projections(v->sql, rel->l, NULL, 1, 1));
92 24950 : if (!nrel || !nrel->exps)
93 : return NULL;
94 24950 : rel->l = nrel;
95 : /* recursively split all functions and add those to the projection list */
96 24950 : select_split_exps(v->sql, rel->exps, nrel);
97 24950 : return rel;
98 : }
99 : return rel;
100 : }
101 :
102 : static sql_rel *
103 49636 : rel_split_select(visitor *v, global_props *gp, sql_rel *rel)
104 : {
105 49636 : (void) gp;
106 49636 : return rel_visitor_bottomup(v, rel, &rel_split_select_);
107 : }
108 :
109 : run_optimizer
110 601083 : bind_split_select(visitor *v, global_props *gp)
111 : {
112 601083 : int flag = v->sql->sql_optimizer;
113 538549 : return gp->opt_cycle == 0 && gp->opt_level == 1 && (flag & split_select)
114 1139631 : && gp->cnt[op_select] ? rel_split_select : NULL;
115 : }
116 :
117 :
118 : /*
119 : * Remove a redundant join
120 : *
121 : * join (L, Distinct Project(join(L,P) [ p.key == l.lkey]) [p.key]) [ p.key == l.lkey]
122 : * =>
123 : * join(L, P) [p.key==l.lkey]
124 : */
125 : static sql_rel *
126 1540050 : rel_remove_redundant_join_(visitor *v, sql_rel *rel)
127 : {
128 1540050 : if ((is_join(rel->op) || is_semi(rel->op)) && !list_empty(rel->exps)) {
129 219785 : sql_rel *l = rel->l, *r = rel->r, *b, *p = NULL, *j;
130 :
131 219785 : if (is_basetable(l->op) && is_simple_project(r->op) && need_distinct(r)) {
132 13 : b = l;
133 13 : p = r;
134 13 : j = p->l;
135 219772 : } else if (is_basetable(r->op) && is_simple_project(l->op) && need_distinct(l)) {
136 2309 : b = r;
137 2309 : p = l;
138 2309 : j = p->l;
139 : }
140 219785 : if (!p || !j || j->op != rel->op)
141 : return rel;
142 : /* j must have b->l (ie table) */
143 6 : sql_rel *jl = j->l, *jr = j->r;
144 6 : if ((is_basetable(jl->op) && jl->l == b->l) ||
145 2 : (is_basetable(jr->op) && jr->l == b->l)) {
146 5 : int left = 0;
147 5 : if (is_basetable(jl->op) && jl->l == b->l)
148 5 : left = 1;
149 5 : if (!list_empty(p->exps)) {
150 7 : for (node *n=p->exps->h; n; n = n->next) { /* all exps of 'p' must be bound to the opposite side */
151 5 : sql_exp *e = n->data;
152 :
153 6 : if (!rel_rebind_exp(v->sql, left ? jr : jl, e))
154 : return rel;
155 : }
156 : }
157 2 : if (exp_match_list(j->exps, rel->exps)) {
158 1 : p->l = (left)?rel_dup(jr):rel_dup(jl);
159 1 : rel_destroy(j);
160 1 : set_nodistinct(p);
161 1 : v->changes++;
162 1 : return rel;
163 : }
164 : }
165 : }
166 : return rel;
167 : }
168 :
169 : static sql_rel *
170 40182 : rel_remove_redundant_join(visitor *v, global_props *gp, sql_rel *rel)
171 : {
172 40182 : (void) gp;
173 40182 : return rel_visitor_bottomup(v, rel, &rel_remove_redundant_join_); /* this optimizer has to run before rel_first_level_optimizations */
174 : }
175 :
176 : run_optimizer
177 601076 : bind_remove_redundant_join(visitor *v, global_props *gp)
178 : {
179 601076 : int flag = v->sql->sql_optimizer;
180 538646 : return gp->opt_cycle == 0 && gp->opt_level == 1 && (gp->cnt[op_left] || gp->cnt[op_right]
181 525505 : || gp->cnt[op_full] || gp->cnt[op_join] || gp->cnt[op_semi] || gp->cnt[op_anti]) &&
182 640972 : (flag & remove_redundant_join) ? rel_remove_redundant_join : NULL;
183 : }
184 :
185 :
186 : static list *
187 1187681 : exp_merge_range(visitor *v, sql_rel *rel, list *exps)
188 : {
189 1187681 : node *n, *m;
190 2573064 : for (n=exps->h; n; n = n->next) {
191 1387238 : sql_exp *e = n->data;
192 1387238 : sql_exp *le = e->l;
193 1387238 : sql_exp *re = e->r;
194 :
195 : /* handle the and's in the or lists */
196 1387238 : if (e->type == e_cmp && e->flag == cmp_or && !is_anti(e)) {
197 44221 : e->l = exp_merge_range(v, rel, e->l);
198 44221 : e->r = exp_merge_range(v, rel, e->r);
199 : /* only look for gt, gte, lte, lt */
200 1343017 : } else if (n->next &&
201 195819 : e->type == e_cmp && e->flag < cmp_equal && !e->f &&
202 6816 : re->card == CARD_ATOM && !is_anti(e)) {
203 1357 : for (m=n->next; m; m = m->next) {
204 805 : sql_exp *f = m->data;
205 805 : sql_exp *lf = f->l;
206 805 : sql_exp *rf = f->r;
207 805 : int c_le = is_numeric_upcast(le), c_lf = is_numeric_upcast(lf);
208 :
209 805 : if (f->type == e_cmp && f->flag < cmp_equal && !f->f &&
210 572 : rf->card == CARD_ATOM && !is_anti(f) &&
211 286 : exp_match_exp(c_le?le->l:le, c_lf?lf->l:lf)) {
212 129 : sql_exp *ne;
213 129 : int swap = 0, lt = 0, gt = 0;
214 129 : sql_subtype super;
215 : /* for now only c1 <[=] x <[=] c2 */
216 :
217 129 : swap = lt = (e->flag == cmp_lt || e->flag == cmp_lte);
218 129 : gt = !lt;
219 :
220 129 : if (gt &&
221 107 : (f->flag == cmp_gt ||
222 : f->flag == cmp_gte))
223 9 : continue;
224 22 : if (lt &&
225 22 : (f->flag == cmp_lt ||
226 : f->flag == cmp_lte))
227 0 : continue;
228 :
229 120 : cmp_supertype(&super, exp_subtype(le), exp_subtype(lf));
230 120 : if (!(rf = exp_check_type(v->sql, &super, rel, rf, type_equal)) ||
231 120 : !(le = exp_check_type(v->sql, &super, rel, le, type_equal)) ||
232 120 : !(re = exp_check_type(v->sql, &super, rel, re, type_equal))) {
233 0 : v->sql->session->status = 0;
234 0 : v->sql->errstr[0] = 0;
235 0 : continue;
236 : }
237 120 : if (!swap)
238 98 : ne = exp_compare2(v->sql->sa, le, re, rf, compare2range(e->flag, f->flag), 0);
239 : else
240 22 : ne = exp_compare2(v->sql->sa, le, rf, re, compare2range(f->flag, e->flag), 0);
241 :
242 120 : list_remove_data(exps, NULL, e);
243 120 : list_remove_data(exps, NULL, f);
244 120 : list_append(exps, ne);
245 120 : v->changes++;
246 120 : return exp_merge_range(v, rel, exps);
247 : }
248 : }
249 1342345 : } else if (n->next &&
250 195147 : e->type == e_cmp && e->flag < cmp_equal && !e->f &&
251 6144 : re->card > CARD_ATOM && !is_anti(e)) {
252 11150 : for (m=n->next; m; m = m->next) {
253 6741 : sql_exp *f = m->data;
254 6741 : sql_exp *lf = f->l;
255 6741 : sql_exp *rf = f->r;
256 :
257 6741 : if (f->type == e_cmp && f->flag < cmp_equal && !f->f &&
258 4953 : rf->card > CARD_ATOM && !is_anti(f)) {
259 4953 : sql_exp *ne, *t;
260 4953 : int swap = 0, lt = 0, gt = 0;
261 4953 : comp_type ef = (comp_type) e->flag, ff = (comp_type) f->flag;
262 4953 : int c_re = is_numeric_upcast(re), c_rf = is_numeric_upcast(rf);
263 4953 : int c_le = is_numeric_upcast(le), c_lf = is_numeric_upcast(lf), c;
264 4953 : sql_subtype super;
265 :
266 : /* both swapped ? */
267 4953 : if (exp_match_exp(c_re?re->l:re, c_rf?rf->l:rf)) {
268 14 : t = re;
269 14 : re = le;
270 14 : le = t;
271 14 : c = c_re; c_re = c_le; c_le = c;
272 14 : ef = swap_compare(ef);
273 14 : t = rf;
274 14 : rf = lf;
275 14 : lf = t;
276 14 : c = c_rf; c_rf = c_lf; c_lf = c;
277 14 : ff = swap_compare(ff);
278 : }
279 :
280 : /* is left swapped ? */
281 4953 : if (exp_match_exp(c_re?re->l:re, c_lf?lf->l:lf)) {
282 105 : t = re;
283 105 : re = le;
284 105 : le = t;
285 105 : c = c_re; c_re = c_le; c_le = c;
286 105 : ef = swap_compare(ef);
287 : }
288 :
289 : /* is right swapped ? */
290 4953 : if (exp_match_exp(c_le?le->l:le, c_rf?rf->l:rf)) {
291 176 : t = rf;
292 176 : rf = lf;
293 176 : lf = t;
294 176 : c = c_rf; c_rf = c_lf; c_lf = c;
295 176 : ff = swap_compare(ff);
296 : }
297 :
298 4953 : if (!exp_match_exp(c_le?le->l:le, c_lf?lf->l:lf))
299 3218 : continue;
300 :
301 : /* for now only c1 <[=] x <[=] c2 */
302 1814 : swap = lt = (ef == cmp_lt || ef == cmp_lte);
303 1814 : gt = !lt;
304 :
305 1814 : if (gt && (ff == cmp_gt || ff == cmp_gte))
306 73 : continue;
307 1741 : if (lt && (ff == cmp_lt || ff == cmp_lte))
308 6 : continue;
309 :
310 1735 : cmp_supertype(&super, exp_subtype(le), exp_subtype(lf));
311 1735 : if (!(rf = exp_check_type(v->sql, &super, rel, rf, type_equal)) ||
312 1735 : !(le = exp_check_type(v->sql, &super, rel, le, type_equal)) ||
313 1735 : !(re = exp_check_type(v->sql, &super, rel, re, type_equal))) {
314 0 : v->sql->session->status = 0;
315 0 : v->sql->errstr[0] = 0;
316 0 : continue;
317 : }
318 1735 : if (!swap)
319 1635 : ne = exp_compare2(v->sql->sa, le, re, rf, compare2range(ef, ff), 0);
320 : else
321 100 : ne = exp_compare2(v->sql->sa, le, rf, re, compare2range(ff, ef), 0);
322 :
323 1735 : list_remove_data(exps, NULL, e);
324 1735 : list_remove_data(exps, NULL, f);
325 1735 : list_append(exps, ne);
326 1735 : v->changes++;
327 1735 : return exp_merge_range(v, rel, exps);
328 : }
329 : }
330 : }
331 : }
332 : return exps;
333 : }
334 :
335 : static int
336 40726 : exps_cse( mvc *sql, list *oexps, list *l, list *r )
337 : {
338 40726 : list *nexps;
339 40726 : node *n, *m;
340 40726 : char *lu, *ru;
341 40726 : int lc = 0, rc = 0, match = 0, res = 0;
342 :
343 40726 : if (list_length(l) == 0 || list_length(r) == 0)
344 0 : return 0;
345 :
346 : /* first recusive exps_cse */
347 40726 : nexps = new_exp_list(sql->sa);
348 86228 : for (n = l->h; n; n = n->next) {
349 45502 : sql_exp *e = n->data;
350 :
351 45502 : if (e->type == e_cmp && e->flag == cmp_or && !is_anti(e)) {
352 15983 : res = exps_cse(sql, nexps, e->l, e->r);
353 : } else {
354 29519 : append(nexps, e);
355 : }
356 : }
357 40726 : l = nexps;
358 :
359 40726 : nexps = new_exp_list(sql->sa);
360 88881 : for (n = r->h; n; n = n->next) {
361 48155 : sql_exp *e = n->data;
362 :
363 48155 : if (e->type == e_cmp && e->flag == cmp_or && !is_anti(e)) {
364 3283 : res = exps_cse(sql, nexps, e->l, e->r);
365 : } else {
366 44872 : append(nexps, e);
367 : }
368 : }
369 40726 : r = nexps;
370 :
371 : /* simplify true or .. and .. or true */
372 40726 : if (list_length(l) == list_length(r) && list_length(l) == 1) {
373 35715 : sql_exp *le = l->h->data, *re = r->h->data;
374 :
375 35715 : if (exp_is_true(le)) {
376 7 : append(oexps, le);
377 7 : return 1;
378 : }
379 35708 : if (exp_is_true(re)) {
380 4 : append(oexps, re);
381 4 : return 1;
382 : }
383 : }
384 :
385 40715 : lu = SA_ZNEW_ARRAY(sql->ta, char, list_length(l));
386 40715 : ru = SA_ZNEW_ARRAY(sql->ta, char, list_length(r));
387 86932 : for (n = l->h, lc = 0; n; n = n->next, lc++) {
388 46217 : sql_exp *le = n->data;
389 :
390 104051 : for ( m = r->h, rc = 0; m; m = m->next, rc++) {
391 57834 : sql_exp *re = m->data;
392 :
393 57834 : if (!ru[rc] && exp_match_exp(le,re)) {
394 793 : lu[lc] = 1;
395 793 : ru[rc] = 1;
396 793 : match = 1;
397 : }
398 : }
399 : }
400 40715 : if (match) {
401 414 : list *nl = new_exp_list(sql->sa);
402 414 : list *nr = new_exp_list(sql->sa);
403 :
404 1623 : for (n = l->h, lc = 0; n; n = n->next, lc++)
405 1209 : if (!lu[lc])
406 418 : append(nl, n->data);
407 1638 : for (n = r->h, rc = 0; n; n = n->next, rc++)
408 1224 : if (!ru[rc])
409 431 : append(nr, n->data);
410 :
411 414 : if (list_length(nl) && list_length(nr))
412 387 : append(oexps, exp_or(sql->sa, nl, nr, 0));
413 :
414 1623 : for (n = l->h, lc = 0; n; n = n->next, lc++) {
415 1209 : if (lu[lc])
416 791 : append(oexps, n->data);
417 : }
418 : res = 1;
419 : } else {
420 40301 : append(oexps, exp_or(sql->sa, list_dup(l, (fdup)NULL),
421 : list_dup(r, (fdup)NULL), 0));
422 : }
423 : return res;
424 : }
425 :
426 : static int
427 92531 : are_equality_exps( list *exps, sql_exp **L)
428 : {
429 92531 : sql_exp *l = *L;
430 :
431 92531 : if (list_length(exps) == 1) {
432 88482 : sql_exp *e = exps->h->data, *le = e->l, *re = e->r;
433 :
434 88482 : if (e->type == e_cmp && e->flag == cmp_equal && le->card != CARD_ATOM && re->card == CARD_ATOM && !is_semantics(e)) {
435 22222 : if (!l) {
436 10896 : *L = l = le;
437 10896 : if (!is_column(le->type))
438 : return 0;
439 : }
440 22222 : return (exp_match(l, le));
441 : }
442 66260 : if (e->type == e_cmp && e->flag == cmp_or && !is_anti(e) && !is_semantics(e))
443 72767 : return (are_equality_exps(e->l, L) && are_equality_exps(e->r, L));
444 : }
445 : return 0;
446 : }
447 :
448 : static void
449 4281 : get_exps( list *n, list *l )
450 : {
451 5994 : sql_exp *e = l->h->data, *re = e->r;
452 :
453 5994 : if (e->type == e_cmp && e->flag == cmp_equal && re->card == CARD_ATOM)
454 4281 : list_append(n, re);
455 5994 : if (e->type == e_cmp && e->flag == cmp_or) {
456 1713 : get_exps(n, e->l);
457 1713 : get_exps(n, e->r);
458 : }
459 4281 : }
460 :
461 : static sql_exp *
462 1284 : equality_exps_2_in( mvc *sql, sql_exp *ce, list *l, list *r)
463 : {
464 1284 : list *nl = new_exp_list(sql->sa);
465 :
466 1284 : get_exps(nl, l);
467 1284 : get_exps(nl, r);
468 :
469 1284 : return exp_in( sql->sa, ce, nl, cmp_in);
470 : }
471 :
472 : static list *
473 609951 : merge_ors(mvc *sql, list *exps, int *changes)
474 : {
475 609951 : list *nexps = NULL;
476 609951 : int needed = 0;
477 :
478 1320064 : for (node *n = exps->h; n && !needed; n = n->next) {
479 710113 : sql_exp *e = n->data;
480 :
481 710113 : if (e->type == e_cmp && e->flag == cmp_or && !is_anti(e))
482 710113 : needed = 1;
483 : }
484 :
485 609951 : if (needed) {
486 41119 : nexps = new_exp_list(sql->sa);
487 90453 : for (node *n = exps->h; n; n = n->next) {
488 49334 : sql_exp *e = n->data, *l = NULL;
489 :
490 49334 : if (e->type == e_cmp && e->flag == cmp_or && !is_anti(e) && are_equality_exps(e->l, &l) && are_equality_exps(e->r, &l) && l) {
491 1284 : (*changes)++;
492 1284 : append(nexps, equality_exps_2_in(sql, l, e->l, e->r));
493 : } else {
494 48050 : append(nexps, e);
495 : }
496 : }
497 : } else {
498 : nexps = exps;
499 : }
500 :
501 1327248 : for (node *n = nexps->h; n ; n = n->next) {
502 717297 : sql_exp *e = n->data;
503 :
504 717297 : if (e->type == e_cmp && e->flag == cmp_or) {
505 40934 : e->l = merge_ors(sql, e->l, changes);
506 40934 : e->r = merge_ors(sql, e->r, changes);
507 : }
508 : }
509 :
510 609951 : return nexps;
511 : }
512 :
513 : #define TRIVIAL_NOT_EQUAL_CMP(e) \
514 : ((e)->type == e_cmp && (e)->flag == cmp_notequal && !is_anti((e)) && !is_semantics((e)) && ((sql_exp*)(e)->l)->card != CARD_ATOM && ((sql_exp*)(e)->r)->card == CARD_ATOM)
515 :
516 : static list *
517 609951 : merge_notequal(mvc *sql, list *exps, int *changes)
518 : {
519 609951 : list *inequality_groups = NULL, *nexps = NULL;
520 609951 : int needed = 0;
521 :
522 1327248 : for (node *n = exps->h; n; n = n->next) {
523 717297 : sql_exp *e = n->data;
524 :
525 717297 : if (TRIVIAL_NOT_EQUAL_CMP(e)) {
526 93967 : bool appended = false;
527 :
528 93967 : if (inequality_groups) {
529 8987 : for (node *m = inequality_groups->h; m && !appended; m = m->next) {
530 5152 : list *next = m->data;
531 5152 : sql_exp *first = (sql_exp*) next->h->data;
532 :
533 5152 : if (exp_match(first->l, e->l)) {
534 1156 : list_append(next, e);
535 1156 : appended = true;
536 : }
537 : }
538 : }
539 3835 : if (!appended) {
540 92811 : if (!inequality_groups)
541 90132 : inequality_groups = new_exp_list(sql->sa);
542 92811 : list_append(inequality_groups, list_append(new_exp_list(sql->sa), e));
543 : }
544 : }
545 : }
546 :
547 609951 : if (inequality_groups) { /* if one list of inequalities has more than one entry, then the re-write is needed */
548 182943 : for (node *n = inequality_groups->h; n; n = n->next) {
549 92811 : list *next = n->data;
550 :
551 92811 : if (list_length(next) > 1)
552 1071 : needed = 1;
553 : }
554 : }
555 :
556 90132 : if (needed) {
557 989 : nexps = new_exp_list(sql->sa);
558 2594 : for (node *n = inequality_groups->h; n; n = n->next) {
559 1605 : list *next = n->data;
560 1605 : sql_exp *first = (sql_exp*) next->h->data;
561 :
562 1605 : if (list_length(next) > 1) {
563 1071 : list *notin = new_exp_list(sql->sa);
564 :
565 3298 : for (node *m = next->h; m; m = m->next) {
566 2227 : sql_exp *e = m->data;
567 2227 : list_append(notin, e->r);
568 : }
569 1071 : list_append(nexps, exp_in(sql->sa, first->l, notin, cmp_notin));
570 : } else {
571 534 : list_append(nexps, first);
572 : }
573 : }
574 :
575 4108 : for (node *n = exps->h; n; n = n->next) {
576 3119 : sql_exp *e = n->data;
577 :
578 3119 : if (!TRIVIAL_NOT_EQUAL_CMP(e))
579 358 : list_append(nexps, e);
580 : }
581 989 : (*changes)++;
582 : } else {
583 : nexps = exps;
584 : }
585 :
586 1326092 : for (node *n = nexps->h; n ; n = n->next) {
587 716141 : sql_exp *e = n->data;
588 :
589 716141 : if (e->type == e_cmp && e->flag == cmp_or) {
590 40934 : e->l = merge_notequal(sql, e->l, changes);
591 40934 : e->r = merge_notequal(sql, e->r, changes);
592 : }
593 : }
594 :
595 609951 : return nexps;
596 : }
597 :
598 : int
599 169986 : is_numeric_upcast(sql_exp *e)
600 : {
601 169986 : if (is_convert(e->type)) {
602 5592 : sql_subtype *f = exp_fromtype(e);
603 5592 : sql_subtype *t = exp_totype(e);
604 :
605 5592 : if (f->type->eclass == t->type->eclass && EC_COMPUTE(f->type->eclass)) {
606 1625 : if (f->type->localtype < t->type->localtype)
607 1603 : return 1;
608 : }
609 : }
610 : return 0;
611 : }
612 :
613 : /* optimize (a = b) or (a is null and b is null) -> a = b with null semantics */
614 : static sql_exp *
615 43456 : try_rewrite_equal_or_is_null(mvc *sql, sql_rel *rel, sql_exp *or, list *l1, list *l2)
616 : {
617 43456 : if (list_length(l1) == 1) {
618 39941 : bool valid = true, first_is_null_found = false, second_is_null_found = false;
619 39941 : sql_exp *cmp = l1->h->data;
620 39941 : sql_exp *first = cmp->l, *second = cmp->r;
621 :
622 39941 : if (is_compare(cmp->type) && !is_anti(cmp) && !cmp->f && cmp->flag == cmp_equal) {
623 7138 : int fupcast = is_numeric_upcast(first), supcast = is_numeric_upcast(second);
624 14546 : for(node *n = l2->h ; n && valid; n = n->next) {
625 7408 : sql_exp *e = n->data, *l = e->l, *r = e->r;
626 :
627 7408 : if (is_compare(e->type) && e->flag == cmp_equal && !e->f &&
628 4531 : !is_anti(e) && is_semantics(e)) {
629 1834 : int lupcast = is_numeric_upcast(l);
630 1834 : int rupcast = is_numeric_upcast(r);
631 1834 : sql_exp *rr = rupcast ? r->l : r;
632 :
633 1834 : if (rr->type == e_atom && rr->l && atom_null(rr->l)) {
634 1834 : if (exp_match_exp(fupcast?first->l:first, lupcast?l->l:l))
635 : first_is_null_found = true;
636 413 : else if (exp_match_exp(supcast?second->l:second, lupcast?l->l:l))
637 : second_is_null_found = true;
638 : else
639 5717 : valid = false;
640 : } else {
641 : valid = false;
642 : }
643 : } else {
644 : valid = false;
645 : }
646 : }
647 7138 : if (valid && first_is_null_found && second_is_null_found) {
648 268 : sql_subtype super;
649 :
650 268 : cmp_supertype(&super, exp_subtype(first), exp_subtype(second)); /* first and second must have the same type */
651 268 : if (!(first = exp_check_type(sql, &super, rel, first, type_equal)) ||
652 268 : !(second = exp_check_type(sql, &super, rel, second, type_equal))) {
653 0 : sql->session->status = 0;
654 0 : sql->errstr[0] = 0;
655 0 : return or;
656 : }
657 268 : sql_exp *res = exp_compare(sql->sa, first, second, cmp->flag);
658 268 : set_semantics(res);
659 268 : if (exp_name(or))
660 0 : exp_prop_alias(sql->sa, res, or);
661 268 : return res;
662 : }
663 : }
664 : }
665 : return or;
666 : }
667 :
668 : static list *
669 1097384 : merge_cmp_or_null(mvc *sql, sql_rel *rel, list *exps, int *changes)
670 : {
671 2380088 : for (node *n = exps->h; n ; n = n->next) {
672 1282704 : sql_exp *e = n->data;
673 :
674 1282704 : if (is_compare(e->type) && e->flag == cmp_or && !is_anti(e)) {
675 21728 : sql_exp *ne = try_rewrite_equal_or_is_null(sql, rel, e, e->l, e->r);
676 21728 : if (ne != e) {
677 266 : (*changes)++;
678 266 : n->data = ne;
679 : }
680 21728 : ne = try_rewrite_equal_or_is_null(sql, rel, e, e->r, e->l);
681 21728 : if (ne != e) {
682 2 : (*changes)++;
683 2 : n->data = ne;
684 : }
685 : }
686 : }
687 1097384 : return exps;
688 : }
689 :
690 : static list *
691 1097384 : cleanup_equal_exps(mvc *sql, sql_rel *rel, list *exps, int *changes)
692 : {
693 1097384 : if (list_length(exps) <= 1)
694 : return exps;
695 168808 : if (is_join(rel->op)) {
696 245840 : for(node *n = exps->h; n; n = n->next) {
697 165626 : sql_exp *e = n->data;
698 331084 : if (e->type == e_cmp && !e->f && e->flag <= cmp_notequal &&
699 206292 : !rel_find_exp(rel->l, e->l) && !rel_find_exp(rel->r, e->r) &&
700 40697 : !find_prop(e->p, PROP_HASHCOL) && !find_prop(e->p, PROP_JOINIDX)) {
701 20289 : exp_swap(e);
702 : }
703 : }
704 : }
705 168808 : bool needed = false;
706 524018 : for(node *n = exps->h; !needed && n; n = n->next) {
707 680739 : for (node *m = n->next; !needed && m; m = m->next) {
708 325529 : if (exp_match_exp_semantics(n->data, m->data, false))
709 69 : needed = true;
710 : }
711 : }
712 168808 : if (needed) {
713 69 : list *nexps = sa_list(sql->sa);
714 :
715 233 : for(node *n = exps->h; n; n = n->next) {
716 164 : bool done = false;
717 532 : for (node *m = exps->h; m && !done; m = m->next) {
718 368 : if (n != m && exp_match_exp_semantics(n->data, m->data, false)) {
719 138 : sql_exp *e1 = n->data, *e2 = m->data;
720 138 : if ((is_any(e1) || is_semantics(e1)) || (!is_any(e2) && !is_semantics(e2))) {
721 124 : append(nexps, e1);
722 124 : if ((!is_any(e2) && !is_semantics(e2)) && is_left(rel->op) && list_length(rel->attr) == 1) {
723 : /* nil is false */
724 14 : sql_exp *m = rel->attr->h->data;
725 14 : if (exp_is_atom(m))
726 14 : set_no_nil(m);
727 : }
728 : }
729 : done = true;
730 : }
731 : }
732 164 : if (!done)
733 26 : append(nexps, n->data);
734 : }
735 : return nexps;
736 : }
737 : (void)changes;
738 : return exps;
739 : }
740 :
741 : static inline sql_rel *
742 1097384 : rel_select_cse(visitor *v, sql_rel *rel)
743 : {
744 1097384 : if ((is_select(rel->op) || is_join(rel->op) || is_semi(rel->op)) && rel->exps) /* cleanup equal expressions */
745 1097384 : rel->exps = cleanup_equal_exps(v->sql, rel, rel->exps, &v->changes); /* (a = b) and (a += b) */
746 :
747 1097384 : if (is_select(rel->op) && rel->exps)
748 528083 : rel->exps = merge_ors(v->sql, rel->exps, &v->changes); /* x = 1 or x = 2 => x in (1, 2)*/
749 :
750 1097384 : if (is_select(rel->op) && rel->exps)
751 528083 : rel->exps = merge_notequal(v->sql, rel->exps, &v->changes); /* x <> 1 and x <> 2 => x not in (1, 2)*/
752 :
753 1097384 : if ((is_select(rel->op) || is_join(rel->op) || is_semi(rel->op)) && rel->exps)
754 1097384 : rel->exps = merge_cmp_or_null(v->sql, rel, rel->exps, &v->changes); /* (a = b) or (a is null and b is null) -> a = b with null semantics */
755 :
756 1097384 : if ((is_select(rel->op) || is_join(rel->op) || is_semi(rel->op)) && rel->exps) {
757 1097384 : node *n;
758 1097384 : list *nexps;
759 1097384 : int needed = 0;
760 :
761 2373607 : for (n=rel->exps->h; n && !needed; n = n->next) {
762 1276223 : sql_exp *e = n->data;
763 :
764 1276223 : if (e->type == e_cmp && e->flag == cmp_or && !is_anti(e))
765 1276223 : needed = 1;
766 : }
767 1097384 : if (!needed)
768 : return rel;
769 20660 : nexps = new_exp_list(v->sql->sa);
770 49164 : for (n=rel->exps->h; n; n = n->next) {
771 28504 : sql_exp *e = n->data;
772 :
773 28504 : if (e->type == e_cmp && e->flag == cmp_or && !is_anti(e)) {
774 : /* split the common expressions */
775 21460 : v->changes += exps_cse(v->sql, nexps, e->l, e->r);
776 : } else {
777 7044 : append(nexps, e);
778 : }
779 : }
780 20660 : rel->exps = nexps;
781 : }
782 : return rel;
783 : }
784 :
785 : static list *
786 14004 : exps_merge_select_rse( mvc *sql, list *l, list *r, bool *merged)
787 : {
788 14004 : node *n, *m, *o;
789 14004 : list *nexps = NULL, *lexps, *rexps;
790 14004 : bool lmerged = true, rmerged = true;
791 :
792 14004 : lexps = new_exp_list(sql->sa);
793 29740 : for (n = l->h; n; n = n->next) {
794 15736 : sql_exp *e = n->data;
795 :
796 15736 : if (e->type == e_cmp && e->flag == cmp_or && !is_anti(e) && !is_semantics(e)) {
797 5494 : lmerged = false;
798 5494 : list *nexps = exps_merge_select_rse(sql, e->l, e->r, &lmerged);
799 6010 : for (o = nexps->h; o; o = o->next)
800 516 : append(lexps, o->data);
801 : } else {
802 10242 : append(lexps, e);
803 : }
804 : }
805 14004 : if (lmerged)
806 8569 : lmerged = (list_length(lexps) == 1);
807 14004 : rexps = new_exp_list(sql->sa);
808 30352 : for (n = r->h; n; n = n->next) {
809 16348 : sql_exp *e = n->data;
810 :
811 16348 : if (e->type == e_cmp && e->flag == cmp_or && !is_anti(e) && !is_semantics(e)) {
812 1105 : rmerged = false;
813 1105 : list *nexps = exps_merge_select_rse(sql, e->l, e->r, &rmerged);
814 1128 : for (o = nexps->h; o; o = o->next)
815 23 : append(rexps, o->data);
816 : } else {
817 15243 : append(rexps, e);
818 : }
819 : }
820 14004 : if (rmerged)
821 12917 : rmerged = (list_length(r) == 1);
822 :
823 14004 : nexps = new_exp_list(sql->sa);
824 :
825 : /* merge merged lists first ? */
826 24762 : for (n = lexps->h; n; n = n->next) {
827 10758 : sql_exp *le = n->data, *re, *fnd = NULL;
828 :
829 10758 : if (le->type != e_cmp || le->flag == cmp_or || is_anti(le) || is_semantics(le) || is_symmetric(le))
830 644 : continue;
831 20657 : for (m = rexps->h; !fnd && m; m = m->next) {
832 10543 : re = m->data;
833 10543 : if (exps_match_col_exps(le, re))
834 1725 : fnd = re;
835 : }
836 10114 : if (fnd && (is_anti(fnd) || is_semantics(fnd)))
837 59 : continue;
838 : /* cases
839 : * 1) 2 values (cmp_equal)
840 : * 2) 1 value (cmp_equal), and cmp_in
841 : * (also cmp_in, cmp_equal)
842 : * 3) 2 cmp_in
843 : * 4) ranges
844 : */
845 1666 : if (fnd) {
846 1666 : re = fnd;
847 1666 : fnd = NULL;
848 1666 : if (is_anti(le) || is_anti(re) || is_symmetric(re))
849 0 : continue;
850 2218 : if (le->flag == cmp_equal && re->flag == cmp_equal) {
851 552 : list *exps = new_exp_list(sql->sa);
852 :
853 552 : append(exps, le->r);
854 552 : append(exps, re->r);
855 552 : fnd = exp_in(sql->sa, le->l, exps, cmp_in);
856 1284 : } else if (le->flag == cmp_equal && re->flag == cmp_in){
857 170 : list *exps = new_exp_list(sql->sa);
858 :
859 170 : append(exps, le->r);
860 170 : list_merge(exps, re->r, NULL);
861 170 : fnd = exp_in(sql->sa, le->l, exps, cmp_in);
862 1275 : } else if (le->flag == cmp_in && re->flag == cmp_equal){
863 331 : list *exps = new_exp_list(sql->sa);
864 :
865 331 : append(exps, re->r);
866 331 : list_merge(exps, le->r, NULL);
867 331 : fnd = exp_in(sql->sa, le->l, exps, cmp_in);
868 733 : } else if (le->flag == cmp_in && re->flag == cmp_in){
869 120 : list *exps = new_exp_list(sql->sa);
870 :
871 120 : list_merge(exps, le->r, NULL);
872 120 : list_merge(exps, re->r, NULL);
873 120 : fnd = exp_in(sql->sa, le->l, exps, cmp_in);
874 493 : } else if (le->f && re->f && /* merge ranges */
875 19 : le->flag == re->flag && le->flag <= cmp_lt) {
876 19 : sql_exp *mine = NULL, *maxe = NULL;
877 :
878 19 : if (!(mine = rel_binop_(sql, NULL, exp_copy(sql, le->r), exp_copy(sql, re->r), "sys", "sql_min", card_value, true))) {
879 0 : sql->session->status = 0;
880 0 : sql->errstr[0] = '\0';
881 0 : continue;
882 : }
883 19 : if (!(maxe = rel_binop_(sql, NULL, exp_copy(sql, le->f), exp_copy(sql, re->f), "sys", "sql_max", card_value, true))) {
884 0 : sql->session->status = 0;
885 0 : sql->errstr[0] = '\0';
886 0 : continue;
887 : }
888 19 : fnd = exp_compare2(sql->sa, exp_copy(sql, le->l), mine, maxe, le->flag, 0);
889 19 : lmerged = false;
890 : }
891 1192 : if (fnd) {
892 1192 : append(nexps, fnd);
893 2228 : *merged = (fnd && lmerged && rmerged);
894 : }
895 : }
896 : }
897 14004 : return nexps;
898 : }
899 :
900 : /* merge related sub expressions
901 : *
902 : * ie (x = a and y > 1 and y < 5) or
903 : * (x = c and y > 1 and y < 10) or
904 : * (x = e and y > 1 and y < 20)
905 : * ->
906 : * ((x = a and y > 1 and y < 5) or
907 : * (x = c and y > 1 and y < 10) or
908 : * (x = e and y > 1 and y < 20)) and
909 : * x in (a,c,e) and
910 : * y > 1 and y < 20
911 : *
912 : * for single expression or's we can do better
913 : * x in (a, b, c) or x in (d, e, f)
914 : * ->
915 : * x in (a, b, c, d, e, f)
916 : * */
917 : static inline sql_rel *
918 393654 : rel_merge_select_rse(visitor *v, sql_rel *rel)
919 : {
920 : /* only execute once per select */
921 393654 : if ((is_select(rel->op) || is_join(rel->op) || is_semi(rel->op)) && rel->exps && !is_rel_merge_select_rse_used(rel->used)) {
922 133656 : node *n, *o;
923 133656 : list *nexps = new_exp_list(v->sql->sa);
924 :
925 287805 : for (n=rel->exps->h; n; n = n->next) {
926 154149 : sql_exp *e = n->data;
927 :
928 161554 : if (e->type == e_cmp && e->flag == cmp_or && !is_anti(e) && !is_semantics(e)) {
929 : /* possibly merge related expressions */
930 7405 : bool merged = false;
931 :
932 7405 : list *ps = exps_merge_select_rse(v->sql, e->l, e->r, &merged);
933 8058 : for (o = ps->h; o; o = o->next)
934 653 : append(nexps, o->data);
935 7405 : if (merged)
936 79 : v->changes++;
937 : else
938 7326 : append(nexps, e);
939 : } else {
940 146744 : append(nexps, e);
941 : }
942 : }
943 133656 : rel->exps = nexps;
944 133656 : rel->used |= rel_merge_select_rse_used;
945 : }
946 393654 : return rel;
947 : }
948 :
949 : /* pack optimizers into a single function call to avoid iterations in the AST */
950 : static sql_rel *
951 3873488 : rel_optimize_select_and_joins_bottomup_(visitor *v, sql_rel *rel)
952 : {
953 3873488 : if (!rel || (!is_join(rel->op) && !is_semi(rel->op) && !is_select(rel->op)) || list_empty(rel->exps))
954 2776104 : return rel;
955 1097384 : uint8_t cycle = *(uint8_t*) v->data;
956 :
957 1097384 : rel->exps = exp_merge_range(v, rel, rel->exps);
958 1097384 : rel = rel_select_cse(v, rel);
959 1097384 : if (cycle == 1)
960 393654 : rel = rel_merge_select_rse(v, rel);
961 1097384 : rel = rewrite_simplify(v, cycle, v->value_based_opt, rel);
962 1097384 : return rel;
963 : }
964 :
965 : static sql_rel *
966 101963 : rel_optimize_select_and_joins_bottomup(visitor *v, global_props *gp, sql_rel *rel)
967 : {
968 101963 : v->data = &gp->opt_cycle;
969 101963 : rel = rel_visitor_bottomup(v, rel, &rel_optimize_select_and_joins_bottomup_);
970 101963 : v->data = gp;
971 101963 : return rel;
972 : }
973 :
974 : run_optimizer
975 601012 : bind_optimize_select_and_joins_bottomup(visitor *v, global_props *gp)
976 : {
977 601012 : int flag = v->sql->sql_optimizer;
978 600812 : return gp->opt_level == 1 && (gp->cnt[op_join] || gp->cnt[op_left] || gp->cnt[op_right] ||
979 534730 : gp->cnt[op_full] || gp->cnt[op_semi] || gp->cnt[op_anti] ||
980 1201824 : gp->cnt[op_select]) && (flag & optimize_select_and_joins_bottomup) ? rel_optimize_select_and_joins_bottomup : NULL;
981 : }
982 :
983 :
984 : static inline sql_rel *
985 3694949 : rel_push_join_exps_down(visitor *v, sql_rel *rel)
986 : {
987 : /* push select exps part of join expressions down */
988 : /* TODO CHECK WHY not semi enabled */
989 3694949 : if ((is_innerjoin(rel->op) || is_left(rel->op) || is_right(rel->op) /*|| is_semi(rel->op)*/) && !list_empty(rel->exps)) {
990 543958 : int left = is_innerjoin(rel->op) || is_right(rel->op) || rel->op == op_semi;
991 543958 : int right = is_innerjoin(rel->op) || is_left(rel->op) || is_semi(rel->op);
992 543958 : sql_rel *jl = rel->l, *ojl = jl, *jr = rel->r, *ojr = jr;
993 :
994 543958 : set_processed(jl);
995 543958 : set_processed(jr);
996 1173719 : for (node *n = rel->exps->h; n;) {
997 629761 : node *next = n->next;
998 629761 : sql_exp *e = n->data;
999 :
1000 629761 : if (left && rel_rebind_exp(v->sql, jl, e) && !is_any(e)) { /* select expressions on left */
1001 1296 : if (!is_select(jl->op) || rel_is_ref(jl))
1002 1285 : rel->l = jl = rel_select(v->sql->sa, jl, NULL);
1003 1296 : rel_select_add_exp(v->sql->sa, jl, e);
1004 1296 : list_remove_node(rel->exps, NULL, n);
1005 1296 : v->changes++;
1006 628465 : } else if (right && rel_rebind_exp(v->sql, jr, e) && !is_any(e)) { /* select expressions on right */
1007 33 : if (!is_select(jr->op) || rel_is_ref(jr))
1008 32 : rel->r = jr = rel_select(v->sql->sa, jr, NULL);
1009 33 : rel_select_add_exp(v->sql->sa, jr, e);
1010 33 : list_remove_node(rel->exps, NULL, n);
1011 33 : v->changes++;
1012 : }
1013 : n = next;
1014 : }
1015 543958 : if (ojl != jl)
1016 1285 : set_processed(jl);
1017 543958 : if (ojr != jr)
1018 32 : set_processed(jr);
1019 : }
1020 3694949 : return rel;
1021 : }
1022 :
1023 : static inline bool
1024 3694949 : is_non_trivial_select_applied_to_outer_join(sql_rel *rel)
1025 : {
1026 3694949 : return is_select(rel->op) && rel->exps && is_outerjoin(((sql_rel*) rel->l)->op);
1027 : }
1028 :
1029 : extern list *list_append_before(list *l, node *n, void *data);
1030 :
1031 : static void replace_column_references_with_nulls_2(mvc *sql, list* crefs, sql_exp* e);
1032 :
1033 : static void
1034 5615 : replace_column_references_with_nulls_1(mvc *sql, list* crefs, list* exps) {
1035 5615 : if (list_empty(exps))
1036 : return;
1037 14252 : for(node* n = exps->h; n; n=n->next) {
1038 8637 : sql_exp* e = n->data;
1039 8637 : replace_column_references_with_nulls_2(sql, crefs, e);
1040 : }
1041 : }
1042 :
1043 : static void
1044 29548 : replace_column_references_with_nulls_2(mvc *sql, list* crefs, sql_exp* e) {
1045 38462 : if (e == NULL) {
1046 : return;
1047 : }
1048 :
1049 31806 : switch (e->type) {
1050 9404 : case e_column:
1051 : {
1052 9404 : sql_exp *c = NULL;
1053 9404 : if (e->l) {
1054 9404 : c = exps_bind_column2(crefs, e->l, e->r, NULL);
1055 : } else {
1056 0 : c = exps_bind_column(crefs, e->r, NULL, NULL, 1);
1057 : }
1058 9404 : if (c) {
1059 2285 : e->type = e_atom;
1060 2285 : e->l = atom_general(sql->sa, &e->tpe, NULL, 0);
1061 2285 : e->r = e->f = NULL;
1062 : }
1063 : }
1064 : break;
1065 9763 : case e_cmp:
1066 9763 : switch (e->flag) {
1067 7104 : case cmp_gt:
1068 : case cmp_gte:
1069 : case cmp_lte:
1070 : case cmp_lt:
1071 : case cmp_equal:
1072 : case cmp_notequal:
1073 : {
1074 7104 : sql_exp* l = e->l;
1075 7104 : sql_exp* r = e->r;
1076 7104 : sql_exp* f = e->f;
1077 :
1078 7104 : replace_column_references_with_nulls_2(sql, crefs, l);
1079 7104 : replace_column_references_with_nulls_2(sql, crefs, r);
1080 7104 : replace_column_references_with_nulls_2(sql, crefs, f);
1081 7104 : break;
1082 : }
1083 1928 : case cmp_filter:
1084 : case cmp_or:
1085 : {
1086 1928 : list* l = e->l;
1087 1928 : list* r = e->r;
1088 1928 : replace_column_references_with_nulls_1(sql, crefs, l);
1089 1928 : replace_column_references_with_nulls_1(sql, crefs, r);
1090 1928 : break;
1091 : }
1092 731 : case cmp_in:
1093 : case cmp_notin:
1094 : {
1095 731 : sql_exp* l = e->l;
1096 731 : list* r = e->r;
1097 731 : replace_column_references_with_nulls_2(sql, crefs, l);
1098 731 : replace_column_references_with_nulls_1(sql, crefs, r);
1099 731 : break;
1100 : }
1101 : default:
1102 : break;
1103 : }
1104 : break;
1105 1028 : case e_func:
1106 : {
1107 1028 : list* l = e->l;
1108 1028 : replace_column_references_with_nulls_1(sql, crefs, l);
1109 1028 : break;
1110 : }
1111 1810 : case e_convert:
1112 : {
1113 1810 : sql_exp* l = e->l;
1114 1810 : replace_column_references_with_nulls_2(sql, crefs, l);
1115 1810 : break;
1116 : }
1117 : default:
1118 : break;
1119 : }
1120 : }
1121 :
1122 : static sql_rel *
1123 5242 : out2inner(visitor *v, sql_rel* sel, sql_rel* join, sql_rel* inner_join_side, operator_type new_type) {
1124 :
1125 : /* handle inner_join relations with a simple select */
1126 5242 : if (is_select(inner_join_side->op) && inner_join_side->l)
1127 5242 : inner_join_side = inner_join_side->l;
1128 5242 : if (!is_base(inner_join_side->op) && !is_simple_project(inner_join_side->op)) {
1129 : // Nothing to do here.
1130 : return sel;
1131 : }
1132 :
1133 5139 : list* inner_join_column_references = inner_join_side->exps;
1134 5139 : list* select_predicates = exps_copy(v->sql, sel->exps);
1135 :
1136 11045 : for(node* n = select_predicates->h; n; n=n->next) {
1137 5972 : sql_exp* e = n->data;
1138 5972 : replace_column_references_with_nulls_2(v->sql, inner_join_column_references, e);
1139 :
1140 5972 : if (exp_is_false(e)) {
1141 66 : join->op = new_type;
1142 66 : v->changes++;
1143 66 : break;
1144 : }
1145 : }
1146 :
1147 : return sel;
1148 : }
1149 :
1150 : static inline sql_rel *
1151 3694949 : rel_out2inner(visitor *v, sql_rel *rel) {
1152 :
1153 3694949 : if (!is_non_trivial_select_applied_to_outer_join(rel)) {
1154 : // Nothing to do here.
1155 : return rel;
1156 : }
1157 :
1158 5235 : sql_rel* join = (sql_rel*) rel->l;
1159 :
1160 5235 : if (rel_is_ref(join)) {
1161 : /* Do not alter a multi-referenced join relation.
1162 : * This is problematic (e.g. in the case of the plan of a merge statement)
1163 : * basically because there are no guarantees on the other container relations.
1164 : * In particular there is no guarantee that the other referencing relations are
1165 : * select relations with null-rejacting predicates on the inner join side.
1166 : */
1167 : return rel;
1168 : }
1169 :
1170 5219 : sql_rel* inner_join_side;
1171 5219 : if (is_left(join->op)) {
1172 5172 : inner_join_side = join->r;
1173 5172 : return out2inner(v, rel, join, inner_join_side, op_join);
1174 : }
1175 47 : else if (is_right(join->op)) {
1176 24 : inner_join_side = join->l;
1177 24 : return out2inner(v, rel, join, inner_join_side, op_join);
1178 : }
1179 : else /*full outer join*/ {
1180 : // First check if left side can degenerate from full outer join to just right outer join.
1181 23 : inner_join_side = join->r;
1182 23 : rel = out2inner(v, rel, join, inner_join_side, op_right);
1183 : /* Now test if the right side can degenerate to
1184 : * a normal inner join or a left outer join
1185 : * depending on the result of previous call to out2inner.
1186 : */
1187 :
1188 23 : inner_join_side = join->l;
1189 38 : return out2inner(v, rel, join, inner_join_side, is_right(join->op)? op_join: op_left);
1190 : }
1191 : }
1192 :
1193 : static bool
1194 4300 : exps_uses_any(list *exps, list *l)
1195 : {
1196 4300 : bool uses_any = false;
1197 :
1198 4300 : if (list_empty(exps) || list_empty(l))
1199 9 : return false;
1200 8588 : for (node *n = l->h; n && !uses_any; n = n->next) {
1201 4297 : sql_exp *e = n->data;
1202 4297 : uses_any |= list_exps_uses_exp(exps, exp_relname(e), exp_name(e)) != NULL;
1203 : }
1204 :
1205 : return uses_any;
1206 : }
1207 :
1208 : /* TODO At the moment I have to disable the new join2semi because the join order optimizer doesn't take semi-joins into account,
1209 : so plans get deteriorated if more joins are optimized into semi-joins. Later I will review the join order with semi-joins and hopefully,
1210 : I will be able to re-enable the new join2semi. */
1211 : #if 0
1212 : #define NO_EXP_FOUND 0
1213 : #define FOUND_WITH_DUPLICATES 1
1214 : #define MAY_HAVE_DUPLICATE_NULLS 2
1215 : #define ALL_VALUES_DISTINCT 3
1216 :
1217 : static int
1218 : find_projection_for_join2semi(sql_rel *rel, sql_exp *jc)
1219 : {
1220 : sql_rel *res = NULL;
1221 : sql_exp *e = NULL;
1222 : bool underjoin = false;
1223 :
1224 : if ((e = rel_find_exp_and_corresponding_rel(rel, jc, &res, &underjoin))) {
1225 : if (underjoin || e->type != e_column)
1226 : return FOUND_WITH_DUPLICATES;
1227 : /* if just one groupby column is projected or the relation needs distinct values and one column is projected or is a primary key, it will be distinct */
1228 : if (is_unique(e) ||
1229 : (is_groupby(res->op) && list_length(res->r) == 1 && exps_find_exp(res->r, e)) ||
1230 : ((is_project(res->op) || is_base(res->op)) && ((need_distinct(res) && list_length(res->exps) == 1) || res->card < CARD_AGGR)))
1231 : return has_nil(e) ? MAY_HAVE_DUPLICATE_NULLS : ALL_VALUES_DISTINCT;
1232 : return FOUND_WITH_DUPLICATES;
1233 : }
1234 : return NO_EXP_FOUND;
1235 : }
1236 :
1237 : static int
1238 : subrel_uses_exp_outside_subrel(visitor *v, sql_rel *rel, list *l, sql_rel *j)
1239 : {
1240 : if (rel == j)
1241 : return 0;
1242 : if (mvc_highwater(v->sql))
1243 : return 1;
1244 : switch(rel->op){
1245 : case op_join:
1246 : case op_left:
1247 : case op_right:
1248 : case op_full:
1249 : return exps_uses_any(rel->exps, l) ||
1250 : subrel_uses_exp_outside_subrel(v, rel->l, l, j) || subrel_uses_exp_outside_subrel(v, rel->r, l, j);
1251 : case op_semi:
1252 : case op_anti:
1253 : case op_select:
1254 : return exps_uses_any(rel->exps, l) ||
1255 : subrel_uses_exp_outside_subrel(v, rel->l, l, j);
1256 : case op_project:
1257 : case op_groupby:
1258 : return exps_uses_any(rel->exps, l) || exps_uses_any(rel->r, l);
1259 : case op_basetable:
1260 : case op_table:
1261 : case op_union:
1262 : case op_except:
1263 : case op_inter:
1264 : return exps_uses_any(rel->exps, l);
1265 : case op_topn:
1266 : case op_sample:
1267 : return subrel_uses_exp_outside_subrel(v, rel->l, l, j);
1268 : default:
1269 : return 1;
1270 : }
1271 : }
1272 :
1273 : static int
1274 : projrel_uses_exp_outside_subrel(visitor *v, sql_rel *rel, list *l, sql_rel *j)
1275 : {
1276 : /* test if projecting relation uses any of the join expressions */
1277 : assert((is_simple_project(rel->op) || is_groupby(rel->op)) && rel->l);
1278 : return exps_uses_any(rel->exps, l) || exps_uses_any(rel->r, l) || subrel_uses_exp_outside_subrel(v, rel->l, l, j);
1279 : }
1280 :
1281 : static sql_rel *
1282 : rewrite_joins2semi(visitor *v, sql_rel *proj, sql_rel *rel)
1283 : {
1284 : /* generalize possibility : we need the visitor 'step' here */
1285 : if (rel_is_ref(rel) || mvc_highwater(v->sql)) /* if the join has multiple references, it's dangerous to convert it into a semijoin */
1286 : return rel;
1287 : if (is_innerjoin(rel->op) && !list_empty(rel->exps)) {
1288 : sql_rel *l = rel->l, *r = rel->r;
1289 : bool left_unique = true, right_unique = true;
1290 :
1291 : /* these relations don't project anything, so skip them */
1292 : while (is_topn(l->op) || is_sample(l->op) || is_select(l->op) || is_semi(l->op))
1293 : l = l->l;
1294 : /* joins will expand values, so don't search on those */
1295 : if (!is_base(l->op) && !is_project(l->op))
1296 : left_unique = false;
1297 : while (is_topn(r->op) || is_sample(r->op) || is_select(r->op) || is_semi(r->op))
1298 : r = r->l;
1299 : if (!is_base(r->op) && !is_project(r->op))
1300 : right_unique = false;
1301 : /* if all columns used in equi-joins from one of the sides are unique, the join can be rewritten into a semijoin */
1302 : for (node *n=rel->exps->h; n && (left_unique || right_unique); n = n->next) {
1303 : sql_exp *e = n->data, *el = e->l, *er = e->r;
1304 :
1305 : if (!is_compare(e->type) || e->flag != cmp_equal || exp_has_func(el) || exp_has_func(er)) {
1306 : left_unique = right_unique = false;
1307 : } else {
1308 : int found = 0;
1309 :
1310 : if (left_unique && (found = find_projection_for_join2semi(l, el)) > NO_EXP_FOUND)
1311 : left_unique &= (found == ALL_VALUES_DISTINCT || (found == MAY_HAVE_DUPLICATE_NULLS && (!is_semantics(e) || !has_nil(er))));
1312 : if (left_unique && (found = find_projection_for_join2semi(l, er)) > NO_EXP_FOUND)
1313 : left_unique &= (found == ALL_VALUES_DISTINCT || (found == MAY_HAVE_DUPLICATE_NULLS && (!is_semantics(e) || !has_nil(el))));
1314 : if (right_unique && (found = find_projection_for_join2semi(r, el)) > NO_EXP_FOUND)
1315 : right_unique &= (found == ALL_VALUES_DISTINCT || (found == MAY_HAVE_DUPLICATE_NULLS && (!is_semantics(e) || !has_nil(er))));
1316 : if (right_unique && (found = find_projection_for_join2semi(r, er)) > NO_EXP_FOUND)
1317 : right_unique &= (found == ALL_VALUES_DISTINCT || (found == MAY_HAVE_DUPLICATE_NULLS && (!is_semantics(e) || !has_nil(el))));
1318 : }
1319 : }
1320 :
1321 : /* now we need to check relation's expressions are not used */
1322 : if (left_unique && !projrel_uses_exp_outside_subrel(v, proj, l->exps, rel)) {
1323 : sql_rel *tmp = rel->r;
1324 : rel->r = rel->l;
1325 : rel->l = tmp;
1326 : rel->op = op_semi;
1327 : v->changes++;
1328 : } else if (right_unique && !projrel_uses_exp_outside_subrel(v, proj, r->exps, rel)) {
1329 : rel->op = op_semi;
1330 : v->changes++;
1331 : }
1332 : }
1333 : if (is_join(rel->op)) {
1334 : rel->l = rewrite_joins2semi(v, proj, rel->l);
1335 : rel->r = rewrite_joins2semi(v, proj, rel->r);
1336 : } else if (is_topn(rel->op) || is_sample(rel->op) || is_select(rel->op) || is_semi(rel->op)) {
1337 : rel->l = rewrite_joins2semi(v, proj, rel->l);
1338 : }
1339 : return rel;
1340 : }
1341 :
1342 : static inline sql_rel *
1343 : rel_join2semijoin(visitor *v, sql_rel *rel)
1344 : {
1345 : if ((is_simple_project(rel->op) || is_groupby(rel->op)) && rel->l)
1346 : rel->l = rewrite_joins2semi(v, rel, rel->l);
1347 : return rel;
1348 : }
1349 : #endif
1350 :
1351 : #define NO_PROJECTION_FOUND 0
1352 : #define MAY_HAVE_DUPLICATE_NULLS 1
1353 : #define ALL_VALUES_DISTINCT 2
1354 :
1355 : static int
1356 784685 : find_projection_for_join2semi(sql_rel *rel)
1357 : {
1358 784685 : if (is_simple_project(rel->op) || is_groupby(rel->op) || is_inter(rel->op) || is_except(rel->op) || is_base(rel->op) || (is_union(rel->op) && need_distinct(rel))) {
1359 353424 : if (rel->card < CARD_AGGR) /* const or groupby without group by exps */
1360 : return ALL_VALUES_DISTINCT;
1361 352288 : if (list_length(rel->exps) == 1) {
1362 6676 : sql_exp *e = rel->exps->h->data;
1363 : /* a single group by column in the projection list from a group by relation is guaranteed to be unique, but not an aggregate */
1364 6676 : if (e->type == e_column) {
1365 6634 : sql_rel *res = NULL;
1366 6634 : sql_exp *found = NULL;
1367 6634 : bool underjoin = false;
1368 :
1369 : /* if just one groupby column is projected or the relation needs distinct values and one column is projected or is a primary key, it will be distinct */
1370 6634 : if ((is_groupby(rel->op) && list_length(rel->r) == 1 && exps_find_exp(rel->r, e)) || (need_distinct(rel) && list_length(rel->exps) == 1))
1371 9536 : return ALL_VALUES_DISTINCT;
1372 2945 : if (is_unique(e))
1373 2154 : return has_nil(e) ? MAY_HAVE_DUPLICATE_NULLS : ALL_VALUES_DISTINCT;
1374 :
1375 791 : if ((is_simple_project(rel->op) || is_groupby(rel->op) || is_inter(rel->op) || is_except(rel->op)) &&
1376 370 : (found = rel_find_exp_and_corresponding_rel(rel->l, e, false, &res, &underjoin)) && !underjoin) { /* grouping column on inner relation */
1377 166 : if (need_distinct(res) && list_length(res->exps) == 1)
1378 : return ALL_VALUES_DISTINCT;
1379 166 : if (is_unique(found))
1380 3 : return has_nil(e) ? MAY_HAVE_DUPLICATE_NULLS : ALL_VALUES_DISTINCT;
1381 163 : if (found->type == e_column && found->card <= CARD_AGGR) {
1382 1 : if (!is_groupby(res->op) && list_length(res->exps) != 1)
1383 : return NO_PROJECTION_FOUND;
1384 3 : for (node *n = res->exps->h ; n ; n = n->next) { /* must be the single column in the group by expression list */
1385 2 : sql_exp *e = n->data;
1386 2 : if (e != found && e->type == e_column)
1387 : return NO_PROJECTION_FOUND;
1388 : }
1389 : return ALL_VALUES_DISTINCT;
1390 : }
1391 : }
1392 : }
1393 : }
1394 : }
1395 : return NO_PROJECTION_FOUND;
1396 : }
1397 :
1398 : static sql_rel *
1399 2426386 : find_candidate_join2semi(visitor *v, sql_rel *rel, bool *swap)
1400 : {
1401 : /* generalize possibility : we need the visitor 'step' here */
1402 2426948 : if (rel_is_ref(rel)) /* if the join has multiple references, it's dangerous to convert it into a semijoin */
1403 : return NULL;
1404 2321735 : if (rel->op == op_join && !list_empty(rel->exps) && list_empty(rel->attr)) {
1405 394177 : sql_rel *l = rel->l, *r = rel->r;
1406 394177 : int foundr = NO_PROJECTION_FOUND, foundl = NO_PROJECTION_FOUND, found = NO_PROJECTION_FOUND;
1407 394177 : bool ok = false;
1408 :
1409 394177 : foundr = find_projection_for_join2semi(r);
1410 394177 : if (foundr < ALL_VALUES_DISTINCT)
1411 390508 : foundl = find_projection_for_join2semi(l);
1412 394177 : if (foundr && foundr > foundl) {
1413 4248 : *swap = false;
1414 4248 : found = foundr;
1415 389929 : } else if (foundl) {
1416 2694 : *swap = true;
1417 2694 : found = foundl;
1418 : }
1419 :
1420 6942 : if (found > NO_PROJECTION_FOUND) {
1421 : /* if all join expressions can be pushed down or have function calls, then it cannot be rewritten into a semijoin */
1422 13888 : for (node *n=rel->exps->h; n && !ok; n = n->next) {
1423 6946 : sql_exp *e = n->data;
1424 :
1425 11258 : ok |= e->type == e_cmp && e->flag == cmp_equal && !exp_has_func(e) && !rel_rebind_exp(v->sql, l, e) && !rel_rebind_exp(v->sql, r, e) &&
1426 605 : (found == ALL_VALUES_DISTINCT || !is_semantics(e) || !has_nil((sql_exp *)e->l) || !has_nil((sql_exp *)e->r));
1427 : }
1428 : }
1429 :
1430 394177 : if (ok)
1431 : return rel;
1432 : }
1433 2318498 : if (is_join(rel->op) || is_semi(rel->op)) {
1434 607794 : sql_rel *c;
1435 :
1436 607794 : if ((c=find_candidate_join2semi(v, rel->l, swap)) != NULL ||
1437 607258 : (c=find_candidate_join2semi(v, rel->r, swap)) != NULL)
1438 551 : if (list_empty(c->attr))
1439 : return c;
1440 : }
1441 2317947 : if (is_topn(rel->op) || is_sample(rel->op))
1442 562 : return find_candidate_join2semi(v, rel->l, swap);
1443 : return NULL;
1444 : }
1445 :
1446 : static int
1447 2813 : subrel_uses_exp_outside_subrel(sql_rel *rel, list *l, sql_rel *c)
1448 : {
1449 2814 : if (rel == c)
1450 : return 0;
1451 : /* for subrel only expect joins (later possibly selects) */
1452 1070 : if (is_join(rel->op) || is_semi(rel->op)) {
1453 537 : if (exps_uses_any(rel->exps, l))
1454 : return 1;
1455 1066 : if (subrel_uses_exp_outside_subrel(rel->l, l, c) ||
1456 532 : subrel_uses_exp_outside_subrel(rel->r, l, c))
1457 3 : return 1;
1458 : }
1459 1064 : if (is_topn(rel->op) || is_sample(rel->op))
1460 1 : return subrel_uses_exp_outside_subrel(rel->l, l, c);
1461 : return 0;
1462 : }
1463 :
1464 : static int
1465 3237 : rel_uses_exp_outside_subrel(sql_rel *rel, list *l, sql_rel *c)
1466 : {
1467 : /* for now we only expect sub relations of type project, selects (rel) or join/semi */
1468 3237 : if (is_simple_project(rel->op) || is_groupby(rel->op) || is_select(rel->op)) {
1469 3237 : if (!list_empty(rel->exps) && exps_uses_any(rel->exps, l))
1470 : return 1;
1471 1747 : if ((is_simple_project(rel->op) || is_groupby(rel->op)) && !list_empty(rel->r) && exps_uses_any(rel->r, l))
1472 : return 1;
1473 1747 : if (rel->l)
1474 1747 : return subrel_uses_exp_outside_subrel(rel->l, l, c);
1475 : }
1476 0 : if (is_topn(rel->op) || is_sample(rel->op))
1477 0 : return subrel_uses_exp_outside_subrel(rel->l, l, c);
1478 : return 1;
1479 : }
1480 :
1481 : static inline sql_rel *
1482 3694949 : rel_join2semijoin(visitor *v, sql_rel *rel)
1483 : {
1484 3694949 : if ((is_simple_project(rel->op) || is_groupby(rel->op)) && rel->l) {
1485 1211334 : bool swap = false;
1486 1211334 : sql_rel *l = rel->l;
1487 1211334 : sql_rel *c = find_candidate_join2semi(v, l, &swap);
1488 :
1489 1211334 : if (c) {
1490 : /* 'p' is a project */
1491 3237 : sql_rel *p = swap ? c->l : c->r;
1492 :
1493 : /* now we need to check if ce is only used at the level of c */
1494 3237 : if (!rel_uses_exp_outside_subrel(rel, p->exps, c)) {
1495 1744 : c->op = op_semi;
1496 1744 : if (swap) {
1497 521 : sql_rel *tmp = c->r;
1498 521 : c->r = c->l;
1499 521 : c->l = tmp;
1500 : }
1501 1744 : v->changes++;
1502 : }
1503 : }
1504 : }
1505 3694949 : return rel;
1506 : }
1507 :
1508 : static inline sql_rel *
1509 3694949 : rel_push_join_down_outer(visitor *v, sql_rel *rel)
1510 : {
1511 3694949 : if (is_join(rel->op) && !is_outerjoin(rel->op) && !is_single(rel) && !list_empty(rel->exps) && !rel_is_ref(rel)) {
1512 404293 : sql_rel *l = rel->l, *r = rel->r;
1513 :
1514 404293 : if (is_left(r->op) && (is_select(l->op) || (is_join(l->op) && !is_outerjoin(l->op))) && !rel_is_ref(l) &&
1515 1083 : !rel_is_ref(r)) {
1516 1083 : sql_rel *rl = r->l;
1517 1083 : sql_rel *rr = r->r;
1518 1083 : if (rel_is_ref(rl) || rel_is_ref(rr))
1519 : return rel;
1520 : /* join exps should only include l and r.l */
1521 1083 : list *njexps = sa_list(v->sql->sa);
1522 2640 : for(node *n = rel->exps->h; n; n = n->next) {
1523 1557 : sql_exp *je = n->data;
1524 :
1525 1557 : assert(je->type == e_cmp);
1526 1557 : if (je->f)
1527 : return rel;
1528 1557 : if ((rel_find_exp(l, je->l) && rel_find_exp(rl, je->r)) || (rel_find_exp(l, je->r) && rel_find_exp(rl, je->l))) {
1529 1557 : list_append(njexps, je);
1530 : } else {
1531 0 : return rel;
1532 : }
1533 : }
1534 1083 : sql_rel *nl = rel_crossproduct(v->sql->sa, rel_dup(l), rl, rel->op);
1535 1083 : r->l = nl;
1536 1083 : nl->exps = njexps;
1537 1083 : nl->attr = rel->attr;
1538 1083 : rel->attr = NULL;
1539 1083 : set_processed(nl);
1540 1083 : rel_dup(r);
1541 1083 : rel_destroy(rel);
1542 1083 : rel = r;
1543 1083 : v->changes++;
1544 : }
1545 : }
1546 : return rel;
1547 : }
1548 :
1549 : static sql_rel *
1550 3694949 : rel_optimize_joins_(visitor *v, sql_rel *rel)
1551 : {
1552 3694949 : rel = rel_push_join_exps_down(v, rel);
1553 3694949 : rel = rel_out2inner(v, rel);
1554 3694949 : rel = rel_join2semijoin(v, rel);
1555 3694949 : rel = rel_push_join_down_outer(v, rel);
1556 3694949 : return rel;
1557 : }
1558 :
1559 : static sql_rel *
1560 71048 : rel_optimize_joins(visitor *v, global_props *gp, sql_rel *rel)
1561 : {
1562 71048 : (void) gp;
1563 71048 : return rel_visitor_topdown(v, rel, &rel_optimize_joins_);
1564 : }
1565 :
1566 : run_optimizer
1567 601118 : bind_optimize_joins(visitor *v, global_props *gp)
1568 : {
1569 601118 : int flag = v->sql->sql_optimizer;
1570 600923 : return gp->opt_level == 1 && (gp->cnt[op_join] || gp->cnt[op_left] || gp->cnt[op_right]
1571 1202041 : || gp->cnt[op_full] || gp->cnt[op_semi] || gp->cnt[op_anti]) && (flag & optimize_joins) ? rel_optimize_joins : NULL;
1572 : }
1573 :
1574 :
1575 : static sql_rel *rel_join_order_(visitor *v, sql_rel *rel);
1576 :
1577 : static void
1578 595360 : get_relations(visitor *v, sql_rel *rel, list *rels)
1579 : {
1580 837816 : if (list_empty(rel->attr) && !rel_is_ref(rel) && rel->op == op_join && rel->exps == NULL) {
1581 242456 : sql_rel *l = rel->l;
1582 242456 : sql_rel *r = rel->r;
1583 :
1584 242456 : get_relations(v, l, rels);
1585 242456 : get_relations(v, r, rels);
1586 242456 : rel->l = NULL;
1587 242456 : rel->r = NULL;
1588 242456 : rel_destroy(rel);
1589 : } else {
1590 352904 : rel = rel_join_order_(v, rel);
1591 352904 : append(rels, rel);
1592 : }
1593 595360 : }
1594 :
1595 : static void
1596 163156 : get_inner_relations(mvc *sql, sql_rel *rel, list *rels)
1597 : {
1598 254796 : if (!rel_is_ref(rel) && is_join(rel->op)) {
1599 91640 : sql_rel *l = rel->l;
1600 91640 : sql_rel *r = rel->r;
1601 :
1602 91640 : get_inner_relations(sql, l, rels);
1603 91640 : get_inner_relations(sql, r, rels);
1604 : } else {
1605 163156 : append(rels, rel);
1606 : }
1607 163156 : }
1608 :
1609 : static int
1610 1168378 : exp_count(int *cnt, sql_exp *e)
1611 : {
1612 1168378 : int flag;
1613 1168378 : if (!e)
1614 : return 0;
1615 1168378 : if (find_prop(e->p, PROP_JOINIDX))
1616 3156 : *cnt += 100;
1617 1168378 : if (find_prop(e->p, PROP_HASHCOL))
1618 33893 : *cnt += 100;
1619 1168378 : if (find_prop(e->p, PROP_HASHIDX))
1620 0 : *cnt += 100;
1621 1168378 : switch(e->type) {
1622 433622 : case e_cmp:
1623 433622 : if (!is_complex_exp(e->flag)) {
1624 365864 : exp_count(cnt, e->l);
1625 365864 : exp_count(cnt, e->r);
1626 365864 : if (e->f)
1627 3022 : exp_count(cnt, e->f);
1628 : }
1629 433622 : flag = e->flag;
1630 433622 : switch (flag) {
1631 334233 : case cmp_equal:
1632 334233 : *cnt += 90;
1633 334233 : return 90;
1634 25027 : case cmp_notequal:
1635 25027 : *cnt += 7;
1636 25027 : return 7;
1637 6604 : case cmp_gt:
1638 : case cmp_gte:
1639 : case cmp_lt:
1640 : case cmp_lte:
1641 6604 : *cnt += 6;
1642 6604 : if (e->l) {
1643 6604 : sql_exp *l = e->l;
1644 6604 : sql_subtype *t = exp_subtype(l);
1645 6604 : if (EC_TEMP(t->type->eclass)) /* give preference to temporal ranges */
1646 182 : *cnt += 90;
1647 : }
1648 6604 : if (e->f){ /* range */
1649 3022 : *cnt += 6;
1650 3022 : return 12;
1651 : }
1652 : return 6;
1653 1205 : case cmp_filter:
1654 1205 : if (exps_card(e->r) > CARD_AGGR) {
1655 : /* filters for joins are special */
1656 12 : *cnt += 1000;
1657 12 : return 1000;
1658 : }
1659 1193 : *cnt += 2;
1660 1193 : return 2;
1661 61879 : case cmp_in:
1662 : case cmp_notin: {
1663 61879 : list *l = e->r;
1664 61879 : int c = 9 - 10*list_length(l);
1665 61879 : *cnt += c;
1666 61879 : return c;
1667 : }
1668 4674 : case cmp_or: /* prefer or over functions */
1669 4674 : *cnt += 3;
1670 4674 : return 3;
1671 : default:
1672 : return 0;
1673 : }
1674 582844 : case e_column:
1675 582844 : *cnt += 20;
1676 582844 : return 20;
1677 137412 : case e_atom:
1678 137412 : *cnt += 10;
1679 137412 : return 10;
1680 3414 : case e_func:
1681 : /* functions are more expensive, depending on the number of columns involved. */
1682 3414 : if (e->card == CARD_ATOM)
1683 : return 0;
1684 3114 : *cnt -= 5*list_length(e->l);
1685 3114 : return 5*list_length(e->l);
1686 11086 : case e_convert:
1687 : /* functions are more expensive, depending on the number of columns involved. */
1688 11086 : if (e->card == CARD_ATOM)
1689 : return 0;
1690 : /* fall through */
1691 : default:
1692 10441 : *cnt -= 5;
1693 10441 : return -5;
1694 : }
1695 : }
1696 :
1697 : int
1698 291110 : exp_keyvalue(sql_exp *e)
1699 : {
1700 291110 : int cnt = 0;
1701 65859 : exp_count(&cnt, e);
1702 291110 : return cnt;
1703 : }
1704 :
1705 : static sql_exp *
1706 670256 : joinexp_col(sql_exp *e, sql_rel *r)
1707 : {
1708 670256 : if (e->type == e_cmp) {
1709 670256 : if (rel_has_exp(r, e->l, false) >= 0)
1710 367624 : return e->l;
1711 302632 : return e->r;
1712 : }
1713 0 : assert(0);
1714 : return NULL;
1715 : }
1716 :
1717 : static sql_column *
1718 416862 : table_colexp(sql_exp *e, sql_rel *r)
1719 : {
1720 416862 : sql_table *t = r->l;
1721 :
1722 416862 : if (e->type == e_column) {
1723 408518 : const char *name = exp_name(e);
1724 408518 : node *cn;
1725 :
1726 408518 : if (r->exps) { /* use alias */
1727 728654 : for (cn = r->exps->h; cn; cn = cn->next) {
1728 726314 : sql_exp *ce = cn->data;
1729 726314 : if (strcmp(exp_name(ce), name) == 0) {
1730 406178 : name = ce->r;
1731 406178 : break;
1732 : }
1733 : }
1734 : }
1735 846870 : for (cn = ol_first_node(t->columns); cn; cn = cn->next) {
1736 844530 : sql_column *c = cn->data;
1737 844530 : if (strcmp(c->base.name, name) == 0)
1738 406178 : return c;
1739 : }
1740 : }
1741 : return NULL;
1742 : }
1743 :
1744 : static list *
1745 318266 : matching_joins(allocator *sa, list *rels, list *exps, sql_exp *je)
1746 : {
1747 318266 : sql_rel *l, *r;
1748 :
1749 318266 : assert (je->type == e_cmp);
1750 :
1751 318266 : l = find_rel(rels, je->l);
1752 318266 : r = find_rel(rels, je->r);
1753 318266 : if (l && r) {
1754 318121 : list *res;
1755 318121 : list *n_rels = sa_list(sa);
1756 :
1757 318121 : append(n_rels, l);
1758 318121 : append(n_rels, r);
1759 318121 : res = list_select(exps, n_rels, (fcmp) &exp_joins_rels, (fdup)NULL);
1760 318121 : return res;
1761 : }
1762 145 : return sa_list(sa);
1763 : }
1764 :
1765 : static int
1766 6315 : sql_column_kc_cmp(sql_column *c, sql_kc *kc)
1767 : {
1768 : /* return on equality */
1769 6315 : return (c->colnr - kc->c->colnr);
1770 : }
1771 :
1772 : static sql_idx *
1773 402533 : find_fk_index(mvc *sql, sql_table *l, list *lcols, sql_table *r, list *rcols)
1774 : {
1775 402533 : sql_trans *tr = sql->session->tr;
1776 :
1777 402533 : if (l->idxs) {
1778 402533 : node *in;
1779 482291 : for (in = ol_first_node(l->idxs); in; in = in->next){
1780 80554 : sql_idx *li = in->data;
1781 80554 : if (li->type == join_idx) {
1782 8396 : sql_key *rk = (sql_key*)os_find_id(tr->cat->objects, tr, ((sql_fkey*)li->key)->rkey);
1783 8396 : fcmp cmp = (fcmp)&sql_column_kc_cmp;
1784 :
1785 9294 : if (rk->t == r &&
1786 1694 : list_match(lcols, li->columns, cmp) == 0 &&
1787 796 : list_match(rcols, rk->columns, cmp) == 0) {
1788 796 : return li;
1789 : }
1790 : }
1791 : }
1792 : }
1793 : return NULL;
1794 : }
1795 :
1796 : static sql_rel *
1797 636532 : find_basetable( sql_rel *r)
1798 : {
1799 938537 : if (!r)
1800 : return NULL;
1801 933576 : switch(r->op) {
1802 495172 : case op_basetable:
1803 495172 : if (!r->l)
1804 0 : return NULL;
1805 : return r;
1806 302005 : case op_semi:
1807 : case op_anti:
1808 : case op_project:
1809 : case op_select:
1810 : case op_topn:
1811 : case op_sample:
1812 302005 : return find_basetable(r->l);
1813 : default:
1814 : return NULL;
1815 : }
1816 : }
1817 :
1818 : static int
1819 106697 : exps_count(list *exps)
1820 : {
1821 106697 : node *n;
1822 106697 : int cnt = 0;
1823 :
1824 106697 : if (!exps)
1825 : return 0;
1826 249215 : for (n = exps->h; n; n=n->next)
1827 142518 : exp_count(&cnt, n->data);
1828 106697 : return cnt;
1829 : }
1830 :
1831 : static list *
1832 208185 : order_join_expressions(mvc *sql, list *dje, list *rels)
1833 : {
1834 208185 : node *n;
1835 208185 : int cnt = list_length(dje);
1836 :
1837 208185 : if (cnt <= 1)
1838 : return dje;
1839 :
1840 70661 : list *res = sa_list(sql->sa);
1841 70661 : int i, *keys = SA_NEW_ARRAY(sql->ta, int, cnt);
1842 70661 : void **data = SA_NEW_ARRAY(sql->ta, void*, cnt);
1843 :
1844 295912 : for (n = dje->h, i = 0; n; n = n->next, i++) {
1845 225251 : sql_exp *e = n->data;
1846 :
1847 225251 : keys[i] = exp_keyvalue(e);
1848 : /* add some weight for the selections */
1849 225251 : if (e->type == e_cmp && !is_complex_exp(e->flag)) {
1850 225218 : sql_rel *l = find_rel(rels, e->l);
1851 225218 : sql_rel *r = find_rel(rels, e->r);
1852 :
1853 225218 : if (l && is_select(l->op) && l->exps)
1854 60240 : keys[i] += list_length(l->exps)*10 + exps_count(l->exps);
1855 225218 : if (r && is_select(r->op) && r->exps)
1856 46457 : keys[i] += list_length(r->exps)*10 + exps_count(r->exps);
1857 : }
1858 225251 : data[i] = n->data;
1859 : }
1860 : /* sort descending */
1861 70661 : GDKqsort(keys, data, NULL, cnt, sizeof(int), sizeof(void *), TYPE_int, true, true);
1862 366573 : for(i=0; i<cnt; i++) {
1863 225251 : list_append(res, data[i]);
1864 : }
1865 : return res;
1866 : }
1867 :
1868 : static int
1869 188034 : find_join_rels(list **L, list **R, list *exps, list *rels)
1870 : {
1871 188034 : node *n;
1872 :
1873 188034 : *L = sa_list(exps->sa);
1874 188034 : *R = sa_list(exps->sa);
1875 188034 : if (!exps || list_length(exps) <= 1)
1876 : return -1;
1877 313726 : for(n = exps->h; n; n = n->next) {
1878 237617 : sql_exp *e = n->data;
1879 237617 : sql_rel *l = NULL, *r = NULL;
1880 :
1881 237617 : if (!is_complex_exp(e->flag)){
1882 237607 : l = find_rel(rels, e->l);
1883 237607 : r = find_rel(rels, e->r);
1884 : }
1885 237607 : if (l<r) {
1886 149602 : list_append(*L, l);
1887 149602 : list_append(*R, r);
1888 : } else {
1889 88015 : list_append(*L, r);
1890 88015 : list_append(*R, l);
1891 : }
1892 : }
1893 : return 0;
1894 : }
1895 :
1896 : static list *
1897 76109 : distinct_join_exps(list *aje, list *lrels, list *rrels)
1898 : {
1899 76109 : node *n, *m, *o, *p;
1900 76109 : int len = list_length(aje), i, j;
1901 76109 : char *used = SA_ZNEW_ARRAY(aje->sa, char, len);
1902 76109 : list *res = sa_list(aje->sa);
1903 :
1904 76109 : assert(len == list_length(lrels));
1905 313726 : for(n = lrels->h, m = rrels->h, j = 0; n && m;
1906 237617 : n = n->next, m = m->next, j++) {
1907 237617 : if (n->data && m->data)
1908 990489 : for(o = n->next, p = m->next, i = j+1; o && p;
1909 752933 : o = o->next, p = p->next, i++) {
1910 752933 : if (o->data == n->data && p->data == m->data)
1911 31407 : used[i] = 1;
1912 : }
1913 : }
1914 313726 : for (i = 0, n = aje->h; i < len; n = n->next, i++) {
1915 237617 : if (!used[i])
1916 209810 : list_append(res, n->data);
1917 : }
1918 76109 : return res;
1919 : }
1920 :
1921 : static list *
1922 188034 : find_fk( mvc *sql, list *rels, list *exps)
1923 : {
1924 188034 : node *djn;
1925 188034 : list *sdje, *aje, *dje;
1926 188034 : list *lrels, *rrels;
1927 :
1928 : /* first find the distinct join expressions */
1929 188034 : aje = list_select(exps, rels, (fcmp) &exp_is_join, (fdup)NULL);
1930 : /* add left/right relation */
1931 188034 : if (find_join_rels(&lrels, &rrels, aje, rels) < 0)
1932 : dje = aje;
1933 : else
1934 76109 : dje = distinct_join_exps(aje, lrels, rrels);
1935 509447 : for(djn=dje->h; djn; djn = djn->next) {
1936 : /* equal join expressions */
1937 321497 : sql_idx *idx = NULL;
1938 321497 : sql_exp *je = djn->data, *le = je->l, *re = je->r;
1939 :
1940 321497 : if (is_complex_exp(je->flag))
1941 : break;
1942 321413 : if (!find_prop(je->p, PROP_JOINIDX)) {
1943 318266 : int swapped = 0;
1944 318266 : list *aaje = matching_joins(sql->sa, rels, aje, je);
1945 318266 : list *eje = list_select(aaje, (void*)1, (fcmp) &exp_is_eqjoin, (fdup)NULL);
1946 318266 : sql_rel *lr = find_rel(rels, le), *olr = lr;
1947 318266 : sql_rel *rr = find_rel(rels, re), *orr = rr;
1948 318266 : sql_rel *bt = NULL;
1949 318266 : char *iname;
1950 :
1951 318266 : sql_table *l, *r;
1952 318266 : list *lexps = list_map(eje, lr, (fmap) &joinexp_col);
1953 318266 : list *rexps = list_map(eje, rr, (fmap) &joinexp_col);
1954 318266 : list *lcols, *rcols;
1955 :
1956 318266 : lr = find_basetable(lr);
1957 318266 : rr = find_basetable(rr);
1958 318266 : if (!lr || !rr)
1959 222900 : continue;
1960 212049 : l = lr->l;
1961 212049 : r = rr->l;
1962 212049 : lcols = list_map(lexps, lr, (fmap) &table_colexp);
1963 212049 : rcols = list_map(rexps, rr, (fmap) &table_colexp);
1964 212049 : lcols->destroy = NULL;
1965 212049 : rcols->destroy = NULL;
1966 212049 : if (list_length(lcols) != list_length(rcols))
1967 10465 : continue;
1968 :
1969 201584 : idx = find_fk_index(sql, l, lcols, r, rcols);
1970 201584 : if (!idx) {
1971 200949 : idx = find_fk_index(sql, r, rcols, l, lcols);
1972 200949 : swapped = 1;
1973 : }
1974 :
1975 201584 : if (idx && (iname = sa_strconcat( sql->sa, "%", idx->base.name)) != NULL &&
1976 635 : ((!swapped && name_find_column(olr, NULL, iname, -2, &bt) == NULL) ||
1977 161 : ( swapped && name_find_column(orr, NULL, iname, -2, &bt) == NULL)))
1978 : idx = NULL;
1979 :
1980 : if (idx) {
1981 737 : prop *p;
1982 737 : node *n;
1983 737 : sql_exp *t = NULL, *i = NULL;
1984 :
1985 737 : if (list_length(lcols) > 1 || !mvc_debug_on(sql, 512)) {
1986 :
1987 : /* Add join between idx and TID */
1988 737 : if (swapped) {
1989 143 : sql_exp *s = je->l, *l = je->r;
1990 :
1991 143 : t = rel_find_column(sql->sa, olr, s->l, TID);
1992 143 : i = rel_find_column(sql->sa, orr, l->l, iname);
1993 143 : if (!t || !i)
1994 1 : continue;
1995 142 : je = exp_compare(sql->sa, i, t, cmp_equal);
1996 : } else {
1997 594 : sql_exp *s = je->r, *l = je->l;
1998 :
1999 594 : t = rel_find_column(sql->sa, orr, s->l, TID);
2000 594 : i = rel_find_column(sql->sa, olr, l->l, iname);
2001 594 : if (!t || !i)
2002 0 : continue;
2003 594 : je = exp_compare(sql->sa, i, t, cmp_equal);
2004 : }
2005 :
2006 : /* Remove all join expressions */
2007 1474 : for (n = eje->h; n; n = n->next)
2008 738 : list_remove_data(exps, NULL, n->data);
2009 736 : append(exps, je);
2010 736 : djn->data = je;
2011 0 : } else if (swapped) { /* else keep je for single column expressions */
2012 0 : je = exp_compare(sql->sa, je->r, je->l, cmp_equal);
2013 : /* Remove all join expressions */
2014 0 : for (n = eje->h; n; n = n->next)
2015 0 : list_remove_data(exps, NULL, n->data);
2016 0 : append(exps, je);
2017 0 : djn->data = je;
2018 : }
2019 736 : je->p = p = prop_create(sql->sa, PROP_JOINIDX, je->p);
2020 736 : p->value.pval = idx;
2021 : }
2022 : }
2023 : }
2024 :
2025 : /* sort expressions on weighted number of reducing operators */
2026 188034 : sdje = order_join_expressions(sql, dje, rels);
2027 188034 : return sdje;
2028 : }
2029 :
2030 : static int
2031 486683 : rels_find_one_rel( sql_rel **rels, int nr, sql_exp *e)
2032 : {
2033 486683 : int fnd = 0;
2034 :
2035 4310595 : for(int i = 1; i<=nr; i++) {
2036 3823982 : if (rel_has_exp(rels[i], e, false) == 0) {
2037 486486 : if (fnd)
2038 : return 0;
2039 : fnd = i;
2040 : }
2041 : }
2042 : return fnd;
2043 : }
2044 :
2045 : /* TODO move popcount and popcount64 into gdk_*.h, used in gdk_cand, strimps and here */
2046 : static inline int
2047 : popcount64(uint64_t x)
2048 : {
2049 : #if defined(__GNUC__)
2050 : return (uint32_t) __builtin_popcountll(x);
2051 : #elif defined(_MSC_VER)
2052 : #if SIZEOF_OID == 4
2053 : /* no __popcnt64 on 32 bit Windows */
2054 : return (int) (__popcnt((uint32_t) x) + __popcnt((uint32_t) (x >> 32)));
2055 : #else
2056 : return (uint32_t) __popcnt64(x);
2057 : #endif
2058 : #else
2059 : x = (x & UINT64_C(0x5555555555555555)) + ((x >> 1) & UINT64_C(0x5555555555555555));
2060 : x = (x & UINT64_C(0x3333333333333333)) + ((x >> 2) & UINT64_C(0x3333333333333333));
2061 : x = (x & UINT64_C(0x0F0F0F0F0F0F0F0F)) + ((x >> 4) & UINT64_C(0x0F0F0F0F0F0F0F0F));
2062 : return (x * UINT64_C(0x0101010101010101)) >> 56;
2063 : #endif
2064 : }
2065 :
2066 : static sql_rel *
2067 110448 : order_joins(visitor *v, list *rels, list *exps)
2068 : {
2069 110448 : sql_rel *top = NULL, *l = NULL, *r = NULL;
2070 110448 : sql_exp *cje;
2071 110448 : node *djn;
2072 110448 : list *sdje, *n_rels = NULL;
2073 110448 : int fnd = 0;
2074 110448 : unsigned int rsingle;
2075 110448 : int direct = 1;
2076 :
2077 : /* find foreign keys and reorder the expressions on reducing quality */
2078 110448 : sdje = find_fk(v->sql, rels, exps);
2079 :
2080 353708 : for(djn = sdje->h; djn; djn = djn->next ) {
2081 243260 : sql_exp *e = djn->data;
2082 243260 : list_remove_data(exps, NULL, e);
2083 : }
2084 110448 : if (list_length(rels) > 2 && mvc_debug_on(v->sql, 256)) {
2085 0 : top = rel_planner(v->sql, rels, sdje, exps);
2086 0 : return top;
2087 : }
2088 :
2089 110448 : int nr_exps = list_length(sdje), nr_rels = list_length(rels), ci = 1;
2090 110448 : if (nr_rels > 64) {
2091 1 : direct = 0;
2092 1 : n_rels = sa_list(v->sql->ta);
2093 : }
2094 110448 : sql_rel **rels_a = SA_NEW_ARRAY(v->sql->ta, sql_rel*, nr_rels+1); /* don't use slot 0 */
2095 110448 : rels_a[0] = NULL;
2096 463352 : for (node *n = rels->h; n; n = n->next, ci++) {
2097 352904 : rels_a[ci] = n->data;
2098 : }
2099 110448 : ulng *h = SA_NEW_ARRAY(v->sql->ta, ulng, nr_exps), rel_mask = 0; /* bit field (for > 64 its an imprint) */
2100 110448 : uint16_t *r1 = SA_NEW_ARRAY(v->sql->ta, uint16_t, nr_exps);
2101 110448 : uint16_t *r2 = SA_NEW_ARRAY(v->sql->ta, uint16_t, nr_exps);
2102 : /* change r3 into rest list's */
2103 110448 : int *r3 = SA_NEW_ARRAY(v->sql->ta, int, nr_exps);
2104 :
2105 110448 : ci = 0;
2106 353708 : for (node *n = sdje->h; n; n = n->next, ci++) {
2107 243260 : sql_exp *cje = n->data;
2108 :
2109 243260 : h[ci] = r1[ci] = r2[ci] = 0;
2110 243260 : r3[ci] = 0;
2111 : /* h[ci] = exp_find_rels(cje, rels) */
2112 243260 : if (cje->type != e_cmp || is_complex_exp(cje->flag) || !find_prop(cje->p, PROP_HASHCOL) ||
2113 29 : (cje->type == e_cmp && cje->f == NULL)) {
2114 243260 : cje->tmp = ci;
2115 243260 : r1[ci] = rels_find_one_rel(rels_a, nr_rels, cje->l);
2116 243260 : r2[ci] = rels_find_one_rel(rels_a, nr_rels, cje->r);
2117 243260 : if (r1[ci])
2118 243140 : h[ci] |= ((ulng)1)<<((r1[ci]-1)%64);
2119 243260 : if (r2[ci])
2120 243116 : h[ci] |= ((ulng)1)<<((r2[ci]-1)%64);
2121 243260 : if (cje->f) {
2122 163 : r3[ci] = rels_find_one_rel(rels_a, nr_rels, cje->f);
2123 163 : if (r3[ci] == r2[ci])
2124 148 : r3[ci] = 0;
2125 163 : if (r3[ci])
2126 5 : h[ci] |= ((ulng)1)<<((r3[ci]-1)%64);
2127 : }
2128 : }
2129 : }
2130 : /* open problem, some expressions use more than 2 relations */
2131 : /* For example a.x = b.y * c.z; */
2132 110448 : if (list_length(rels) >= 2 && sdje->h) {
2133 : /* get the first expression */
2134 110425 : cje = sdje->h->data;
2135 :
2136 : /* find the involved relations */
2137 :
2138 : /* complex expressions may touch multiple base tables
2139 : * Should be pushed up to extra selection.
2140 : * */
2141 110425 : if (0 && popcount64(h[cje->tmp]) > 2)
2142 : assert(0);
2143 110425 : if (cje->type != e_cmp || is_complex_exp(cje->flag) || !find_prop(cje->p, PROP_HASHCOL) ||
2144 29 : (cje->type == e_cmp && cje->f == NULL)) {
2145 110425 : l = rels_a[r1[cje->tmp]];
2146 110425 : r = rels_a[r2[cje->tmp]];
2147 110425 : rel_mask |= h[cje->tmp];
2148 : }
2149 110425 : cje->tmp = 0;
2150 :
2151 110425 : if (l && r && l != r)
2152 110311 : list_remove_data(sdje, NULL, cje);
2153 : }
2154 110448 : if (l && r && l != r) {
2155 110311 : list_remove_data(rels, NULL, l);
2156 110311 : list_remove_data(rels, NULL, r);
2157 110311 : if (!direct) {
2158 1 : list_append(n_rels, l);
2159 1 : list_append(n_rels, r);
2160 : }
2161 :
2162 : /* Create a relation between l and r. Since the calling
2163 : functions rewrote the join tree, into a list of expressions
2164 : and a list of (simple) relations, there are no outer joins
2165 : involved, we can simply do a crossproduct here.
2166 : */
2167 110311 : rsingle = is_single(r);
2168 110311 : reset_single(r);
2169 110311 : top = rel_crossproduct(v->sql->sa, l, r, op_join);
2170 110311 : if (rsingle)
2171 7172 : set_single(r);
2172 110311 : rel_join_add_exp(v->sql->sa, top, cje);
2173 :
2174 : /* all other join expressions on these 2 relations */
2175 114239 : for (node *en = exps->h; en; ) {
2176 3928 : node *next = en->next;
2177 3928 : sql_exp *e = en->data;
2178 3928 : if (rel_rebind_exp(v->sql, top, e)) {
2179 3809 : rel_join_add_exp(v->sql->sa, top, e);
2180 3809 : list_remove_data(exps, NULL, e);
2181 : }
2182 : en = next;
2183 : }
2184 : fnd = 1;
2185 : }
2186 : /* build join tree using the ordered list */
2187 239955 : while(list_length(sdje) && fnd) {
2188 129507 : fnd = 0;
2189 : /* find the first expression which could be added */
2190 914681 : for(djn = sdje->h; djn && !fnd && rels->h; djn = (!fnd)?djn->next:NULL) {
2191 392587 : node *en;
2192 392587 : l = r = NULL;
2193 :
2194 392587 : cje = djn->data;
2195 392587 : if ((h[cje->tmp] & rel_mask) > 0) {
2196 129429 : if (rel_mask & (((ulng)1)<<((r1[cje->tmp]-1)%64)))
2197 73832 : l = rels_a[r1[cje->tmp]];
2198 129429 : if (rel_mask & (((ulng)1)<<((r2[cje->tmp]-1)%64)))
2199 55598 : r = rels_a[r2[cje->tmp]];
2200 : }
2201 392587 : if (!direct) { /* check if atleast one side in n_rels */
2202 1040 : if (l && !list_find(n_rels, l, NULL))
2203 1007 : l = NULL;
2204 1040 : if (r && !list_find(n_rels, r, NULL))
2205 1 : r = NULL;
2206 : }
2207 :
2208 392587 : if (l && r) {
2209 0 : assert(0);
2210 : /* create a selection on the current */
2211 : rel_join_add_exp(v->sql->sa, top, cje);
2212 : fnd = 1;
2213 392587 : } else if (l || r) {
2214 : /* TODO: handle case for joins which need > 2 relations, ie where the current 'top' of the
2215 : * join tree needs to add more then one relation */
2216 129429 : rel_mask |= h[cje->tmp];
2217 129429 : if (l) {
2218 73832 : r = rels_a[r2[cje->tmp]];
2219 : } else {
2220 55597 : l = r;
2221 55597 : r = rels_a[r1[cje->tmp]];
2222 : }
2223 129429 : if (!r) {
2224 0 : fnd = 1; /* not really, but this bails out */
2225 0 : list_remove_data(sdje, NULL, cje); /* handle later as select */
2226 0 : continue;
2227 : }
2228 :
2229 : /* remove the expression from the lists */
2230 129429 : list_remove_data(sdje, NULL, cje);
2231 :
2232 129429 : list_remove_data(rels, NULL, r);
2233 129429 : if (!direct)
2234 63 : append(n_rels, r);
2235 :
2236 : /* create a join using the current expression */
2237 129429 : rsingle = is_single(r);
2238 129429 : reset_single(r);
2239 129429 : top = rel_crossproduct(v->sql->sa, top, r, op_join);
2240 129429 : if (rsingle)
2241 0 : set_single(r);
2242 129429 : rel_join_add_exp(v->sql->sa, top, cje);
2243 :
2244 : /* all join expressions on these tables */
2245 129767 : for (en = exps->h; en; ) {
2246 338 : node *next = en->next;
2247 338 : sql_exp *e = en->data;
2248 338 : if (rel_rebind_exp(v->sql, top, e)) {
2249 117 : rel_join_add_exp(v->sql->sa, top, e);
2250 117 : list_remove_data(exps, NULL, e);
2251 : }
2252 : en = next;
2253 : }
2254 : /* Remove other joins on the current 'n_rels'
2255 : set in the distinct list too */
2256 714110 : for (en = sdje->h; en; ) {
2257 584681 : node *next = en->next;
2258 584681 : sql_exp *e = en->data;
2259 584681 : if ((direct && ((e->flag <= cmp_notequal && (h[e->tmp] & rel_mask) == h[e->tmp]) || (e->flag > cmp_notequal && rel_rebind_exp(v->sql, top, e)))) ||
2260 1953 : (!direct && rel_rebind_exp(v->sql, top, e))) {
2261 3316 : rel_join_add_exp(v->sql->sa, top, e);
2262 3316 : list_remove_data(sdje, NULL, en->data);
2263 : }
2264 : en = next;
2265 : }
2266 : fnd = 1;
2267 : }
2268 : }
2269 : }
2270 110448 : if (list_length(rels)) { /* more relations */
2271 1173 : node *n;
2272 4026 : for(n=rels->h; n; n = n->next) {
2273 2853 : sql_rel *nr = n->data;
2274 :
2275 2853 : if (top) {
2276 2716 : rsingle = is_single(nr);
2277 2716 : reset_single(nr);
2278 2716 : top = rel_crossproduct(v->sql->sa, top, nr, op_join);
2279 2716 : if (rsingle)
2280 0 : set_single(nr);
2281 : } else
2282 : top = nr;
2283 : }
2284 : }
2285 110448 : if (list_length(sdje)) {
2286 192 : if (list_empty(exps))
2287 : exps = sdje;
2288 : else
2289 0 : exps = list_merge(exps, sdje, (fdup)NULL);
2290 : }
2291 110448 : if (list_length(exps)) { /* more expressions (add selects) */
2292 217 : top = rel_select(v->sql->sa, top, NULL);
2293 446 : for(node *n=exps->h; n; n = n->next) {
2294 229 : sql_exp *e = n->data;
2295 :
2296 229 : if (exp_is_join_exp(e) == 0) {
2297 216 : sql_rel *nr = NULL;
2298 216 : if (is_theta_exp(e->flag)) {
2299 141 : nr = rel_push_join(v->sql, top->l, e->l, e->r, e->f, e, 0);
2300 75 : } else if (e->flag == cmp_filter || e->flag == cmp_or) {
2301 75 : sql_exp *l = exps_find_one_multi_exp(e->l), *r = exps_find_one_multi_exp(e->r);
2302 75 : if (l && r)
2303 71 : nr = rel_push_join(v->sql, top->l, l, r, NULL, e, 0);
2304 : }
2305 212 : if (!nr)
2306 4 : rel_join_add_exp(v->sql->sa, top->l, e);
2307 : } else
2308 13 : rel_select_add_exp(v->sql->sa, top, e);
2309 : }
2310 217 : if (list_empty(top->exps)) { /* empty select */
2311 204 : sql_rel *l = top->l;
2312 204 : top->l = NULL;
2313 204 : rel_destroy(top);
2314 204 : top = l;
2315 : }
2316 : }
2317 : return top;
2318 : }
2319 :
2320 : static int
2321 352904 : rel_neg_in_size(sql_rel *r)
2322 : {
2323 352904 : if ((is_union(r->op) /*|| is_munion(r->op)*/) && r->nrcols == 0)
2324 0 : return -1 + rel_neg_in_size(r->l);
2325 352904 : if (is_project(r->op) && r->nrcols == 0)
2326 0 : return -1;
2327 : return 0;
2328 : }
2329 :
2330 352904 : static void _rel_destroy(void *dummy, sql_rel *rel)
2331 : {
2332 352904 : (void)dummy;
2333 352904 : rel_destroy(rel);
2334 352904 : }
2335 :
2336 : static list *
2337 110448 : push_in_join_down(mvc *sql, list *rels, list *exps)
2338 : {
2339 110448 : node *n;
2340 110448 : int restart = 1;
2341 110448 : list *nrels;
2342 :
2343 : /* we should sort these first, ie small in's before large one's */
2344 110448 : nrels = list_sort(rels, (fkeyvalue)&rel_neg_in_size, (fdup)&rel_dup);
2345 :
2346 : /* we need to cleanup, the new refs ! */
2347 110448 : rels->destroy = (fdestroy)_rel_destroy;
2348 110448 : list_destroy(rels);
2349 110448 : rels = nrels;
2350 :
2351 : /* one of the rels should be a op_union with nrcols == 0 */
2352 331344 : while (restart) {
2353 463352 : for (n = rels->h; n; n = n->next) {
2354 352904 : sql_rel *r = n->data;
2355 :
2356 352904 : restart = 0;
2357 352904 : if (is_project(r->op) && r->nrcols == 0) {
2358 : /* next step find expression on this relation */
2359 0 : node *m;
2360 0 : sql_rel *l = NULL;
2361 0 : sql_exp *je = NULL;
2362 :
2363 0 : for(m = exps->h; !je && m; m = m->next) {
2364 0 : sql_exp *e = m->data;
2365 :
2366 0 : if (e->type == e_cmp && e->flag == cmp_equal) {
2367 : /* in values are on
2368 : the right of the join */
2369 0 : if (rel_has_exp(r, e->r, false) >= 0)
2370 0 : je = e;
2371 : }
2372 : }
2373 : /* with this expression find other relation */
2374 0 : if (je && (l = find_rel(rels, je->l)) != NULL) {
2375 0 : unsigned int rsingle = is_single(r);
2376 0 : reset_single(r);
2377 0 : sql_rel *nr = rel_crossproduct(sql->sa, l, r, op_join);
2378 0 : if (rsingle)
2379 0 : set_single(r);
2380 0 : rel_join_add_exp(sql->sa, nr, je);
2381 0 : list_append(rels, nr);
2382 0 : list_remove_data(rels, NULL, l);
2383 0 : list_remove_data(rels, NULL, r);
2384 0 : list_remove_data(exps, NULL, je);
2385 0 : restart = 1;
2386 0 : break;
2387 : }
2388 :
2389 : }
2390 : }
2391 : }
2392 110448 : return rels;
2393 : }
2394 :
2395 : static list *
2396 623832 : push_up_join_exps( mvc *sql, sql_rel *rel)
2397 : {
2398 623832 : if (rel_is_ref(rel))
2399 : return NULL;
2400 :
2401 614810 : switch(rel->op) {
2402 254693 : case op_join: {
2403 254693 : sql_rel *rl = rel->l;
2404 254693 : sql_rel *rr = rel->r;
2405 254693 : list *l, *r;
2406 :
2407 254693 : if (rel_is_ref(rl) && rel_is_ref(rr)) {
2408 10 : l = rel->exps;
2409 10 : rel->exps = NULL;
2410 10 : return l;
2411 : }
2412 254683 : l = push_up_join_exps(sql, rl);
2413 254683 : r = push_up_join_exps(sql, rr);
2414 254683 : if (l && r) {
2415 258 : l = list_merge(l, r, (fdup)NULL);
2416 258 : r = NULL;
2417 254425 : } else if (!l) {
2418 131275 : l = r;
2419 131275 : r = NULL;
2420 : }
2421 254683 : if (rel->exps) {
2422 223965 : if (l && !r)
2423 113233 : r = l;
2424 223965 : l = list_merge(rel->exps, r, (fdup)NULL);
2425 : }
2426 254683 : rel->exps = NULL;
2427 254683 : return l;
2428 : }
2429 : default:
2430 : return NULL;
2431 : }
2432 : }
2433 :
2434 : static sql_rel *
2435 233435 : reorder_join(visitor *v, sql_rel *rel)
2436 : {
2437 233435 : list *exps, *rels;
2438 :
2439 233435 : if (is_innerjoin(rel->op) && !is_single(rel) && !rel_is_ref(rel) && list_empty(rel->attr)) {
2440 135099 : if (list_empty(rel->exps)) {
2441 25449 : sql_rel *l = rel->l, *r = rel->r;
2442 25449 : if (!is_innerjoin(l->op) && !is_innerjoin(r->op))
2443 : return rel;
2444 : }
2445 114466 : rel->exps = push_up_join_exps(v->sql, rel);
2446 : }
2447 :
2448 212802 : if (!is_innerjoin(rel->op) || is_single(rel) || rel_is_ref(rel) || list_empty(rel->exps) || !list_empty(rel->attr)) {
2449 102354 : if (!list_empty(rel->exps)) { /* cannot add join idxs to cross products */
2450 71516 : exps = rel->exps;
2451 71516 : rel->exps = NULL; /* should be all crosstables by now */
2452 71516 : rels = sa_list(v->sql->ta);
2453 : /* try to use an join index also for outer joins */
2454 71516 : get_inner_relations(v->sql, rel, rels);
2455 71516 : int cnt = list_length(exps);
2456 71516 : rel->exps = find_fk(v->sql, rels, exps);
2457 71516 : if (list_length(rel->exps) != cnt)
2458 20151 : rel->exps = order_join_expressions(v->sql, exps, rels);
2459 : }
2460 102354 : rel->l = rel_join_order_(v, rel->l);
2461 102354 : rel->r = rel_join_order_(v, rel->r);
2462 : } else {
2463 110448 : exps = rel->exps;
2464 110448 : rel->exps = NULL; /* should be all crosstables by now */
2465 110448 : rels = sa_list(v->sql->ta);
2466 110448 : get_relations(v, rel, rels);
2467 110448 : if (list_length(rels) > 1) {
2468 110448 : rels = push_in_join_down(v->sql, rels, exps);
2469 110448 : rel = order_joins(v, rels, exps);
2470 : } else {
2471 0 : rel->exps = exps;
2472 : }
2473 : }
2474 : return rel;
2475 : }
2476 :
2477 : static sql_rel *
2478 1933068 : rel_join_order_(visitor *v, sql_rel *rel)
2479 : {
2480 1933068 : if (!rel)
2481 : return rel;
2482 :
2483 1922148 : switch (rel->op) {
2484 : case op_basetable:
2485 : break;
2486 3175 : case op_table:
2487 3175 : if (IS_TABLE_PROD_FUNC(rel->flag) || rel->flag == TABLE_FROM_RELATION)
2488 3175 : rel->l = rel_join_order_(v, rel->l);
2489 : break;
2490 : case op_join:
2491 : case op_left:
2492 : case op_right:
2493 : case op_full:
2494 : break;
2495 :
2496 26042 : case op_semi:
2497 : case op_anti:
2498 :
2499 : case op_union:
2500 : case op_inter:
2501 : case op_except:
2502 : case op_merge:
2503 26042 : rel->l = rel_join_order_(v, rel->l);
2504 26042 : rel->r = rel_join_order_(v, rel->r);
2505 26042 : break;
2506 134074 : case op_munion:
2507 134074 : assert(rel->l);
2508 402222 : for (node *n = ((list*)rel->l)->h; n; n = n->next)
2509 268148 : n->data = rel_join_order_(v, n->data);
2510 : break;
2511 985798 : case op_project:
2512 : case op_select:
2513 : case op_groupby:
2514 : case op_topn:
2515 : case op_sample:
2516 985798 : rel->l = rel_join_order_(v, rel->l);
2517 985798 : break;
2518 4017 : case op_ddl:
2519 4017 : if (rel->flag == ddl_output || rel->flag == ddl_create_seq || rel->flag == ddl_alter_seq || rel->flag == ddl_alter_table || rel->flag == ddl_create_table || rel->flag == ddl_create_view) {
2520 0 : rel->l = rel_join_order_(v, rel->l);
2521 : } else if (rel->flag == ddl_list || rel->flag == ddl_exception) {
2522 47 : rel->l = rel_join_order_(v, rel->l);
2523 47 : rel->r = rel_join_order_(v, rel->r);
2524 : }
2525 : break;
2526 1243 : case op_insert:
2527 : case op_update:
2528 : case op_delete:
2529 1243 : rel->r = rel_join_order_(v, rel->r);
2530 1243 : break;
2531 : case op_truncate:
2532 : break;
2533 : }
2534 1922148 : if (is_join(rel->op))
2535 233435 : rel = reorder_join(v, rel);
2536 : return rel;
2537 : }
2538 :
2539 : static sql_rel *
2540 64914 : rel_join_order(visitor *v, global_props *gp, sql_rel *rel)
2541 : {
2542 64914 : (void) gp;
2543 64914 : sql_rel *r = rel_join_order_(v, rel);
2544 64914 : sa_reset(v->sql->ta);
2545 64914 : return r;
2546 : }
2547 :
2548 : run_optimizer
2549 601138 : bind_join_order(visitor *v, global_props *gp)
2550 : {
2551 601138 : int flag = v->sql->sql_optimizer;
2552 600953 : return gp->opt_level == 1 && gp->opt_cycle < 10 && !gp->cnt[op_update] && (gp->cnt[op_join] || gp->cnt[op_left] ||
2553 1196135 : gp->cnt[op_right] || gp->cnt[op_full]) && (flag & join_order) ? rel_join_order : NULL;
2554 : }
2555 :
2556 : /* this join order is to be done once after statistics are gathered */
2557 : run_optimizer
2558 698834 : bind_join_order2(visitor *v, global_props *gp)
2559 : {
2560 : /*int flag = v->sql->sql_optimizer;
2561 : return gp->opt_level == 1 && !gp->has_special_modify && !gp->cnt[op_update] && (gp->cnt[op_join] || gp->cnt[op_left] ||
2562 : gp->cnt[op_right] || gp->cnt[op_full]) && (flag & join_order) ? rel_join_order : NULL;*/
2563 : /* TODO we have to propagate count statistics here */
2564 698834 : (void) v;
2565 698834 : (void) gp;
2566 698834 : return NULL;
2567 : }
2568 :
2569 :
2570 : static int
2571 11949 : is_identity_of(sql_exp *e, sql_rel *l)
2572 : {
2573 11949 : if (e->type != e_cmp)
2574 : return 0;
2575 11922 : if (!is_identity(e->l, l) || !is_identity(e->r, l) || (e->f && !is_identity(e->f, l)))
2576 11922 : return 0;
2577 : return 1;
2578 : }
2579 :
2580 : static inline sql_rel *
2581 16233 : rel_rewrite_semijoin(visitor *v, sql_rel *rel)
2582 : {
2583 16233 : assert(is_semi(rel->op));
2584 : {
2585 16233 : sql_rel *l = rel->l;
2586 16233 : sql_rel *r = rel->r;
2587 16233 : sql_rel *rl = (r->l)?r->l:NULL;
2588 16233 : int on_identity = 1;
2589 :
2590 16233 : if (!rel->exps || list_length(rel->exps) != 1 || !is_identity_of(rel->exps->h->data, l))
2591 : on_identity = 0;
2592 :
2593 : /* rewrite {semi,anti}join (A, join(A,B)) into {semi,anti}join (A,B)
2594 : * and {semi,anti}join (A, join(B,A)) into {semi,anti}join (A,B)
2595 : * Where the semi/anti join is done using the identity */
2596 0 : if (on_identity && l->ref.refcnt == 2 && ((is_join(r->op) && (l == r->l || l == r->r)) ||
2597 0 : (is_project(r->op) && rl && is_join(rl->op) && (l == rl->l || l == rl->r)))){
2598 0 : sql_rel *or = r;
2599 :
2600 0 : if (is_project(r->op))
2601 0 : r = rl;
2602 :
2603 0 : if (l == r->r)
2604 0 : rel->r = rel_dup(r->l);
2605 : else
2606 0 : rel->r = rel_dup(r->r);
2607 :
2608 0 : rel->exps = r->exps;
2609 0 : r->exps = NULL;
2610 0 : rel->attr = r->attr;
2611 0 : r->attr = NULL;
2612 0 : rel_destroy(or);
2613 0 : v->changes++;
2614 : }
2615 : }
2616 : {
2617 16233 : sql_rel *l = rel->l, *rl = NULL;
2618 16233 : sql_rel *r = rel->r, *or = r;
2619 :
2620 16233 : if (r)
2621 16233 : rl = r->l;
2622 16233 : if (r && is_project(r->op)) {
2623 13679 : r = rl;
2624 13679 : if (r)
2625 13560 : rl = r->l;
2626 : }
2627 :
2628 : /* More general case is (join reduction)
2629 : {semi,anti}join (A, join(A,B) [A.c1 == B.c1]) [ A.c1 == B.c1 ]
2630 : into {semi,anti}join (A,B) [ A.c1 == B.c1 ]
2631 :
2632 : for semijoin also A.c1 == B.k1 ] [ A.c1 == B.k2 ] could be rewriten
2633 : */
2634 16233 : if (l && r && rl &&
2635 15845 : is_basetable(l->op) && is_basetable(rl->op) &&
2636 2723 : is_join(r->op) && l->l == rl->l)
2637 : {
2638 26 : node *n, *m;
2639 26 : list *exps;
2640 :
2641 51 : if (!rel->exps || !r->exps ||
2642 25 : list_length(rel->exps) != list_length(r->exps))
2643 1 : return rel;
2644 25 : exps = new_exp_list(v->sql->sa);
2645 :
2646 : /* are the join conditions equal */
2647 25 : for (n = rel->exps->h, m = r->exps->h;
2648 25 : n && m; n = n->next, m = m->next)
2649 : {
2650 25 : sql_exp *le = NULL, *oe = n->data;
2651 25 : sql_exp *re = NULL, *ne = m->data;
2652 25 : sql_column *cl;
2653 25 : int equal = 0;
2654 :
2655 25 : if (oe->type != e_cmp || ne->type != e_cmp ||
2656 25 : oe->flag != cmp_equal ||
2657 25 : ne->flag != cmp_equal || is_anti(oe) || is_anti(ne))
2658 : return rel;
2659 :
2660 25 : if ((cl = exp_find_column(rel->l, oe->l, -2)) != NULL) {
2661 25 : le = oe->l;
2662 25 : re = oe->r;
2663 0 : } else if ((cl = exp_find_column(rel->l, oe->r, -2)) != NULL) {
2664 0 : le = oe->r;
2665 0 : re = oe->l;
2666 : } else
2667 : return rel;
2668 :
2669 25 : if (exp_find_column(rl, ne->l, -2) == cl) {
2670 25 : sql_exp *e = (or != r)?rel_find_exp(or, re):re;
2671 :
2672 25 : if (e)
2673 24 : equal = exp_match_exp(ne->r, e);
2674 25 : if (!e || !equal)
2675 : return rel;
2676 0 : re = ne->r;
2677 0 : } else if (exp_find_column(rl, ne->r, -2) == cl) {
2678 0 : sql_exp *e = (or != r)?rel_find_exp(or, re):re;
2679 :
2680 0 : if (e)
2681 0 : equal = exp_match_exp(ne->l, e);
2682 0 : if (!e || !equal)
2683 : return rel;
2684 0 : re = ne->l;
2685 : } else
2686 : return rel;
2687 :
2688 0 : ne = exp_compare(v->sql->sa, le, re, cmp_equal);
2689 0 : append(exps, ne);
2690 : }
2691 :
2692 0 : rel->r = rel_dup(r->r);
2693 0 : rel->exps = exps;
2694 0 : rel_destroy(or);
2695 0 : v->changes++;
2696 : }
2697 : }
2698 : return rel;
2699 : }
2700 :
2701 : /*
2702 : * Push semijoins down, pushes the semijoin through a join.
2703 : *
2704 : * semijoin( join(A, B) [ A.x == B.y ], C ) [ A.z == C.c ]
2705 : * ->
2706 : * join( semijoin(A, C) [ A.z == C.c ], B ) [ A.x == B.y ]
2707 : *
2708 : * also push simple expressions of a semijoin down if they only
2709 : * involve the left sided of the semijoin.
2710 : *
2711 : * in some cases the other way is useful, ie push join down
2712 : * semijoin. When the join reduces (ie when there are selects on it).
2713 : *
2714 : * At the moment, we only flag changes by this optimizer on the first level of optimization
2715 : */
2716 : static inline sql_rel *
2717 507299 : rel_push_semijoin_down_or_up(visitor *v, sql_rel *rel)
2718 : {
2719 507299 : uint8_t cycle = *(uint8_t*) v->data;
2720 :
2721 507299 : if (rel->op == op_join && rel->exps && rel->l) {
2722 470904 : sql_rel *l = rel->l, *r = rel->r;
2723 :
2724 470904 : if (is_semi(l->op) && !rel_is_ref(l) && is_select(r->op) && !rel_is_ref(r)) {
2725 32 : rel->l = l->l;
2726 32 : l->l = rel;
2727 32 : if (cycle <= 0)
2728 0 : v->changes++;
2729 32 : return l;
2730 : }
2731 : }
2732 : /* also case with 2 joins */
2733 : /* join ( join ( semijoin(), table), select (table)); */
2734 507267 : if (rel->op == op_join && rel->exps && rel->l) {
2735 470872 : sql_rel *l = rel->l, *r = rel->r;
2736 470872 : sql_rel *ll;
2737 :
2738 470872 : if (is_join(l->op) && !rel_is_ref(l) && is_select(r->op) && !rel_is_ref(r)) {
2739 10144 : ll = l->l;
2740 10144 : if (is_semi(ll->op) && !rel_is_ref(ll)) {
2741 43 : l->l = ll->l;
2742 43 : ll->l = rel;
2743 43 : if (cycle <= 0)
2744 12 : v->changes++;
2745 43 : return ll;
2746 : }
2747 : }
2748 : }
2749 : /* first push down the expressions involving only A */
2750 507224 : if (rel->op == op_semi && rel->exps && rel->l) {
2751 10586 : sql_rel *jl = rel->l, *ojl = jl;
2752 :
2753 10586 : set_processed(jl);
2754 24697 : for (node *n = rel->exps->h; n;) {
2755 14111 : node *next = n->next;
2756 14111 : sql_exp *e = n->data;
2757 :
2758 14111 : if (n != rel->exps->h && e->type == e_cmp && rel_rebind_exp(v->sql, jl, e)) {
2759 15 : if (!is_select(jl->op) || rel_is_ref(jl))
2760 12 : rel->l = jl = rel_select(v->sql->sa, jl, NULL);
2761 15 : rel_select_add_exp(v->sql->sa, jl, e);
2762 15 : list_remove_node(rel->exps, NULL, n);
2763 15 : if (cycle <= 0)
2764 15 : v->changes++;
2765 : }
2766 : n = next;
2767 : }
2768 10586 : if (ojl != jl)
2769 12 : set_processed(jl);
2770 : }
2771 507224 : if (rel->op == op_semi && rel->exps && rel->l) {
2772 10586 : operator_type op = rel->op, lop;
2773 10586 : node *n;
2774 10586 : sql_rel *l = rel->l, *ll = NULL, *lr = NULL;
2775 10586 : sql_rel *r = rel->r;
2776 10586 : list *exps = rel->exps, *nsexps, *njexps, *nsattr, *njattr;
2777 10586 : int left = 1, right = 1;
2778 :
2779 : /* handle project
2780 : if (l->op == op_project && !need_distinct(l))
2781 : l = l->l;
2782 : */
2783 :
2784 10586 : if (!is_join(l->op) || is_full(l->op) || rel_is_ref(l) || is_single(l))
2785 : return rel;
2786 :
2787 1131 : lop = l->op;
2788 1131 : ll = l->l;
2789 1131 : lr = l->r;
2790 :
2791 : /* check which side is used and other exps are atoms or from right of semijoin */
2792 2177 : for(n = exps->h; n; n = n->next) {
2793 1148 : sql_exp *sje = n->data;
2794 :
2795 1148 : if (sje->type != e_cmp || is_complex_exp(sje->flag))
2796 : return rel;
2797 : /* sje->l from ll and sje->r/f from semijoin r ||
2798 : * sje->l from semijoin r and sje->r/f from ll ||
2799 : * sje->l from lr and sje->r/f from semijoin r ||
2800 : * sje->l from semijoin r and sje->r/f from lr */
2801 2239 : if (left &&
2802 2239 : ((rel_rebind_exp(v->sql, ll, sje->l) && rel_rebind_exp(v->sql, rel->r, sje->r) && (!sje->f || rel_rebind_exp(v->sql, rel->r, sje->f))) ||
2803 1171 : (rel_rebind_exp(v->sql, rel->r, sje->l) && rel_rebind_exp(v->sql, ll, sje->r) && (!sje->f || rel_rebind_exp(v->sql, ll, sje->f)))))
2804 : right = 0;
2805 : else
2806 838 : left = 0;
2807 1662 : if (right &&
2808 1649 : ((rel_rebind_exp(v->sql, lr, sje->l) && rel_rebind_exp(v->sql, rel->r, sje->r) && (!sje->f || rel_rebind_exp(v->sql, rel->r, sje->f))) ||
2809 574 : (rel_rebind_exp(v->sql, rel->r, sje->l) && rel_rebind_exp(v->sql, lr, sje->r) && (!sje->f || rel_rebind_exp(v->sql, lr, sje->f)))))
2810 : left = 0;
2811 : else
2812 : right = 0;
2813 1120 : if (!right && !left)
2814 : return rel;
2815 : }
2816 1029 : if (left && is_right(lop))
2817 : return rel;
2818 1028 : if (right && is_left(lop))
2819 : return rel;
2820 1026 : nsexps = exps_copy(v->sql, rel->exps);
2821 1026 : nsattr = exps_copy(v->sql, rel->attr);
2822 1026 : njexps = exps_copy(v->sql, l->exps);
2823 1026 : njattr = exps_copy(v->sql, l->attr);
2824 1026 : if (left)
2825 265 : l = rel_crossproduct(v->sql->sa, rel_dup(ll), rel_dup(r), op);
2826 : else
2827 761 : l = rel_crossproduct(v->sql->sa, rel_dup(lr), rel_dup(r), op);
2828 1026 : l->exps = nsexps;
2829 1026 : l->attr = nsattr;
2830 1026 : set_processed(l);
2831 1026 : if (left)
2832 265 : l = rel_crossproduct(v->sql->sa, l, rel_dup(lr), lop);
2833 : else
2834 761 : l = rel_crossproduct(v->sql->sa, rel_dup(ll), l, lop);
2835 1026 : l->exps = njexps;
2836 1026 : l->attr = njattr;
2837 1026 : set_processed(l);
2838 1026 : rel_destroy(rel);
2839 1026 : rel = l;
2840 1026 : if (cycle <= 0)
2841 575 : v->changes++;
2842 : }
2843 : return rel;
2844 : }
2845 :
2846 : /* antijoin(a, union(b,c)) -> antijoin(antijoin(a,b), c) */
2847 : static inline sql_rel *
2848 5591 : rel_rewrite_antijoin(visitor *v, sql_rel *rel)
2849 : {
2850 5591 : sql_rel *l = rel->l;
2851 5591 : sql_rel *r = rel->r;
2852 :
2853 5591 : assert(rel->op == op_anti);
2854 5591 : if (l && !rel_is_ref(l) && r && !rel_is_ref(r) && is_union(r->op) && !is_single(r)) {
2855 2 : sql_rel *rl = rel_dup(r->l), *nl;
2856 2 : sql_rel *rr = rel_dup(r->r);
2857 :
2858 2 : if (!is_project(rl->op))
2859 0 : rl = rel_project(v->sql->sa, rl,
2860 : rel_projections(v->sql, rl, NULL, 1, 1));
2861 2 : if (!is_project(rr->op))
2862 0 : rr = rel_project(v->sql->sa, rr,
2863 : rel_projections(v->sql, rr, NULL, 1, 1));
2864 2 : rel_rename_exps(v->sql, r->exps, rl->exps);
2865 2 : rel_rename_exps(v->sql, r->exps, rr->exps);
2866 :
2867 2 : nl = rel_crossproduct(v->sql->sa, rel->l, rl, op_anti);
2868 2 : nl->exps = exps_copy(v->sql, rel->exps);
2869 2 : nl->attr = exps_copy(v->sql, rel->attr);
2870 2 : set_processed(nl);
2871 2 : rel->l = nl;
2872 2 : rel->r = rr;
2873 2 : rel_destroy(r);
2874 2 : v->changes++;
2875 2 : return rel;
2876 : }
2877 : return rel;
2878 : }
2879 :
2880 : static sql_rel *
2881 3694966 : rel_optimize_semi_and_anti_(visitor *v, sql_rel *rel)
2882 : {
2883 : /* rewrite semijoin (A, join(A,B)) into semijoin (A,B) */
2884 3694966 : if (rel && is_semi(rel->op))
2885 16233 : rel = rel_rewrite_semijoin(v, rel);
2886 : /* push semijoin through join */
2887 3694966 : if (rel && (is_semi(rel->op) || is_innerjoin(rel->op)))
2888 507299 : rel = rel_push_semijoin_down_or_up(v, rel);
2889 : /* antijoin(a, union(b,c)) -> antijoin(antijoin(a,b), c) */
2890 3694966 : if (rel && rel->op == op_anti)
2891 5591 : rel = rel_rewrite_antijoin(v, rel);
2892 3694966 : return rel;
2893 : }
2894 :
2895 : static sql_rel *
2896 71048 : rel_optimize_semi_and_anti(visitor *v, global_props *gp, sql_rel *rel)
2897 : {
2898 71048 : v->data = &gp->opt_cycle;
2899 71048 : rel = rel_visitor_bottomup(v, rel, &rel_optimize_semi_and_anti_);
2900 71048 : v->data = gp;
2901 71048 : return rel;
2902 : }
2903 :
2904 : run_optimizer
2905 601245 : bind_optimize_semi_and_anti(visitor *v, global_props *gp)
2906 : {
2907 : /* Important -> Re-write semijoins after rel_join_order */
2908 601245 : int flag = v->sql->sql_optimizer;
2909 601049 : return gp->opt_level == 1 && (gp->cnt[op_join] || gp->cnt[op_left] || gp->cnt[op_right]
2910 1202294 : || gp->cnt[op_full] || gp->cnt[op_semi] || gp->cnt[op_anti]) && (flag & optimize_semi_and_anti) ? rel_optimize_semi_and_anti : NULL;
2911 : }
2912 :
2913 :
2914 : static sql_rel *
2915 1648739 : rel_semijoin_use_fk(visitor *v, sql_rel *rel)
2916 : {
2917 1648739 : if (is_semi(rel->op) && rel->exps) {
2918 6070 : list *exps = rel->exps;
2919 6070 : list *rels = sa_list(v->sql->sa);
2920 :
2921 6070 : rel->exps = NULL;
2922 6070 : append(rels, rel->l);
2923 6070 : append(rels, rel->r);
2924 6070 : (void) find_fk( v->sql, rels, exps);
2925 :
2926 6070 : rel->exps = exps;
2927 : }
2928 1648739 : return rel;
2929 : }
2930 :
2931 : /*
2932 : * Push {semi}joins down, pushes the joins through group by expressions.
2933 : * When the join is on the group by columns, we can push the joins left
2934 : * under the group by. This should only be done, iff the new semijoin would
2935 : * reduce the input table to the groupby. So there should be a reduction
2936 : * (selection) on the table A and this should be propagated to the groupby via
2937 : * for example a primary key.
2938 : *
2939 : * {semi}join( A, groupby( B ) [gbe][aggrs] ) [ gbe == A.x ]
2940 : * ->
2941 : * {semi}join( A, groupby( semijoin(B,A) [gbe == A.x] ) [gbe][aggrs] ) [ gbe == A.x ]
2942 : */
2943 : static inline sql_rel *
2944 1648739 : rel_push_join_down(visitor *v, sql_rel *rel)
2945 : {
2946 1648739 : if (!rel_is_ref(rel) && ((is_left(rel->op) || rel->op == op_join || is_semi(rel->op)) && rel->l && rel->exps)) {
2947 232592 : sql_rel *gb = rel->r, *ogb = gb, *l = NULL, *rell = rel->l;
2948 :
2949 232592 : if (is_simple_project(gb->op) && !rel_is_ref(gb))
2950 45883 : gb = gb->l;
2951 :
2952 232592 : if (rel_is_ref(rell) || !gb || rel_is_ref(gb))
2953 : return rel;
2954 :
2955 223751 : if (is_groupby(gb->op) && gb->r && list_length(gb->r)) {
2956 181 : list *exps = rel->exps, *jes = new_exp_list(v->sql->sa), *gbes = gb->r;
2957 181 : node *n, *m;
2958 : /* find out if all group by expressions are used in the join */
2959 182 : for(n = gbes->h; n; n = n->next) {
2960 181 : sql_exp *gbe = n->data;
2961 181 : int fnd = 0;
2962 181 : const char *rname = NULL, *name = NULL;
2963 :
2964 : /* project in between, ie find alias */
2965 : /* first find expression in expression list */
2966 181 : gbe = exps_uses_exp( gb->exps, gbe);
2967 181 : if (!gbe)
2968 0 : continue;
2969 181 : if (ogb != gb)
2970 171 : gbe = exps_uses_exp( ogb->exps, gbe);
2971 181 : if (gbe) {
2972 140 : rname = exp_find_rel_name(gbe);
2973 140 : name = exp_name(gbe);
2974 : }
2975 :
2976 140 : if (!name)
2977 41 : return rel;
2978 :
2979 352 : for (m = exps->h; m && !fnd; m = m->next) {
2980 212 : sql_exp *je = m->data;
2981 :
2982 212 : if (je->card >= CARD_ATOM && je->type == e_cmp &&
2983 212 : !is_complex_exp(je->flag)) {
2984 : /* expect right expression to match */
2985 207 : sql_exp *r = je->r;
2986 :
2987 207 : if (r == 0 || r->type != e_column)
2988 9 : continue;
2989 198 : if (r->l && rname && strcmp(r->l, rname) == 0 && strcmp(r->r, name)==0) {
2990 : fnd = 1;
2991 74 : } else if (!r->l && !rname && strcmp(r->r, name)==0) {
2992 : fnd = 1;
2993 : }
2994 : if (fnd) {
2995 124 : sql_exp *le = je->l;
2996 124 : sql_exp *re = exp_push_down_prj(v->sql, r, gb, gb->l);
2997 124 : if (!re || (list_length(jes) == 0 && !find_prop(le->p, PROP_HASHCOL))) {
2998 : fnd = 0;
2999 : } else {
3000 1 : int anti = is_anti(je), semantics = is_semantics(je);
3001 :
3002 1 : je = exp_compare(v->sql->sa, le, re, je->flag);
3003 1 : if (anti) set_anti(je);
3004 1 : if (semantics) set_semantics(je);
3005 1 : list_append(jes, je);
3006 : }
3007 : }
3008 : }
3009 : }
3010 140 : if (!fnd)
3011 : return rel;
3012 : }
3013 1 : l = rel_dup(rel->l);
3014 :
3015 : /* push join's left side (as semijoin) down group by */
3016 1 : l = gb->l = rel_crossproduct(v->sql->sa, gb->l, l, op_semi);
3017 1 : l->exps = jes;
3018 1 : set_processed(l);
3019 1 : v->changes++;
3020 1 : return rel;
3021 : }
3022 : }
3023 : return rel;
3024 : }
3025 :
3026 : static bool
3027 692 : check_projection_on_foreignside(sql_rel *r, list *pexps, int fk_left)
3028 : {
3029 : /* projection columns from the foreign side */
3030 692 : if (list_empty(pexps))
3031 : return true;
3032 2476 : for (node *n = pexps->h; n; n = n->next) {
3033 2410 : sql_exp *pe = n->data;
3034 :
3035 2410 : if (pe && is_atom(pe->type))
3036 23 : continue;
3037 2387 : if (pe && !is_alias(pe->type))
3038 : return false;
3039 : /* check for columns from the pk side, then keep the join with the pk */
3040 2377 : if ((fk_left && rel_find_exp(r->r, pe)) || (!fk_left && rel_find_exp(r->l, pe)))
3041 567 : return false;
3042 : }
3043 : return true;
3044 : }
3045 :
3046 : static sql_rel *
3047 233811 : rel_simplify_project_fk_join(mvc *sql, sql_rel *r, list *pexps, list *orderexps, int *changes)
3048 : {
3049 233811 : sql_rel *rl = r->l, *rr = r->r, *nr = NULL;
3050 233811 : sql_exp *je, *le, *nje, *re;
3051 233811 : int fk_left = 1;
3052 :
3053 : /* check for foreign key join */
3054 233811 : if (list_length(r->exps) != 1 || !list_empty(r->attr))
3055 4520 : return r;
3056 229291 : if (!(je = exps_find_prop(r->exps, PROP_JOINIDX)) || je->flag != cmp_equal)
3057 : return r;
3058 : /* je->l == foreign expression, je->r == primary expression */
3059 1226 : if (rel_find_exp(r->l, je->l)) {
3060 : fk_left = 1;
3061 33 : } else if (rel_find_exp(r->r, je->l)) {
3062 : fk_left = 0;
3063 : } else { /* not found */
3064 : return r;
3065 : }
3066 :
3067 : /* primary side must be a full table */
3068 1193 : if ((fk_left && (!is_left(r->op) && !is_full(r->op)) && !is_basetable(rr->op)) ||
3069 33 : (!fk_left && (!is_right(r->op) && !is_full(r->op)) && !is_basetable(rl->op)))
3070 613 : return r;
3071 :
3072 613 : if (!check_projection_on_foreignside(r, pexps, fk_left) || !check_projection_on_foreignside(r, orderexps, fk_left))
3073 575 : return r;
3074 :
3075 : /* rewrite, ie remove pkey side if possible */
3076 38 : le = (sql_exp*)je->l, re = (sql_exp*)je->l;
3077 :
3078 : /* both have NULL and there are semantics, the join cannot be removed */
3079 38 : if (is_semantics(je) && has_nil(le) && has_nil(re))
3080 : return r;
3081 :
3082 38 : (*changes)++;
3083 : /* if the foreign key column doesn't have NULL values, then return it */
3084 38 : if (!has_nil(le) || is_full(r->op) || (fk_left && is_left(r->op)) || (!fk_left && is_right(r->op))) {
3085 : /* if ->attr, introduce group by on index */
3086 32 : if (fk_left) {
3087 24 : nr = rel_dup(r->l);
3088 : } else {
3089 8 : nr = rel_dup(r->r);
3090 : }
3091 32 : if (!list_empty(r->attr)) {
3092 0 : nr = rel_groupby(sql, nr, NULL);
3093 0 : if (nr) {
3094 : // printf("# introduced groupby \n");
3095 0 : nr->r = append(sa_list(sql->sa), le);
3096 0 : nr->exps = r->attr;
3097 : }
3098 : }
3099 32 : return nr;
3100 : }
3101 :
3102 : /* remove NULL values, ie generate a select not null */
3103 6 : nje = exp_compare(sql->sa, exp_ref(sql, le), exp_atom(sql->sa, atom_general(sql->sa, exp_subtype(le), NULL, 0)), cmp_equal);
3104 6 : set_anti(nje);
3105 6 : set_has_no_nil(nje);
3106 6 : set_semantics(nje);
3107 6 : if (fk_left) {
3108 6 : nr = rel_dup(r->l);
3109 : } else {
3110 0 : nr = rel_dup(r->r);
3111 : }
3112 6 : nr = rel_select(sql->sa, nr, nje);
3113 6 : set_processed(nr);
3114 6 : return nr;
3115 : }
3116 :
3117 : static sql_rel *
3118 1736 : rel_simplify_count_fk_join(mvc *sql, sql_rel *r, list *gexps, list *gcols, int *changes)
3119 : {
3120 1736 : sql_rel *rl = r->l, *rr = r->r, *nr = NULL;
3121 1736 : sql_exp *je, *le, *nje, *re, *oce;
3122 1736 : int fk_left = 1;
3123 :
3124 : /* check for foreign key join */
3125 1736 : if (list_length(r->exps) != 1)
3126 : return r;
3127 1735 : if (!(je = exps_find_prop(r->exps, PROP_JOINIDX)) || je->flag != cmp_equal)
3128 : return r;
3129 : /* je->l == foreign expression, je->r == primary expression */
3130 61 : if (rel_find_exp(r->l, je->l)) {
3131 : fk_left = 1;
3132 7 : } else if (rel_find_exp(r->r, je->l)) {
3133 : fk_left = 0;
3134 : } else { /* not found */
3135 : return r;
3136 : }
3137 :
3138 61 : oce = gexps->h->data;
3139 61 : if (oce->l) /* we only handle COUNT(*) */
3140 : return r;
3141 :
3142 : /* primary side must be a full table */
3143 60 : if ((fk_left && (!is_left(r->op) && !is_full(r->op)) && !is_basetable(rr->op)) ||
3144 6 : (!fk_left && (!is_right(r->op) && !is_full(r->op)) && !is_basetable(rl->op)))
3145 : return r;
3146 :
3147 33 : if (fk_left && is_join(rl->op) && !rel_is_ref(rl)) {
3148 12 : r->l = rel_simplify_count_fk_join(sql, rl, gexps, gcols, changes);
3149 12 : if (rl != r->l)
3150 9 : rel_destroy(rl);
3151 : }
3152 39 : if (!fk_left && is_join(rr->op) && !rel_is_ref(rr)) {
3153 2 : r->r = rel_simplify_count_fk_join(sql, rr, gexps, gcols, changes);
3154 2 : if (rr != r->r)
3155 2 : rel_destroy(rr);
3156 : }
3157 :
3158 39 : if (!check_projection_on_foreignside(r, gcols, fk_left))
3159 : return r;
3160 :
3161 : /* rewrite, ie remove pkey side if possible */
3162 37 : le = (sql_exp*)je->l, re = (sql_exp*)je->l;
3163 :
3164 : /* both have NULL and there are semantics, the join cannot be removed */
3165 37 : if (is_semantics(je) && has_nil(le) && has_nil(re))
3166 : return r;
3167 :
3168 37 : (*changes)++;
3169 : /* if the foreign key column doesn't have NULL values, then return it */
3170 37 : if (!has_nil(le) || is_full(r->op) || (fk_left && is_left(r->op)) || (!fk_left && is_right(r->op))) {
3171 31 : if (fk_left) {
3172 25 : nr = rel_dup(r->l);
3173 : } else {
3174 6 : nr = rel_dup(r->r);
3175 : }
3176 31 : return nr;
3177 : }
3178 :
3179 : /* remove NULL values, ie generate a select not null */
3180 6 : nje = exp_compare(sql->sa, exp_ref(sql, le), exp_atom(sql->sa, atom_general(sql->sa, exp_subtype(le), NULL, 0)), cmp_equal);
3181 6 : set_anti(nje);
3182 6 : set_has_no_nil(nje);
3183 6 : set_semantics(nje);
3184 6 : if (fk_left) {
3185 6 : nr = rel_dup(r->l);
3186 : } else {
3187 0 : nr = rel_dup(r->r);
3188 : }
3189 6 : nr = rel_select(sql->sa, nr, nje);
3190 6 : set_processed(nr);
3191 6 : return nr;
3192 : }
3193 :
3194 : /*
3195 : * Handle (left/right/outer/natural) join fk-pk rewrites
3196 : * 1 group by ( fk-pk-join () ) [ count(*) ] -> group by ( fk )
3197 : * 2 project ( fk-pk-join () ) [ fk-column ] -> project (fk table)[ fk-column ]
3198 : * 3 project ( fk1-pk1-join( fk2-pk2-join()) [ fk-column, pk1 column ] -> project (fk1-pk1-join)[ fk-column, pk1 column ]
3199 : */
3200 : static inline sql_rel *
3201 3887543 : rel_simplify_fk_joins(visitor *v, sql_rel *rel)
3202 : {
3203 3887543 : sql_rel *r = NULL;
3204 :
3205 3887543 : if (is_simple_project(rel->op))
3206 1196452 : r = rel->l;
3207 :
3208 3887581 : while (is_simple_project(rel->op) && r && list_length(r->exps) == 1 && (is_join(r->op) || r->op == op_semi) && !(rel_is_ref(r))) {
3209 233811 : sql_rel *or = r;
3210 :
3211 233811 : r = rel_simplify_project_fk_join(v->sql, r, rel->exps, rel->r, &v->changes);
3212 233811 : if (r == or)
3213 : return rel;
3214 38 : rel_destroy(rel->l);
3215 38 : rel->l = r;
3216 : }
3217 :
3218 3653770 : if (!is_groupby(rel->op))
3219 : return rel;
3220 :
3221 126178 : r = rel->l;
3222 189207 : while(r && is_simple_project(r->op))
3223 63029 : r = r->l;
3224 :
3225 141246 : while (is_groupby(rel->op) && !rel_is_ref(rel) && r && (is_join(r->op) || r->op == op_semi) && list_length(r->exps) == 1 && !(rel_is_ref(r)) &&
3226 : /* currently only single count aggregation is handled, no other projects or aggregation */
3227 146733 : list_length(rel->exps) == 1 && exp_aggr_is_count(rel->exps->h->data)) {
3228 1722 : sql_rel *or = r;
3229 :
3230 1722 : r = rel_simplify_count_fk_join(v->sql, r, rel->exps, rel->r, &v->changes);
3231 1722 : if (r == or)
3232 : return rel;
3233 26 : rel_destroy(rel->l);
3234 26 : rel->l = r;
3235 : }
3236 : return rel;
3237 : }
3238 :
3239 : /*
3240 : * Gets the column expressions of a diff function and adds them to "columns".
3241 : * The diff function has two possible argument types: either a sql_exp representing a column
3242 : * or a sql_exp representing another diff function, therefore this function is recursive.
3243 : */
3244 : static void
3245 90 : get_diff_function_columns(sql_exp *diffExp, list *columns) {
3246 90 : list *args = diffExp->l;
3247 :
3248 184 : for (node *arg = args->h; arg; arg = arg->next) {
3249 94 : sql_exp *exp = arg->data;
3250 :
3251 : // diff function
3252 94 : if (exp->type == e_func) {
3253 4 : get_diff_function_columns(exp, columns);
3254 : }
3255 : // column
3256 : else {
3257 90 : list_append(columns, exp);
3258 : }
3259 : }
3260 90 : }
3261 :
3262 : /*
3263 : * Builds a list of aggregation key columns to be used by the select push down algorithm, namely for
3264 : * window functions. Returns NULL if the window function does not partition by any column
3265 : */
3266 : static list *
3267 5434 : get_aggregation_key_columns(allocator *sa, sql_rel *r) {
3268 46563 : for (node* n = r->exps->h; n; n = n->next) {
3269 41241 : sql_exp *e = n->data;
3270 :
3271 41241 : if (e->type == e_func) {
3272 9729 : sql_subfunc *f = e->f;
3273 :
3274 : // aggregation function
3275 9729 : if (!strcmp(f->func->base.name, "rank")) {
3276 112 : list* rankArguments = e->l;
3277 : // the partition key is the second argument
3278 112 : sql_exp *partitionExp = rankArguments->h->next->data;
3279 :
3280 : // check if the key contains any columns, i.e., is a diff function
3281 112 : if (partitionExp->type == e_func) {
3282 : // get columns to list
3283 86 : list *aggColumns = sa_list(sa);
3284 86 : get_diff_function_columns(partitionExp, aggColumns);
3285 86 : return aggColumns;
3286 : }
3287 : // the function has no aggregation columns (e_atom of boolean)
3288 : else {
3289 : return NULL;
3290 : }
3291 :
3292 : }
3293 : }
3294 : }
3295 : return NULL;
3296 : }
3297 :
3298 : /*
3299 : * Checks if a filter column is also used as an aggregation key, so it can be later safely pushed down.
3300 : */
3301 : static int
3302 139 : filter_column_in_aggregation_columns(sql_exp *column, list *aggColumns) {
3303 : /* check if it is a column or an e_convert, and get the actual column if it is the latter */
3304 139 : if (column->type == e_convert) {
3305 1 : column = column->l;
3306 : }
3307 :
3308 139 : char *tableName = column->l;
3309 139 : char *columnName = column->r;
3310 :
3311 278 : for (node *n = aggColumns->h; n; n = n->next) {
3312 148 : sql_exp *aggCol = n->data;
3313 148 : char *aggColTableName = aggCol->l;
3314 148 : char *aggColColumnName = aggCol->r;
3315 :
3316 148 : if (!strcmp(tableName, aggColTableName) && !strcmp(columnName, aggColColumnName)) {
3317 : /* match */
3318 : return 1;
3319 : }
3320 : }
3321 :
3322 : /* no matches found */
3323 : return 0;
3324 : }
3325 :
3326 :
3327 : /*
3328 : * Push select down, pushes the selects through (simple) projections. Also
3329 : * it cleans up the projections which become useless.
3330 : *
3331 : * WARNING - Make sure to call try_remove_empty_select macro before returning so we ensure
3332 : * possible generated empty selects won't never be generated
3333 : */
3334 : static sql_rel *
3335 7763353 : rel_push_select_down(visitor *v, sql_rel *rel)
3336 : {
3337 7763353 : list *exps = NULL;
3338 7763353 : sql_rel *r = NULL;
3339 7763353 : node *n;
3340 :
3341 7763353 : if (rel_is_ref(rel)) {
3342 378587 : if (is_select(rel->op) && rel->exps) {
3343 : /* add inplace empty select */
3344 1382 : sql_rel *l = rel_select(v->sql->sa, rel->l, NULL);
3345 :
3346 1382 : l->exps = rel->exps;
3347 1382 : set_processed(l);
3348 1382 : rel->exps = NULL;
3349 1382 : rel->l = l;
3350 1382 : v->changes++;
3351 : }
3352 378587 : return rel;
3353 : }
3354 :
3355 : /* don't make changes for empty selects */
3356 7384766 : if (is_select(rel->op) && list_empty(rel->exps))
3357 12 : return try_remove_empty_select(v, rel);
3358 :
3359 : /* merge 2 selects */
3360 7384754 : r = rel->l;
3361 7384754 : if (is_select(rel->op) && r && r->exps && is_select(r->op) && !(rel_is_ref(r)) && !exps_have_func(rel->exps)) {
3362 33 : (void)list_merge(r->exps, rel->exps, (fdup)NULL);
3363 33 : rel->l = NULL;
3364 33 : rel_destroy(rel);
3365 33 : v->changes++;
3366 33 : return try_remove_empty_select(v, r);
3367 : }
3368 : /*
3369 : * Push select through semi/anti join
3370 : * select (semi(A,B)) == semi(select(A), B)
3371 : */
3372 7384721 : if (is_select(rel->op) && r && is_semi(r->op) && !(rel_is_ref(r))) {
3373 272 : rel->l = r->l;
3374 272 : r->l = rel;
3375 272 : v->changes++;
3376 : /*
3377 : * if A has 2 references (ie used on both sides of
3378 : * the semi join), we also push the select into A.
3379 : */
3380 272 : if (rel_is_ref(rel->l) && rel->l == rel_find_ref(r->r)){
3381 0 : sql_rel *lx = rel->l;
3382 0 : sql_rel *rx = r->r;
3383 0 : if (lx->ref.refcnt == 2 && !rel_is_ref(rx)) {
3384 0 : while (rx->l && !rel_is_ref(rx->l) &&
3385 0 : (is_project(rx->op) ||
3386 : is_select(rx->op) ||
3387 : is_join(rx->op)))
3388 : rx = rx->l;
3389 : /* probably we need to introduce a project */
3390 0 : rel_destroy(rel->l);
3391 0 : lx = rel_project(v->sql->sa, rel, rel_projections(v->sql, rel, NULL, 1, 1));
3392 0 : r->l = lx;
3393 0 : rx->l = rel_dup(lx);
3394 : }
3395 : }
3396 272 : return r;
3397 : }
3398 7384449 : exps = rel->exps;
3399 :
3400 : /* push select through join */
3401 7384449 : if (is_select(rel->op) && r && is_join(r->op) && !rel_is_ref(r) && !is_single(r)){
3402 21570 : sql_rel *jl = r->l, *ojl = jl, *jr = r->r, *ojr = jr;
3403 21570 : int left = r->op == op_join || r->op == op_left;
3404 21570 : int right = r->op == op_join || r->op == op_right;
3405 :
3406 21570 : if (r->op == op_full)
3407 : return rel;
3408 :
3409 : /* introduce selects under the join (if needed) */
3410 21544 : set_processed(jl);
3411 21544 : set_processed(jr);
3412 47892 : for (n = exps->h; n;) {
3413 26348 : node *next = n->next;
3414 26348 : sql_exp *e = n->data;
3415 :
3416 26348 : if (left && rel_rebind_exp(v->sql, jl, e)) {
3417 13961 : if (!is_select(jl->op) || rel_is_ref(jl))
3418 11209 : r->l = jl = rel_select(v->sql->sa, jl, NULL);
3419 13961 : rel_select_add_exp(v->sql->sa, jl, e);
3420 13961 : list_remove_node(exps, NULL, n);
3421 13961 : v->changes++;
3422 12387 : } else if (right && rel_rebind_exp(v->sql, jr, e)) {
3423 4781 : if (!is_select(jr->op) || rel_is_ref(jr))
3424 3961 : r->r = jr = rel_select(v->sql->sa, jr, NULL);
3425 4781 : rel_select_add_exp(v->sql->sa, jr, e);
3426 4781 : list_remove_node(exps, NULL, n);
3427 4781 : v->changes++;
3428 : }
3429 : n = next;
3430 : }
3431 21544 : if (ojl != jl)
3432 11209 : set_processed(jl);
3433 21544 : if (ojr != jr)
3434 3961 : set_processed(jr);
3435 : }
3436 :
3437 : /* merge select and cross product ? */
3438 7384423 : if (is_select(rel->op) && r && r->op == op_join && !rel_is_ref(r) && !is_single(r)){
3439 14254 : for (n = exps->h; n;) {
3440 1859 : node *next = n->next;
3441 1859 : sql_exp *e = n->data;
3442 :
3443 1859 : if (exp_is_join(e, NULL) == 0) {
3444 664 : if (!r->exps)
3445 89 : r->exps = sa_list(v->sql->sa);
3446 664 : append(r->exps, e);
3447 664 : list_remove_node(exps, NULL, n);
3448 664 : v->changes++;
3449 : }
3450 : n = next;
3451 : }
3452 : }
3453 :
3454 7384423 : if (is_select(rel->op) && r && (is_simple_project(r->op) || (is_groupby(r->op) && !list_empty(r->r))) && !rel_is_ref(r) && !is_single(r)){
3455 59506 : sql_rel *pl = r->l, *opl = pl;
3456 : /* we cannot push through window functions (for safety I disabled projects over DDL too) */
3457 59506 : if (pl && pl->op != op_ddl && !exps_have_unsafe(r->exps, 0)) {
3458 : /* introduce selects under the project (if needed) */
3459 38864 : set_processed(pl);
3460 38864 : if (!pl->exps)
3461 392 : pl->exps = sa_list(v->sql->sa);
3462 85498 : for (n = exps->h; n;) {
3463 46634 : node *next = n->next;
3464 46634 : sql_exp *e = n->data, *ne = NULL;
3465 :
3466 : /* can we move it down */
3467 46634 : if (e->type == e_cmp && (ne = exp_push_down_prj(v->sql, e, r, pl)) && ne != e) {
3468 25455 : if (!(is_select(pl->op) && is_join(pl->op) && is_semi(pl->op)) || rel_is_ref(pl))
3469 25455 : r->l = pl = rel_select(v->sql->sa, pl, NULL);
3470 25455 : rel_select_add_exp(v->sql->sa, pl, ne);
3471 25455 : list_remove_node(exps, NULL, n);
3472 25455 : v->changes++;
3473 : }
3474 : n = next;
3475 : }
3476 38864 : if (opl != pl)
3477 17586 : set_processed(pl);
3478 : }
3479 :
3480 : /* push filters if they match the aggregation key on a window function */
3481 5434 : else if (pl && pl->op != op_ddl && exps_have_unsafe(r->exps, 0)) {
3482 5434 : set_processed(pl);
3483 : /* list of aggregation key columns */
3484 5434 : list *aggColumns = get_aggregation_key_columns(v->sql->sa, r);
3485 :
3486 : /* aggregation keys found, check if any filter matches them */
3487 5434 : if (aggColumns) {
3488 229 : for (n = exps->h; n;) {
3489 143 : node *next = n->next;
3490 143 : sql_exp *e = n->data, *ne = NULL;
3491 :
3492 143 : if (e->type == e_cmp) {
3493 : /* simple comparison filter */
3494 143 : if (e->flag == cmp_gt || e->flag == cmp_gte || e->flag == cmp_lte || e->flag == cmp_lt
3495 143 : || e->flag == cmp_equal || e->flag == cmp_notequal || e->flag == cmp_in || e->flag == cmp_notin
3496 5 : || (e->flag == cmp_filter && ((list*)e->l)->cnt == 1)) {
3497 139 : sql_exp* column;
3498 : /* the column in 'like' filters is stored inside a list */
3499 139 : if (e->flag == cmp_filter) {
3500 1 : column = ((list*)e->l)->h->data;
3501 : } else {
3502 138 : column = e->l;
3503 : }
3504 :
3505 : /* check if the expression matches any aggregation key, meaning we can
3506 : try to safely push it down */
3507 139 : if (filter_column_in_aggregation_columns(column, aggColumns)) {
3508 9 : ne = exp_push_down_prj(v->sql, e, r, pl);
3509 :
3510 : /* can we move it down */
3511 9 : if (ne && ne != e && pl->exps) {
3512 9 : if (!is_select(pl->op) || rel_is_ref(pl))
3513 8 : r->l = pl = rel_select(v->sql->sa, pl, NULL);
3514 9 : rel_select_add_exp(v->sql->sa, pl, ne);
3515 9 : list_remove_node(exps, NULL, n);
3516 9 : v->changes++;
3517 : }
3518 : }
3519 : }
3520 : }
3521 : n = next;
3522 : }
3523 :
3524 : /* cleanup list */
3525 86 : list_destroy(aggColumns);
3526 : }
3527 : }
3528 : }
3529 :
3530 : /* try push select under set relation */
3531 7384423 : if (is_select(rel->op) && r && is_set(r->op) && !list_empty(r->exps) && !rel_is_ref(r) && !is_single(r) && !list_empty(exps)) {
3532 267 : sql_rel *u = r, *ul = u->l, *ur = u->r;
3533 :
3534 267 : ul = rel_dup(ul);
3535 267 : ur = rel_dup(ur);
3536 267 : if (!is_project(ul->op))
3537 0 : ul = rel_project(v->sql->sa, ul,
3538 : rel_projections(v->sql, ul, NULL, 1, 1));
3539 267 : if (!is_project(ur->op))
3540 0 : ur = rel_project(v->sql->sa, ur,
3541 : rel_projections(v->sql, ur, NULL, 1, 1));
3542 267 : rel_rename_exps(v->sql, u->exps, ul->exps);
3543 267 : rel_rename_exps(v->sql, u->exps, ur->exps);
3544 :
3545 : /* introduce selects under the set */
3546 267 : ul = rel_select(v->sql->sa, ul, NULL);
3547 267 : ul->exps = exps_copy(v->sql, exps);
3548 267 : set_processed(ul);
3549 267 : ur = rel_select(v->sql->sa, ur, NULL);
3550 267 : ur->exps = exps_copy(v->sql, exps);
3551 267 : set_processed(ur);
3552 :
3553 267 : rel = rel_inplace_setop(v->sql, rel, ul, ur, u->op, rel_projections(v->sql, rel, NULL, 1, 1));
3554 267 : if (need_distinct(u))
3555 1 : set_distinct(rel);
3556 267 : v->changes++;
3557 : }
3558 7384423 : if (is_select(rel->op) && r && is_munion(r->op) && !list_empty(r->exps) && !rel_is_ref(r) && !is_single(r) && !list_empty(exps)) {
3559 3908 : sql_rel *u = r;
3560 3908 : list *rels = u->l, *nrels = sa_list(v->sql->sa);
3561 11724 : for(node *n = rels->h; n; n = n->next) {
3562 7816 : sql_rel *ul = n->data;
3563 7816 : ul = rel_dup(ul);
3564 7816 : if (!is_project(ul->op))
3565 0 : ul = rel_project(v->sql->sa, ul,
3566 : rel_projections(v->sql, ul, NULL, 1, 1));
3567 7816 : rel_rename_exps(v->sql, u->exps, ul->exps);
3568 :
3569 : /* introduce selects under the set */
3570 7816 : ul = rel_select(v->sql->sa, ul, NULL);
3571 7816 : ul->exps = exps_copy(v->sql, exps);
3572 7816 : set_processed(ul);
3573 7816 : nrels = append(nrels, ul);
3574 : }
3575 :
3576 3908 : rel = rel_inplace_setop_n_ary(v->sql, rel, nrels, u->op, rel_projections(v->sql, rel, NULL, 1, 1));
3577 3908 : if (need_distinct(u))
3578 6 : set_distinct(rel);
3579 3908 : v->changes++;
3580 : }
3581 :
3582 7384423 : return try_remove_empty_select(v, rel);
3583 : }
3584 :
3585 : static int
3586 12092 : index_exp(sql_exp *e, sql_idx *i)
3587 : {
3588 12092 : if (e->type == e_cmp && !is_complex_exp(e->flag)) {
3589 6860 : switch(i->type) {
3590 6860 : case hash_idx:
3591 : case oph_idx:
3592 6860 : if (e->flag == cmp_equal)
3593 : return 0;
3594 : /* fall through */
3595 : case join_idx:
3596 : default:
3597 1281 : return -1;
3598 : }
3599 : }
3600 : return -1;
3601 : }
3602 :
3603 : /* find column for the select/join expression */
3604 : static sql_column *
3605 5579 : sjexp_col(sql_exp *e, sql_rel *r)
3606 : {
3607 5579 : sql_column *res = NULL;
3608 :
3609 5579 : if (e->type == e_cmp && !is_complex_exp(e->flag)) {
3610 5579 : res = exp_find_column(r, e->l, -2);
3611 5579 : if (!res)
3612 519 : res = exp_find_column(r, e->r, -2);
3613 : }
3614 5579 : return res;
3615 : }
3616 :
3617 : static sql_idx *
3618 1973813 : find_index(allocator *sa, sql_rel *rel, sql_rel *sub, list **EXPS)
3619 : {
3620 1973813 : node *n;
3621 :
3622 : /* any (partial) match of the expressions with the index columns */
3623 : /* Depending on the index type we may need full matches and only
3624 : limited number of cmp types (hash only equality etc) */
3625 : /* Depending on the index type we should (in the rel_bin) generate
3626 : more code, ie for spatial index add post filter etc, for hash
3627 : compute hash value and use index */
3628 :
3629 1973813 : if (sub->exps && rel->exps)
3630 7264573 : for(n = sub->exps->h; n; n = n->next) {
3631 5536948 : prop *p;
3632 5536948 : sql_exp *e = n->data;
3633 :
3634 5536948 : if ((p = find_prop(e->p, PROP_HASHIDX)) != NULL) {
3635 9457 : list *exps, *cols;
3636 9457 : sql_idx *i = p->value.pval;
3637 9457 : fcmp cmp = (fcmp)&sql_column_kc_cmp;
3638 :
3639 : /* join indices are only interesting for joins */
3640 9457 : if (i->type == join_idx || list_length(i->columns) <= 1)
3641 0 : continue;
3642 : /* based on the index type, find qualifying exps */
3643 9457 : exps = list_select(rel->exps, i, (fcmp) &index_exp, (fdup)NULL);
3644 9457 : if (list_empty(exps))
3645 4844 : continue;
3646 : /* now we obtain the columns, move into sql_column_kc_cmp! */
3647 4613 : cols = list_map(exps, sub, (fmap) &sjexp_col);
3648 :
3649 : /* TODO check that at most 2 relations are involved */
3650 :
3651 : /* Match the index columns with the expression columns.
3652 : TODO, Allow partial matches ! */
3653 4613 : if (list_match(cols, i->columns, cmp) == 0) {
3654 : /* re-order exps in index order */
3655 303 : node *n, *m;
3656 303 : list *es = sa_list(sa);
3657 :
3658 998 : for(n = i->columns->h; n; n = n->next) {
3659 695 : int i = 0;
3660 2170 : for(m = cols->h; m; m = m->next, i++) {
3661 2170 : if (cmp(m->data, n->data) == 0){
3662 695 : sql_exp *e = list_fetch(exps, i);
3663 695 : list_append(es, e);
3664 695 : break;
3665 : }
3666 : }
3667 : }
3668 : /* fix the destroy function */
3669 303 : cols->destroy = NULL;
3670 303 : *EXPS = es;
3671 303 : e->used = 1;
3672 303 : return i;
3673 : }
3674 4310 : cols->destroy = NULL;
3675 : }
3676 : }
3677 : return NULL;
3678 : }
3679 :
3680 : static inline sql_rel *
3681 1260151 : rel_use_index(visitor *v, sql_rel *rel)
3682 : {
3683 1260151 : list *exps = NULL;
3684 1260151 : sql_idx *i = find_index(v->sql->sa, rel, rel->l, &exps);
3685 1260151 : int left = 1;
3686 :
3687 1260151 : assert(is_select(rel->op) || is_join(rel->op));
3688 1260151 : if (!i && is_join(rel->op)) {
3689 713662 : left = 0;
3690 713662 : i = find_index(v->sql->sa, rel, rel->r, &exps);
3691 : }
3692 :
3693 1259943 : if (i) {
3694 303 : prop *p;
3695 303 : node *n;
3696 303 : int single_table = 1;
3697 303 : sql_exp *re = NULL;
3698 :
3699 993 : for( n = exps->h; n && single_table; n = n->next) {
3700 690 : sql_exp *e = n->data, *nre = e->l;
3701 :
3702 690 : if (!is_compare(e->type) || is_anti(e) || e->flag != cmp_equal)
3703 : return rel;
3704 690 : if (is_join(rel->op) && ((left && !rel_find_exp(rel->l, nre)) || (!left && rel_find_exp(rel->r, nre))))
3705 67 : nre = e->r;
3706 690 : single_table = (!re || (exp_relname(nre) && exp_relname(re) && strcmp(exp_relname(nre), exp_relname(re)) == 0));
3707 690 : re = nre;
3708 : }
3709 303 : if (single_table) { /* add PROP_HASHCOL to all column exps */
3710 972 : for( n = exps->h; n; n = n->next) {
3711 676 : sql_exp *e = n->data;
3712 :
3713 : /* swapped ? */
3714 676 : if (is_join(rel->op) && ((left && !rel_find_exp(rel->l, e->l)) || (!left && !rel_find_exp(rel->r, e->l)))) {
3715 165 : exp_swap(e);
3716 : }
3717 676 : p = find_prop(e->p, PROP_HASHCOL);
3718 676 : if (!p)
3719 285 : e->p = p = prop_create(v->sql->sa, PROP_HASHCOL, e->p);
3720 676 : p->value.pval = i;
3721 : }
3722 : }
3723 : /* add the remaining exps to the new exp list */
3724 303 : if (list_length(rel->exps) > list_length(exps)) {
3725 92 : for( n = rel->exps->h; n; n = n->next) {
3726 69 : sql_exp *e = n->data;
3727 69 : if (!list_find(exps, e, (fcmp)&exp_cmp))
3728 23 : list_append(exps, e);
3729 : }
3730 : }
3731 303 : rel->exps = exps;
3732 : }
3733 : return rel;
3734 : }
3735 :
3736 : static sql_rel *
3737 3887543 : rel_select_leftgroup_2_semi(visitor *v, sql_rel *rel)
3738 : {
3739 3887543 : if (rel_is_ref(rel) || !is_select(rel->op) || list_empty(rel->exps))
3740 3348213 : return rel;
3741 539330 : sql_rel *l = rel->l;
3742 :
3743 539330 : if (!l || rel_is_ref(l) || !is_left(l->op) || list_empty(l->attr))
3744 538313 : return rel;
3745 :
3746 2026 : for(node *n = rel->exps->h; n; n = n->next) {
3747 1017 : sql_exp *e = n->data;
3748 :
3749 1017 : if (e->type == e_cmp && !is_semantics(e) && !e->f) {
3750 1004 : list *attrs = l->attr;
3751 1004 : sql_exp *a = attrs->h->data;
3752 :
3753 1004 : if (exps_find_exp(l->attr, e->l) && exp_is_true(e->r) && e->flag == cmp_equal /*&& exp_is_true(a)*/) {
3754 : // printf("# optimize select leftgroup -> semi\n");
3755 8 : if (!list_empty(l->exps)) {
3756 12 : for(node *m = l->exps->h; m; m = m->next) {
3757 6 : sql_exp *j = m->data;
3758 6 : reset_any(j);
3759 : }
3760 : }
3761 8 : l->attr = NULL;
3762 8 : l->op = exp_is_true(a)?op_semi:op_anti;
3763 8 : list_remove_node(rel->exps, NULL, n);
3764 8 : rel = rel_project(v->sql->sa, rel, rel_projections(v->sql, rel, NULL, 1, 1));
3765 8 : list_append(rel->exps, attrs->h->data);
3766 8 : v->changes++;
3767 8 : return rel;
3768 : }
3769 : }
3770 : }
3771 : return rel;
3772 : }
3773 :
3774 : static sql_rel *
3775 3887543 : rel_optimize_select_and_joins_topdown_(visitor *v, sql_rel *rel)
3776 : {
3777 : /* push_join_down introduces semijoins */
3778 3887543 : uint8_t cycle = *(uint8_t*) v->data;
3779 3887543 : if (cycle <= 0) {
3780 1648739 : rel = rel_semijoin_use_fk(v, rel);
3781 1648739 : rel = rel_push_join_down(v, rel);
3782 : }
3783 :
3784 3887543 : rel = rel_simplify_fk_joins(v, rel);
3785 3887543 : rel = rel_push_select_down(v, rel);
3786 3887543 : rel = rel_select_leftgroup_2_semi(v, rel);
3787 3887543 : if (rel && rel->l && (is_select(rel->op) || is_join(rel->op)))
3788 1260151 : rel = rel_use_index(v, rel);
3789 3887543 : return rel;
3790 : }
3791 :
3792 : static sql_rel *
3793 101963 : rel_optimize_select_and_joins_topdown(visitor *v, global_props *gp, sql_rel *rel)
3794 : {
3795 101963 : v->data = &gp->opt_cycle;
3796 101963 : rel = rel_visitor_topdown(v, rel, &rel_optimize_select_and_joins_topdown_);
3797 101963 : v->data = gp;
3798 101963 : return rel;
3799 : }
3800 :
3801 : run_optimizer
3802 601202 : bind_optimize_select_and_joins_topdown(visitor *v, global_props *gp)
3803 : {
3804 601202 : int flag = v->sql->sql_optimizer;
3805 601028 : return gp->opt_level == 1 && (gp->cnt[op_join] || gp->cnt[op_left] || gp->cnt[op_right]
3806 534749 : || gp->cnt[op_full] || gp->cnt[op_semi] || gp->cnt[op_anti] ||
3807 1202230 : gp->cnt[op_select]) && (flag & optimize_select_and_joins_topdown) ? rel_optimize_select_and_joins_topdown : NULL;
3808 : }
3809 :
3810 :
3811 : static int
3812 1475629 : can_push_func(sql_exp *e, sql_rel *rel, int *must, int depth)
3813 : {
3814 1514308 : switch(e->type) {
3815 1310292 : case e_cmp: {
3816 1310292 : sql_exp *l = e->l, *r = e->r, *f = e->f;
3817 :
3818 : /* don't push down functions inside attribute joins */
3819 1310292 : if (e->flag == cmp_or || e->flag == cmp_in || e->flag == cmp_notin || e->flag == cmp_filter || (is_join(rel->op) && is_any(e)))
3820 : return 0;
3821 1236388 : if (depth > 0) { /* for comparisons under the top ones, they become functions */
3822 53 : int lmust = 0;
3823 53 : int res = can_push_func(l, rel, &lmust, depth + 1) && can_push_func(r, rel, &lmust, depth + 1) &&
3824 19 : (!f || can_push_func(f, rel, &lmust, depth + 1));
3825 13 : if (res && !lmust)
3826 : return 1;
3827 53 : (*must) |= lmust;
3828 53 : return res;
3829 : } else {
3830 1236335 : int mustl = 0, mustr = 0, mustf = 0;
3831 1236335 : return ((l->type == e_column || can_push_func(l, rel, &mustl, depth + 1)) && (*must = mustl)) ||
3832 2465358 : ((r->type == e_column || can_push_func(r, rel, &mustr, depth + 1)) && (*must = mustr)) ||
3833 3538 : ((f && (f->type == e_column || can_push_func(f, rel, &mustf, depth + 1)) && (*must = mustf)));
3834 : }
3835 : }
3836 38679 : case e_convert:
3837 38679 : return can_push_func(e->l, rel, must, depth + 1);
3838 10204 : case e_aggr:
3839 : case e_func: {
3840 10204 : list *l = e->l;
3841 10204 : int res = 1, lmust = 0;
3842 :
3843 10204 : if (exp_unsafe(e, 0))
3844 : return 0;
3845 28462 : if (l) for (node *n = l->h; n && res; n = n->next)
3846 18262 : res &= can_push_func(n->data, rel, &lmust, depth + 1);
3847 10200 : if (res && !lmust)
3848 : return 1;
3849 9367 : (*must) |= lmust;
3850 9367 : return res;
3851 : }
3852 45730 : case e_column:
3853 45730 : if (rel && !rel_find_exp(rel, e))
3854 : return 0;
3855 27642 : (*must) = 1;
3856 : /* fall through */
3857 : default:
3858 : return 1;
3859 : }
3860 : }
3861 :
3862 : static int
3863 1042329 : exps_can_push_func(list *exps, sql_rel *rel)
3864 : {
3865 4035068 : for(node *n = exps->h; n; n = n->next) {
3866 3014156 : sql_exp *e = n->data;
3867 3014156 : int mustl = 0, mustr = 0;
3868 :
3869 3014156 : if ((is_joinop(rel->op) || is_select(rel->op)) && ((can_push_func(e, rel->l, &mustl, 0) && mustl)))
3870 21417 : return 1;
3871 3009629 : if (is_joinop(rel->op) && can_push_func(e, rel->r, &mustr, 0) && mustr)
3872 : return 1;
3873 : }
3874 : return 0;
3875 : }
3876 :
3877 : static int
3878 75916 : exp_needs_push_down(sql_rel *rel, sql_exp *e)
3879 : {
3880 97509 : switch(e->type) {
3881 25650 : case e_cmp:
3882 : /* don't push down functions inside attribute joins */
3883 25650 : if (e->flag == cmp_or || e->flag == cmp_in || e->flag == cmp_notin || e->flag == cmp_filter || (is_join(rel->op) && is_any(e)))
3884 : return 0;
3885 24694 : return exp_needs_push_down(rel, e->l) || exp_needs_push_down(rel, e->r) || (e->f && exp_needs_push_down(rel, e->f));
3886 21593 : case e_convert:
3887 21593 : return exp_needs_push_down(rel, e->l);
3888 2280 : case e_aggr:
3889 : case e_func:
3890 2280 : if (!e->l || exps_are_atoms(e->l))
3891 19 : return 0;
3892 : return 1;
3893 1802 : case e_atom:
3894 1802 : if (!e->f || exps_are_atoms(e->f))
3895 1802 : return 0;
3896 : return 1;
3897 : case e_column:
3898 : default:
3899 : return 0;
3900 : }
3901 : }
3902 :
3903 : static int
3904 21417 : exps_need_push_down(sql_rel *rel, list *exps )
3905 : {
3906 44795 : for(node *n = exps->h; n; n = n->next)
3907 25639 : if (exp_needs_push_down(rel, n->data))
3908 : return 1;
3909 : return 0;
3910 : }
3911 :
3912 : static sql_exp *exp_push_single_func_down(visitor *v, sql_rel *rel, sql_rel *ol, sql_rel *or, sql_exp *e, int depth);
3913 :
3914 : static list *
3915 2755 : exps_push_single_func_down(visitor *v, sql_rel *rel, sql_rel *ol, sql_rel *or, list *exps, int depth)
3916 : {
3917 5510 : if (mvc_highwater(v->sql))
3918 : return exps;
3919 :
3920 7842 : for (node *n = exps->h; n; n = n->next)
3921 5087 : if ((n->data = exp_push_single_func_down(v, rel, ol, or, n->data, depth)) == NULL)
3922 : return NULL;
3923 : return exps;
3924 : }
3925 :
3926 : static sql_exp *
3927 18132 : exp_push_single_func_down(visitor *v, sql_rel *rel, sql_rel *ol, sql_rel *or, sql_exp *e, int depth)
3928 : {
3929 31946 : if (mvc_highwater(v->sql))
3930 : return e;
3931 :
3932 18132 : switch(e->type) {
3933 5064 : case e_cmp: {
3934 5064 : if (e->flag == cmp_or || e->flag == cmp_filter) {
3935 243 : if ((e->l = exps_push_single_func_down(v, rel, ol, or, e->l, depth + 1)) == NULL)
3936 : return NULL;
3937 243 : if ((e->r = exps_push_single_func_down(v, rel, ol, or, e->r, depth + 1)) == NULL)
3938 : return NULL;
3939 4821 : } else if (e->flag == cmp_in || e->flag == cmp_notin) {
3940 8 : if ((e->l = exp_push_single_func_down(v, rel, ol, or, e->l, depth + 1)) == NULL)
3941 : return NULL;
3942 8 : if ((e->r = exps_push_single_func_down(v, rel, ol, or, e->r, depth + 1)) == NULL)
3943 : return NULL;
3944 : } else {
3945 4813 : if ((e->l = exp_push_single_func_down(v, rel, ol, or, e->l, depth + 1)) == NULL)
3946 : return NULL;
3947 4813 : if ((e->r = exp_push_single_func_down(v, rel, ol, or, e->r, depth + 1)) == NULL)
3948 : return NULL;
3949 4813 : if (e->f && (e->f = exp_push_single_func_down(v, rel, ol, or, e->f, depth + 1)) == NULL)
3950 : return NULL;
3951 : }
3952 : } break;
3953 1982 : case e_convert:
3954 1982 : if ((e->l = exp_push_single_func_down(v, rel, ol, or, e->l, depth + 1)) == NULL)
3955 : return NULL;
3956 : break;
3957 4344 : case e_aggr:
3958 : case e_func: {
3959 4344 : sql_rel *l = rel->l, *r = rel->r;
3960 4344 : int must = 0, mustl = 0, mustr = 0;
3961 :
3962 4344 : if (exp_unsafe(e, 0))
3963 26 : return e;
3964 4338 : if (!e->l || exps_are_atoms(e->l))
3965 20 : return e;
3966 4318 : if ((is_joinop(rel->op) && ((can_push_func(e, l, &mustl, depth + 1) && mustl) || (can_push_func(e, r, &mustr, depth + 1) && mustr))) ||
3967 3351 : (is_select(rel->op) && can_push_func(e, l, &must, depth + 1) && must)) {
3968 4303 : exp_label(v->sql->sa, e, ++v->sql->label);
3969 : /* we need a full projection, group by's and unions cannot be extended with more expressions */
3970 4303 : if (mustr) {
3971 342 : if (r == or) /* don't project twice */
3972 320 : rel->r = r = rel_project(v->sql->sa, r, rel_projections(v->sql, r, NULL, 1, 1));
3973 342 : list_append(r->exps, e);
3974 : } else {
3975 3961 : if (l == ol) /* don't project twice */
3976 2204 : rel->l = l = rel_project(v->sql->sa, l, rel_projections(v->sql, l, NULL, 1, 1));
3977 3961 : list_append(l->exps, e);
3978 : }
3979 4303 : e = exp_ref(v->sql, e);
3980 4303 : v->changes++;
3981 : }
3982 4318 : } break;
3983 1963 : case e_atom: {
3984 1963 : if (e->f && (e->f = exps_push_single_func_down(v, rel, ol, or, e->f, depth + 1)) == NULL)
3985 : return NULL;
3986 : } break;
3987 : case e_column:
3988 : case e_psm:
3989 : break;
3990 : }
3991 : return e;
3992 : }
3993 :
3994 : static inline sql_rel *
3995 3875810 : rel_push_func_down(visitor *v, sql_rel *rel)
3996 : {
3997 3875810 : if ((is_select(rel->op) || is_joinop(rel->op)) && rel->l && rel->exps && !(rel_is_ref(rel))) {
3998 1139336 : int changes = v->changes;
3999 1139336 : sql_rel *l = rel->l, *r = rel->r;
4000 :
4001 : /* only push down when is useful */
4002 1139336 : if ((is_select(rel->op) && list_length(rel->exps) <= 1) || rel_is_ref(l) || (is_joinop(rel->op) && rel_is_ref(r)))
4003 473888 : return rel;
4004 665448 : if (exps_can_push_func(rel->exps, rel) && exps_need_push_down(rel, rel->exps) && !exps_push_single_func_down(v, rel, l, r, rel->exps, 0))
4005 : return NULL;
4006 665448 : if (v->changes > changes) /* once we get a better join order, we can try to remove this projection */
4007 2255 : return rel_project(v->sql->sa, rel, rel_projections(v->sql, rel, NULL, 1, 1));
4008 : }
4009 3399667 : if (is_simple_project(rel->op) && rel->l && rel->exps) {
4010 1142472 : sql_rel *pl = rel->l;
4011 :
4012 1142472 : if (is_joinop(pl->op) && exps_can_push_func(rel->exps, rel)) {
4013 0 : sql_rel *l = pl->l, *r = pl->r, *ol = l, *or = r;
4014 :
4015 0 : for (node *n = rel->exps->h; n; ) {
4016 0 : node *next = n->next;
4017 0 : sql_exp *e = n->data;
4018 0 : int mustl = 0, mustr = 0;
4019 :
4020 0 : if ((can_push_func(e, l, &mustl, 0) && mustl) || (can_push_func(e, r, &mustr, 0) && mustr)) {
4021 0 : if (mustl) {
4022 0 : if (l == ol) /* don't project twice */
4023 0 : pl->l = l = rel_project(v->sql->sa, l, rel_projections(v->sql, l, NULL, 1, 1));
4024 0 : list_append(l->exps, e);
4025 0 : list_remove_node(rel->exps, NULL, n);
4026 0 : v->changes++;
4027 : } else {
4028 0 : if (r == or) /* don't project twice */
4029 0 : pl->r = r = rel_project(v->sql->sa, r, rel_projections(v->sql, r, NULL, 1, 1));
4030 0 : list_append(r->exps, e);
4031 0 : list_remove_node(rel->exps, NULL, n);
4032 0 : v->changes++;
4033 : }
4034 : }
4035 0 : n = next;
4036 : }
4037 : }
4038 : }
4039 : return rel;
4040 : }
4041 :
4042 : static sql_rel *
4043 3875810 : rel_push_func_and_select_down_(visitor *v, sql_rel *rel)
4044 : {
4045 3875810 : if (rel)
4046 3875810 : rel = rel_push_func_down(v, rel);
4047 3875810 : if (rel)
4048 3875810 : rel = rel_push_select_down(v, rel);
4049 3875810 : return rel;
4050 : }
4051 :
4052 : static sql_rel *
4053 101963 : rel_push_func_and_select_down(visitor *v, global_props *gp, sql_rel *rel)
4054 : {
4055 101963 : (void) gp;
4056 101963 : return rel_visitor_topdown(v, rel, &rel_push_func_and_select_down_);
4057 : }
4058 :
4059 : run_optimizer
4060 600894 : bind_push_func_and_select_down(visitor *v, global_props *gp)
4061 : {
4062 600894 : int flag = v->sql->sql_optimizer;
4063 600705 : return gp->opt_level == 1 && (gp->cnt[op_join] || gp->cnt[op_left] || gp->cnt[op_right]
4064 534634 : || gp->cnt[op_full] || gp->cnt[op_semi] || gp->cnt[op_anti] || gp->cnt[op_select])
4065 702407 : && (flag & push_func_and_select_down) ? rel_push_func_and_select_down : NULL;
4066 : }
|