Line data Source code
1 : /*
2 : * SPDX-License-Identifier: MPL-2.0
3 : *
4 : * This Source Code Form is subject to the terms of the Mozilla Public
5 : * License, v. 2.0. If a copy of the MPL was not distributed with this
6 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
7 : *
8 : * Copyright 2024 MonetDB Foundation;
9 : * Copyright August 2008 - 2023 MonetDB B.V.;
10 : * Copyright 1997 - July 2008 CWI.
11 : */
12 :
13 : #include "monetdb_config.h"
14 : #include "rel_optimizer_private.h"
15 : #include "rel_planner.h"
16 : #include "rel_exp.h"
17 : #include "rel_select.h"
18 : #include "rel_rewriter.h"
19 :
20 : /* Split_select optimizer splits case statements in select expressions. This is a step needed for cse */
21 : static void select_split_exps(mvc *sql, list *exps, sql_rel *rel);
22 :
23 : static sql_exp *
24 220174 : select_split_exp(mvc *sql, sql_exp *e, sql_rel *rel)
25 : {
26 220174 : switch(e->type) {
27 : case e_column:
28 : return e;
29 6907 : case e_convert:
30 6907 : e->l = select_split_exp(sql, e->l, rel);
31 6907 : return e;
32 24436 : case e_aggr:
33 : case e_func:
34 24436 : if (!is_analytic(e) && !exp_has_sideeffect(e)) {
35 24431 : sql_subfunc *f = e->f;
36 24431 : if (e->type == e_func && !f->func->s && is_caselike_func(f) /*is_ifthenelse_func(f)*/)
37 280 : return add_exp_too_project(sql, e, rel);
38 : }
39 : return e;
40 75335 : case e_cmp:
41 75335 : if (e->flag == cmp_or || e->flag == cmp_filter) {
42 18846 : select_split_exps(sql, e->l, rel);
43 18846 : select_split_exps(sql, e->r, rel);
44 56489 : } else if (e->flag == cmp_in || e->flag == cmp_notin) {
45 3256 : e->l = select_split_exp(sql, e->l, rel);
46 3256 : select_split_exps(sql, e->r, rel);
47 : } else {
48 53233 : e->l = select_split_exp(sql, e->l, rel);
49 53233 : e->r = select_split_exp(sql, e->r, rel);
50 53233 : if (e->f)
51 3590 : e->f = select_split_exp(sql, e->f, rel);
52 : }
53 : return e;
54 : case e_atom:
55 : case e_psm:
56 : return e;
57 : }
58 : return e;
59 : }
60 :
61 : static void
62 65784 : select_split_exps(mvc *sql, list *exps, sql_rel *rel)
63 : {
64 65784 : node *n;
65 :
66 65784 : if (!exps)
67 : return;
68 165739 : for(n=exps->h; n; n = n->next){
69 99955 : sql_exp *e = n->data;
70 :
71 99955 : e = select_split_exp(sql, e, rel);
72 99955 : n->data = e;
73 : }
74 : }
75 :
76 : static sql_rel *
77 1661040 : rel_split_select_(visitor *v, sql_rel *rel)
78 : {
79 1661040 : if (!rel || !is_select(rel->op) || list_empty(rel->exps) || !rel->l || mvc_highwater(v->sql))
80 1439723 : return rel;
81 :
82 221317 : bool funcs = false;
83 :
84 : /* are there functions */
85 473189 : for (node *n = rel->exps->h; n && !funcs; n = n->next)
86 251872 : funcs = exp_has_func(n->data);
87 :
88 : /* introduce extra project */
89 221317 : if (funcs) {
90 24836 : sql_rel *nrel = rel_project(v->sql->sa, rel->l,
91 24836 : rel_projections(v->sql, rel->l, NULL, 1, 1));
92 24836 : if (!nrel || !nrel->exps)
93 : return NULL;
94 24836 : rel->l = nrel;
95 : /* recursively split all functions and add those to the projection list */
96 24836 : select_split_exps(v->sql, rel->exps, nrel);
97 24836 : return rel;
98 : }
99 : return rel;
100 : }
101 :
102 : static sql_rel *
103 55762 : rel_split_select(visitor *v, global_props *gp, sql_rel *rel)
104 : {
105 55762 : (void) gp;
106 55762 : return rel_visitor_bottomup(v, rel, &rel_split_select_);
107 : }
108 :
109 : run_optimizer
110 623385 : bind_split_select(visitor *v, global_props *gp)
111 : {
112 623385 : int flag = v->sql->sql_optimizer;
113 547758 : return gp->opt_cycle == 0 && gp->opt_level == 1 && (flag & split_select)
114 1171135 : && gp->cnt[op_select] ? rel_split_select : NULL;
115 : }
116 :
117 :
118 : /*
119 : * Remove a redundant join
120 : *
121 : * join (L, Distinct Project(join(L,P) [ p.key == l.lkey]) [p.key]) [ p.key == l.lkey]
122 : * =>
123 : * join(L, P) [p.key==l.lkey]
124 : */
125 : static sql_rel *
126 1468176 : rel_remove_redundant_join_(visitor *v, sql_rel *rel)
127 : {
128 1468176 : if ((is_join(rel->op) || is_semi(rel->op)) && !list_empty(rel->exps)) {
129 218367 : sql_rel *l = rel->l, *r = rel->r, *b, *p = NULL, *j;
130 :
131 218367 : if (is_basetable(l->op) && is_simple_project(r->op) && need_distinct(r)) {
132 9 : b = l;
133 9 : p = r;
134 9 : j = p->l;
135 218358 : } else if (is_basetable(r->op) && is_simple_project(l->op) && need_distinct(l)) {
136 2333 : b = r;
137 2333 : p = l;
138 2333 : j = p->l;
139 : }
140 218367 : if (!p || !j || j->op != rel->op)
141 : return rel;
142 : /* j must have b->l (ie table) */
143 12 : sql_rel *jl = j->l, *jr = j->r;
144 12 : if ((is_basetable(jl->op) && jl->l == b->l) ||
145 8 : (is_basetable(jr->op) && jr->l == b->l)) {
146 11 : int left = 0;
147 11 : if (is_basetable(jl->op) && jl->l == b->l)
148 11 : left = 1;
149 11 : if (!list_empty(p->exps)) {
150 19 : for (node *n=p->exps->h; n; n = n->next) { /* all exps of 'p' must be bound to the opposite side */
151 11 : sql_exp *e = n->data;
152 :
153 18 : if (!rel_rebind_exp(v->sql, left ? jr : jl, e))
154 : return rel;
155 : }
156 : }
157 8 : if (exp_match_list(j->exps, rel->exps)) {
158 0 : p->l = (left)?rel_dup(jr):rel_dup(jl);
159 0 : rel_destroy(j);
160 0 : set_nodistinct(p);
161 0 : v->changes++;
162 0 : return rel;
163 : }
164 : }
165 : }
166 : return rel;
167 : }
168 :
169 : static sql_rel *
170 48183 : rel_remove_redundant_join(visitor *v, global_props *gp, sql_rel *rel)
171 : {
172 48183 : (void) gp;
173 48183 : return rel_visitor_bottomup(v, rel, &rel_remove_redundant_join_); /* this optimizer has to run before rel_first_level_optimizations */
174 : }
175 :
176 : run_optimizer
177 623386 : bind_remove_redundant_join(visitor *v, global_props *gp)
178 : {
179 623386 : int flag = v->sql->sql_optimizer;
180 547758 : return gp->opt_cycle == 0 && gp->opt_level == 1 && (gp->cnt[op_left] || gp->cnt[op_right]
181 529262 : || gp->cnt[op_full] || gp->cnt[op_join] || gp->cnt[op_semi] || gp->cnt[op_anti]) &&
182 671569 : (flag & remove_redundant_join) ? rel_remove_redundant_join : NULL;
183 : }
184 :
185 :
186 : static list *
187 1124683 : exp_merge_range(visitor *v, sql_rel *rel, list *exps)
188 : {
189 1124683 : node *n, *m;
190 2417000 : for (n=exps->h; n; n = n->next) {
191 1294210 : sql_exp *e = n->data;
192 1294210 : sql_exp *le = e->l;
193 1294210 : sql_exp *re = e->r;
194 :
195 : /* handle the and's in the or lists */
196 1294210 : if (e->type == e_cmp && e->flag == cmp_or && !is_anti(e)) {
197 45297 : e->l = exp_merge_range(v, rel, e->l);
198 45297 : e->r = exp_merge_range(v, rel, e->r);
199 : /* only look for gt, gte, lte, lt */
200 1248913 : } else if (n->next &&
201 165327 : e->type == e_cmp && e->flag < cmp_equal && !e->f &&
202 6818 : re->card == CARD_ATOM && !is_anti(e)) {
203 1268 : for (m=n->next; m; m = m->next) {
204 777 : sql_exp *f = m->data;
205 777 : sql_exp *lf = f->l;
206 777 : sql_exp *rf = f->r;
207 777 : int c_le = is_numeric_upcast(le), c_lf = is_numeric_upcast(lf);
208 :
209 777 : if (f->type == e_cmp && f->flag < cmp_equal && !f->f &&
210 645 : rf->card == CARD_ATOM && !is_anti(f) &&
211 322 : exp_match_exp(c_le?le->l:le, c_lf?lf->l:lf)) {
212 172 : sql_exp *ne;
213 172 : int swap = 0, lt = 0, gt = 0;
214 172 : sql_subtype super;
215 : /* for now only c1 <[=] x <[=] c2 */
216 :
217 172 : swap = lt = (e->flag == cmp_lt || e->flag == cmp_lte);
218 172 : gt = !lt;
219 :
220 172 : if (gt &&
221 150 : (f->flag == cmp_gt ||
222 : f->flag == cmp_gte))
223 32 : continue;
224 22 : if (lt &&
225 22 : (f->flag == cmp_lt ||
226 : f->flag == cmp_lte))
227 0 : continue;
228 :
229 140 : cmp_supertype(&super, exp_subtype(le), exp_subtype(lf));
230 140 : if (!(rf = exp_check_type(v->sql, &super, rel, rf, type_equal)) ||
231 140 : !(le = exp_check_type(v->sql, &super, rel, le, type_equal)) ||
232 140 : !(re = exp_check_type(v->sql, &super, rel, re, type_equal))) {
233 0 : v->sql->session->status = 0;
234 0 : v->sql->errstr[0] = 0;
235 0 : continue;
236 : }
237 140 : if (!swap)
238 118 : ne = exp_compare2(v->sql->sa, le, re, rf, compare2range(e->flag, f->flag), 0);
239 : else
240 22 : ne = exp_compare2(v->sql->sa, le, rf, re, compare2range(f->flag, e->flag), 0);
241 :
242 140 : list_remove_data(exps, NULL, e);
243 140 : list_remove_data(exps, NULL, f);
244 140 : list_append(exps, ne);
245 140 : v->changes++;
246 140 : return exp_merge_range(v, rel, exps);
247 : }
248 : }
249 1248282 : } else if (n->next &&
250 164696 : e->type == e_cmp && e->flag < cmp_equal && !e->f &&
251 6187 : re->card > CARD_ATOM && !is_anti(e)) {
252 11218 : for (m=n->next; m; m = m->next) {
253 6784 : sql_exp *f = m->data;
254 6784 : sql_exp *lf = f->l;
255 6784 : sql_exp *rf = f->r;
256 :
257 6784 : if (f->type == e_cmp && f->flag < cmp_equal && !f->f &&
258 4979 : rf->card > CARD_ATOM && !is_anti(f)) {
259 4978 : sql_exp *ne, *t;
260 4978 : int swap = 0, lt = 0, gt = 0;
261 4978 : comp_type ef = (comp_type) e->flag, ff = (comp_type) f->flag;
262 4978 : int c_re = is_numeric_upcast(re), c_rf = is_numeric_upcast(rf);
263 4978 : int c_le = is_numeric_upcast(le), c_lf = is_numeric_upcast(lf), c;
264 4978 : sql_subtype super;
265 :
266 : /* both swapped ? */
267 4978 : if (exp_match_exp(c_re?re->l:re, c_rf?rf->l:rf)) {
268 19 : t = re;
269 19 : re = le;
270 19 : le = t;
271 19 : c = c_re; c_re = c_le; c_le = c;
272 19 : ef = swap_compare(ef);
273 19 : t = rf;
274 19 : rf = lf;
275 19 : lf = t;
276 19 : c = c_rf; c_rf = c_lf; c_lf = c;
277 19 : ff = swap_compare(ff);
278 : }
279 :
280 : /* is left swapped ? */
281 4978 : if (exp_match_exp(c_re?re->l:re, c_lf?lf->l:lf)) {
282 104 : t = re;
283 104 : re = le;
284 104 : le = t;
285 104 : c = c_re; c_re = c_le; c_le = c;
286 104 : ef = swap_compare(ef);
287 : }
288 :
289 : /* is right swapped ? */
290 4978 : if (exp_match_exp(c_le?le->l:le, c_rf?rf->l:rf)) {
291 177 : t = rf;
292 177 : rf = lf;
293 177 : lf = t;
294 177 : c = c_rf; c_rf = c_lf; c_lf = c;
295 177 : ff = swap_compare(ff);
296 : }
297 :
298 4978 : if (!exp_match_exp(c_le?le->l:le, c_lf?lf->l:lf))
299 3225 : continue;
300 :
301 : /* for now only c1 <[=] x <[=] c2 */
302 1830 : swap = lt = (ef == cmp_lt || ef == cmp_lte);
303 1830 : gt = !lt;
304 :
305 1830 : if (gt && (ff == cmp_gt || ff == cmp_gte))
306 73 : continue;
307 1757 : if (lt && (ff == cmp_lt || ff == cmp_lte))
308 4 : continue;
309 :
310 1753 : cmp_supertype(&super, exp_subtype(le), exp_subtype(lf));
311 1753 : if (!(rf = exp_check_type(v->sql, &super, rel, rf, type_equal)) ||
312 1753 : !(le = exp_check_type(v->sql, &super, rel, le, type_equal)) ||
313 1753 : !(re = exp_check_type(v->sql, &super, rel, re, type_equal))) {
314 0 : v->sql->session->status = 0;
315 0 : v->sql->errstr[0] = 0;
316 0 : continue;
317 : }
318 1753 : if (!swap)
319 1653 : ne = exp_compare2(v->sql->sa, le, re, rf, compare2range(ef, ff), 0);
320 : else
321 100 : ne = exp_compare2(v->sql->sa, le, rf, re, compare2range(ff, ef), 0);
322 :
323 1753 : list_remove_data(exps, NULL, e);
324 1753 : list_remove_data(exps, NULL, f);
325 1753 : list_append(exps, ne);
326 1753 : v->changes++;
327 1753 : return exp_merge_range(v, rel, exps);
328 : }
329 : }
330 : }
331 : }
332 : return exps;
333 : }
334 :
335 : static int
336 41807 : exps_cse( mvc *sql, list *oexps, list *l, list *r )
337 : {
338 41807 : list *nexps;
339 41807 : node *n, *m;
340 41807 : char *lu, *ru;
341 41807 : int lc = 0, rc = 0, match = 0, res = 0;
342 :
343 41807 : if (list_length(l) == 0 || list_length(r) == 0)
344 0 : return 0;
345 :
346 : /* first recursive exps_cse */
347 41807 : nexps = new_exp_list(sql->sa);
348 88469 : for (n = l->h; n; n = n->next) {
349 46662 : sql_exp *e = n->data;
350 :
351 46662 : if (e->type == e_cmp && e->flag == cmp_or && !is_anti(e)) {
352 16178 : res = exps_cse(sql, nexps, e->l, e->r);
353 : } else {
354 30484 : append(nexps, e);
355 : }
356 : }
357 41807 : l = nexps;
358 :
359 41807 : nexps = new_exp_list(sql->sa);
360 91197 : for (n = r->h; n; n = n->next) {
361 49390 : sql_exp *e = n->data;
362 :
363 49390 : if (e->type == e_cmp && e->flag == cmp_or && !is_anti(e)) {
364 3320 : res = exps_cse(sql, nexps, e->l, e->r);
365 : } else {
366 46070 : append(nexps, e);
367 : }
368 : }
369 41807 : r = nexps;
370 :
371 : /* simplify true or .. and .. or true */
372 41807 : if (list_length(l) == list_length(r) && list_length(l) == 1) {
373 36707 : sql_exp *le = l->h->data, *re = r->h->data;
374 :
375 36707 : if (exp_is_true(le)) {
376 14 : append(oexps, le);
377 14 : return 1;
378 : }
379 36693 : if (exp_is_true(re)) {
380 5 : append(oexps, re);
381 5 : return 1;
382 : }
383 : }
384 :
385 41788 : lu = SA_ZNEW_ARRAY(sql->ta, char, list_length(l));
386 41788 : ru = SA_ZNEW_ARRAY(sql->ta, char, list_length(r));
387 89157 : for (n = l->h, lc = 0; n; n = n->next, lc++) {
388 47369 : sql_exp *le = n->data;
389 :
390 106542 : for ( m = r->h, rc = 0; m; m = m->next, rc++) {
391 59173 : sql_exp *re = m->data;
392 :
393 59173 : if (!ru[rc] && exp_match_exp(le,re)) {
394 796 : lu[lc] = 1;
395 796 : ru[rc] = 1;
396 796 : match = 1;
397 : }
398 : }
399 : }
400 41788 : if (match) {
401 415 : list *nl = new_exp_list(sql->sa);
402 415 : list *nr = new_exp_list(sql->sa);
403 :
404 1625 : for (n = l->h, lc = 0; n; n = n->next, lc++)
405 1210 : if (!lu[lc])
406 418 : append(nl, n->data);
407 1639 : for (n = r->h, rc = 0; n; n = n->next, rc++)
408 1224 : if (!ru[rc])
409 428 : append(nr, n->data);
410 :
411 415 : if (list_length(nl) && list_length(nr))
412 387 : append(oexps, exp_or(sql->sa, nl, nr, 0));
413 :
414 1625 : for (n = l->h, lc = 0; n; n = n->next, lc++) {
415 1210 : if (lu[lc])
416 792 : append(oexps, n->data);
417 : }
418 : res = 1;
419 : } else {
420 41373 : append(oexps, exp_or(sql->sa, list_dup(l, (fdup)NULL),
421 : list_dup(r, (fdup)NULL), 0));
422 : }
423 : return res;
424 : }
425 :
426 : static int
427 93550 : are_equality_exps( list *exps, sql_exp **L)
428 : {
429 93550 : sql_exp *l = *L;
430 :
431 93550 : if (list_length(exps) == 1) {
432 89443 : sql_exp *e = exps->h->data, *le = e->l, *re = e->r;
433 :
434 89443 : if (e->type == e_cmp && e->flag == cmp_equal && le->card != CARD_ATOM && re->card == CARD_ATOM && !is_semantics(e)) {
435 22376 : if (!l) {
436 10841 : *L = l = le;
437 10841 : if (!is_column(le->type))
438 : return 0;
439 : }
440 22376 : return (exp_match(l, le));
441 : }
442 67067 : if (e->type == e_cmp && e->flag == cmp_or && !is_anti(e) && !is_semantics(e))
443 72707 : return (are_equality_exps(e->l, L) && are_equality_exps(e->r, L));
444 : }
445 : return 0;
446 : }
447 :
448 : static void
449 4283 : get_exps( list *n, list *l )
450 : {
451 5996 : sql_exp *e = l->h->data, *re = e->r;
452 :
453 5996 : if (e->type == e_cmp && e->flag == cmp_equal && re->card == CARD_ATOM)
454 4283 : list_append(n, re);
455 5996 : if (e->type == e_cmp && e->flag == cmp_or) {
456 1713 : get_exps(n, e->l);
457 1713 : get_exps(n, e->r);
458 : }
459 4283 : }
460 :
461 : static sql_exp *
462 1285 : equality_exps_2_in( mvc *sql, sql_exp *ce, list *l, list *r)
463 : {
464 1285 : list *nl = new_exp_list(sql->sa);
465 :
466 1285 : get_exps(nl, l);
467 1285 : get_exps(nl, r);
468 :
469 1285 : return exp_in( sql->sa, ce, nl, cmp_in);
470 : }
471 :
472 : static list *
473 606340 : merge_ors(mvc *sql, list *exps, int *changes)
474 : {
475 606340 : list *nexps = NULL;
476 606340 : int needed = 0;
477 :
478 1295409 : for (node *n = exps->h; n && !needed; n = n->next) {
479 689069 : sql_exp *e = n->data;
480 :
481 689069 : if (e->type == e_cmp && e->flag == cmp_or && !is_anti(e))
482 689069 : needed = 1;
483 : }
484 :
485 606340 : if (needed) {
486 42212 : nexps = new_exp_list(sql->sa);
487 92812 : for (node *n = exps->h; n; n = n->next) {
488 50600 : sql_exp *e = n->data, *l = NULL;
489 :
490 50600 : if (e->type == e_cmp && e->flag == cmp_or && !is_anti(e) && are_equality_exps(e->l, &l) && are_equality_exps(e->r, &l) && l) {
491 1285 : (*changes)++;
492 1285 : append(nexps, equality_exps_2_in(sql, l, e->l, e->r));
493 : } else {
494 49315 : append(nexps, e);
495 : }
496 : }
497 : } else {
498 : nexps = exps;
499 : }
500 :
501 1302742 : for (node *n = nexps->h; n ; n = n->next) {
502 696402 : sql_exp *e = n->data;
503 :
504 696402 : if (e->type == e_cmp && e->flag == cmp_or) {
505 42039 : e->l = merge_ors(sql, e->l, changes);
506 42039 : e->r = merge_ors(sql, e->r, changes);
507 : }
508 : }
509 :
510 606340 : return nexps;
511 : }
512 :
513 : #define TRIVIAL_NOT_EQUAL_CMP(e) \
514 : ((e)->type == e_cmp && (e)->flag == cmp_notequal && !is_anti((e)) && !is_semantics((e)) && ((sql_exp*)(e)->l)->card != CARD_ATOM && ((sql_exp*)(e)->r)->card == CARD_ATOM)
515 :
516 : static list *
517 606340 : merge_notequal(mvc *sql, list *exps, int *changes)
518 : {
519 606340 : list *inequality_groups = NULL, *nexps = NULL;
520 606340 : int needed = 0;
521 :
522 1302742 : for (node *n = exps->h; n; n = n->next) {
523 696402 : sql_exp *e = n->data;
524 :
525 696402 : if (TRIVIAL_NOT_EQUAL_CMP(e)) {
526 82299 : bool appended = false;
527 :
528 82299 : if (inequality_groups) {
529 9091 : for (node *m = inequality_groups->h; m && !appended; m = m->next) {
530 5210 : list *next = m->data;
531 5210 : sql_exp *first = (sql_exp*) next->h->data;
532 :
533 5210 : if (exp_match(first->l, e->l)) {
534 1164 : list_append(next, e);
535 1164 : appended = true;
536 : }
537 : }
538 : }
539 3881 : if (!appended) {
540 81135 : if (!inequality_groups)
541 78418 : inequality_groups = new_exp_list(sql->sa);
542 81135 : list_append(inequality_groups, list_append(new_exp_list(sql->sa), e));
543 : }
544 : }
545 : }
546 :
547 606340 : if (inequality_groups) { /* if one list of inequalities has more than one entry, then the re-write is needed */
548 159553 : for (node *n = inequality_groups->h; n; n = n->next) {
549 81135 : list *next = n->data;
550 :
551 81135 : if (list_length(next) > 1)
552 1079 : needed = 1;
553 : }
554 : }
555 :
556 78418 : if (needed) {
557 997 : nexps = new_exp_list(sql->sa);
558 2610 : for (node *n = inequality_groups->h; n; n = n->next) {
559 1613 : list *next = n->data;
560 1613 : sql_exp *first = (sql_exp*) next->h->data;
561 :
562 1613 : if (list_length(next) > 1) {
563 1079 : list *notin = new_exp_list(sql->sa);
564 :
565 3322 : for (node *m = next->h; m; m = m->next) {
566 2243 : sql_exp *e = m->data;
567 2243 : list_append(notin, e->r);
568 : }
569 1079 : list_append(nexps, exp_in(sql->sa, first->l, notin, cmp_notin));
570 : } else {
571 534 : list_append(nexps, first);
572 : }
573 : }
574 :
575 4132 : for (node *n = exps->h; n; n = n->next) {
576 3135 : sql_exp *e = n->data;
577 :
578 3135 : if (!TRIVIAL_NOT_EQUAL_CMP(e))
579 358 : list_append(nexps, e);
580 : }
581 997 : (*changes)++;
582 : } else {
583 : nexps = exps;
584 : }
585 :
586 1301578 : for (node *n = nexps->h; n ; n = n->next) {
587 695238 : sql_exp *e = n->data;
588 :
589 695238 : if (e->type == e_cmp && e->flag == cmp_or) {
590 42039 : e->l = merge_notequal(sql, e->l, changes);
591 42039 : e->r = merge_notequal(sql, e->r, changes);
592 : }
593 : }
594 :
595 606340 : return nexps;
596 : }
597 :
598 : int
599 175079 : is_numeric_upcast(sql_exp *e)
600 : {
601 175079 : if (is_convert(e->type)) {
602 4675 : sql_subtype *f = exp_fromtype(e);
603 4675 : sql_subtype *t = exp_totype(e);
604 :
605 4675 : if (f->type->eclass == t->type->eclass && EC_COMPUTE(f->type->eclass)) {
606 1640 : if (f->type->localtype < t->type->localtype)
607 1620 : return 1;
608 : }
609 : }
610 : return 0;
611 : }
612 :
613 : /* optimize (a = b) or (a is null and b is null) -> a = b with null semantics */
614 : static sql_exp *
615 45142 : try_rewrite_equal_or_is_null(mvc *sql, sql_rel *rel, sql_exp *or, list *l1, list *l2)
616 : {
617 45142 : if (list_length(l1) == 1) {
618 41577 : bool valid = true, first_is_null_found = false, second_is_null_found = false;
619 41577 : sql_exp *cmp = l1->h->data;
620 41577 : sql_exp *first = cmp->l, *second = cmp->r;
621 :
622 41577 : if (is_compare(cmp->type) && !is_anti(cmp) && !cmp->f && cmp->flag == cmp_equal) {
623 7272 : int fupcast = is_numeric_upcast(first), supcast = is_numeric_upcast(second);
624 14809 : for(node *n = l2->h ; n && valid; n = n->next) {
625 7537 : sql_exp *e = n->data, *l = e->l, *r = e->r;
626 :
627 7537 : if (is_compare(e->type) && e->flag == cmp_equal && !e->f &&
628 4637 : !is_anti(e) && is_semantics(e)) {
629 1852 : int lupcast = is_numeric_upcast(l);
630 1852 : int rupcast = is_numeric_upcast(r);
631 1852 : sql_exp *rr = rupcast ? r->l : r;
632 :
633 1852 : if (rr->type == e_atom && rr->l && atom_null(rr->l)) {
634 1852 : if (exp_match_exp(fupcast?first->l:first, lupcast?l->l:l))
635 : first_is_null_found = true;
636 353 : else if (exp_match_exp(supcast?second->l:second, lupcast?l->l:l))
637 : second_is_null_found = true;
638 : else
639 5773 : valid = false;
640 : } else {
641 : valid = false;
642 : }
643 : } else {
644 : valid = false;
645 : }
646 : }
647 7272 : if (valid && first_is_null_found && second_is_null_found) {
648 262 : sql_subtype super;
649 :
650 262 : cmp_supertype(&super, exp_subtype(first), exp_subtype(second)); /* first and second must have the same type */
651 262 : if (!(first = exp_check_type(sql, &super, rel, first, type_equal)) ||
652 262 : !(second = exp_check_type(sql, &super, rel, second, type_equal))) {
653 0 : sql->session->status = 0;
654 0 : sql->errstr[0] = 0;
655 0 : return or;
656 : }
657 262 : sql_exp *res = exp_compare(sql->sa, first, second, cmp->flag);
658 262 : set_semantics(res);
659 262 : if (exp_name(or))
660 0 : exp_prop_alias(sql->sa, res, or);
661 262 : return res;
662 : }
663 : }
664 : }
665 : return or;
666 : }
667 :
668 : static list *
669 1032196 : merge_cmp_or_null(mvc *sql, sql_rel *rel, list *exps, int *changes)
670 : {
671 2219460 : for (node *n = exps->h; n ; n = n->next) {
672 1187264 : sql_exp *e = n->data;
673 :
674 1187264 : if (is_compare(e->type) && e->flag == cmp_or && !is_anti(e)) {
675 22571 : sql_exp *ne = try_rewrite_equal_or_is_null(sql, rel, e, e->l, e->r);
676 22571 : if (ne != e) {
677 260 : (*changes)++;
678 260 : n->data = ne;
679 : }
680 22571 : ne = try_rewrite_equal_or_is_null(sql, rel, e, e->r, e->l);
681 22571 : if (ne != e) {
682 2 : (*changes)++;
683 2 : n->data = ne;
684 : }
685 : }
686 : }
687 1032196 : return exps;
688 : }
689 :
690 : static list *
691 1032196 : cleanup_equal_exps(mvc *sql, sql_rel *rel, list *exps, int *changes)
692 : {
693 1032196 : if (list_length(exps) <= 1)
694 : return exps;
695 141439 : if (is_join(rel->op)) {
696 213461 : for(node *n = exps->h; n; n = n->next) {
697 143541 : sql_exp *e = n->data;
698 286913 : if (e->type == e_cmp && !e->f && e->flag <= cmp_notequal &&
699 185051 : !rel_find_exp(rel->l, e->l) && !rel_find_exp(rel->r, e->r) &&
700 41505 : !find_prop(e->p, PROP_HASHCOL) && !find_prop(e->p, PROP_JOINIDX)) {
701 20672 : exp_swap(e);
702 : }
703 : }
704 : }
705 141439 : bool needed = false;
706 439002 : for(node *n = exps->h; !needed && n; n = n->next) {
707 589326 : for (node *m = n->next; !needed && m; m = m->next) {
708 291763 : if (exp_match_exp_semantics(n->data, m->data, true))
709 89 : needed = true;
710 : }
711 : }
712 141439 : if (needed) {
713 89 : list *nexps = sa_list(sql->sa);
714 :
715 293 : for(node *n = exps->h; n; n = n->next) {
716 204 : bool done = false;
717 629 : for (node *m = exps->h; m && !done; m = m->next) {
718 425 : if (n != m && exp_match_exp_semantics(n->data, m->data, false)) {
719 179 : sql_exp *e1 = n->data, *e2 = m->data;
720 179 : if ((is_any(e1) || is_semantics(e1)) || (!is_any(e2) && !is_semantics(e2))) {
721 179 : append(nexps, e1);
722 179 : if ((!is_any(e2) && !is_semantics(e2)) && is_left(rel->op) && list_length(rel->attr) == 1) {
723 : /* nil is false */
724 0 : sql_exp *m = rel->attr->h->data;
725 0 : if (exp_is_atom(m))
726 0 : set_no_nil(m);
727 : }
728 : }
729 : done = true;
730 : }
731 : }
732 204 : if (!done)
733 25 : append(nexps, n->data);
734 : }
735 : return nexps;
736 : }
737 : (void)changes;
738 : return exps;
739 : }
740 :
741 : static inline sql_rel *
742 1032196 : rel_select_cse(visitor *v, sql_rel *rel)
743 : {
744 1032196 : if ((is_select(rel->op) || is_join(rel->op) || is_semi(rel->op)) && rel->exps) /* cleanup equal expressions */
745 1032196 : rel->exps = cleanup_equal_exps(v->sql, rel, rel->exps, &v->changes); /* (a = b) and (a += b) */
746 :
747 1032196 : if (is_select(rel->op) && rel->exps)
748 522262 : rel->exps = merge_ors(v->sql, rel->exps, &v->changes); /* x = 1 or x = 2 => x in (1, 2)*/
749 :
750 1032196 : if (is_select(rel->op) && rel->exps)
751 522262 : rel->exps = merge_notequal(v->sql, rel->exps, &v->changes); /* x <> 1 and x <> 2 => x not in (1, 2)*/
752 :
753 1032196 : if ((is_select(rel->op) || is_join(rel->op) || is_semi(rel->op)) && rel->exps)
754 1032196 : rel->exps = merge_cmp_or_null(v->sql, rel, rel->exps, &v->changes); /* (a = b) or (a is null and b is null) -> a = b with null semantics */
755 :
756 1032196 : if ((is_select(rel->op) || is_join(rel->op) || is_semi(rel->op)) && rel->exps) {
757 1032196 : node *n;
758 1032196 : list *nexps;
759 1032196 : int needed = 0;
760 :
761 2212841 : for (n=rel->exps->h; n && !needed; n = n->next) {
762 1180645 : sql_exp *e = n->data;
763 :
764 1180645 : if (e->type == e_cmp && e->flag == cmp_or && !is_anti(e))
765 1180645 : needed = 1;
766 : }
767 1032196 : if (!needed)
768 : return rel;
769 21500 : nexps = new_exp_list(v->sql->sa);
770 51015 : for (n=rel->exps->h; n; n = n->next) {
771 29515 : sql_exp *e = n->data;
772 :
773 29515 : if (e->type == e_cmp && e->flag == cmp_or && !is_anti(e)) {
774 : /* split the common expressions */
775 22309 : v->changes += exps_cse(v->sql, nexps, e->l, e->r);
776 : } else {
777 7206 : append(nexps, e);
778 : }
779 : }
780 21500 : rel->exps = nexps;
781 : }
782 : return rel;
783 : }
784 :
785 : static list *
786 14245 : exps_merge_select_rse( mvc *sql, list *l, list *r, bool *merged)
787 : {
788 14245 : node *n, *m, *o;
789 14245 : list *nexps = NULL, *lexps, *rexps;
790 14245 : bool lmerged = true, rmerged = true;
791 :
792 14245 : lexps = new_exp_list(sql->sa);
793 30224 : for (n = l->h; n; n = n->next) {
794 15979 : sql_exp *e = n->data;
795 :
796 15979 : if (e->type == e_cmp && e->flag == cmp_or && !is_anti(e) && !is_semantics(e)) {
797 5488 : lmerged = false;
798 5488 : list *nexps = exps_merge_select_rse(sql, e->l, e->r, &lmerged);
799 6004 : for (o = nexps->h; o; o = o->next)
800 516 : append(lexps, o->data);
801 : } else {
802 10491 : append(lexps, e);
803 : }
804 : }
805 14245 : if (lmerged)
806 8816 : lmerged = (list_length(lexps) == 1);
807 14245 : rexps = new_exp_list(sql->sa);
808 30837 : for (n = r->h; n; n = n->next) {
809 16592 : sql_exp *e = n->data;
810 :
811 16592 : if (e->type == e_cmp && e->flag == cmp_or && !is_anti(e) && !is_semantics(e)) {
812 1104 : rmerged = false;
813 1104 : list *nexps = exps_merge_select_rse(sql, e->l, e->r, &rmerged);
814 1127 : for (o = nexps->h; o; o = o->next)
815 23 : append(rexps, o->data);
816 : } else {
817 15488 : append(rexps, e);
818 : }
819 : }
820 14245 : if (rmerged)
821 13159 : rmerged = (list_length(r) == 1);
822 :
823 14245 : nexps = new_exp_list(sql->sa);
824 :
825 : /* merge merged lists first ? */
826 25252 : for (n = lexps->h; n; n = n->next) {
827 11007 : sql_exp *le = n->data, *re, *fnd = NULL;
828 :
829 11007 : if (le->type != e_cmp || le->flag == cmp_or || is_anti(le) || is_semantics(le) || is_symmetric(le))
830 619 : continue;
831 21205 : for (m = rexps->h; !fnd && m; m = m->next) {
832 10817 : re = m->data;
833 10817 : if (exps_match_col_exps(le, re))
834 1736 : fnd = re;
835 : }
836 10388 : if (fnd && (is_anti(fnd) || is_semantics(fnd)))
837 52 : continue;
838 : /* cases
839 : * 1) 2 values (cmp_equal)
840 : * 2) 1 value (cmp_equal), and cmp_in
841 : * (also cmp_in, cmp_equal)
842 : * 3) 2 cmp_in
843 : * 4) ranges
844 : */
845 1684 : if (fnd) {
846 1684 : re = fnd;
847 1684 : fnd = NULL;
848 1684 : if (is_anti(le) || is_anti(re) || is_symmetric(re))
849 0 : continue;
850 2238 : if (le->flag == cmp_equal && re->flag == cmp_equal) {
851 554 : list *exps = new_exp_list(sql->sa);
852 :
853 554 : append(exps, le->r);
854 554 : append(exps, re->r);
855 554 : fnd = exp_in(sql->sa, le->l, exps, cmp_in);
856 1301 : } else if (le->flag == cmp_equal && re->flag == cmp_in){
857 171 : list *exps = new_exp_list(sql->sa);
858 :
859 171 : append(exps, le->r);
860 171 : list_merge(exps, re->r, NULL);
861 171 : fnd = exp_in(sql->sa, le->l, exps, cmp_in);
862 1290 : } else if (le->flag == cmp_in && re->flag == cmp_equal){
863 331 : list *exps = new_exp_list(sql->sa);
864 :
865 331 : append(exps, re->r);
866 331 : list_merge(exps, le->r, NULL);
867 331 : fnd = exp_in(sql->sa, le->l, exps, cmp_in);
868 748 : } else if (le->flag == cmp_in && re->flag == cmp_in){
869 120 : list *exps = new_exp_list(sql->sa);
870 :
871 120 : list_merge(exps, le->r, NULL);
872 120 : list_merge(exps, re->r, NULL);
873 120 : fnd = exp_in(sql->sa, le->l, exps, cmp_in);
874 508 : } else if (le->f && re->f && /* merge ranges */
875 20 : le->flag == re->flag && le->flag <= cmp_lt) {
876 20 : sql_exp *mine = NULL, *maxe = NULL;
877 :
878 20 : if (!(mine = rel_binop_(sql, NULL, exp_copy(sql, le->r), exp_copy(sql, re->r), "sys", "sql_min", card_value, true))) {
879 0 : sql->session->status = 0;
880 0 : sql->errstr[0] = '\0';
881 0 : continue;
882 : }
883 20 : if (!(maxe = rel_binop_(sql, NULL, exp_copy(sql, le->f), exp_copy(sql, re->f), "sys", "sql_max", card_value, true))) {
884 0 : sql->session->status = 0;
885 0 : sql->errstr[0] = '\0';
886 0 : continue;
887 : }
888 20 : fnd = exp_compare2(sql->sa, exp_copy(sql, le->l), mine, maxe, le->flag, 0);
889 20 : lmerged = false;
890 : }
891 1196 : if (fnd) {
892 1196 : append(nexps, fnd);
893 2229 : *merged = (fnd && lmerged && rmerged);
894 : }
895 : }
896 : }
897 14245 : return nexps;
898 : }
899 :
900 : /* merge related sub expressions
901 : *
902 : * ie (x = a and y > 1 and y < 5) or
903 : * (x = c and y > 1 and y < 10) or
904 : * (x = e and y > 1 and y < 20)
905 : * ->
906 : * ((x = a and y > 1 and y < 5) or
907 : * (x = c and y > 1 and y < 10) or
908 : * (x = e and y > 1 and y < 20)) and
909 : * x in (a,c,e) and
910 : * y > 1 and y < 20
911 : *
912 : * for single expression or's we can do better
913 : * x in (a, b, c) or x in (d, e, f)
914 : * ->
915 : * x in (a, b, c, d, e, f)
916 : * */
917 : static inline sql_rel *
918 396569 : rel_merge_select_rse(visitor *v, sql_rel *rel)
919 : {
920 : /* only execute once per select */
921 396569 : if ((is_select(rel->op) || is_join(rel->op) || is_semi(rel->op)) && rel->exps && !is_rel_merge_select_rse_used(rel->used)) {
922 140957 : node *n, *o;
923 140957 : list *nexps = new_exp_list(v->sql->sa);
924 :
925 301574 : for (n=rel->exps->h; n; n = n->next) {
926 160617 : sql_exp *e = n->data;
927 :
928 168270 : if (e->type == e_cmp && e->flag == cmp_or && !is_anti(e) && !is_semantics(e)) {
929 : /* possibly merge related expressions */
930 7653 : bool merged = false;
931 :
932 7653 : list *ps = exps_merge_select_rse(v->sql, e->l, e->r, &merged);
933 8310 : for (o = ps->h; o; o = o->next)
934 657 : append(nexps, o->data);
935 7653 : if (merged)
936 86 : v->changes++;
937 : else
938 7567 : append(nexps, e);
939 : } else {
940 152964 : append(nexps, e);
941 : }
942 : }
943 140957 : rel->exps = nexps;
944 140957 : rel->used |= rel_merge_select_rse_used;
945 : }
946 396569 : return rel;
947 : }
948 :
949 : /* pack optimizers into a single function call to avoid iterations in the AST */
950 : static sql_rel *
951 3628033 : rel_optimize_select_and_joins_bottomup_(visitor *v, sql_rel *rel)
952 : {
953 3628033 : if (!rel || (!is_join(rel->op) && !is_semi(rel->op) && !is_select(rel->op)) || list_empty(rel->exps))
954 2595837 : return rel;
955 1032196 : uint8_t cycle = *(uint8_t*) v->data;
956 :
957 1032196 : rel->exps = exp_merge_range(v, rel, rel->exps);
958 1032196 : rel = rel_select_cse(v, rel);
959 1032196 : if (cycle == 1)
960 396569 : rel = rel_merge_select_rse(v, rel);
961 1032196 : rel = rewrite_simplify(v, cycle, v->value_based_opt, rel);
962 1032196 : return rel;
963 : }
964 :
965 : static sql_rel *
966 126368 : rel_optimize_select_and_joins_bottomup(visitor *v, global_props *gp, sql_rel *rel)
967 : {
968 126368 : v->data = &gp->opt_cycle;
969 126368 : rel = rel_visitor_bottomup(v, rel, &rel_optimize_select_and_joins_bottomup_);
970 126368 : v->data = gp;
971 126368 : return rel;
972 : }
973 :
974 : run_optimizer
975 623383 : bind_optimize_select_and_joins_bottomup(visitor *v, global_props *gp)
976 : {
977 623383 : int flag = v->sql->sql_optimizer;
978 623187 : return gp->opt_level == 1 && (gp->cnt[op_join] || gp->cnt[op_left] || gp->cnt[op_right] ||
979 538971 : gp->cnt[op_full] || gp->cnt[op_semi] || gp->cnt[op_anti] ||
980 1246570 : gp->cnt[op_select]) && (flag & optimize_select_and_joins_bottomup) ? rel_optimize_select_and_joins_bottomup : NULL;
981 : }
982 :
983 :
984 : static inline sql_rel *
985 3428233 : rel_push_join_exps_down(visitor *v, sql_rel *rel)
986 : {
987 : /* push select exps part of join expressions down */
988 : /* TODO CHECK WHY not semi enabled */
989 3428233 : if ((is_innerjoin(rel->op) || is_left(rel->op) || is_right(rel->op) /*|| is_semi(rel->op)*/) && !list_empty(rel->exps)) {
990 482901 : int left = is_innerjoin(rel->op) || is_right(rel->op) || rel->op == op_semi;
991 482901 : int right = is_innerjoin(rel->op) || is_left(rel->op) || is_semi(rel->op);
992 482901 : sql_rel *jl = rel->l, *ojl = jl, *jr = rel->r, *ojr = jr;
993 :
994 482901 : set_processed(jl);
995 482901 : set_processed(jr);
996 1039581 : for (node *n = rel->exps->h; n;) {
997 556680 : node *next = n->next;
998 556680 : sql_exp *e = n->data;
999 :
1000 556680 : if (left && rel_rebind_exp(v->sql, jl, e) && !is_any(e)) { /* select expressions on left */
1001 1335 : if (!is_select(jl->op) || rel_is_ref(jl))
1002 1325 : rel->l = jl = rel_select(v->sql->sa, jl, NULL);
1003 1335 : rel_select_add_exp(v->sql->sa, jl, e);
1004 1335 : list_remove_node(rel->exps, NULL, n);
1005 1335 : v->changes++;
1006 555345 : } else if (right && rel_rebind_exp(v->sql, jr, e) && !is_any(e)) { /* select expressions on right */
1007 42 : if (!is_select(jr->op) || rel_is_ref(jr))
1008 40 : rel->r = jr = rel_select(v->sql->sa, jr, NULL);
1009 42 : rel_select_add_exp(v->sql->sa, jr, e);
1010 42 : list_remove_node(rel->exps, NULL, n);
1011 42 : v->changes++;
1012 : }
1013 : n = next;
1014 : }
1015 482901 : if (ojl != jl)
1016 1325 : set_processed(jl);
1017 482901 : if (ojr != jr)
1018 40 : set_processed(jr);
1019 : }
1020 3428233 : return rel;
1021 : }
1022 :
1023 : static inline bool
1024 3428233 : is_non_trivial_select_applied_to_outer_join(sql_rel *rel)
1025 : {
1026 3428233 : return is_select(rel->op) && rel->exps && is_outerjoin(((sql_rel*) rel->l)->op);
1027 : }
1028 :
1029 : extern list *list_append_before(list *l, node *n, void *data);
1030 :
1031 : static void replace_column_references_with_nulls_2(mvc *sql, list* crefs, sql_exp* e);
1032 :
1033 : static void
1034 5630 : replace_column_references_with_nulls_1(mvc *sql, list* crefs, list* exps) {
1035 5630 : if (list_empty(exps))
1036 : return;
1037 14310 : for(node* n = exps->h; n; n=n->next) {
1038 8680 : sql_exp* e = n->data;
1039 8680 : replace_column_references_with_nulls_2(sql, crefs, e);
1040 : }
1041 : }
1042 :
1043 : static void
1044 27934 : replace_column_references_with_nulls_2(mvc *sql, list* crefs, sql_exp* e) {
1045 35609 : if (e == NULL) {
1046 : return;
1047 : }
1048 :
1049 29502 : switch (e->type) {
1050 8825 : case e_column:
1051 : {
1052 8825 : sql_exp *c = NULL;
1053 8825 : if (e->nid)
1054 8825 : c = exps_bind_nid(crefs, e->nid);
1055 8825 : if (c) {
1056 2437 : e->type = e_atom;
1057 2437 : e->l = atom_general(sql->sa, &e->tpe, NULL, 0);
1058 2437 : e->r = e->f = NULL;
1059 : }
1060 : }
1061 : break;
1062 9229 : case e_cmp:
1063 9229 : switch (e->flag) {
1064 6533 : case cmp_gt:
1065 : case cmp_gte:
1066 : case cmp_lte:
1067 : case cmp_lt:
1068 : case cmp_equal:
1069 : case cmp_notequal:
1070 : {
1071 6533 : sql_exp* l = e->l;
1072 6533 : sql_exp* r = e->r;
1073 6533 : sql_exp* f = e->f;
1074 :
1075 6533 : replace_column_references_with_nulls_2(sql, crefs, l);
1076 6533 : replace_column_references_with_nulls_2(sql, crefs, r);
1077 6533 : replace_column_references_with_nulls_2(sql, crefs, f);
1078 6533 : break;
1079 : }
1080 1938 : case cmp_filter:
1081 : case cmp_or:
1082 : {
1083 1938 : list* l = e->l;
1084 1938 : list* r = e->r;
1085 1938 : replace_column_references_with_nulls_1(sql, crefs, l);
1086 1938 : replace_column_references_with_nulls_1(sql, crefs, r);
1087 1938 : break;
1088 : }
1089 758 : case cmp_in:
1090 : case cmp_notin:
1091 : {
1092 758 : sql_exp* l = e->l;
1093 758 : list* r = e->r;
1094 758 : replace_column_references_with_nulls_2(sql, crefs, l);
1095 758 : replace_column_references_with_nulls_1(sql, crefs, r);
1096 758 : break;
1097 : }
1098 : default:
1099 : break;
1100 : }
1101 : break;
1102 996 : case e_func:
1103 : {
1104 996 : list* l = e->l;
1105 996 : replace_column_references_with_nulls_1(sql, crefs, l);
1106 996 : break;
1107 : }
1108 1142 : case e_convert:
1109 : {
1110 1142 : sql_exp* l = e->l;
1111 1142 : replace_column_references_with_nulls_2(sql, crefs, l);
1112 1142 : break;
1113 : }
1114 : default:
1115 : break;
1116 : }
1117 : }
1118 :
1119 : static sql_rel *
1120 4948 : out2inner(visitor *v, sql_rel* sel, sql_rel* join, sql_rel* inner_join_side, operator_type new_type) {
1121 :
1122 : /* handle inner_join relations with a simple select */
1123 4948 : if (is_select(inner_join_side->op) && inner_join_side->l)
1124 4948 : inner_join_side = inner_join_side->l;
1125 4948 : if (!is_base(inner_join_side->op) && !is_simple_project(inner_join_side->op)) {
1126 : // Nothing to do here.
1127 : return sel;
1128 : }
1129 :
1130 4818 : list* inner_join_column_references = inner_join_side->exps;
1131 4818 : list* select_predicates = exps_copy(v->sql, sel->exps);
1132 :
1133 10165 : for(node* n = select_predicates->h; n; n=n->next) {
1134 5430 : sql_exp* e = n->data;
1135 5430 : replace_column_references_with_nulls_2(v->sql, inner_join_column_references, e);
1136 :
1137 5430 : if (exp_is_false(e)) {
1138 83 : join->op = new_type;
1139 83 : v->changes++;
1140 83 : break;
1141 : }
1142 : }
1143 :
1144 : return sel;
1145 : }
1146 :
1147 : static inline sql_rel *
1148 3428233 : rel_out2inner(visitor *v, sql_rel *rel) {
1149 :
1150 3428233 : if (!is_non_trivial_select_applied_to_outer_join(rel)) {
1151 : // Nothing to do here.
1152 : return rel;
1153 : }
1154 :
1155 4925 : sql_rel* join = (sql_rel*) rel->l;
1156 :
1157 4925 : if (rel_is_ref(join)) {
1158 : /* Do not alter a multi-referenced join relation.
1159 : * This is problematic (e.g. in the case of the plan of a merge statement)
1160 : * basically because there are no guarantees on the other container relations.
1161 : * In particular there is no guarantee that the other referencing relations are
1162 : * select relations with null-rejacting predicates on the inner join side.
1163 : */
1164 : return rel;
1165 : }
1166 :
1167 4925 : sql_rel* inner_join_side;
1168 4925 : if (is_left(join->op)) {
1169 4876 : inner_join_side = join->r;
1170 4876 : return out2inner(v, rel, join, inner_join_side, op_join);
1171 : }
1172 49 : else if (is_right(join->op)) {
1173 26 : inner_join_side = join->l;
1174 26 : return out2inner(v, rel, join, inner_join_side, op_join);
1175 : }
1176 : else /*full outer join*/ {
1177 : // First check if left side can degenerate from full outer join to just right outer join.
1178 23 : inner_join_side = join->r;
1179 23 : rel = out2inner(v, rel, join, inner_join_side, op_right);
1180 : /* Now test if the right side can degenerate to
1181 : * a normal inner join or a left outer join
1182 : * depending on the result of previous call to out2inner.
1183 : */
1184 :
1185 23 : inner_join_side = join->l;
1186 38 : return out2inner(v, rel, join, inner_join_side, is_right(join->op)? op_join: op_left);
1187 : }
1188 : }
1189 :
1190 : static bool
1191 4219 : exps_uses_any(list *exps, list *l)
1192 : {
1193 4219 : bool uses_any = false;
1194 :
1195 4219 : if (list_empty(exps) || list_empty(l))
1196 14 : return false;
1197 8416 : for (node *n = l->h; n && !uses_any; n = n->next) {
1198 4211 : sql_exp *e = n->data;
1199 4211 : uses_any |= list_exps_uses_exp(exps, exp_relname(e), exp_name(e)) != NULL;
1200 : }
1201 :
1202 : return uses_any;
1203 : }
1204 :
1205 : /* TODO At the moment I have to disable the new join2semi because the join order optimizer doesn't take semi-joins into account,
1206 : so plans get deteriorated if more joins are optimized into semi-joins. Later I will review the join order with semi-joins and hopefully,
1207 : I will be able to re-enable the new join2semi. */
1208 : #if 0
1209 : #define NO_EXP_FOUND 0
1210 : #define FOUND_WITH_DUPLICATES 1
1211 : #define MAY_HAVE_DUPLICATE_NULLS 2
1212 : #define ALL_VALUES_DISTINCT 3
1213 :
1214 : static int
1215 : find_projection_for_join2semi(sql_rel *rel, sql_exp *jc)
1216 : {
1217 : sql_rel *res = NULL;
1218 : sql_exp *e = NULL;
1219 : bool underjoin = false;
1220 :
1221 : if ((e = rel_find_exp_and_corresponding_rel(rel, jc, &res, &underjoin))) {
1222 : if (underjoin || e->type != e_column)
1223 : return FOUND_WITH_DUPLICATES;
1224 : /* if just one groupby column is projected or the relation needs distinct values and one column is projected or is a primary key, it will be distinct */
1225 : if (is_unique(e) ||
1226 : (is_groupby(res->op) && list_length(res->r) == 1 && exps_find_exp(res->r, e)) ||
1227 : ((is_project(res->op) || is_base(res->op)) && ((need_distinct(res) && list_length(res->exps) == 1) || res->card < CARD_AGGR)))
1228 : return has_nil(e) ? MAY_HAVE_DUPLICATE_NULLS : ALL_VALUES_DISTINCT;
1229 : return FOUND_WITH_DUPLICATES;
1230 : }
1231 : return NO_EXP_FOUND;
1232 : }
1233 :
1234 : static int
1235 : subrel_uses_exp_outside_subrel(visitor *v, sql_rel *rel, list *l, sql_rel *j)
1236 : {
1237 : if (rel == j)
1238 : return 0;
1239 : if (mvc_highwater(v->sql))
1240 : return 1;
1241 : switch(rel->op){
1242 : case op_join:
1243 : case op_left:
1244 : case op_right:
1245 : case op_full:
1246 : return exps_uses_any(rel->exps, l) ||
1247 : subrel_uses_exp_outside_subrel(v, rel->l, l, j) || subrel_uses_exp_outside_subrel(v, rel->r, l, j);
1248 : case op_semi:
1249 : case op_anti:
1250 : case op_select:
1251 : return exps_uses_any(rel->exps, l) ||
1252 : subrel_uses_exp_outside_subrel(v, rel->l, l, j);
1253 : case op_project:
1254 : case op_groupby:
1255 : return exps_uses_any(rel->exps, l) || exps_uses_any(rel->r, l);
1256 : case op_basetable:
1257 : case op_table:
1258 : case op_union:
1259 : case op_except:
1260 : case op_inter:
1261 : return exps_uses_any(rel->exps, l);
1262 : case op_topn:
1263 : case op_sample:
1264 : return subrel_uses_exp_outside_subrel(v, rel->l, l, j);
1265 : default:
1266 : return 1;
1267 : }
1268 : }
1269 :
1270 : static int
1271 : projrel_uses_exp_outside_subrel(visitor *v, sql_rel *rel, list *l, sql_rel *j)
1272 : {
1273 : /* test if projecting relation uses any of the join expressions */
1274 : assert((is_simple_project(rel->op) || is_groupby(rel->op)) && rel->l);
1275 : return exps_uses_any(rel->exps, l) || exps_uses_any(rel->r, l) || subrel_uses_exp_outside_subrel(v, rel->l, l, j);
1276 : }
1277 :
1278 : static sql_rel *
1279 : rewrite_joins2semi(visitor *v, sql_rel *proj, sql_rel *rel)
1280 : {
1281 : /* generalize possibility : we need the visitor 'step' here */
1282 : if (rel_is_ref(rel) || mvc_highwater(v->sql)) /* if the join has multiple references, it's dangerous to convert it into a semijoin */
1283 : return rel;
1284 : if (is_innerjoin(rel->op) && !list_empty(rel->exps)) {
1285 : sql_rel *l = rel->l, *r = rel->r;
1286 : bool left_unique = true, right_unique = true;
1287 :
1288 : /* these relations don't project anything, so skip them */
1289 : while (is_topn(l->op) || is_sample(l->op) || is_select(l->op) || is_semi(l->op))
1290 : l = l->l;
1291 : /* joins will expand values, so don't search on those */
1292 : if (!is_base(l->op) && !is_project(l->op))
1293 : left_unique = false;
1294 : while (is_topn(r->op) || is_sample(r->op) || is_select(r->op) || is_semi(r->op))
1295 : r = r->l;
1296 : if (!is_base(r->op) && !is_project(r->op))
1297 : right_unique = false;
1298 : /* if all columns used in equi-joins from one of the sides are unique, the join can be rewritten into a semijoin */
1299 : for (node *n=rel->exps->h; n && (left_unique || right_unique); n = n->next) {
1300 : sql_exp *e = n->data, *el = e->l, *er = e->r;
1301 :
1302 : if (!is_compare(e->type) || e->flag != cmp_equal || exp_has_func(el) || exp_has_func(er)) {
1303 : left_unique = right_unique = false;
1304 : } else {
1305 : int found = 0;
1306 :
1307 : if (left_unique && (found = find_projection_for_join2semi(l, el)) > NO_EXP_FOUND)
1308 : left_unique &= (found == ALL_VALUES_DISTINCT || (found == MAY_HAVE_DUPLICATE_NULLS && (!is_semantics(e) || !has_nil(er))));
1309 : if (left_unique && (found = find_projection_for_join2semi(l, er)) > NO_EXP_FOUND)
1310 : left_unique &= (found == ALL_VALUES_DISTINCT || (found == MAY_HAVE_DUPLICATE_NULLS && (!is_semantics(e) || !has_nil(el))));
1311 : if (right_unique && (found = find_projection_for_join2semi(r, el)) > NO_EXP_FOUND)
1312 : right_unique &= (found == ALL_VALUES_DISTINCT || (found == MAY_HAVE_DUPLICATE_NULLS && (!is_semantics(e) || !has_nil(er))));
1313 : if (right_unique && (found = find_projection_for_join2semi(r, er)) > NO_EXP_FOUND)
1314 : right_unique &= (found == ALL_VALUES_DISTINCT || (found == MAY_HAVE_DUPLICATE_NULLS && (!is_semantics(e) || !has_nil(el))));
1315 : }
1316 : }
1317 :
1318 : /* now we need to check relation's expressions are not used */
1319 : if (left_unique && !projrel_uses_exp_outside_subrel(v, proj, l->exps, rel)) {
1320 : sql_rel *tmp = rel->r;
1321 : rel->r = rel->l;
1322 : rel->l = tmp;
1323 : rel->op = op_semi;
1324 : v->changes++;
1325 : } else if (right_unique && !projrel_uses_exp_outside_subrel(v, proj, r->exps, rel)) {
1326 : rel->op = op_semi;
1327 : v->changes++;
1328 : }
1329 : }
1330 : if (is_join(rel->op)) {
1331 : rel->l = rewrite_joins2semi(v, proj, rel->l);
1332 : rel->r = rewrite_joins2semi(v, proj, rel->r);
1333 : } else if (is_topn(rel->op) || is_sample(rel->op) || is_select(rel->op) || is_semi(rel->op)) {
1334 : rel->l = rewrite_joins2semi(v, proj, rel->l);
1335 : }
1336 : return rel;
1337 : }
1338 :
1339 : static inline sql_rel *
1340 : rel_join2semijoin(visitor *v, sql_rel *rel)
1341 : {
1342 : if ((is_simple_project(rel->op) || is_groupby(rel->op)) && rel->l)
1343 : rel->l = rewrite_joins2semi(v, rel, rel->l);
1344 : return rel;
1345 : }
1346 : #endif
1347 :
1348 : #define NO_PROJECTION_FOUND 0
1349 : #define MAY_HAVE_DUPLICATE_NULLS 1
1350 : #define ALL_VALUES_DISTINCT 2
1351 :
1352 : static int
1353 704309 : find_projection_for_join2semi(sql_rel *rel)
1354 : {
1355 704309 : if (is_simple_project(rel->op) || is_groupby(rel->op) || is_inter(rel->op) || is_except(rel->op) || is_base(rel->op) || (is_union(rel->op) && need_distinct(rel))) {
1356 373267 : if (rel->card < CARD_AGGR) /* const or groupby without group by exps */
1357 : return ALL_VALUES_DISTINCT;
1358 372120 : if (list_length(rel->exps) == 1) {
1359 6627 : sql_exp *e = rel->exps->h->data;
1360 : /* a single group by column in the projection list from a group by relation is guaranteed to be unique, but not an aggregate */
1361 6627 : if (e->type == e_column) {
1362 6597 : sql_rel *res = NULL;
1363 6597 : sql_exp *found = NULL;
1364 6597 : bool underjoin = false;
1365 :
1366 : /* if just one groupby column is projected or the relation needs distinct values and one column is projected or is a primary key, it will be distinct */
1367 6597 : if ((is_groupby(rel->op) && list_length(rel->r) == 1 && exps_find_exp(rel->r, e)) || (need_distinct(rel) && list_length(rel->exps) == 1))
1368 9528 : return ALL_VALUES_DISTINCT;
1369 2958 : if (is_unique(e))
1370 2249 : return has_nil(e) ? MAY_HAVE_DUPLICATE_NULLS : ALL_VALUES_DISTINCT;
1371 :
1372 709 : if ((is_simple_project(rel->op) || is_groupby(rel->op) || is_inter(rel->op) || is_except(rel->op)) &&
1373 129 : (found = rel_find_exp_and_corresponding_rel(rel->l, e, false, &res, &underjoin)) && !underjoin) { /* grouping column on inner relation */
1374 113 : if (need_distinct(res) && list_length(res->exps) == 1)
1375 : return ALL_VALUES_DISTINCT;
1376 113 : if (is_unique(found))
1377 0 : return has_nil(e) ? MAY_HAVE_DUPLICATE_NULLS : ALL_VALUES_DISTINCT;
1378 113 : if (found->type == e_column && found->card <= CARD_AGGR) {
1379 1 : if (!is_groupby(res->op) && list_length(res->exps) != 1)
1380 : return NO_PROJECTION_FOUND;
1381 0 : for (node *n = res->exps->h ; n ; n = n->next) { /* must be the single column in the group by expression list */
1382 0 : sql_exp *e = n->data;
1383 0 : if (e != found && e->type == e_column)
1384 : return NO_PROJECTION_FOUND;
1385 : }
1386 : return ALL_VALUES_DISTINCT;
1387 : }
1388 : }
1389 : }
1390 : }
1391 : }
1392 : return NO_PROJECTION_FOUND;
1393 : }
1394 :
1395 : static sql_rel *
1396 2244326 : find_candidate_join2semi(visitor *v, sql_rel *rel, bool *swap)
1397 : {
1398 : /* generalize possibility : we need the visitor 'step' here */
1399 2244846 : if (rel_is_ref(rel)) /* if the join has multiple references, it's dangerous to convert it into a semijoin */
1400 : return NULL;
1401 2127548 : if (rel->op == op_join && !list_empty(rel->exps) && list_empty(rel->attr)) {
1402 354321 : sql_rel *l = rel->l, *r = rel->r;
1403 354321 : int foundr = NO_PROJECTION_FOUND, foundl = NO_PROJECTION_FOUND, found = NO_PROJECTION_FOUND;
1404 354321 : bool ok = false;
1405 :
1406 354321 : foundr = find_projection_for_join2semi(r);
1407 354321 : if (foundr < ALL_VALUES_DISTINCT)
1408 349988 : foundl = find_projection_for_join2semi(l);
1409 354321 : if (foundr && foundr > foundl) {
1410 4339 : *swap = false;
1411 4339 : found = foundr;
1412 349982 : } else if (foundl) {
1413 2689 : *swap = true;
1414 2689 : found = foundl;
1415 : }
1416 :
1417 7028 : if (found > NO_PROJECTION_FOUND) {
1418 : /* if all join expressions can be pushed down or have function calls, then it cannot be rewritten into a semijoin */
1419 14060 : for (node *n=rel->exps->h; n && !ok; n = n->next) {
1420 7032 : sql_exp *e = n->data;
1421 :
1422 10796 : ok |= e->type == e_cmp && e->flag == cmp_equal && !exp_has_func(e) && !rel_rebind_exp(v->sql, l, e) && !rel_rebind_exp(v->sql, r, e) &&
1423 7 : (found == ALL_VALUES_DISTINCT || !is_semantics(e) || !has_nil((sql_exp *)e->l) || !has_nil((sql_exp *)e->r));
1424 : }
1425 : }
1426 :
1427 354321 : if (ok)
1428 : return rel;
1429 : }
1430 2124273 : if (is_join(rel->op) || is_semi(rel->op)) {
1431 533871 : sql_rel *c;
1432 :
1433 533871 : if ((c=find_candidate_join2semi(v, rel->l, swap)) != NULL ||
1434 533481 : (c=find_candidate_join2semi(v, rel->r, swap)) != NULL)
1435 412 : if (list_empty(c->attr))
1436 : return c;
1437 : }
1438 2123861 : if (is_topn(rel->op) || is_sample(rel->op))
1439 520 : return find_candidate_join2semi(v, rel->l, swap);
1440 : return NULL;
1441 : }
1442 :
1443 : static int
1444 2507 : subrel_uses_exp_outside_subrel(sql_rel *rel, list *l, sql_rel *c)
1445 : {
1446 2508 : if (rel == c)
1447 : return 0;
1448 : /* for subrel only expect joins (later possibly selects) */
1449 825 : if (is_join(rel->op) || is_semi(rel->op)) {
1450 418 : if (exps_uses_any(rel->exps, l))
1451 : return 1;
1452 818 : if (subrel_uses_exp_outside_subrel(rel->l, l, c) ||
1453 408 : subrel_uses_exp_outside_subrel(rel->r, l, c))
1454 5 : return 1;
1455 : }
1456 812 : if (is_topn(rel->op) || is_sample(rel->op))
1457 1 : return subrel_uses_exp_outside_subrel(rel->l, l, c);
1458 : return 0;
1459 : }
1460 :
1461 : static int
1462 3275 : rel_uses_exp_outside_subrel(sql_rel *rel, list *l, sql_rel *c)
1463 : {
1464 : /* for now we only expect sub relations of type project, selects (rel) or join/semi */
1465 3275 : if (is_simple_project(rel->op) || is_groupby(rel->op) || is_select(rel->op)) {
1466 3275 : if (!list_empty(rel->exps) && exps_uses_any(rel->exps, l))
1467 : return 1;
1468 1689 : if ((is_simple_project(rel->op) || is_groupby(rel->op)) && !list_empty(rel->r) && exps_uses_any(rel->r, l))
1469 : return 1;
1470 1689 : if (rel->l)
1471 1689 : return subrel_uses_exp_outside_subrel(rel->l, l, c);
1472 : }
1473 0 : if (is_topn(rel->op) || is_sample(rel->op))
1474 0 : return subrel_uses_exp_outside_subrel(rel->l, l, c);
1475 : return 1;
1476 : }
1477 :
1478 : static inline sql_rel *
1479 3428233 : rel_join2semijoin(visitor *v, sql_rel *rel)
1480 : {
1481 3428233 : if ((is_simple_project(rel->op) || is_groupby(rel->op)) && rel->l) {
1482 1176974 : bool swap = false;
1483 1176974 : sql_rel *l = rel->l;
1484 1176974 : sql_rel *c = find_candidate_join2semi(v, l, &swap);
1485 :
1486 1176974 : if (c) {
1487 : /* 'p' is a project */
1488 3275 : sql_rel *p = swap ? c->l : c->r;
1489 :
1490 : /* now we need to check if ce is only used at the level of c */
1491 3275 : if (!rel_uses_exp_outside_subrel(rel, p->exps, c)) {
1492 1681 : c->op = op_semi;
1493 1681 : if (swap) {
1494 518 : sql_rel *tmp = c->r;
1495 518 : c->r = c->l;
1496 518 : c->l = tmp;
1497 : }
1498 1681 : v->changes++;
1499 : }
1500 : }
1501 : }
1502 3428233 : return rel;
1503 : }
1504 :
1505 : static inline sql_rel *
1506 3428233 : rel_push_join_down_outer(visitor *v, sql_rel *rel)
1507 : {
1508 3428233 : if (is_join(rel->op) && !is_outerjoin(rel->op) && !is_single(rel) && !list_empty(rel->exps) && !rel_is_ref(rel)) {
1509 364921 : sql_rel *l = rel->l, *r = rel->r;
1510 :
1511 364921 : if (is_left(r->op) && (is_select(l->op) || (is_join(l->op) && !is_outerjoin(l->op))) && !rel_is_ref(l) &&
1512 887 : !rel_is_ref(r)) {
1513 887 : sql_rel *rl = r->l;
1514 887 : sql_rel *rr = r->r;
1515 887 : if (rel_is_ref(rl) || rel_is_ref(rr))
1516 : return rel;
1517 : /* join exps should only include l and r.l */
1518 887 : list *njexps = sa_list(v->sql->sa);
1519 2023 : for(node *n = rel->exps->h; n; n = n->next) {
1520 1136 : sql_exp *je = n->data;
1521 :
1522 1136 : assert(je->type == e_cmp);
1523 1136 : if (je->f)
1524 : return rel;
1525 1136 : if ((rel_find_exp(l, je->l) && rel_find_exp(rl, je->r)) || (rel_find_exp(l, je->r) && rel_find_exp(rl, je->l))) {
1526 1136 : list_append(njexps, je);
1527 : } else {
1528 0 : return rel;
1529 : }
1530 : }
1531 887 : sql_rel *nl = rel_crossproduct(v->sql->sa, rel_dup(l), rl, rel->op);
1532 887 : r->l = nl;
1533 887 : nl->exps = njexps;
1534 887 : nl->attr = rel->attr;
1535 887 : rel->attr = NULL;
1536 887 : set_processed(nl);
1537 887 : rel_dup(r);
1538 887 : rel_destroy(rel);
1539 887 : rel = r;
1540 887 : v->changes++;
1541 : }
1542 : }
1543 : return rel;
1544 : }
1545 :
1546 : static sql_rel *
1547 3428233 : rel_optimize_joins_(visitor *v, sql_rel *rel)
1548 : {
1549 3428233 : rel = rel_push_join_exps_down(v, rel);
1550 3428233 : rel = rel_out2inner(v, rel);
1551 3428233 : rel = rel_join2semijoin(v, rel);
1552 3428233 : rel = rel_push_join_down_outer(v, rel);
1553 3428233 : return rel;
1554 : }
1555 :
1556 : static sql_rel *
1557 89414 : rel_optimize_joins(visitor *v, global_props *gp, sql_rel *rel)
1558 : {
1559 89414 : (void) gp;
1560 89414 : return rel_visitor_topdown(v, rel, &rel_optimize_joins_);
1561 : }
1562 :
1563 : run_optimizer
1564 623386 : bind_optimize_joins(visitor *v, global_props *gp)
1565 : {
1566 623386 : int flag = v->sql->sql_optimizer;
1567 623190 : return gp->opt_level == 1 && (gp->cnt[op_join] || gp->cnt[op_left] || gp->cnt[op_right]
1568 1246576 : || gp->cnt[op_full] || gp->cnt[op_semi] || gp->cnt[op_anti]) && (flag & optimize_joins) ? rel_optimize_joins : NULL;
1569 : }
1570 :
1571 :
1572 : static sql_rel *rel_join_order_(visitor *v, sql_rel *rel);
1573 :
1574 : static void
1575 700468 : get_relations(visitor *v, sql_rel *rel, list *rels)
1576 : {
1577 974169 : if (list_empty(rel->attr) && !rel_is_ref(rel) && rel->op == op_join && rel->exps == NULL) {
1578 273701 : sql_rel *l = rel->l;
1579 273701 : sql_rel *r = rel->r;
1580 :
1581 273701 : get_relations(v, l, rels);
1582 273701 : get_relations(v, r, rels);
1583 273701 : rel->l = NULL;
1584 273701 : rel->r = NULL;
1585 273701 : rel_destroy(rel);
1586 : } else {
1587 426767 : rel = rel_join_order_(v, rel);
1588 426767 : append(rels, rel);
1589 : }
1590 700468 : }
1591 :
1592 : static void
1593 188894 : get_inner_relations(mvc *sql, sql_rel *rel, list *rels)
1594 : {
1595 299833 : if (!rel_is_ref(rel) && is_join(rel->op)) {
1596 110939 : sql_rel *l = rel->l;
1597 110939 : sql_rel *r = rel->r;
1598 :
1599 110939 : get_inner_relations(sql, l, rels);
1600 110939 : get_inner_relations(sql, r, rels);
1601 : } else {
1602 188894 : append(rels, rel);
1603 : }
1604 188894 : }
1605 :
1606 : static int
1607 1053280 : exp_count(int *cnt, sql_exp *e)
1608 : {
1609 1053280 : int flag;
1610 1053280 : if (!e)
1611 : return 0;
1612 1053280 : if (find_prop(e->p, PROP_JOINIDX))
1613 3541 : *cnt += 100;
1614 1053280 : if (find_prop(e->p, PROP_HASHCOL))
1615 34177 : *cnt += 100;
1616 1053280 : if (find_prop(e->p, PROP_HASHIDX))
1617 0 : *cnt += 100;
1618 1053280 : switch(e->type) {
1619 389004 : case e_cmp:
1620 389004 : if (!is_complex_exp(e->flag)) {
1621 330609 : exp_count(cnt, e->l);
1622 330609 : exp_count(cnt, e->r);
1623 330609 : if (e->f)
1624 3052 : exp_count(cnt, e->f);
1625 : }
1626 389004 : flag = e->flag;
1627 389004 : switch (flag) {
1628 306786 : case cmp_equal:
1629 306786 : *cnt += 90;
1630 306786 : return 90;
1631 17323 : case cmp_notequal:
1632 17323 : *cnt += 7;
1633 17323 : return 7;
1634 6500 : case cmp_gt:
1635 : case cmp_gte:
1636 : case cmp_lt:
1637 : case cmp_lte:
1638 6500 : *cnt += 6;
1639 6500 : if (e->l) {
1640 6500 : sql_exp *l = e->l;
1641 6500 : sql_subtype *t = exp_subtype(l);
1642 6500 : if (EC_TEMP(t->type->eclass)) /* give preference to temporal ranges */
1643 172 : *cnt += 90;
1644 : }
1645 6500 : if (e->f){ /* range */
1646 3052 : *cnt += 6;
1647 3052 : return 12;
1648 : }
1649 : return 6;
1650 693 : case cmp_filter:
1651 693 : if (exps_card(e->r) > CARD_AGGR) {
1652 : /* filters for joins are special */
1653 5 : *cnt += 1000;
1654 5 : return 1000;
1655 : }
1656 688 : *cnt += 2;
1657 688 : return 2;
1658 52914 : case cmp_in:
1659 : case cmp_notin: {
1660 52914 : list *l = e->r;
1661 52914 : int c = 9 - 10*list_length(l);
1662 52914 : *cnt += c;
1663 52914 : return c;
1664 : }
1665 4788 : case cmp_or: /* prefer or over functions */
1666 4788 : *cnt += 3;
1667 4788 : return 3;
1668 : default:
1669 : return 0;
1670 : }
1671 524477 : case e_column:
1672 524477 : *cnt += 20;
1673 524477 : return 20;
1674 121374 : case e_atom:
1675 121374 : *cnt += 10;
1676 121374 : return 10;
1677 2856 : case e_func:
1678 : /* functions are more expensive, depending on the number of columns involved. */
1679 2856 : if (e->card == CARD_ATOM)
1680 : return 0;
1681 2565 : *cnt -= 5*list_length(e->l);
1682 2565 : return 5*list_length(e->l);
1683 15569 : case e_convert:
1684 : /* functions are more expensive, depending on the number of columns involved. */
1685 15569 : if (e->card == CARD_ATOM)
1686 : return 0;
1687 : /* fall through */
1688 : default:
1689 14909 : *cnt -= 5;
1690 14909 : return -5;
1691 : }
1692 : }
1693 :
1694 : int
1695 264444 : exp_keyvalue(sql_exp *e)
1696 : {
1697 264444 : int cnt = 0;
1698 58417 : exp_count(&cnt, e);
1699 264444 : return cnt;
1700 : }
1701 :
1702 : static sql_exp *
1703 739604 : joinexp_col(sql_exp *e, sql_rel *r)
1704 : {
1705 739604 : if (e->type == e_cmp) {
1706 739604 : if (rel_has_exp(r, e->l, false) >= 0)
1707 395558 : return e->l;
1708 344046 : return e->r;
1709 : }
1710 0 : assert(0);
1711 : return NULL;
1712 : }
1713 :
1714 : static sql_column *
1715 489410 : table_colexp(sql_exp *e, sql_rel *r)
1716 : {
1717 489410 : sql_table *t = r->l;
1718 :
1719 489410 : if (e->type == e_column) {
1720 475493 : const char *name = exp_name(e);
1721 475493 : node *cn;
1722 :
1723 475493 : if (r->exps) { /* use alias */
1724 889096 : for (cn = r->exps->h; cn; cn = cn->next) {
1725 884823 : sql_exp *ce = cn->data;
1726 884823 : if (strcmp(exp_name(ce), name) == 0) {
1727 471220 : name = ce->r;
1728 471220 : break;
1729 : }
1730 : }
1731 : }
1732 1042298 : for (cn = ol_first_node(t->columns); cn; cn = cn->next) {
1733 1038025 : sql_column *c = cn->data;
1734 1038025 : if (strcmp(c->base.name, name) == 0)
1735 471220 : return c;
1736 : }
1737 : }
1738 : return NULL;
1739 : }
1740 :
1741 : static list *
1742 355655 : matching_joins(allocator *sa, list *rels, list *exps, sql_exp *je)
1743 : {
1744 355655 : sql_rel *l, *r;
1745 :
1746 355655 : assert (je->type == e_cmp);
1747 :
1748 355655 : l = find_rel(rels, je->l);
1749 355655 : r = find_rel(rels, je->r);
1750 355655 : if (l && r) {
1751 355511 : list *res;
1752 355511 : list *n_rels = sa_list(sa);
1753 :
1754 355511 : append(n_rels, l);
1755 355511 : append(n_rels, r);
1756 355511 : res = list_select(exps, n_rels, (fcmp) &exp_joins_rels, (fdup)NULL);
1757 355511 : return res;
1758 : }
1759 144 : return sa_list(sa);
1760 : }
1761 :
1762 : static int
1763 6373 : sql_column_kc_cmp(sql_column *c, sql_kc *kc)
1764 : {
1765 : /* return on equality */
1766 6373 : return (c->colnr - kc->c->colnr);
1767 : }
1768 :
1769 : static sql_idx *
1770 460189 : find_fk_index(mvc *sql, sql_table *l, list *lcols, sql_table *r, list *rcols)
1771 : {
1772 460189 : sql_trans *tr = sql->session->tr;
1773 :
1774 460189 : if (l->idxs) {
1775 460189 : node *in;
1776 547225 : for (in = ol_first_node(l->idxs); in; in = in->next){
1777 87876 : sql_idx *li = in->data;
1778 87876 : if (li->type == join_idx) {
1779 8717 : sql_key *rk = (sql_key*)os_find_id(tr->cat->objects, tr, ((sql_fkey*)li->key)->rkey);
1780 8717 : fcmp cmp = (fcmp)&sql_column_kc_cmp;
1781 :
1782 9666 : if (rk->t == r &&
1783 1789 : list_match(lcols, li->columns, cmp) == 0 &&
1784 840 : list_match(rcols, rk->columns, cmp) == 0) {
1785 840 : return li;
1786 : }
1787 : }
1788 : }
1789 : }
1790 : return NULL;
1791 : }
1792 :
1793 : static sql_rel *
1794 711310 : find_basetable( sql_rel *r)
1795 : {
1796 1064739 : if (!r)
1797 : return NULL;
1798 1057113 : switch(r->op) {
1799 572260 : case op_basetable:
1800 572260 : if (!r->l)
1801 0 : return NULL;
1802 : return r;
1803 353429 : case op_semi:
1804 : case op_anti:
1805 : case op_project:
1806 : case op_select:
1807 : case op_topn:
1808 : case op_sample:
1809 353429 : return find_basetable(r->l);
1810 : default:
1811 : return NULL;
1812 : }
1813 : }
1814 :
1815 : static int
1816 98785 : exps_count(list *exps)
1817 : {
1818 98785 : node *n;
1819 98785 : int cnt = 0;
1820 :
1821 98785 : if (!exps)
1822 : return 0;
1823 223351 : for (n = exps->h; n; n=n->next)
1824 124566 : exp_count(&cnt, n->data);
1825 98785 : return cnt;
1826 : }
1827 :
1828 : static list *
1829 253705 : order_join_expressions(mvc *sql, list *dje, list *rels)
1830 : {
1831 253705 : node *n;
1832 253705 : int cnt = list_length(dje);
1833 :
1834 253705 : if (cnt <= 1)
1835 : return dje;
1836 :
1837 65408 : list *res = sa_list(sql->sa);
1838 65408 : int i, *keys = SA_NEW_ARRAY(sql->ta, int, cnt);
1839 65408 : void **data = SA_NEW_ARRAY(sql->ta, void*, cnt);
1840 :
1841 271435 : for (n = dje->h, i = 0; n; n = n->next, i++) {
1842 206027 : sql_exp *e = n->data;
1843 :
1844 206027 : keys[i] = exp_keyvalue(e);
1845 : /* add some weight for the selections */
1846 206027 : if (e->type == e_cmp && !is_complex_exp(e->flag)) {
1847 206001 : sql_rel *l = find_rel(rels, e->l);
1848 206001 : sql_rel *r = find_rel(rels, e->r);
1849 :
1850 206001 : if (l && is_select(l->op) && l->exps)
1851 52905 : keys[i] += list_length(l->exps)*10 + exps_count(l->exps);
1852 206001 : if (r && is_select(r->op) && r->exps)
1853 45880 : keys[i] += list_length(r->exps)*10 + exps_count(r->exps);
1854 : }
1855 206027 : data[i] = n->data;
1856 : }
1857 : /* sort descending */
1858 65408 : GDKqsort(keys, data, NULL, cnt, sizeof(int), sizeof(void *), TYPE_int, true, true);
1859 336843 : for(i=0; i<cnt; i++) {
1860 206027 : list_append(res, data[i]);
1861 : }
1862 : return res;
1863 : }
1864 :
1865 : static int
1866 237270 : find_join_rels(list **L, list **R, list *exps, list *rels)
1867 : {
1868 237270 : node *n;
1869 :
1870 237270 : *L = sa_list(exps->sa);
1871 237270 : *R = sa_list(exps->sa);
1872 237270 : if (!exps || list_length(exps) <= 1)
1873 : return -1;
1874 289498 : for(n = exps->h; n; n = n->next) {
1875 218473 : sql_exp *e = n->data;
1876 218473 : sql_rel *l = NULL, *r = NULL;
1877 :
1878 218473 : if (!is_complex_exp(e->flag)){
1879 218466 : l = find_rel(rels, e->l);
1880 218466 : r = find_rel(rels, e->r);
1881 : }
1882 218466 : if (l<r) {
1883 141009 : list_append(*L, l);
1884 141009 : list_append(*R, r);
1885 : } else {
1886 77464 : list_append(*L, r);
1887 77464 : list_append(*R, l);
1888 : }
1889 : }
1890 : return 0;
1891 : }
1892 :
1893 : static list *
1894 71025 : distinct_join_exps(list *aje, list *lrels, list *rrels)
1895 : {
1896 71025 : node *n, *m, *o, *p;
1897 71025 : int len = list_length(aje), i, j;
1898 71025 : char *used = SA_ZNEW_ARRAY(aje->sa, char, len);
1899 71025 : list *res = sa_list(aje->sa);
1900 :
1901 71025 : assert(len == list_length(lrels));
1902 289498 : for(n = lrels->h, m = rrels->h, j = 0; n && m;
1903 218473 : n = n->next, m = m->next, j++) {
1904 218473 : if (n->data && m->data)
1905 948192 : for(o = n->next, p = m->next, i = j+1; o && p;
1906 729771 : o = o->next, p = p->next, i++) {
1907 729771 : if (o->data == n->data && p->data == m->data)
1908 30281 : used[i] = 1;
1909 : }
1910 : }
1911 289498 : for (i = 0, n = aje->h; i < len; n = n->next, i++) {
1912 218473 : if (!used[i])
1913 193284 : list_append(res, n->data);
1914 : }
1915 71025 : return res;
1916 : }
1917 :
1918 : static list *
1919 237270 : find_fk( mvc *sql, list *rels, list *exps)
1920 : {
1921 237270 : node *djn;
1922 237270 : list *sdje, *aje, *dje;
1923 237270 : list *lrels, *rrels;
1924 :
1925 : /* first find the distinct join expressions */
1926 237270 : aje = list_select(exps, rels, (fcmp) &exp_is_join, (fdup)NULL);
1927 : /* add left/right relation */
1928 237270 : if (find_join_rels(&lrels, &rrels, aje, rels) < 0)
1929 : dje = aje;
1930 : else
1931 71025 : dje = distinct_join_exps(aje, lrels, rrels);
1932 596425 : for(djn=dje->h; djn; djn = djn->next) {
1933 : /* equal join expressions */
1934 359248 : sql_idx *idx = NULL;
1935 359248 : sql_exp *je = djn->data, *le = je->l, *re = je->r;
1936 :
1937 359248 : if (is_complex_exp(je->flag))
1938 : break;
1939 359155 : if (!find_prop(je->p, PROP_JOINIDX)) {
1940 355655 : int swapped = 0;
1941 355655 : list *aaje = matching_joins(sql->sa, rels, aje, je);
1942 355655 : list *eje = list_select(aaje, (void*)1, (fcmp) &exp_is_eqjoin, (fdup)NULL);
1943 355655 : sql_rel *lr = find_rel(rels, le), *olr = lr;
1944 355655 : sql_rel *rr = find_rel(rels, re), *orr = rr;
1945 355655 : sql_rel *bt = NULL;
1946 355655 : char *iname;
1947 :
1948 355655 : sql_table *l, *r;
1949 355655 : list *lexps = list_map(eje, lr, (fmap) &joinexp_col);
1950 355655 : list *rexps = list_map(eje, rr, (fmap) &joinexp_col);
1951 355655 : list *lcols, *rcols;
1952 :
1953 355655 : lr = find_basetable(lr);
1954 355655 : rr = find_basetable(rr);
1955 355655 : if (!lr || !rr)
1956 232521 : continue;
1957 248362 : l = lr->l;
1958 248362 : r = rr->l;
1959 248362 : lcols = list_map(lexps, lr, (fmap) &table_colexp);
1960 248362 : rcols = list_map(rexps, rr, (fmap) &table_colexp);
1961 248362 : lcols->destroy = NULL;
1962 248362 : rcols->destroy = NULL;
1963 248362 : if (list_length(lcols) != list_length(rcols))
1964 17934 : continue;
1965 :
1966 230428 : idx = find_fk_index(sql, l, lcols, r, rcols);
1967 230428 : if (!idx) {
1968 229761 : idx = find_fk_index(sql, r, rcols, l, lcols);
1969 229761 : swapped = 1;
1970 : }
1971 :
1972 230428 : if (idx && (iname = sa_strconcat( sql->sa, "%", idx->base.name)) != NULL &&
1973 667 : ((!swapped && name_find_column(olr, NULL, iname, -2, &bt) == NULL) ||
1974 173 : ( swapped && name_find_column(orr, NULL, iname, -2, &bt) == NULL)))
1975 : idx = NULL;
1976 :
1977 : if (idx) {
1978 751 : prop *p;
1979 751 : node *n;
1980 751 : sql_exp *t = NULL, *i = NULL;
1981 :
1982 751 : if (list_length(lcols) > 1 || !mvc_debug_on(sql, 512)) {
1983 :
1984 : /* Add join between idx and TID */
1985 751 : if (swapped) {
1986 155 : sql_exp *s = je->l, *l = je->r;
1987 :
1988 155 : t = rel_find_column(sql, olr, s->l, TID);
1989 155 : i = rel_find_column(sql, orr, l->l, iname);
1990 155 : if (!t || !i)
1991 1 : continue;
1992 154 : t->p = NULL;
1993 154 : i->p = NULL;
1994 154 : je = exp_compare(sql->sa, i, t, cmp_equal);
1995 : } else {
1996 596 : sql_exp *s = je->r, *l = je->l;
1997 :
1998 596 : t = rel_find_column(sql, orr, s->l, TID);
1999 596 : i = rel_find_column(sql, olr, l->l, iname);
2000 596 : if (!t || !i)
2001 0 : continue;
2002 596 : t->p = NULL;
2003 596 : i->p = NULL;
2004 596 : je = exp_compare(sql->sa, i, t, cmp_equal);
2005 : }
2006 :
2007 : /* Remove all join expressions */
2008 1502 : for (n = eje->h; n; n = n->next)
2009 752 : list_remove_data(exps, NULL, n->data);
2010 750 : append(exps, je);
2011 750 : djn->data = je;
2012 0 : } else if (swapped) { /* else keep je for single column expressions */
2013 0 : je = exp_compare(sql->sa, je->r, je->l, cmp_equal);
2014 : /* Remove all join expressions */
2015 0 : for (n = eje->h; n; n = n->next)
2016 0 : list_remove_data(exps, NULL, n->data);
2017 0 : append(exps, je);
2018 0 : djn->data = je;
2019 : }
2020 750 : je->p = p = prop_create(sql->sa, PROP_JOINIDX, je->p);
2021 750 : p->value.pval = idx;
2022 : }
2023 : }
2024 : }
2025 :
2026 : /* sort expressions on weighted number of reducing operators */
2027 237270 : sdje = order_join_expressions(sql, dje, rels);
2028 237270 : return sdje;
2029 : }
2030 :
2031 : static int
2032 549148 : rels_find_one_rel( sql_rel **rels, int nr, sql_exp *e)
2033 : {
2034 549148 : int fnd = 0;
2035 :
2036 4409446 : for(int i = 1; i<=nr; i++) {
2037 3860364 : if (rel_has_exp(rels[i], e, false) == 0) {
2038 548907 : if (fnd)
2039 : return 0;
2040 : fnd = i;
2041 : }
2042 : }
2043 : return fnd;
2044 : }
2045 :
2046 : /* TODO move popcount and popcount64 into gdk_*.h, used in gdk_cand, strimps and here */
2047 : static inline int
2048 : popcount64(uint64_t x)
2049 : {
2050 : #ifdef __has_builtin
2051 : #if __has_builtin(__builtin_popcountll)
2052 : return (uint32_t) __builtin_popcountll(x);
2053 : #define BUILTIN_USED
2054 : #endif
2055 : #endif
2056 : #ifndef BUILTIN_USED
2057 : #if defined(_MSC_VER)
2058 : #if SIZEOF_OID == 4
2059 : /* no __popcnt64 on 32 bit Windows */
2060 : return (int) (__popcnt((uint32_t) x) + __popcnt((uint32_t) (x >> 32)));
2061 : #else
2062 : return (uint32_t) __popcnt64(x);
2063 : #endif
2064 : #else
2065 : x = (x & UINT64_C(0x5555555555555555)) + ((x >> 1) & UINT64_C(0x5555555555555555));
2066 : x = (x & UINT64_C(0x3333333333333333)) + ((x >> 2) & UINT64_C(0x3333333333333333));
2067 : x = (x & UINT64_C(0x0F0F0F0F0F0F0F0F)) + ((x >> 4) & UINT64_C(0x0F0F0F0F0F0F0F0F));
2068 : return (x * UINT64_C(0x0101010101010101)) >> 56;
2069 : #endif
2070 : #endif
2071 : #undef BUILTIN_USED
2072 : }
2073 :
2074 : static sql_rel *
2075 153066 : order_joins(visitor *v, list *rels, list *exps)
2076 : {
2077 153066 : sql_rel *top = NULL, *l = NULL, *r = NULL;
2078 153066 : sql_exp *cje;
2079 153066 : node *djn;
2080 153066 : list *sdje, *n_rels = NULL;
2081 153066 : int fnd = 0;
2082 153066 : unsigned int rsingle;
2083 153066 : int direct = 1;
2084 :
2085 : /* find foreign keys and reorder the expressions on reducing quality */
2086 153066 : sdje = find_fk(v->sql, rels, exps);
2087 :
2088 427535 : for(djn = sdje->h; djn; djn = djn->next ) {
2089 274469 : sql_exp *e = djn->data;
2090 274469 : list_remove_data(exps, NULL, e);
2091 : }
2092 153066 : if (list_length(rels) > 2 && mvc_debug_on(v->sql, 256)) {
2093 0 : top = rel_planner(v->sql, rels, sdje, exps);
2094 0 : return top;
2095 : }
2096 :
2097 153066 : int nr_exps = list_length(sdje), nr_rels = list_length(rels), ci = 1;
2098 153066 : if (nr_rels > 64) {
2099 1 : direct = 0;
2100 1 : n_rels = sa_list(v->sql->ta);
2101 : }
2102 153066 : sql_rel **rels_a = SA_NEW_ARRAY(v->sql->ta, sql_rel*, nr_rels+1); /* don't use slot 0 */
2103 153066 : rels_a[0] = NULL;
2104 579833 : for (node *n = rels->h; n; n = n->next, ci++) {
2105 426767 : rels_a[ci] = n->data;
2106 : }
2107 153066 : ulng *h = SA_NEW_ARRAY(v->sql->ta, ulng, nr_exps), rel_mask = 0; /* bit field (for > 64 its an imprint) */
2108 153066 : uint16_t *r1 = SA_NEW_ARRAY(v->sql->ta, uint16_t, nr_exps);
2109 153066 : uint16_t *r2 = SA_NEW_ARRAY(v->sql->ta, uint16_t, nr_exps);
2110 : /* change r3 into rest list's */
2111 153066 : int *r3 = SA_NEW_ARRAY(v->sql->ta, int, nr_exps);
2112 :
2113 153066 : ci = 0;
2114 427535 : for (node *n = sdje->h; n; n = n->next, ci++) {
2115 274469 : sql_exp *cje = n->data;
2116 :
2117 274469 : h[ci] = r1[ci] = r2[ci] = 0;
2118 274469 : r3[ci] = 0;
2119 : /* h[ci] = exp_find_rels(cje, rels) */
2120 274469 : if (cje->type != e_cmp || !is_complex_exp(cje->flag) || !find_prop(cje->p, PROP_HASHCOL) ||
2121 0 : (cje->type == e_cmp && cje->f == NULL)) {
2122 274469 : cje->tmp = ci;
2123 274469 : r1[ci] = rels_find_one_rel(rels_a, nr_rels, cje->l);
2124 274469 : r2[ci] = rels_find_one_rel(rels_a, nr_rels, cje->r);
2125 274469 : if (r1[ci])
2126 274335 : h[ci] |= ((ulng)1)<<((r1[ci]-1)%64);
2127 274469 : if (r2[ci])
2128 274316 : h[ci] |= ((ulng)1)<<((r2[ci]-1)%64);
2129 274469 : if (cje->f) {
2130 210 : r3[ci] = rels_find_one_rel(rels_a, nr_rels, cje->f);
2131 210 : if (r3[ci] == r2[ci])
2132 190 : r3[ci] = 0;
2133 210 : if (r3[ci])
2134 10 : h[ci] |= ((ulng)1)<<((r3[ci]-1)%64);
2135 : }
2136 : }
2137 : }
2138 : /* open problem, some expressions use more than 2 relations */
2139 : /* For example a.x = b.y * c.z; */
2140 153066 : if (list_length(rels) >= 2 && sdje->h) {
2141 306098 : for (node *n = sdje->h; n && !l && !r; n = n->next, ci++) {
2142 153054 : cje = n->data;
2143 :
2144 : /* find the involved relations */
2145 :
2146 : /* complex expressions may touch multiple base tables
2147 : * Should be pushed up to extra selection.
2148 : * */
2149 153054 : if (0 && popcount64(h[cje->tmp]) > 2)
2150 : assert(0);
2151 153054 : if (cje->type != e_cmp || !is_complex_exp(cje->flag) || !find_prop(cje->p, PROP_HASHCOL) ||
2152 0 : (cje->type == e_cmp && cje->f == NULL)) {
2153 153054 : l = rels_a[r1[cje->tmp]];
2154 153054 : r = rels_a[r2[cje->tmp]];
2155 153054 : if (l && r)
2156 152919 : rel_mask |= h[cje->tmp];
2157 : }
2158 : }
2159 153044 : cje->tmp = 0;
2160 :
2161 153044 : if (l && r && l != r)
2162 152919 : list_remove_data(sdje, NULL, cje);
2163 : }
2164 :
2165 153066 : if (l && r && l != r) {
2166 152919 : list_remove_data(rels, NULL, l);
2167 152919 : list_remove_data(rels, NULL, r);
2168 152919 : if (!direct) {
2169 1 : list_append(n_rels, l);
2170 1 : list_append(n_rels, r);
2171 : }
2172 :
2173 : /* Create a relation between l and r. Since the calling
2174 : functions rewrote the join tree, into a list of expressions
2175 : and a list of (simple) relations, there are no outer joins
2176 : involved, we can simply do a crossproduct here.
2177 : */
2178 152919 : rsingle = is_single(r);
2179 152919 : reset_single(r);
2180 152919 : top = rel_crossproduct(v->sql->sa, l, r, op_join);
2181 152919 : if (rsingle)
2182 5088 : set_single(r);
2183 152919 : rel_join_add_exp(v->sql->sa, top, cje);
2184 :
2185 : /* all other join expressions on these 2 relations */
2186 157238 : for (node *en = exps->h; en; ) {
2187 4319 : node *next = en->next;
2188 4319 : sql_exp *e = en->data;
2189 4319 : if (rel_rebind_exp(v->sql, top, e)) {
2190 4141 : rel_join_add_exp(v->sql->sa, top, e);
2191 4141 : list_remove_data(exps, NULL, e);
2192 : }
2193 : en = next;
2194 : }
2195 : fnd = 1;
2196 : }
2197 : /* build join tree using the ordered list */
2198 271175 : while(list_length(sdje) && fnd) {
2199 118109 : fnd = 0;
2200 : /* find the first expression which could be added */
2201 873579 : for(djn = sdje->h; djn && !fnd && rels->h; djn = (!fnd)?djn->next:NULL) {
2202 377735 : node *en;
2203 377735 : l = r = NULL;
2204 :
2205 377735 : cje = djn->data;
2206 377735 : if ((h[cje->tmp] & rel_mask) > 0) {
2207 118025 : if (rel_mask & (((ulng)1)<<((r1[cje->tmp]-1)%64)))
2208 66381 : l = rels_a[r1[cje->tmp]];
2209 118025 : if (rel_mask & (((ulng)1)<<((r2[cje->tmp]-1)%64)))
2210 51645 : r = rels_a[r2[cje->tmp]];
2211 : }
2212 377735 : if (!direct) { /* check if at least one side in n_rels */
2213 1040 : if (l && !list_find(n_rels, l, NULL))
2214 1007 : l = NULL;
2215 1040 : if (r && !list_find(n_rels, r, NULL))
2216 1 : r = NULL;
2217 : }
2218 :
2219 377735 : if (l && r) {
2220 0 : assert(0);
2221 : /* create a selection on the current */
2222 : rel_join_add_exp(v->sql->sa, top, cje);
2223 : fnd = 1;
2224 377735 : } else if (l || r) {
2225 : /* TODO: handle case for joins which need > 2 relations, ie where the current 'top' of the
2226 : * join tree needs to add more then one relation */
2227 118025 : rel_mask |= h[cje->tmp];
2228 118025 : if (l) {
2229 66381 : r = rels_a[r2[cje->tmp]];
2230 : } else {
2231 51644 : l = r;
2232 51644 : r = rels_a[r1[cje->tmp]];
2233 : }
2234 118025 : if (!r) {
2235 0 : fnd = 1; /* not really, but this bails out */
2236 0 : list_remove_data(sdje, NULL, cje); /* handle later as select */
2237 0 : continue;
2238 : }
2239 :
2240 : /* remove the expression from the lists */
2241 118025 : list_remove_data(sdje, NULL, cje);
2242 :
2243 118025 : list_remove_data(rels, NULL, r);
2244 118025 : if (!direct)
2245 63 : append(n_rels, r);
2246 :
2247 : /* create a join using the current expression */
2248 118025 : rsingle = is_single(r);
2249 118025 : reset_single(r);
2250 118025 : top = rel_crossproduct(v->sql->sa, top, r, op_join);
2251 118025 : if (rsingle)
2252 0 : set_single(r);
2253 118025 : rel_join_add_exp(v->sql->sa, top, cje);
2254 :
2255 : /* all join expressions on these tables */
2256 118432 : for (en = exps->h; en; ) {
2257 407 : node *next = en->next;
2258 407 : sql_exp *e = en->data;
2259 407 : if (rel_rebind_exp(v->sql, top, e)) {
2260 176 : rel_join_add_exp(v->sql->sa, top, e);
2261 176 : list_remove_data(exps, NULL, e);
2262 : }
2263 : en = next;
2264 : }
2265 : /* Remove other joins on the current 'n_rels'
2266 : set in the distinct list too */
2267 692017 : for (en = sdje->h; en; ) {
2268 573992 : node *next = en->next;
2269 573992 : sql_exp *e = en->data;
2270 573992 : if ((direct && ((e->flag <= cmp_notequal && (h[e->tmp] & rel_mask) == h[e->tmp] && h[e->tmp]) || (e->flag > cmp_notequal && rel_rebind_exp(v->sql, top, e)))) ||
2271 1953 : (!direct && rel_rebind_exp(v->sql, top, e))) {
2272 3311 : rel_join_add_exp(v->sql->sa, top, e);
2273 3311 : list_remove_data(sdje, NULL, en->data);
2274 : }
2275 : en = next;
2276 : }
2277 : fnd = 1;
2278 : }
2279 : }
2280 : }
2281 153066 : if (list_length(rels)) { /* more relations */
2282 1188 : node *n;
2283 4092 : for(n=rels->h; n; n = n->next) {
2284 2904 : sql_rel *nr = n->data;
2285 :
2286 2904 : if (top) {
2287 2757 : rsingle = is_single(nr);
2288 2757 : reset_single(nr);
2289 2757 : top = rel_crossproduct(v->sql->sa, top, nr, op_join);
2290 2757 : if (rsingle)
2291 0 : set_single(nr);
2292 : } else
2293 : top = nr;
2294 : }
2295 : }
2296 153066 : if (list_length(sdje)) {
2297 209 : if (list_empty(exps))
2298 : exps = sdje;
2299 : else
2300 0 : exps = list_merge(exps, sdje, (fdup)NULL);
2301 : }
2302 153066 : if (list_length(exps)) { /* more expressions (add selects) */
2303 233 : top = rel_select(v->sql->sa, top, NULL);
2304 471 : for(node *n=exps->h; n; n = n->next) {
2305 238 : sql_exp *e = n->data;
2306 :
2307 238 : if (exp_is_join_exp(e) == 0) {
2308 226 : sql_rel *nr = NULL;
2309 226 : if (is_theta_exp(e->flag)) {
2310 138 : nr = rel_push_join(v->sql, top->l, e->l, e->r, e->f, e, 0);
2311 88 : } else if (e->flag == cmp_filter || e->flag == cmp_or) {
2312 88 : sql_exp *l = exps_find_one_multi_exp(e->l), *r = exps_find_one_multi_exp(e->r);
2313 88 : if (l && r)
2314 82 : nr = rel_push_join(v->sql, top->l, l, r, NULL, e, 0);
2315 : }
2316 220 : if (!nr)
2317 6 : rel_join_add_exp(v->sql->sa, top->l, e);
2318 : } else
2319 12 : rel_select_add_exp(v->sql->sa, top, e);
2320 : }
2321 233 : if (list_empty(top->exps)) { /* empty select */
2322 221 : sql_rel *l = top->l;
2323 221 : top->l = NULL;
2324 221 : rel_destroy(top);
2325 221 : top = l;
2326 : }
2327 : }
2328 : return top;
2329 : }
2330 :
2331 : static int
2332 426767 : rel_neg_in_size(sql_rel *r)
2333 : {
2334 426767 : if ((is_union(r->op) /*|| is_munion(r->op)*/) && r->nrcols == 0)
2335 0 : return -1 + rel_neg_in_size(r->l);
2336 426767 : if (is_project(r->op) && r->nrcols == 0)
2337 0 : return -1;
2338 : return 0;
2339 : }
2340 :
2341 426767 : static void _rel_destroy(void *dummy, sql_rel *rel)
2342 : {
2343 426767 : (void)dummy;
2344 426767 : rel_destroy(rel);
2345 426767 : }
2346 :
2347 : static list *
2348 153066 : push_in_join_down(mvc *sql, list *rels, list *exps)
2349 : {
2350 153066 : node *n;
2351 153066 : int restart = 1;
2352 153066 : list *nrels;
2353 :
2354 : /* we should sort these first, ie small in's before large one's */
2355 153066 : nrels = list_sort(rels, (fkeyvalue)&rel_neg_in_size, (fdup)&rel_dup);
2356 :
2357 : /* we need to cleanup, the new refs ! */
2358 153066 : rels->destroy = (fdestroy)_rel_destroy;
2359 153066 : list_destroy(rels);
2360 153066 : rels = nrels;
2361 :
2362 : /* one of the rels should be a op_union with nrcols == 0 */
2363 459198 : while (restart) {
2364 579833 : for (n = rels->h; n; n = n->next) {
2365 426767 : sql_rel *r = n->data;
2366 :
2367 426767 : restart = 0;
2368 426767 : if (is_project(r->op) && r->nrcols == 0) {
2369 : /* next step find expression on this relation */
2370 0 : node *m;
2371 0 : sql_rel *l = NULL;
2372 0 : sql_exp *je = NULL;
2373 :
2374 0 : for(m = exps->h; !je && m; m = m->next) {
2375 0 : sql_exp *e = m->data;
2376 :
2377 0 : if (e->type == e_cmp && e->flag == cmp_equal) {
2378 : /* in values are on
2379 : the right of the join */
2380 0 : if (rel_has_exp(r, e->r, false) >= 0)
2381 0 : je = e;
2382 : }
2383 : }
2384 : /* with this expression find other relation */
2385 0 : if (je && (l = find_rel(rels, je->l)) != NULL) {
2386 0 : unsigned int rsingle = is_single(r);
2387 0 : reset_single(r);
2388 0 : sql_rel *nr = rel_crossproduct(sql->sa, l, r, op_join);
2389 0 : if (rsingle)
2390 0 : set_single(r);
2391 0 : rel_join_add_exp(sql->sa, nr, je);
2392 0 : list_append(rels, nr);
2393 0 : list_remove_data(rels, NULL, l);
2394 0 : list_remove_data(rels, NULL, r);
2395 0 : list_remove_data(exps, NULL, je);
2396 0 : restart = 1;
2397 0 : break;
2398 : }
2399 :
2400 : }
2401 : }
2402 : }
2403 153066 : return rels;
2404 : }
2405 :
2406 : static list *
2407 729024 : push_up_join_exps( mvc *sql, sql_rel *rel)
2408 : {
2409 729024 : if (rel_is_ref(rel))
2410 : return NULL;
2411 :
2412 688666 : switch(rel->op) {
2413 285972 : case op_join: {
2414 285972 : sql_rel *rl = rel->l;
2415 285972 : sql_rel *rr = rel->r;
2416 285972 : list *l, *r;
2417 :
2418 285972 : if (rel_is_ref(rl) && rel_is_ref(rr)) {
2419 16 : l = rel->exps;
2420 16 : rel->exps = NULL;
2421 16 : return l;
2422 : }
2423 285956 : l = push_up_join_exps(sql, rl);
2424 285956 : r = push_up_join_exps(sql, rr);
2425 285956 : if (l && r) {
2426 34 : l = list_merge(l, r, (fdup)NULL);
2427 34 : r = NULL;
2428 285922 : } else if (!l) {
2429 173938 : l = r;
2430 173938 : r = NULL;
2431 : }
2432 285956 : if (rel->exps) {
2433 254538 : if (l && !r)
2434 101416 : r = l;
2435 254538 : l = list_merge(rel->exps, r, (fdup)NULL);
2436 : }
2437 285956 : rel->exps = NULL;
2438 285956 : return l;
2439 : }
2440 : default:
2441 : return NULL;
2442 : }
2443 : }
2444 :
2445 : static sql_rel *
2446 284157 : reorder_join(visitor *v, sql_rel *rel)
2447 : {
2448 284157 : list *exps, *rels;
2449 :
2450 284157 : if (is_innerjoin(rel->op) && !is_single(rel) && !rel_is_ref(rel) && list_empty(rel->attr)) {
2451 174370 : if (list_empty(rel->exps)) {
2452 22113 : sql_rel *l = rel->l, *r = rel->r;
2453 22113 : if (!is_innerjoin(l->op) && !is_innerjoin(r->op))
2454 : return rel;
2455 : }
2456 157112 : rel->exps = push_up_join_exps(v->sql, rel);
2457 : }
2458 :
2459 266899 : if (!is_innerjoin(rel->op) || is_single(rel) || rel_is_ref(rel) || list_empty(rel->exps) || !list_empty(rel->attr)) {
2460 113833 : if (!list_empty(rel->exps)) { /* cannot add join idxs to cross products */
2461 77955 : exps = rel->exps;
2462 77955 : rel->exps = NULL; /* should be all crosstables by now */
2463 77955 : rels = sa_list(v->sql->ta);
2464 : /* try to use an join index also for outer joins */
2465 77955 : get_inner_relations(v->sql, rel, rels);
2466 77955 : int cnt = list_length(exps);
2467 77955 : rel->exps = find_fk(v->sql, rels, exps);
2468 77955 : if (list_length(rel->exps) != cnt)
2469 16435 : rel->exps = order_join_expressions(v->sql, exps, rels);
2470 : }
2471 113833 : rel->l = rel_join_order_(v, rel->l);
2472 113833 : rel->r = rel_join_order_(v, rel->r);
2473 : } else {
2474 153066 : exps = rel->exps;
2475 153066 : rel->exps = NULL; /* should be all crosstables by now */
2476 153066 : rels = sa_list(v->sql->ta);
2477 153066 : get_relations(v, rel, rels);
2478 153066 : if (list_length(rels) > 1) {
2479 153066 : rels = push_in_join_down(v->sql, rels, exps);
2480 153066 : rel = order_joins(v, rels, exps);
2481 : } else {
2482 0 : rel->exps = exps;
2483 : }
2484 : }
2485 : return rel;
2486 : }
2487 :
2488 : static sql_rel *
2489 2398506 : rel_join_order_(visitor *v, sql_rel *rel)
2490 : {
2491 2398506 : if (!rel)
2492 : return rel;
2493 :
2494 2382741 : switch (rel->op) {
2495 : case op_basetable:
2496 : break;
2497 3851 : case op_table:
2498 3851 : if (IS_TABLE_PROD_FUNC(rel->flag) || rel->flag == TABLE_FROM_RELATION)
2499 3851 : rel->l = rel_join_order_(v, rel->l);
2500 : break;
2501 : case op_join:
2502 : case op_left:
2503 : case op_right:
2504 : case op_full:
2505 : break;
2506 :
2507 11415 : case op_semi:
2508 : case op_anti:
2509 :
2510 : case op_union:
2511 : case op_inter:
2512 : case op_except:
2513 : case op_merge:
2514 11415 : rel->l = rel_join_order_(v, rel->l);
2515 11415 : rel->r = rel_join_order_(v, rel->r);
2516 11415 : break;
2517 115446 : case op_munion:
2518 115446 : assert(rel->l);
2519 466172 : for (node *n = ((list*)rel->l)->h; n; n = n->next)
2520 350726 : n->data = rel_join_order_(v, n->data);
2521 : break;
2522 1272923 : case op_project:
2523 : case op_select:
2524 : case op_groupby:
2525 : case op_topn:
2526 : case op_sample:
2527 1272923 : rel->l = rel_join_order_(v, rel->l);
2528 1272923 : break;
2529 52 : case op_ddl:
2530 52 : if (rel->flag == ddl_output || rel->flag == ddl_create_seq || rel->flag == ddl_alter_seq || rel->flag == ddl_alter_table || rel->flag == ddl_create_table || rel->flag == ddl_create_view) {
2531 0 : rel->l = rel_join_order_(v, rel->l);
2532 : } else if (rel->flag == ddl_list || rel->flag == ddl_exception) {
2533 52 : rel->l = rel_join_order_(v, rel->l);
2534 52 : rel->r = rel_join_order_(v, rel->r);
2535 : }
2536 : break;
2537 11196 : case op_insert:
2538 : case op_update:
2539 : case op_delete:
2540 11196 : rel->r = rel_join_order_(v, rel->r);
2541 11196 : break;
2542 : case op_truncate:
2543 : break;
2544 : }
2545 2382741 : if (is_join(rel->op))
2546 284157 : rel = reorder_join(v, rel);
2547 : return rel;
2548 : }
2549 :
2550 : static sql_rel *
2551 82443 : rel_join_order(visitor *v, global_props *gp, sql_rel *rel)
2552 : {
2553 82443 : (void) gp;
2554 82443 : sql_rel *r = rel_join_order_(v, rel);
2555 82443 : sa_reset(v->sql->ta);
2556 82443 : return r;
2557 : }
2558 :
2559 : run_optimizer
2560 623386 : bind_join_order(visitor *v, global_props *gp)
2561 : {
2562 623386 : int flag = v->sql->sql_optimizer;
2563 623190 : return gp->opt_level == 1 && gp->opt_cycle < 10 && !gp->cnt[op_update] && (gp->cnt[op_join] || gp->cnt[op_left] ||
2564 1240529 : gp->cnt[op_right] || gp->cnt[op_full]) && (flag & join_order) ? rel_join_order : NULL;
2565 : }
2566 :
2567 : /* this join order is to be done once after statistics are gathered */
2568 : run_optimizer
2569 705680 : bind_join_order2(visitor *v, global_props *gp)
2570 : {
2571 : /*int flag = v->sql->sql_optimizer;
2572 : return gp->opt_level == 1 && !gp->has_special_modify && !gp->cnt[op_update] && (gp->cnt[op_join] || gp->cnt[op_left] ||
2573 : gp->cnt[op_right] || gp->cnt[op_full]) && (flag & join_order) ? rel_join_order : NULL;*/
2574 : /* TODO we have to propagate count statistics here */
2575 705680 : (void) v;
2576 705680 : (void) gp;
2577 705680 : return NULL;
2578 : }
2579 :
2580 :
2581 : static int
2582 13681 : is_identity_of(sql_exp *e, sql_rel *l)
2583 : {
2584 13681 : if (e->type != e_cmp)
2585 : return 0;
2586 13654 : if (!is_identity(e->l, l) || !is_identity(e->r, l) || (e->f && !is_identity(e->f, l)))
2587 13654 : return 0;
2588 : return 1;
2589 : }
2590 :
2591 : static inline sql_rel *
2592 17861 : rel_rewrite_semijoin(visitor *v, sql_rel *rel)
2593 : {
2594 17861 : assert(is_semi(rel->op));
2595 : {
2596 17861 : sql_rel *l = rel->l;
2597 17861 : sql_rel *r = rel->r;
2598 17861 : sql_rel *rl = (r->l)?r->l:NULL;
2599 17861 : int on_identity = 1;
2600 :
2601 17861 : if (!rel->exps || list_length(rel->exps) != 1 || !is_identity_of(rel->exps->h->data, l))
2602 : on_identity = 0;
2603 :
2604 : /* rewrite {semi,anti}join (A, join(A,B)) into {semi,anti}join (A,B)
2605 : * and {semi,anti}join (A, join(B,A)) into {semi,anti}join (A,B)
2606 : * Where the semi/anti join is done using the identity */
2607 0 : if (on_identity && l->ref.refcnt == 2 && ((is_join(r->op) && (l == r->l || l == r->r)) ||
2608 0 : (is_project(r->op) && rl && is_join(rl->op) && (l == rl->l || l == rl->r)))){
2609 0 : sql_rel *or = r;
2610 :
2611 0 : if (is_project(r->op))
2612 0 : r = rl;
2613 :
2614 0 : if (l == r->r)
2615 0 : rel->r = rel_dup(r->l);
2616 : else
2617 0 : rel->r = rel_dup(r->r);
2618 :
2619 0 : rel->exps = r->exps;
2620 0 : r->exps = NULL;
2621 0 : rel->attr = r->attr;
2622 0 : r->attr = NULL;
2623 0 : rel_destroy(or);
2624 0 : v->changes++;
2625 : }
2626 : }
2627 : {
2628 17861 : sql_rel *l = rel->l, *rl = NULL;
2629 17861 : sql_rel *r = rel->r, *or = r;
2630 :
2631 17861 : if (r)
2632 17861 : rl = r->l;
2633 17861 : if (r && is_project(r->op)) {
2634 14757 : r = rl;
2635 14757 : if (r)
2636 14540 : rl = r->l;
2637 : }
2638 :
2639 : /* More general case is (join reduction)
2640 : {semi,anti}join (A, join(A,B) [A.c1 == B.c1]) [ A.c1 == B.c1 ]
2641 : into {semi,anti}join (A,B) [ A.c1 == B.c1 ]
2642 :
2643 : for semijoin also A.c1 == B.k1 ] [ A.c1 == B.k2 ] could be rewritten
2644 : */
2645 17861 : if (l && r && rl &&
2646 16538 : is_basetable(l->op) && is_basetable(rl->op) &&
2647 3986 : is_join(r->op) && l->l == rl->l)
2648 : {
2649 26 : node *n, *m;
2650 26 : list *exps;
2651 :
2652 51 : if (!rel->exps || !r->exps ||
2653 25 : list_length(rel->exps) != list_length(r->exps))
2654 1 : return rel;
2655 25 : exps = new_exp_list(v->sql->sa);
2656 :
2657 : /* are the join conditions equal */
2658 25 : for (n = rel->exps->h, m = r->exps->h;
2659 25 : n && m; n = n->next, m = m->next)
2660 : {
2661 25 : sql_exp *le = NULL, *oe = n->data;
2662 25 : sql_exp *re = NULL, *ne = m->data;
2663 25 : sql_column *cl;
2664 25 : int equal = 0;
2665 :
2666 25 : if (oe->type != e_cmp || ne->type != e_cmp ||
2667 25 : oe->flag != cmp_equal ||
2668 25 : ne->flag != cmp_equal || is_anti(oe) || is_anti(ne))
2669 : return rel;
2670 :
2671 25 : if ((cl = exp_find_column(rel->l, oe->l, -2)) != NULL) {
2672 25 : le = oe->l;
2673 25 : re = oe->r;
2674 0 : } else if ((cl = exp_find_column(rel->l, oe->r, -2)) != NULL) {
2675 0 : le = oe->r;
2676 0 : re = oe->l;
2677 : } else
2678 : return rel;
2679 :
2680 25 : if (exp_find_column(rl, ne->l, -2) == cl) {
2681 25 : sql_exp *e = (or != r)?rel_find_exp(or, re):re;
2682 :
2683 25 : if (e)
2684 24 : equal = exp_match_exp(ne->r, e);
2685 25 : if (!e || !equal)
2686 : return rel;
2687 0 : re = ne->r;
2688 0 : } else if (exp_find_column(rl, ne->r, -2) == cl) {
2689 0 : sql_exp *e = (or != r)?rel_find_exp(or, re):re;
2690 :
2691 0 : if (e)
2692 0 : equal = exp_match_exp(ne->l, e);
2693 0 : if (!e || !equal)
2694 : return rel;
2695 0 : re = ne->l;
2696 : } else
2697 : return rel;
2698 :
2699 0 : ne = exp_compare(v->sql->sa, le, re, cmp_equal);
2700 0 : append(exps, ne);
2701 : }
2702 :
2703 0 : rel->r = rel_dup(r->r);
2704 0 : rel->exps = exps;
2705 0 : rel_destroy(or);
2706 0 : v->changes++;
2707 : }
2708 : }
2709 : return rel;
2710 : }
2711 :
2712 : /*
2713 : * Push semijoins down, pushes the semijoin through a join.
2714 : *
2715 : * semijoin( join(A, B) [ A.x == B.y ], C ) [ A.z == C.c ]
2716 : * ->
2717 : * join( semijoin(A, C) [ A.z == C.c ], B ) [ A.x == B.y ]
2718 : *
2719 : * also push simple expressions of a semijoin down if they only
2720 : * involve the left sided of the semijoin.
2721 : *
2722 : * in some cases the other way is useful, ie push join down
2723 : * semijoin. When the join reduces (ie when there are selects on it).
2724 : *
2725 : * At the moment, we only flag changes by this optimizer on the first level of optimization
2726 : */
2727 : static inline sql_rel *
2728 461463 : rel_push_semijoin_down_or_up(visitor *v, sql_rel *rel)
2729 : {
2730 461463 : uint8_t cycle = *(uint8_t*) v->data;
2731 :
2732 461463 : if (rel->op == op_join && rel->exps && rel->l) {
2733 425614 : sql_rel *l = rel->l, *r = rel->r;
2734 :
2735 425614 : if (is_semi(l->op) && !rel_is_ref(l) && is_select(r->op) && !rel_is_ref(r)) {
2736 22 : rel->l = l->l;
2737 22 : l->l = rel;
2738 22 : if (cycle <= 0)
2739 0 : v->changes++;
2740 22 : return l;
2741 : }
2742 : }
2743 : /* also case with 2 joins */
2744 : /* join ( join ( semijoin(), table), select (table)); */
2745 461441 : if (rel->op == op_join && rel->exps && rel->l) {
2746 425592 : sql_rel *l = rel->l, *r = rel->r;
2747 425592 : sql_rel *ll;
2748 :
2749 425592 : if (is_join(l->op) && !rel_is_ref(l) && is_select(r->op) && !rel_is_ref(r)) {
2750 8074 : ll = l->l;
2751 8074 : if (is_semi(ll->op) && !rel_is_ref(ll)) {
2752 51 : l->l = ll->l;
2753 51 : ll->l = rel;
2754 51 : if (cycle <= 0)
2755 12 : v->changes++;
2756 51 : return ll;
2757 : }
2758 : }
2759 : }
2760 : /* first push down the expressions involving only A */
2761 461390 : if (rel->op == op_semi && rel->exps && rel->l) {
2762 12119 : sql_rel *jl = rel->l, *ojl = jl;
2763 :
2764 12119 : set_processed(jl);
2765 27731 : for (node *n = rel->exps->h; n;) {
2766 15612 : node *next = n->next;
2767 15612 : sql_exp *e = n->data;
2768 :
2769 15612 : if (n != rel->exps->h && e->type == e_cmp && rel_rebind_exp(v->sql, jl, e)) {
2770 16 : if (!is_select(jl->op) || rel_is_ref(jl))
2771 14 : rel->l = jl = rel_select(v->sql->sa, jl, NULL);
2772 16 : rel_select_add_exp(v->sql->sa, jl, e);
2773 16 : list_remove_node(rel->exps, NULL, n);
2774 16 : if (cycle <= 0)
2775 16 : v->changes++;
2776 : }
2777 : n = next;
2778 : }
2779 12119 : if (ojl != jl)
2780 14 : set_processed(jl);
2781 : }
2782 461390 : if (rel->op == op_semi && rel->exps && rel->l) {
2783 12119 : operator_type op = rel->op, lop;
2784 12119 : node *n;
2785 12119 : sql_rel *l = rel->l, *ll = NULL, *lr = NULL;
2786 12119 : sql_rel *r = rel->r;
2787 12119 : list *exps = rel->exps, *nsexps, *njexps, *nsattr, *njattr;
2788 12119 : int left = 1, right = 1;
2789 :
2790 : /* handle project
2791 : if (l->op == op_project && !need_distinct(l))
2792 : l = l->l;
2793 : */
2794 :
2795 12119 : if (!is_join(l->op) || is_full(l->op) || rel_is_ref(l) || is_single(l))
2796 : return rel;
2797 :
2798 1170 : lop = l->op;
2799 1170 : ll = l->l;
2800 1170 : lr = l->r;
2801 :
2802 : /* check which side is used and other exps are atoms or from right of semijoin */
2803 2250 : for(n = exps->h; n; n = n->next) {
2804 1274 : sql_exp *sje = n->data;
2805 :
2806 1274 : if (sje->type != e_cmp || is_complex_exp(sje->flag))
2807 : return rel;
2808 : /* sje->l from ll and sje->r/f from semijoin r ||
2809 : * sje->l from semijoin r and sje->r/f from ll ||
2810 : * sje->l from lr and sje->r/f from semijoin r ||
2811 : * sje->l from semijoin r and sje->r/f from lr */
2812 2484 : if (left &&
2813 2484 : ((rel_rebind_exp(v->sql, ll, sje->l) && rel_rebind_exp(v->sql, rel->r, sje->r) && (!sje->f || rel_rebind_exp(v->sql, rel->r, sje->f))) ||
2814 1233 : (rel_rebind_exp(v->sql, rel->r, sje->l) && rel_rebind_exp(v->sql, ll, sje->r) && (!sje->f || rel_rebind_exp(v->sql, ll, sje->f)))))
2815 : right = 0;
2816 : else
2817 906 : left = 0;
2818 1710 : if (right &&
2819 1608 : ((rel_rebind_exp(v->sql, lr, sje->l) && rel_rebind_exp(v->sql, rel->r, sje->r) && (!sje->f || rel_rebind_exp(v->sql, rel->r, sje->f))) ||
2820 566 : (rel_rebind_exp(v->sql, rel->r, sje->l) && rel_rebind_exp(v->sql, lr, sje->r) && (!sje->f || rel_rebind_exp(v->sql, lr, sje->f)))))
2821 : left = 0;
2822 : else
2823 : right = 0;
2824 1242 : if (!right && !left)
2825 : return rel;
2826 : }
2827 976 : if (left && is_right(lop))
2828 : return rel;
2829 975 : if (right && is_left(lop))
2830 : return rel;
2831 973 : nsexps = exps_copy(v->sql, rel->exps);
2832 973 : nsattr = exps_copy(v->sql, rel->attr);
2833 973 : njexps = exps_copy(v->sql, l->exps);
2834 973 : njattr = exps_copy(v->sql, l->attr);
2835 973 : if (left)
2836 231 : l = rel_crossproduct(v->sql->sa, rel_dup(ll), rel_dup(r), op);
2837 : else
2838 742 : l = rel_crossproduct(v->sql->sa, rel_dup(lr), rel_dup(r), op);
2839 973 : l->exps = nsexps;
2840 973 : l->attr = nsattr;
2841 973 : set_processed(l);
2842 973 : if (left)
2843 231 : l = rel_crossproduct(v->sql->sa, l, rel_dup(lr), lop);
2844 : else
2845 742 : l = rel_crossproduct(v->sql->sa, rel_dup(ll), l, lop);
2846 973 : l->exps = njexps;
2847 973 : l->attr = njattr;
2848 973 : set_processed(l);
2849 973 : rel_destroy(rel);
2850 973 : rel = l;
2851 973 : if (cycle <= 0)
2852 570 : v->changes++;
2853 : }
2854 : return rel;
2855 : }
2856 :
2857 : /* antijoin(a, union(b,c)) -> antijoin(antijoin(a,b), c) */
2858 : static inline sql_rel *
2859 5688 : rel_rewrite_antijoin(visitor *v, sql_rel *rel)
2860 : {
2861 5688 : sql_rel *l = rel->l;
2862 5688 : sql_rel *r = rel->r;
2863 :
2864 5688 : assert(rel->op == op_anti);
2865 5688 : if (l && !rel_is_ref(l) && r && !rel_is_ref(r) && is_union(r->op) && !is_single(r)) {
2866 0 : sql_rel *rl = rel_dup(r->l), *nl;
2867 0 : sql_rel *rr = rel_dup(r->r);
2868 :
2869 0 : if (!is_project(rl->op))
2870 0 : rl = rel_project(v->sql->sa, rl,
2871 : rel_projections(v->sql, rl, NULL, 1, 1));
2872 0 : if (!is_project(rr->op))
2873 0 : rr = rel_project(v->sql->sa, rr,
2874 : rel_projections(v->sql, rr, NULL, 1, 1));
2875 0 : rel_rename_exps(v->sql, r->exps, rl->exps);
2876 0 : rel_rename_exps(v->sql, r->exps, rr->exps);
2877 :
2878 0 : nl = rel_crossproduct(v->sql->sa, rel->l, rl, op_anti);
2879 0 : nl->exps = exps_copy(v->sql, rel->exps);
2880 0 : nl->attr = exps_copy(v->sql, rel->attr);
2881 0 : set_processed(nl);
2882 0 : rel->l = nl;
2883 0 : rel->r = rr;
2884 0 : rel_destroy(r);
2885 0 : v->changes++;
2886 0 : return rel;
2887 : }
2888 : return rel;
2889 : }
2890 :
2891 : static sql_rel *
2892 3428248 : rel_optimize_semi_and_anti_(visitor *v, sql_rel *rel)
2893 : {
2894 : /* rewrite semijoin (A, join(A,B)) into semijoin (A,B) */
2895 3428248 : if (rel && is_semi(rel->op))
2896 17861 : rel = rel_rewrite_semijoin(v, rel);
2897 : /* push semijoin through join */
2898 3428248 : if (rel && (is_semi(rel->op) || is_innerjoin(rel->op)))
2899 461463 : rel = rel_push_semijoin_down_or_up(v, rel);
2900 : /* antijoin(a, union(b,c)) -> antijoin(antijoin(a,b), c) */
2901 3428248 : if (rel && rel->op == op_anti)
2902 5688 : rel = rel_rewrite_antijoin(v, rel);
2903 3428248 : return rel;
2904 : }
2905 :
2906 : static sql_rel *
2907 89414 : rel_optimize_semi_and_anti(visitor *v, global_props *gp, sql_rel *rel)
2908 : {
2909 89414 : v->data = &gp->opt_cycle;
2910 89414 : rel = rel_visitor_bottomup(v, rel, &rel_optimize_semi_and_anti_);
2911 89414 : v->data = gp;
2912 89414 : return rel;
2913 : }
2914 :
2915 : run_optimizer
2916 623384 : bind_optimize_semi_and_anti(visitor *v, global_props *gp)
2917 : {
2918 : /* Important -> Re-write semijoins after rel_join_order */
2919 623384 : int flag = v->sql->sql_optimizer;
2920 623188 : return gp->opt_level == 1 && (gp->cnt[op_join] || gp->cnt[op_left] || gp->cnt[op_right]
2921 1246572 : || gp->cnt[op_full] || gp->cnt[op_semi] || gp->cnt[op_anti]) && (flag & optimize_semi_and_anti) ? rel_optimize_semi_and_anti : NULL;
2922 : }
2923 :
2924 :
2925 : static sql_rel *
2926 1586177 : rel_semijoin_use_fk(visitor *v, sql_rel *rel)
2927 : {
2928 1586177 : if (is_semi(rel->op) && rel->exps) {
2929 6249 : list *exps = rel->exps;
2930 6249 : list *rels = sa_list(v->sql->sa);
2931 :
2932 6249 : rel->exps = NULL;
2933 6249 : append(rels, rel->l);
2934 6249 : append(rels, rel->r);
2935 6249 : (void) find_fk( v->sql, rels, exps);
2936 :
2937 6249 : rel->exps = exps;
2938 : }
2939 1586177 : return rel;
2940 : }
2941 :
2942 : /*
2943 : * Push {semi}joins down, pushes the joins through group by expressions.
2944 : * When the join is on the group by columns, we can push the joins left
2945 : * under the group by. This should only be done, iff the new semijoin would
2946 : * reduce the input table to the groupby. So there should be a reduction
2947 : * (selection) on the table A and this should be propagated to the groupby via
2948 : * for example a primary key.
2949 : *
2950 : * {semi}join( A, groupby( B ) [gbe][aggrs] ) [ gbe == A.x ]
2951 : * ->
2952 : * {semi}join( A, groupby( semijoin(B,A) [gbe == A.x] ) [gbe][aggrs] ) [ gbe == A.x ]
2953 : */
2954 : static inline sql_rel *
2955 1586177 : rel_push_join_down(visitor *v, sql_rel *rel)
2956 : {
2957 1586177 : if (!rel_is_ref(rel) && ((is_left(rel->op) || rel->op == op_join || is_semi(rel->op)) && rel->l && rel->exps)) {
2958 231847 : sql_rel *gb = rel->r, *ogb = gb, *l = NULL, *rell = rel->l;
2959 :
2960 231847 : if (is_simple_project(gb->op) && !rel_is_ref(gb))
2961 45551 : gb = gb->l;
2962 :
2963 231847 : if (rel_is_ref(rell) || !gb || rel_is_ref(gb))
2964 : return rel;
2965 :
2966 222058 : if (is_groupby(gb->op) && gb->r && list_length(gb->r)) {
2967 190 : list *exps = rel->exps, *jes = new_exp_list(v->sql->sa), *gbes = gb->r;
2968 190 : node *n, *m;
2969 : /* find out if all group by expressions are used in the join */
2970 195 : for(n = gbes->h; n; n = n->next) {
2971 190 : sql_exp *gbe = n->data;
2972 190 : int fnd = 0;
2973 190 : const char *rname = NULL, *name = NULL;
2974 :
2975 : /* project in between, ie find alias */
2976 : /* first find expression in expression list */
2977 190 : gbe = exps_uses_exp( gb->exps, gbe);
2978 190 : if (!gbe)
2979 0 : continue;
2980 190 : if (ogb != gb)
2981 167 : gbe = exps_uses_exp( ogb->exps, gbe);
2982 190 : if (gbe) {
2983 149 : rname = exp_find_rel_name(gbe);
2984 149 : name = exp_name(gbe);
2985 : }
2986 :
2987 149 : if (!name)
2988 41 : return rel;
2989 :
2990 370 : for (m = exps->h; m && !fnd; m = m->next) {
2991 221 : sql_exp *je = m->data;
2992 :
2993 221 : if (je->card >= CARD_ATOM && je->type == e_cmp &&
2994 221 : !is_complex_exp(je->flag)) {
2995 : /* expect right expression to match */
2996 221 : sql_exp *r = je->r;
2997 :
2998 221 : if (r == 0 || r->type != e_column)
2999 10 : continue;
3000 211 : if (r->l && rname && strcmp(r->l, rname) == 0 && strcmp(r->r, name)==0) {
3001 : fnd = 1;
3002 83 : } else if (!r->l && !rname && strcmp(r->r, name)==0) {
3003 : fnd = 1;
3004 : }
3005 : if (fnd) {
3006 128 : sql_exp *le = je->l;
3007 128 : sql_exp *re = exp_push_down_prj(v->sql, r, gb, gb->l);
3008 128 : if (!re || (list_length(jes) == 0 && !find_prop(le->p, PROP_HASHCOL))) {
3009 : fnd = 0;
3010 : } else {
3011 5 : int anti = is_anti(je), semantics = is_semantics(je);
3012 :
3013 5 : je = exp_compare(v->sql->sa, le, re, je->flag);
3014 5 : if (anti) set_anti(je);
3015 5 : if (semantics) set_semantics(je);
3016 5 : list_append(jes, je);
3017 : }
3018 : }
3019 : }
3020 : }
3021 149 : if (!fnd)
3022 : return rel;
3023 : }
3024 5 : l = rel_dup(rel->l);
3025 :
3026 : /* push join's left side (as semijoin) down group by */
3027 5 : l = gb->l = rel_crossproduct(v->sql->sa, gb->l, l, op_semi);
3028 5 : l->exps = jes;
3029 5 : set_processed(l);
3030 5 : v->changes++;
3031 5 : return rel;
3032 : }
3033 : }
3034 : return rel;
3035 : }
3036 :
3037 : static bool
3038 728 : check_projection_on_foreignside(sql_rel *r, list *pexps, int fk_left)
3039 : {
3040 : /* projection columns from the foreign side */
3041 728 : if (list_empty(pexps))
3042 : return true;
3043 3156 : for (node *n = pexps->h; n; n = n->next) {
3044 3091 : sql_exp *pe = n->data;
3045 :
3046 3091 : if (pe && is_atom(pe->type))
3047 23 : continue;
3048 3068 : if (pe && !is_alias(pe->type))
3049 : return false;
3050 : /* check for columns from the pk side, then keep the join with the pk */
3051 3047 : if ((fk_left && rel_find_exp(r->r, pe)) || (!fk_left && rel_find_exp(r->l, pe)))
3052 594 : return false;
3053 : }
3054 : return true;
3055 : }
3056 :
3057 : static sql_rel *
3058 237562 : rel_simplify_project_fk_join(mvc *sql, sql_rel *r, list *pexps, list *orderexps, int *changes)
3059 : {
3060 237562 : sql_rel *rl = r->l, *rr = r->r, *nr = NULL;
3061 237562 : sql_exp *je, *le, *nje, *re;
3062 237562 : int fk_left = 1;
3063 :
3064 : /* check for foreign key join */
3065 237562 : if (list_length(r->exps) != 1 || !list_empty(r->attr))
3066 7430 : return r;
3067 230132 : if (!(je = exps_find_prop(r->exps, PROP_JOINIDX)) || je->flag != cmp_equal)
3068 : return r;
3069 : /* je->l == foreign expression, je->r == primary expression */
3070 1204 : if (rel_find_exp(r->l, je->l)) {
3071 : fk_left = 1;
3072 34 : } else if (rel_find_exp(r->r, je->l)) {
3073 : fk_left = 0;
3074 : } else { /* not found */
3075 : return r;
3076 : }
3077 :
3078 : /* primary side must be a full table */
3079 1170 : if ((fk_left && (!is_left(r->op) && !is_full(r->op)) && !is_basetable(rr->op)) ||
3080 34 : (!fk_left && (!is_right(r->op) && !is_full(r->op)) && !is_basetable(rl->op)))
3081 554 : return r;
3082 :
3083 650 : if (!check_projection_on_foreignside(r, pexps, fk_left) || !check_projection_on_foreignside(r, orderexps, fk_left))
3084 613 : return r;
3085 :
3086 : /* rewrite, ie remove pkey side if possible */
3087 37 : le = (sql_exp*)je->l, re = (sql_exp*)je->l;
3088 :
3089 : /* both have NULL and there are semantics, the join cannot be removed */
3090 37 : if (is_semantics(je) && has_nil(le) && has_nil(re))
3091 : return r;
3092 :
3093 37 : (*changes)++;
3094 : /* if the foreign key column doesn't have NULL values, then return it */
3095 37 : if (!has_nil(le) || is_full(r->op) || (fk_left && is_left(r->op)) || (!fk_left && is_right(r->op))) {
3096 : /* if ->attr, introduce group by on index */
3097 31 : if (fk_left) {
3098 23 : nr = rel_dup(r->l);
3099 : } else {
3100 8 : nr = rel_dup(r->r);
3101 : }
3102 31 : if (!list_empty(r->attr)) {
3103 0 : nr = rel_groupby(sql, nr, NULL);
3104 0 : if (nr) {
3105 : // printf("# introduced groupby \n");
3106 0 : nr->r = append(sa_list(sql->sa), le);
3107 0 : nr->exps = r->attr;
3108 : }
3109 : }
3110 31 : return nr;
3111 : }
3112 :
3113 : /* remove NULL values, ie generate a select not null */
3114 6 : nje = exp_compare(sql->sa, exp_ref(sql, le), exp_atom(sql->sa, atom_general(sql->sa, exp_subtype(le), NULL, 0)), cmp_equal);
3115 6 : set_anti(nje);
3116 6 : set_has_no_nil(nje);
3117 6 : set_semantics(nje);
3118 6 : if (fk_left) {
3119 6 : nr = rel_dup(r->l);
3120 : } else {
3121 0 : nr = rel_dup(r->r);
3122 : }
3123 6 : nr = rel_select(sql->sa, nr, nje);
3124 6 : set_processed(nr);
3125 6 : return nr;
3126 : }
3127 :
3128 : static sql_rel *
3129 1853 : rel_simplify_count_fk_join(mvc *sql, sql_rel *r, list *gexps, list *gcols, int *changes)
3130 : {
3131 1853 : sql_rel *rl = r->l, *rr = r->r, *nr = NULL;
3132 1853 : sql_exp *je, *le, *nje, *re, *oce;
3133 1853 : int fk_left = 1;
3134 :
3135 : /* check for foreign key join */
3136 1853 : if (list_length(r->exps) != 1)
3137 : return r;
3138 1852 : if (!(je = exps_find_prop(r->exps, PROP_JOINIDX)) || je->flag != cmp_equal)
3139 : return r;
3140 : /* je->l == foreign expression, je->r == primary expression */
3141 63 : if (rel_find_exp(r->l, je->l)) {
3142 : fk_left = 1;
3143 7 : } else if (rel_find_exp(r->r, je->l)) {
3144 : fk_left = 0;
3145 : } else { /* not found */
3146 : return r;
3147 : }
3148 :
3149 63 : oce = gexps->h->data;
3150 63 : if (oce->l) /* we only handle COUNT(*) */
3151 : return r;
3152 :
3153 : /* primary side must be a full table */
3154 62 : if ((fk_left && (!is_left(r->op) && !is_full(r->op)) && !is_basetable(rr->op)) ||
3155 6 : (!fk_left && (!is_right(r->op) && !is_full(r->op)) && !is_basetable(rl->op)))
3156 : return r;
3157 :
3158 33 : if (fk_left && is_join(rl->op) && !rel_is_ref(rl)) {
3159 12 : r->l = rel_simplify_count_fk_join(sql, rl, gexps, gcols, changes);
3160 12 : if (rl != r->l)
3161 9 : rel_destroy(rl);
3162 : }
3163 39 : if (!fk_left && is_join(rr->op) && !rel_is_ref(rr)) {
3164 2 : r->r = rel_simplify_count_fk_join(sql, rr, gexps, gcols, changes);
3165 2 : if (rr != r->r)
3166 2 : rel_destroy(rr);
3167 : }
3168 :
3169 39 : if (!check_projection_on_foreignside(r, gcols, fk_left))
3170 : return r;
3171 :
3172 : /* rewrite, ie remove pkey side if possible */
3173 37 : le = (sql_exp*)je->l, re = (sql_exp*)je->l;
3174 :
3175 : /* both have NULL and there are semantics, the join cannot be removed */
3176 37 : if (is_semantics(je) && has_nil(le) && has_nil(re))
3177 : return r;
3178 :
3179 37 : (*changes)++;
3180 : /* if the foreign key column doesn't have NULL values, then return it */
3181 37 : if (!has_nil(le) || is_full(r->op) || (fk_left && is_left(r->op)) || (!fk_left && is_right(r->op))) {
3182 31 : if (fk_left) {
3183 25 : nr = rel_dup(r->l);
3184 : } else {
3185 6 : nr = rel_dup(r->r);
3186 : }
3187 31 : return nr;
3188 : }
3189 :
3190 : /* remove NULL values, ie generate a select not null */
3191 6 : nje = exp_compare(sql->sa, exp_ref(sql, le), exp_atom(sql->sa, atom_general(sql->sa, exp_subtype(le), NULL, 0)), cmp_equal);
3192 6 : set_anti(nje);
3193 6 : set_has_no_nil(nje);
3194 6 : set_semantics(nje);
3195 6 : if (fk_left) {
3196 6 : nr = rel_dup(r->l);
3197 : } else {
3198 0 : nr = rel_dup(r->r);
3199 : }
3200 6 : nr = rel_select(sql->sa, nr, nje);
3201 6 : set_processed(nr);
3202 6 : return nr;
3203 : }
3204 :
3205 : /*
3206 : * Handle (left/right/outer/natural) join fk-pk rewrites
3207 : * 1 group by ( fk-pk-join () ) [ count(*) ] -> group by ( fk )
3208 : * 2 project ( fk-pk-join () ) [ fk-column ] -> project (fk table)[ fk-column ]
3209 : * 3 project ( fk1-pk1-join( fk2-pk2-join()) [ fk-column, pk1 column ] -> project (fk1-pk1-join)[ fk-column, pk1 column ]
3210 : */
3211 : static inline sql_rel *
3212 3640731 : rel_simplify_fk_joins(visitor *v, sql_rel *rel)
3213 : {
3214 3640731 : sql_rel *r = NULL;
3215 :
3216 3640731 : if (is_simple_project(rel->op))
3217 1194241 : r = rel->l;
3218 :
3219 3640768 : while (is_simple_project(rel->op) && r && list_length(r->exps) == 1 && (is_join(r->op) || r->op == op_semi) && !(rel_is_ref(r))) {
3220 237562 : sql_rel *or = r;
3221 :
3222 237562 : r = rel_simplify_project_fk_join(v->sql, r, rel->exps, rel->r, &v->changes);
3223 237562 : if (r == or)
3224 : return rel;
3225 37 : rel_destroy(rel->l);
3226 37 : rel->l = r;
3227 : }
3228 :
3229 3403206 : if (!is_groupby(rel->op))
3230 : return rel;
3231 :
3232 86681 : r = rel->l;
3233 130797 : while(r && is_simple_project(r->op))
3234 44116 : r = r->l;
3235 :
3236 101085 : while (is_groupby(rel->op) && !rel_is_ref(rel) && r && (is_join(r->op) || r->op == op_semi) && list_length(r->exps) == 1 && !(rel_is_ref(r)) &&
3237 : /* currently only single count aggregation is handled, no other projects or aggregation */
3238 105117 : list_length(rel->exps) == 1 && exp_aggr_is_count(rel->exps->h->data)) {
3239 1839 : sql_rel *or = r;
3240 :
3241 1839 : r = rel_simplify_count_fk_join(v->sql, r, rel->exps, rel->r, &v->changes);
3242 1839 : if (r == or)
3243 : return rel;
3244 26 : rel_destroy(rel->l);
3245 26 : rel->l = r;
3246 : }
3247 : return rel;
3248 : }
3249 :
3250 : /*
3251 : * Gets the column expressions of a diff function and adds them to "columns".
3252 : * The diff function has two possible argument types: either a sql_exp representing a column
3253 : * or a sql_exp representing another diff function, therefore this function is recursive.
3254 : */
3255 : static void
3256 168 : get_diff_function_columns(sql_exp *diffExp, list *columns) {
3257 168 : list *args = diffExp->l;
3258 :
3259 370 : for (node *arg = args->h; arg; arg = arg->next) {
3260 202 : sql_exp *exp = arg->data;
3261 :
3262 : // diff function
3263 202 : if (exp->type == e_func) {
3264 34 : get_diff_function_columns(exp, columns);
3265 : }
3266 : // column
3267 : else {
3268 168 : list_append(columns, exp);
3269 : }
3270 : }
3271 168 : }
3272 :
3273 : /*
3274 : * Builds a list of aggregation key columns to be used by the select push down algorithm, namely for
3275 : * window functions. Returns NULL if the window function does not partition by any column
3276 : */
3277 : static list *
3278 3623 : get_aggregation_key_columns(allocator *sa, sql_rel *r) {
3279 34495 : for (node* n = r->exps->h; n; n = n->next) {
3280 31096 : sql_exp *e = n->data;
3281 :
3282 31096 : if (e->type == e_func) {
3283 7923 : sql_subfunc *f = e->f;
3284 :
3285 : // aggregation function
3286 7923 : if (!strcmp(f->func->base.name, "rank")) {
3287 224 : list* rankArguments = e->l;
3288 : // the partition key is the second argument
3289 224 : sql_exp *partitionExp = rankArguments->h->next->data;
3290 :
3291 : // check if the key contains any columns, i.e., is a diff function
3292 224 : if (partitionExp->type == e_func) {
3293 : // get columns to list
3294 134 : list *aggColumns = sa_list(sa);
3295 134 : get_diff_function_columns(partitionExp, aggColumns);
3296 134 : return aggColumns;
3297 : }
3298 : // the function has no aggregation columns (e_atom of boolean)
3299 : else {
3300 : return NULL;
3301 : }
3302 :
3303 : }
3304 : }
3305 : }
3306 : return NULL;
3307 : }
3308 :
3309 : /*
3310 : * Checks if a filter column is also used as an aggregation key, so it can be later safely pushed down.
3311 : */
3312 : static int
3313 187 : filter_column_in_aggregation_columns(sql_exp *column, list *aggColumns) {
3314 : /* check if it is a column or an e_convert, and get the actual column if it is the latter */
3315 187 : if (column->type == e_convert) {
3316 1 : column = column->l;
3317 : }
3318 :
3319 187 : char *tableName = column->l;
3320 187 : char *columnName = column->r;
3321 :
3322 404 : for (node *n = aggColumns->h; n; n = n->next) {
3323 226 : sql_exp *aggCol = n->data;
3324 226 : char *aggColTableName = aggCol->l;
3325 226 : char *aggColColumnName = aggCol->r;
3326 :
3327 226 : if (!strcmp(tableName, aggColTableName) && !strcmp(columnName, aggColColumnName)) {
3328 : /* match */
3329 : return 1;
3330 : }
3331 : }
3332 :
3333 : /* no matches found */
3334 : return 0;
3335 : }
3336 :
3337 :
3338 : /*
3339 : * Push select down, pushes the selects through (simple) projections. Also
3340 : * it cleans up the projections which become useless.
3341 : *
3342 : * WARNING - Make sure to call try_remove_empty_select macro before returning so we ensure
3343 : * possible generated empty selects won't never be generated
3344 : */
3345 : static sql_rel *
3346 7207926 : rel_push_select_down(visitor *v, sql_rel *rel)
3347 : {
3348 7207926 : list *exps = NULL;
3349 7207926 : sql_rel *r = NULL;
3350 7207926 : node *n;
3351 :
3352 7207926 : if (rel_is_ref(rel)) {
3353 374106 : if (is_select(rel->op) && !list_empty(rel->exps)) {
3354 : /* add inplace empty select */
3355 1732 : sql_rel *l = rel_select(v->sql->sa, rel->l, NULL);
3356 :
3357 1732 : l->exps = rel->exps;
3358 1732 : set_processed(l);
3359 1732 : rel->exps = NULL;
3360 1732 : rel->l = l;
3361 1732 : v->changes++;
3362 : }
3363 374106 : return rel;
3364 : }
3365 :
3366 : /* don't make changes for empty selects */
3367 6833820 : if (is_select(rel->op) && list_empty(rel->exps))
3368 10 : return try_remove_empty_select(v, rel);
3369 :
3370 : /* merge 2 selects */
3371 6833810 : r = rel->l;
3372 6833810 : if (is_select(rel->op) && r && r->exps && is_select(r->op) && !(rel_is_ref(r)) && !exps_have_func(rel->exps)) {
3373 27 : (void)list_merge(r->exps, rel->exps, (fdup)NULL);
3374 27 : rel->l = NULL;
3375 27 : rel_destroy(rel);
3376 27 : v->changes++;
3377 27 : return try_remove_empty_select(v, r);
3378 : }
3379 : /*
3380 : * Push select through semi/anti join
3381 : * select (semi(A,B)) == semi(select(A), B)
3382 : */
3383 6833783 : if (is_select(rel->op) && r && is_semi(r->op) && !(rel_is_ref(r))) {
3384 305 : rel->l = r->l;
3385 305 : r->l = rel;
3386 305 : v->changes++;
3387 : /*
3388 : * if A has 2 references (ie used on both sides of
3389 : * the semi join), we also push the select into A.
3390 : */
3391 305 : if (rel_is_ref(rel->l) && rel->l == rel_find_ref(r->r)){
3392 0 : sql_rel *lx = rel->l;
3393 0 : sql_rel *rx = r->r;
3394 0 : if (lx->ref.refcnt == 2 && !rel_is_ref(rx)) {
3395 0 : while (rx->l && !rel_is_ref(rx->l) &&
3396 0 : (is_project(rx->op) ||
3397 : is_select(rx->op) ||
3398 : is_join(rx->op)))
3399 : rx = rx->l;
3400 : /* probably we need to introduce a project */
3401 0 : rel_destroy(rel->l);
3402 0 : lx = rel_project(v->sql->sa, rel, rel_projections(v->sql, rel, NULL, 1, 1));
3403 0 : r->l = lx;
3404 0 : rx->l = rel_dup(lx);
3405 : }
3406 : }
3407 305 : return r;
3408 : }
3409 6833478 : exps = rel->exps;
3410 :
3411 : /* push select through join */
3412 6833478 : if (is_select(rel->op) && r && is_join(r->op) && !rel_is_ref(r) && !is_single(r)){
3413 22158 : sql_rel *jl = r->l, *ojl = jl, *jr = r->r, *ojr = jr;
3414 22158 : int left = r->op == op_join || r->op == op_left;
3415 22158 : int right = r->op == op_join || r->op == op_right;
3416 :
3417 22158 : if (r->op == op_full)
3418 : return rel;
3419 :
3420 : /* introduce selects under the join (if needed) */
3421 22132 : set_processed(jl);
3422 22132 : set_processed(jr);
3423 48630 : for (n = exps->h; n;) {
3424 26498 : node *next = n->next;
3425 26498 : sql_exp *e = n->data;
3426 :
3427 26498 : if (!exp_unsafe(e, false, true)) {
3428 26292 : if (left && rel_rebind_exp(v->sql, jl, e)) {
3429 13315 : if (!is_select(jl->op) || rel_is_ref(jl))
3430 11128 : r->l = jl = rel_select(v->sql->sa, jl, NULL);
3431 13315 : rel_select_add_exp(v->sql->sa, jl, e);
3432 13315 : list_remove_node(exps, NULL, n);
3433 13315 : v->changes++;
3434 12977 : } else if (right && rel_rebind_exp(v->sql, jr, e)) {
3435 5155 : if (!is_select(jr->op) || rel_is_ref(jr))
3436 4207 : r->r = jr = rel_select(v->sql->sa, jr, NULL);
3437 5155 : rel_select_add_exp(v->sql->sa, jr, e);
3438 5155 : list_remove_node(exps, NULL, n);
3439 5155 : v->changes++;
3440 : }
3441 : }
3442 : n = next;
3443 : }
3444 22132 : if (ojl != jl)
3445 11128 : set_processed(jl);
3446 22132 : if (ojr != jr)
3447 4207 : set_processed(jr);
3448 : }
3449 :
3450 : /* merge select and cross product ? */
3451 6833452 : if (is_select(rel->op) && r && r->op == op_join && !rel_is_ref(r) && !is_single(r) && !exps_have_unsafe(exps, false, true)) {
3452 14522 : for (n = exps->h; n;) {
3453 1913 : node *next = n->next;
3454 1913 : sql_exp *e = n->data;
3455 :
3456 1913 : if (exp_is_join(e, NULL) == 0) {
3457 693 : if (!r->exps)
3458 109 : r->exps = sa_list(v->sql->sa);
3459 693 : append(r->exps, e);
3460 693 : list_remove_node(exps, NULL, n);
3461 693 : v->changes++;
3462 : }
3463 : n = next;
3464 : }
3465 : }
3466 :
3467 6833452 : if (is_select(rel->op) && r && (is_simple_project(r->op) || (is_groupby(r->op) && !list_empty(r->r))) && !rel_is_ref(r) && !is_single(r)){
3468 70225 : sql_rel *pl = r->l, *opl = pl;
3469 : /* we cannot push through window functions (for safety I disabled projects over DDL too) */
3470 70225 : if (pl && pl->op != op_ddl && !exps_have_unsafe(r->exps, false, false)) {
3471 : /* introduce selects under the project (if needed) */
3472 48689 : set_processed(pl);
3473 48689 : if (!pl->exps)
3474 411 : pl->exps = sa_list(v->sql->sa);
3475 103688 : for (n = exps->h; n;) {
3476 54999 : node *next = n->next;
3477 54999 : sql_exp *e = n->data, *ne = NULL;
3478 :
3479 : /* can we move it down */
3480 54999 : if (e->type == e_cmp && (ne = exp_push_down_prj(v->sql, e, r, pl)) && ne != e) {
3481 25393 : if (!(is_select(pl->op) && is_join(pl->op) && is_semi(pl->op)) || rel_is_ref(pl))
3482 25393 : r->l = pl = rel_select(v->sql->sa, pl, NULL);
3483 25393 : rel_select_add_exp(v->sql->sa, pl, ne);
3484 25393 : list_remove_node(exps, NULL, n);
3485 25393 : v->changes++;
3486 : }
3487 : n = next;
3488 : }
3489 48689 : if (opl != pl)
3490 18317 : set_processed(pl);
3491 : }
3492 :
3493 : /* push filters if they match the aggregation key on a window function */
3494 3623 : else if (pl && pl->op != op_ddl && exps_have_unsafe(r->exps, false, false)) {
3495 3623 : set_processed(pl);
3496 : /* list of aggregation key columns */
3497 3623 : list *aggColumns = get_aggregation_key_columns(v->sql->sa, r);
3498 :
3499 : /* aggregation keys found, check if any filter matches them */
3500 3623 : if (aggColumns) {
3501 325 : for (n = exps->h; n;) {
3502 191 : node *next = n->next;
3503 191 : sql_exp *e = n->data, *ne = NULL;
3504 :
3505 191 : if (e->type == e_cmp) {
3506 : /* simple comparison filter */
3507 191 : if (e->flag == cmp_gt || e->flag == cmp_gte || e->flag == cmp_lte || e->flag == cmp_lt
3508 191 : || e->flag == cmp_equal || e->flag == cmp_notequal || e->flag == cmp_in || e->flag == cmp_notin
3509 5 : || (e->flag == cmp_filter && ((list*)e->l)->cnt == 1)) {
3510 187 : sql_exp* column;
3511 : /* the column in 'like' filters is stored inside a list */
3512 187 : if (e->flag == cmp_filter) {
3513 1 : column = ((list*)e->l)->h->data;
3514 : } else {
3515 186 : column = e->l;
3516 : }
3517 :
3518 : /* check if the expression matches any aggregation key, meaning we can
3519 : try to safely push it down */
3520 187 : if (filter_column_in_aggregation_columns(column, aggColumns)) {
3521 9 : ne = exp_push_down_prj(v->sql, e, r, pl);
3522 :
3523 : /* can we move it down */
3524 9 : if (ne && ne != e && pl->exps) {
3525 9 : if (!is_select(pl->op) || rel_is_ref(pl))
3526 8 : r->l = pl = rel_select(v->sql->sa, pl, NULL);
3527 9 : rel_select_add_exp(v->sql->sa, pl, ne);
3528 9 : list_remove_node(exps, NULL, n);
3529 9 : v->changes++;
3530 : }
3531 : }
3532 : }
3533 : }
3534 : n = next;
3535 : }
3536 :
3537 : /* cleanup list */
3538 134 : list_destroy(aggColumns);
3539 : }
3540 : }
3541 : }
3542 :
3543 : /* try push select under set relation */
3544 6833452 : if (is_select(rel->op) && r && is_set(r->op) && !list_empty(r->exps) && !rel_is_ref(r) && !is_single(r) && !list_empty(exps)) {
3545 19 : sql_rel *u = r, *ul = u->l, *ur = u->r;
3546 :
3547 19 : ul = rel_dup(ul);
3548 19 : ur = rel_dup(ur);
3549 19 : if (!is_project(ul->op))
3550 0 : ul = rel_project(v->sql->sa, ul,
3551 : rel_projections(v->sql, ul, NULL, 1, 1));
3552 19 : if (!is_project(ur->op))
3553 0 : ur = rel_project(v->sql->sa, ur,
3554 : rel_projections(v->sql, ur, NULL, 1, 1));
3555 19 : rel_rename_exps(v->sql, u->exps, ul->exps);
3556 19 : rel_rename_exps(v->sql, u->exps, ur->exps);
3557 :
3558 : /* introduce selects under the set */
3559 19 : ul = rel_select(v->sql->sa, ul, NULL);
3560 19 : ul->exps = exps_copy(v->sql, exps);
3561 19 : set_processed(ul);
3562 19 : ur = rel_select(v->sql->sa, ur, NULL);
3563 19 : ur->exps = exps_copy(v->sql, exps);
3564 19 : set_processed(ur);
3565 :
3566 19 : rel = rel_inplace_setop(v->sql, rel, ul, ur, u->op, rel_projections(v->sql, rel, NULL, 1, 1));
3567 19 : if (need_distinct(u))
3568 0 : set_distinct(rel);
3569 19 : v->changes++;
3570 : }
3571 6833452 : if (is_select(rel->op) && r && is_munion(r->op) && !list_empty(r->exps) && !rel_is_ref(r) && !is_single(r) && !list_empty(exps)) {
3572 4865 : sql_rel *u = r;
3573 4865 : list *rels = u->l, *nrels = sa_list(v->sql->sa);
3574 14621 : for(node *n = rels->h; n; n = n->next) {
3575 9756 : sql_rel *ul = n->data;
3576 9756 : ul = rel_dup(ul);
3577 9756 : if (!is_project(ul->op))
3578 8 : ul = rel_project(v->sql->sa, ul,
3579 : rel_projections(v->sql, ul, NULL, 1, 1));
3580 9756 : rel_rename_exps(v->sql, u->exps, ul->exps);
3581 :
3582 : /* introduce selects under the set */
3583 9756 : ul = rel_select(v->sql->sa, ul, NULL);
3584 9756 : ul->exps = exps_copy(v->sql, exps);
3585 9756 : set_processed(ul);
3586 9756 : nrels = append(nrels, ul);
3587 : }
3588 :
3589 4865 : rel = rel_inplace_setop_n_ary(v->sql, rel, nrels, u->op, rel_projections(v->sql, rel, NULL, 1, 1));
3590 4865 : if (need_distinct(u))
3591 6 : set_distinct(rel);
3592 4865 : v->changes++;
3593 : }
3594 :
3595 6833452 : return try_remove_empty_select(v, rel);
3596 : }
3597 :
3598 : static int
3599 12033 : index_exp(sql_exp *e, sql_idx *i)
3600 : {
3601 12033 : if (e->type == e_cmp && !is_complex_exp(e->flag)) {
3602 6776 : switch(i->type) {
3603 6776 : case hash_idx:
3604 : case oph_idx:
3605 6776 : if (e->flag == cmp_equal)
3606 : return 0;
3607 : /* fall through */
3608 : case join_idx:
3609 : default:
3610 1252 : return -1;
3611 : }
3612 : }
3613 : return -1;
3614 : }
3615 :
3616 : /* find column for the select/join expression */
3617 : static sql_column *
3618 5524 : sjexp_col(sql_exp *e, sql_rel *r)
3619 : {
3620 5524 : sql_column *res = NULL;
3621 :
3622 5524 : if (e->type == e_cmp && !is_complex_exp(e->flag)) {
3623 5524 : res = exp_find_column(r, e->l, -2);
3624 5524 : if (!res)
3625 466 : res = exp_find_column(r, e->r, -2);
3626 : }
3627 5524 : return res;
3628 : }
3629 :
3630 : static sql_idx *
3631 1771496 : find_index(allocator *sa, sql_rel *rel, sql_rel *sub, list **EXPS)
3632 : {
3633 1771496 : node *n;
3634 :
3635 : /* any (partial) match of the expressions with the index columns */
3636 : /* Depending on the index type we may need full matches and only
3637 : limited number of cmp types (hash only equality etc) */
3638 : /* Depending on the index type we should (in the rel_bin) generate
3639 : more code, ie for spatial index add post filter etc, for hash
3640 : compute hash value and use index */
3641 :
3642 1771496 : if (sub->exps && rel->exps)
3643 7110919 : for(n = sub->exps->h; n; n = n->next) {
3644 5522169 : prop *p;
3645 5522169 : sql_exp *e = n->data;
3646 :
3647 5522169 : if ((p = find_prop(e->p, PROP_HASHIDX)) != NULL) {
3648 9408 : list *exps, *cols;
3649 9408 : sql_idx *i = p->value.pval;
3650 9408 : fcmp cmp = (fcmp)&sql_column_kc_cmp;
3651 :
3652 : /* join indices are only interesting for joins */
3653 9408 : if (i->type == join_idx || list_length(i->columns) <= 1)
3654 0 : continue;
3655 : /* based on the index type, find qualifying exps */
3656 9408 : exps = list_select(rel->exps, i, (fcmp) &index_exp, (fdup)NULL);
3657 9408 : if (list_empty(exps))
3658 4848 : continue;
3659 : /* now we obtain the columns, move into sql_column_kc_cmp! */
3660 4560 : cols = list_map(exps, sub, (fmap) &sjexp_col);
3661 :
3662 : /* TODO check that at most 2 relations are involved */
3663 :
3664 : /* Match the index columns with the expression columns.
3665 : TODO, Allow partial matches ! */
3666 4560 : if (list_match(cols, i->columns, cmp) == 0) {
3667 : /* re-order exps in index order */
3668 286 : node *n, *m;
3669 286 : list *es = sa_list(sa);
3670 :
3671 958 : for(n = i->columns->h; n; n = n->next) {
3672 672 : int i = 0;
3673 2152 : for(m = cols->h; m; m = m->next, i++) {
3674 2152 : if (cmp(m->data, n->data) == 0){
3675 672 : sql_exp *e = list_fetch(exps, i);
3676 672 : list_append(es, e);
3677 672 : break;
3678 : }
3679 : }
3680 : }
3681 : /* fix the destroy function */
3682 286 : cols->destroy = NULL;
3683 286 : *EXPS = es;
3684 286 : e->used = 1;
3685 286 : return i;
3686 : }
3687 4274 : cols->destroy = NULL;
3688 : }
3689 : }
3690 : return NULL;
3691 : }
3692 :
3693 : static inline sql_rel *
3694 1157535 : rel_use_index(visitor *v, sql_rel *rel)
3695 : {
3696 1157535 : list *exps = NULL;
3697 1157535 : sql_idx *i = find_index(v->sql->sa, rel, rel->l, &exps);
3698 1157535 : int left = 1;
3699 :
3700 1157535 : assert(is_select(rel->op) || is_join(rel->op));
3701 1157535 : if (!i && is_join(rel->op)) {
3702 613961 : left = 0;
3703 613961 : i = find_index(v->sql->sa, rel, rel->r, &exps);
3704 : }
3705 :
3706 1157355 : if (i) {
3707 286 : prop *p;
3708 286 : node *n;
3709 286 : int single_table = 1;
3710 286 : sql_exp *re = NULL;
3711 :
3712 956 : for( n = exps->h; n && single_table; n = n->next) {
3713 670 : sql_exp *e = n->data, *nre = e->l;
3714 :
3715 670 : if (!is_compare(e->type) || is_anti(e) || e->flag != cmp_equal)
3716 : return rel;
3717 670 : if (is_join(rel->op) && ((left && !rel_find_exp(rel->l, nre)) || (!left && rel_find_exp(rel->r, nre))))
3718 111 : nre = e->r;
3719 670 : single_table = (!re || (exp_relname(nre) && exp_relname(re) && strcmp(exp_relname(nre), exp_relname(re)) == 0));
3720 670 : re = nre;
3721 : }
3722 286 : if (single_table) { /* add PROP_HASHCOL to all column exps */
3723 932 : for( n = exps->h; n; n = n->next) {
3724 654 : sql_exp *e = n->data;
3725 :
3726 : /* swapped ? */
3727 654 : if (is_join(rel->op) && ((left && !rel_find_exp(rel->l, e->l)) || (!left && !rel_find_exp(rel->r, e->l)))) {
3728 165 : exp_swap(e);
3729 : }
3730 654 : p = find_prop(e->p, PROP_HASHCOL);
3731 654 : if (!p)
3732 285 : e->p = p = prop_create(v->sql->sa, PROP_HASHCOL, e->p);
3733 654 : p->value.pval = i;
3734 : }
3735 : }
3736 : /* add the remaining exps to the new exp list */
3737 286 : if (list_length(rel->exps) > list_length(exps)) {
3738 108 : for( n = rel->exps->h; n; n = n->next) {
3739 81 : sql_exp *e = n->data;
3740 81 : if (!list_find(exps, e, (fcmp)&exp_cmp))
3741 27 : list_append(exps, e);
3742 : }
3743 : }
3744 286 : rel->exps = exps;
3745 : }
3746 : return rel;
3747 : }
3748 :
3749 : static sql_rel *
3750 3640731 : rel_select_leftgroup_2_semi(visitor *v, sql_rel *rel)
3751 : {
3752 3640731 : if (rel_is_ref(rel) || !is_select(rel->op) || list_empty(rel->exps))
3753 3107437 : return rel;
3754 533294 : sql_rel *l = rel->l;
3755 :
3756 533294 : if (!l || rel_is_ref(l) || !is_left(l->op) || list_empty(l->attr))
3757 532308 : return rel;
3758 :
3759 1964 : for(node *n = rel->exps->h; n; n = n->next) {
3760 986 : sql_exp *e = n->data;
3761 :
3762 986 : if (e->type == e_cmp && !is_semantics(e) && !e->f) {
3763 973 : list *attrs = l->attr;
3764 973 : sql_exp *a = attrs->h->data;
3765 :
3766 973 : if (exps_find_exp(l->attr, e->l) && exp_is_true(e->r) && e->flag == cmp_equal /*&& exp_is_true(a)*/) {
3767 : // printf("# optimize select leftgroup -> semi\n");
3768 8 : if (!list_empty(l->exps)) {
3769 13 : for(node *m = l->exps->h; m; m = m->next) {
3770 7 : sql_exp *j = m->data;
3771 7 : reset_any(j);
3772 : }
3773 : }
3774 8 : l->attr = NULL;
3775 8 : l->op = exp_is_true(a)?op_semi:op_anti;
3776 8 : list_remove_node(rel->exps, NULL, n);
3777 8 : rel = rel_project(v->sql->sa, rel, rel_projections(v->sql, rel, NULL, 1, 1));
3778 8 : list_append(rel->exps, attrs->h->data);
3779 8 : v->changes++;
3780 8 : return rel;
3781 : }
3782 : }
3783 : }
3784 : return rel;
3785 : }
3786 :
3787 : static sql_rel *
3788 3640731 : rel_optimize_select_and_joins_topdown_(visitor *v, sql_rel *rel)
3789 : {
3790 : /* push_join_down introduces semijoins */
3791 3640731 : uint8_t cycle = *(uint8_t*) v->data;
3792 3640731 : if (cycle <= 0) {
3793 1586177 : rel = rel_semijoin_use_fk(v, rel);
3794 1586177 : rel = rel_push_join_down(v, rel);
3795 : }
3796 :
3797 3640731 : rel = rel_simplify_fk_joins(v, rel);
3798 3640731 : rel = rel_push_select_down(v, rel);
3799 3640731 : rel = rel_select_leftgroup_2_semi(v, rel);
3800 3640731 : if (rel && rel->l && (is_select(rel->op) || is_join(rel->op)))
3801 1157535 : rel = rel_use_index(v, rel);
3802 3640731 : return rel;
3803 : }
3804 :
3805 : static sql_rel *
3806 126368 : rel_optimize_select_and_joins_topdown(visitor *v, global_props *gp, sql_rel *rel)
3807 : {
3808 126368 : v->data = &gp->opt_cycle;
3809 126368 : rel = rel_visitor_topdown(v, rel, &rel_optimize_select_and_joins_topdown_);
3810 126368 : v->data = gp;
3811 126368 : return rel;
3812 : }
3813 :
3814 : run_optimizer
3815 623385 : bind_optimize_select_and_joins_topdown(visitor *v, global_props *gp)
3816 : {
3817 623385 : int flag = v->sql->sql_optimizer;
3818 623189 : return gp->opt_level == 1 && (gp->cnt[op_join] || gp->cnt[op_left] || gp->cnt[op_right]
3819 538973 : || gp->cnt[op_full] || gp->cnt[op_semi] || gp->cnt[op_anti] ||
3820 1246574 : gp->cnt[op_select]) && (flag & optimize_select_and_joins_topdown) ? rel_optimize_select_and_joins_topdown : NULL;
3821 : }
3822 :
3823 :
3824 : static int
3825 1242386 : can_push_func(sql_exp *e, sql_rel *rel, int *must, int depth)
3826 : {
3827 1286554 : switch(e->type) {
3828 1088751 : case e_cmp: {
3829 1088751 : sql_exp *l = e->l, *r = e->r, *f = e->f;
3830 :
3831 : /* don't push down functions inside attribute joins */
3832 1088751 : if (e->flag == cmp_or || e->flag == cmp_in || e->flag == cmp_notin || e->flag == cmp_filter || (is_join(rel->op) && is_any(e)))
3833 : return 0;
3834 1033153 : if (depth > 0) { /* for comparisons under the top ones, they become functions */
3835 58 : int lmust = 0;
3836 58 : int res = can_push_func(l, rel, &lmust, depth + 1) && can_push_func(r, rel, &lmust, depth + 1) &&
3837 20 : (!f || can_push_func(f, rel, &lmust, depth + 1));
3838 12 : if (res && !lmust)
3839 : return 1;
3840 54 : (*must) |= lmust;
3841 54 : return res;
3842 : } else {
3843 1033095 : int mustl = 0, mustr = 0, mustf = 0;
3844 1033095 : return ((l->type == e_column || can_push_func(l, rel, &mustl, depth + 1)) && (*must = mustl)) ||
3845 2059149 : ((r->type == e_column || can_push_func(r, rel, &mustr, depth + 1)) && (*must = mustr)) ||
3846 3644 : ((f && (f->type == e_column || can_push_func(f, rel, &mustf, depth + 1)) && (*must = mustf)));
3847 : }
3848 : }
3849 44168 : case e_convert:
3850 44168 : return can_push_func(e->l, rel, must, depth + 1);
3851 9457 : case e_aggr:
3852 : case e_func: {
3853 9457 : list *l = e->l;
3854 9457 : int res = 1, lmust = 0;
3855 :
3856 9457 : if (exp_unsafe(e, false, false))
3857 : return 0;
3858 26949 : if (l) for (node *n = l->h; n && res; n = n->next)
3859 17492 : res &= can_push_func(n->data, rel, &lmust, depth + 1);
3860 9457 : if (res && !lmust)
3861 : return 1;
3862 8610 : (*must) |= lmust;
3863 8610 : return res;
3864 : }
3865 50370 : case e_column:
3866 50370 : if (rel && !rel_find_exp(rel, e))
3867 : return 0;
3868 29671 : (*must) = 1;
3869 : /* fall through */
3870 : default:
3871 : return 1;
3872 : }
3873 : }
3874 :
3875 : static int
3876 902553 : exps_can_push_func(list *exps, sql_rel *rel)
3877 : {
3878 3923682 : for(node *n = exps->h; n; n = n->next) {
3879 3044937 : sql_exp *e = n->data;
3880 3044937 : int mustl = 0, mustr = 0;
3881 :
3882 3044937 : if ((is_joinop(rel->op) || is_select(rel->op)) && ((can_push_func(e, rel->l, &mustl, 0) && mustl)))
3883 23808 : return 1;
3884 3040673 : if (is_joinop(rel->op) && can_push_func(e, rel->r, &mustr, 0) && mustr)
3885 : return 1;
3886 : }
3887 : return 0;
3888 : }
3889 :
3890 : static int
3891 82158 : exp_needs_push_down(sql_rel *rel, sql_exp *e)
3892 : {
3893 106618 : switch(e->type) {
3894 27578 : case e_cmp:
3895 : /* don't push down functions inside attribute joins */
3896 27578 : if (e->flag == cmp_or || e->flag == cmp_in || e->flag == cmp_notin || e->flag == cmp_filter || (is_join(rel->op) && is_any(e)))
3897 : return 0;
3898 26642 : return exp_needs_push_down(rel, e->l) || exp_needs_push_down(rel, e->r) || (e->f && exp_needs_push_down(rel, e->f));
3899 24460 : case e_convert:
3900 24460 : return exp_needs_push_down(rel, e->l);
3901 1922 : case e_aggr:
3902 : case e_func:
3903 1922 : if (!e->l || exps_are_atoms(e->l))
3904 20 : return 0;
3905 : return 1;
3906 1459 : case e_atom:
3907 1459 : if (!e->f || exps_are_atoms(e->f))
3908 1459 : return 0;
3909 : return 1;
3910 : case e_column:
3911 : default:
3912 : return 0;
3913 : }
3914 : }
3915 :
3916 : static int
3917 23808 : exps_need_push_down(sql_rel *rel, list *exps )
3918 : {
3919 49476 : for(node *n = exps->h; n; n = n->next)
3920 27570 : if (exp_needs_push_down(rel, n->data))
3921 : return 1;
3922 : return 0;
3923 : }
3924 :
3925 : static sql_exp *exp_push_single_func_down(visitor *v, sql_rel *rel, sql_rel *ol, sql_rel *or, sql_exp *e, int depth);
3926 :
3927 : static list *
3928 2414 : exps_push_single_func_down(visitor *v, sql_rel *rel, sql_rel *ol, sql_rel *or, list *exps, int depth)
3929 : {
3930 4828 : if (mvc_highwater(v->sql))
3931 : return exps;
3932 :
3933 6724 : for (node *n = exps->h; n; n = n->next)
3934 4310 : if ((n->data = exp_push_single_func_down(v, rel, ol, or, n->data, depth)) == NULL)
3935 : return NULL;
3936 : return exps;
3937 : }
3938 :
3939 : static sql_exp *
3940 15731 : exp_push_single_func_down(visitor *v, sql_rel *rel, sql_rel *ol, sql_rel *or, sql_exp *e, int depth)
3941 : {
3942 27503 : if (mvc_highwater(v->sql))
3943 : return e;
3944 :
3945 15731 : switch(e->type) {
3946 4247 : case e_cmp: {
3947 4247 : if (e->flag == cmp_or || e->flag == cmp_filter) {
3948 247 : if ((e->l = exps_push_single_func_down(v, rel, ol, or, e->l, depth + 1)) == NULL)
3949 : return NULL;
3950 247 : if ((e->r = exps_push_single_func_down(v, rel, ol, or, e->r, depth + 1)) == NULL)
3951 : return NULL;
3952 4000 : } else if (e->flag == cmp_in || e->flag == cmp_notin) {
3953 18 : if ((e->l = exp_push_single_func_down(v, rel, ol, or, e->l, depth + 1)) == NULL)
3954 : return NULL;
3955 18 : if ((e->r = exps_push_single_func_down(v, rel, ol, or, e->r, depth + 1)) == NULL)
3956 : return NULL;
3957 : } else {
3958 3982 : if ((e->l = exp_push_single_func_down(v, rel, ol, or, e->l, depth + 1)) == NULL)
3959 : return NULL;
3960 3982 : if ((e->r = exp_push_single_func_down(v, rel, ol, or, e->r, depth + 1)) == NULL)
3961 : return NULL;
3962 3982 : if (e->f && (e->f = exp_push_single_func_down(v, rel, ol, or, e->f, depth + 1)) == NULL)
3963 : return NULL;
3964 : }
3965 : } break;
3966 2004 : case e_convert:
3967 2004 : if ((e->l = exp_push_single_func_down(v, rel, ol, or, e->l, depth + 1)) == NULL)
3968 : return NULL;
3969 : break;
3970 3982 : case e_aggr:
3971 : case e_func: {
3972 3982 : sql_rel *l = rel->l, *r = rel->r;
3973 3982 : int must = 0, mustl = 0, mustr = 0;
3974 :
3975 3982 : if (exp_unsafe(e, false, false))
3976 23 : return e;
3977 3982 : if (!e->l || exps_are_atoms(e->l))
3978 23 : return e;
3979 3959 : if ((is_joinop(rel->op) && ((can_push_func(e, l, &mustl, depth + 1) && mustl) || (can_push_func(e, r, &mustr, depth + 1) && mustr))) ||
3980 2977 : (is_select(rel->op) && can_push_func(e, l, &must, depth + 1) && must)) {
3981 3946 : exp_label(v->sql->sa, e, ++v->sql->label);
3982 : /* we need a full projection, group by's and unions cannot be extended with more expressions */
3983 3946 : if (mustr) {
3984 352 : if (r == or) /* don't project twice */
3985 329 : rel->r = r = rel_project(v->sql->sa, r, rel_projections(v->sql, r, NULL, 1, 1));
3986 352 : list_append(r->exps, e);
3987 : } else {
3988 3594 : if (l == ol) /* don't project twice */
3989 1838 : rel->l = l = rel_project(v->sql->sa, l, rel_projections(v->sql, l, NULL, 1, 1));
3990 3594 : list_append(l->exps, e);
3991 : }
3992 3946 : e = exp_ref(v->sql, e);
3993 3946 : v->changes++;
3994 : }
3995 3959 : } break;
3996 1156 : case e_atom: {
3997 1156 : if (e->f && (e->f = exps_push_single_func_down(v, rel, ol, or, e->f, depth + 1)) == NULL)
3998 : return NULL;
3999 : } break;
4000 : case e_column:
4001 : case e_psm:
4002 : break;
4003 : }
4004 : return e;
4005 : }
4006 :
4007 : static inline sql_rel *
4008 3567196 : rel_push_func_down(visitor *v, sql_rel *rel)
4009 : {
4010 3567196 : if ((is_select(rel->op) || is_joinop(rel->op)) && rel->l && rel->exps && !(rel_is_ref(rel))) {
4011 1082866 : int changes = v->changes;
4012 1082866 : sql_rel *l = rel->l, *r = rel->r;
4013 :
4014 : /* only push down when is useful */
4015 1082866 : if ((is_select(rel->op) && list_length(rel->exps) <= 1) || rel_is_ref(l) || (is_joinop(rel->op) && rel_is_ref(r)))
4016 532697 : return rel;
4017 550169 : if (exps_can_push_func(rel->exps, rel) && exps_need_push_down(rel, rel->exps) && !exps_push_single_func_down(v, rel, l, r, rel->exps, 0))
4018 : return NULL;
4019 550169 : if (v->changes > changes) /* once we get a better join order, we can try to remove this projection */
4020 1902 : return rel_project(v->sql->sa, rel, rel_projections(v->sql, rel, NULL, 1, 1));
4021 : }
4022 3032597 : if (is_simple_project(rel->op) && rel->l && rel->exps) {
4023 1126562 : sql_rel *pl = rel->l;
4024 :
4025 1126562 : if (is_joinop(pl->op) && exps_can_push_func(rel->exps, rel)) {
4026 0 : sql_rel *l = pl->l, *r = pl->r, *ol = l, *or = r;
4027 :
4028 0 : for (node *n = rel->exps->h; n; ) {
4029 0 : node *next = n->next;
4030 0 : sql_exp *e = n->data;
4031 0 : int mustl = 0, mustr = 0;
4032 :
4033 0 : if ((can_push_func(e, l, &mustl, 0) && mustl) || (can_push_func(e, r, &mustr, 0) && mustr)) {
4034 0 : if (mustl) {
4035 0 : if (l == ol) /* don't project twice */
4036 0 : pl->l = l = rel_project(v->sql->sa, l, rel_projections(v->sql, l, NULL, 1, 1));
4037 0 : list_append(l->exps, e);
4038 0 : list_remove_node(rel->exps, NULL, n);
4039 0 : v->changes++;
4040 : } else {
4041 0 : if (r == or) /* don't project twice */
4042 0 : pl->r = r = rel_project(v->sql->sa, r, rel_projections(v->sql, r, NULL, 1, 1));
4043 0 : list_append(r->exps, e);
4044 0 : list_remove_node(rel->exps, NULL, n);
4045 0 : v->changes++;
4046 : }
4047 : }
4048 0 : n = next;
4049 : }
4050 : }
4051 : }
4052 : return rel;
4053 : }
4054 :
4055 : static sql_rel *
4056 3567195 : rel_push_func_and_select_down_(visitor *v, sql_rel *rel)
4057 : {
4058 3567195 : if (rel)
4059 3567195 : rel = rel_push_func_down(v, rel);
4060 3567195 : if (rel)
4061 3567195 : rel = rel_push_select_down(v, rel);
4062 3567195 : return rel;
4063 : }
4064 :
4065 : static sql_rel *
4066 126367 : rel_push_func_and_select_down(visitor *v, global_props *gp, sql_rel *rel)
4067 : {
4068 126367 : (void) gp;
4069 126367 : return rel_visitor_topdown(v, rel, &rel_push_func_and_select_down_);
4070 : }
4071 :
4072 : run_optimizer
4073 623384 : bind_push_func_and_select_down(visitor *v, global_props *gp)
4074 : {
4075 623384 : int flag = v->sql->sql_optimizer;
4076 623188 : return gp->opt_level == 1 && (gp->cnt[op_join] || gp->cnt[op_left] || gp->cnt[op_right]
4077 538972 : || gp->cnt[op_full] || gp->cnt[op_semi] || gp->cnt[op_anti] || gp->cnt[op_select])
4078 749752 : && (flag & push_func_and_select_down) ? rel_push_func_and_select_down : NULL;
4079 : }
|