Line data Source code
1 : /*
2 : * SPDX-License-Identifier: MPL-2.0
3 : *
4 : * This Source Code Form is subject to the terms of the Mozilla Public
5 : * License, v. 2.0. If a copy of the MPL was not distributed with this
6 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
7 : *
8 : * Copyright 2024, 2025 MonetDB Foundation;
9 : * Copyright August 2008 - 2023 MonetDB B.V.;
10 : * Copyright 1997 - July 2008 CWI.
11 : */
12 :
13 : #include "monetdb_config.h"
14 : #include "rel_optimizer_private.h"
15 : #include "rel_planner.h"
16 : #include "rel_exp.h"
17 : #include "rel_select.h"
18 : #include "rel_rewriter.h"
19 :
20 : /* Split_select optimizer splits case statements in select expressions. This is a step needed for cse */
21 : static void select_split_exps(mvc *sql, list *exps, sql_rel *rel);
22 :
23 : static sql_exp *
24 200982 : select_split_exp(mvc *sql, sql_exp *e, sql_rel *rel)
25 : {
26 200982 : switch(e->type) {
27 : case e_column:
28 : return e;
29 6905 : case e_convert:
30 6905 : e->l = select_split_exp(sql, e->l, rel);
31 6905 : return e;
32 26673 : case e_aggr:
33 : case e_func:
34 26673 : if (!is_analytic(e) && !exp_has_sideeffect(e)) {
35 26668 : sql_subfunc *f = e->f;
36 26668 : if (e->type == e_func && !f->func->s && is_caselike_func(f) /*is_ifthenelse_func(f)*/)
37 283 : return add_exp_too_project(sql, e, rel);
38 : }
39 : return e;
40 67910 : case e_cmp:
41 67910 : if (e->flag == cmp_or || e->flag == cmp_filter) {
42 15545 : select_split_exps(sql, e->l, rel);
43 15545 : select_split_exps(sql, e->r, rel);
44 52365 : } else if (e->flag == cmp_in || e->flag == cmp_notin) {
45 2809 : e->l = select_split_exp(sql, e->l, rel);
46 2809 : select_split_exps(sql, e->r, rel);
47 : } else {
48 49556 : e->l = select_split_exp(sql, e->l, rel);
49 49556 : e->r = select_split_exp(sql, e->r, rel);
50 49556 : if (e->f)
51 3591 : e->f = select_split_exp(sql, e->f, rel);
52 : }
53 : return e;
54 : case e_atom:
55 : case e_psm:
56 : return e;
57 : }
58 : return e;
59 : }
60 :
61 : static void
62 59770 : select_split_exps(mvc *sql, list *exps, sql_rel *rel)
63 : {
64 59770 : node *n;
65 :
66 59770 : if (!exps)
67 : return;
68 148335 : for(n=exps->h; n; n = n->next){
69 88565 : sql_exp *e = n->data;
70 :
71 88565 : e = select_split_exp(sql, e, rel);
72 88565 : n->data = e;
73 : }
74 : }
75 :
76 : static sql_rel *
77 1677352 : rel_split_select_(visitor *v, sql_rel *rel)
78 : {
79 1677352 : if (!rel || !is_select(rel->op) || list_empty(rel->exps) || !rel->l || mvc_highwater(v->sql))
80 1453597 : return rel;
81 :
82 223755 : bool funcs = false;
83 :
84 : /* are there functions */
85 477769 : for (node *n = rel->exps->h; n && !funcs; n = n->next)
86 254014 : funcs = exp_has_func(n->data);
87 :
88 : /* introduce extra project */
89 223755 : if (funcs) {
90 25871 : sql_rel *nrel = rel_project(v->sql->sa, rel->l,
91 25871 : rel_projections(v->sql, rel->l, NULL, 1, 1));
92 25871 : if (!nrel || !nrel->exps)
93 : return NULL;
94 25871 : rel->l = nrel;
95 : /* recursively split all functions and add those to the projection list */
96 25871 : select_split_exps(v->sql, rel->exps, nrel);
97 25871 : return rel;
98 : }
99 : return rel;
100 : }
101 :
102 : static sql_rel *
103 57299 : rel_split_select(visitor *v, global_props *gp, sql_rel *rel)
104 : {
105 57299 : (void) gp;
106 57299 : return rel_visitor_bottomup(v, rel, &rel_split_select_);
107 : }
108 :
109 : run_optimizer
110 638406 : bind_split_select(visitor *v, global_props *gp)
111 : {
112 638406 : int flag = v->sql->sql_optimizer;
113 561147 : return gp->opt_cycle == 0 && gp->opt_level == 1 && (flag & split_select)
114 1199545 : && gp->cnt[op_select] ? rel_split_select : NULL;
115 : }
116 :
117 :
118 : /*
119 : * Remove a redundant join
120 : *
121 : * join (L, Distinct Project(join(L,P) [ p.key == l.lkey]) [p.key]) [ p.key == l.lkey]
122 : * =>
123 : * join(L, P) [p.key==l.lkey]
124 : */
125 : static sql_rel *
126 1489652 : rel_remove_redundant_join_(visitor *v, sql_rel *rel)
127 : {
128 1489652 : if ((is_join(rel->op) || is_semi(rel->op)) && !list_empty(rel->exps)) {
129 222052 : sql_rel *l = rel->l, *r = rel->r, *b, *p = NULL, *j;
130 :
131 222052 : if (is_basetable(l->op) && is_simple_project(r->op) && need_distinct(r)) {
132 8 : b = l;
133 8 : p = r;
134 8 : j = p->l;
135 222044 : } else if (is_basetable(r->op) && is_simple_project(l->op) && need_distinct(l)) {
136 2333 : b = r;
137 2333 : p = l;
138 2333 : j = p->l;
139 : }
140 222052 : if (!p || !j || j->op != rel->op)
141 : return rel;
142 : /* j must have b->l (ie table) */
143 12 : sql_rel *jl = j->l, *jr = j->r;
144 12 : if ((is_basetable(jl->op) && jl->l == b->l) ||
145 8 : (is_basetable(jr->op) && jr->l == b->l)) {
146 11 : int left = 0;
147 11 : if (is_basetable(jl->op) && jl->l == b->l)
148 11 : left = 1;
149 11 : if (!list_empty(p->exps)) {
150 19 : for (node *n=p->exps->h; n; n = n->next) { /* all exps of 'p' must be bound to the opposite side */
151 11 : sql_exp *e = n->data;
152 :
153 18 : if (!rel_rebind_exp(v->sql, left ? jr : jl, e))
154 : return rel;
155 : }
156 : }
157 8 : if (exp_match_list(j->exps, rel->exps)) {
158 0 : p->l = (left)?rel_dup(jr):rel_dup(jl);
159 0 : rel_destroy(j);
160 0 : set_nodistinct(p);
161 0 : v->changes++;
162 0 : return rel;
163 : }
164 : }
165 : }
166 : return rel;
167 : }
168 :
169 : static sql_rel *
170 49379 : rel_remove_redundant_join(visitor *v, global_props *gp, sql_rel *rel)
171 : {
172 49379 : (void) gp;
173 49379 : return rel_visitor_bottomup(v, rel, &rel_remove_redundant_join_); /* this optimizer has to run before rel_first_level_optimizations */
174 : }
175 :
176 : run_optimizer
177 638407 : bind_remove_redundant_join(visitor *v, global_props *gp)
178 : {
179 638407 : int flag = v->sql->sql_optimizer;
180 561147 : return gp->opt_cycle == 0 && gp->opt_level == 1 && (gp->cnt[op_left] || gp->cnt[op_right]
181 542265 : || gp->cnt[op_full] || gp->cnt[op_join] || gp->cnt[op_semi] || gp->cnt[op_anti]) &&
182 687786 : (flag & remove_redundant_join) ? rel_remove_redundant_join : NULL;
183 : }
184 :
185 :
186 : static list *
187 1115185 : exp_merge_range(visitor *v, sql_rel *rel, list *exps)
188 : {
189 1115185 : node *n, *m;
190 2391116 : for (n=exps->h; n; n = n->next) {
191 1277827 : sql_exp *e = n->data;
192 1277827 : sql_exp *le = e->l;
193 1277827 : sql_exp *re = e->r;
194 :
195 : /* handle the and's in the or lists */
196 1277827 : if (e->type == e_cmp && e->flag == cmp_or && !is_anti(e)) {
197 34414 : e->l = exp_merge_range(v, rel, e->l);
198 34414 : e->r = exp_merge_range(v, rel, e->r);
199 : /* only look for gt, gte, lte, lt */
200 1243413 : } else if (n->next &&
201 160841 : e->type == e_cmp && e->flag < cmp_equal && !e->f &&
202 6821 : re->card == CARD_ATOM && !is_anti(e)) {
203 1275 : for (m=n->next; m; m = m->next) {
204 781 : sql_exp *f = m->data;
205 781 : sql_exp *lf = f->l;
206 781 : sql_exp *rf = f->r;
207 781 : int c_le = is_numeric_upcast(le), c_lf = is_numeric_upcast(lf);
208 :
209 781 : if (f->type == e_cmp && f->flag < cmp_equal && !f->f &&
210 651 : rf->card == CARD_ATOM && !is_anti(f) &&
211 325 : exp_match_exp(c_le?le->l:le, c_lf?lf->l:lf)) {
212 175 : sql_exp *ne;
213 175 : int swap = 0, lt = 0, gt = 0;
214 175 : sql_subtype super;
215 : /* for now only c1 <[=] x <[=] c2 */
216 :
217 175 : swap = lt = (e->flag == cmp_lt || e->flag == cmp_lte);
218 175 : gt = !lt;
219 :
220 175 : if (gt &&
221 151 : (f->flag == cmp_gt ||
222 : f->flag == cmp_gte))
223 32 : continue;
224 24 : if (lt &&
225 24 : (f->flag == cmp_lt ||
226 : f->flag == cmp_lte))
227 0 : continue;
228 :
229 143 : cmp_supertype(&super, exp_subtype(le), exp_subtype(lf));
230 143 : if (!(rf = exp_check_type(v->sql, &super, rel, rf, type_equal)) ||
231 143 : !(le = exp_check_type(v->sql, &super, rel, le, type_equal)) ||
232 143 : !(re = exp_check_type(v->sql, &super, rel, re, type_equal))) {
233 0 : v->sql->session->status = 0;
234 0 : v->sql->errstr[0] = 0;
235 0 : continue;
236 : }
237 143 : if (!swap)
238 119 : ne = exp_compare2(v->sql->sa, le, re, rf, compare2range(e->flag, f->flag), 0);
239 : else
240 24 : ne = exp_compare2(v->sql->sa, le, rf, re, compare2range(f->flag, e->flag), 0);
241 :
242 143 : list_remove_data(exps, NULL, e);
243 143 : list_remove_data(exps, NULL, f);
244 143 : list_append(exps, ne);
245 143 : v->changes++;
246 143 : return exp_merge_range(v, rel, exps);
247 : }
248 : }
249 1242776 : } else if (n->next &&
250 160204 : e->type == e_cmp && e->flag < cmp_equal && !e->f &&
251 6184 : re->card > CARD_ATOM && !is_anti(e)) {
252 11212 : for (m=n->next; m; m = m->next) {
253 6781 : sql_exp *f = m->data;
254 6781 : sql_exp *lf = f->l;
255 6781 : sql_exp *rf = f->r;
256 :
257 6781 : if (f->type == e_cmp && f->flag < cmp_equal && !f->f &&
258 4979 : rf->card > CARD_ATOM && !is_anti(f)) {
259 4978 : sql_exp *ne, *t;
260 4978 : int swap = 0, lt = 0, gt = 0;
261 4978 : comp_type ef = (comp_type) e->flag, ff = (comp_type) f->flag;
262 4978 : int c_re = is_numeric_upcast(re), c_rf = is_numeric_upcast(rf);
263 4978 : int c_le = is_numeric_upcast(le), c_lf = is_numeric_upcast(lf), c;
264 4978 : sql_subtype super;
265 :
266 : /* both swapped ? */
267 4978 : if (exp_match_exp(c_re?re->l:re, c_rf?rf->l:rf)) {
268 19 : t = re;
269 19 : re = le;
270 19 : le = t;
271 19 : c = c_re; c_re = c_le; c_le = c;
272 19 : ef = swap_compare(ef);
273 19 : t = rf;
274 19 : rf = lf;
275 19 : lf = t;
276 19 : c = c_rf; c_rf = c_lf; c_lf = c;
277 19 : ff = swap_compare(ff);
278 : }
279 :
280 : /* is left swapped ? */
281 4978 : if (exp_match_exp(c_re?re->l:re, c_lf?lf->l:lf)) {
282 104 : t = re;
283 104 : re = le;
284 104 : le = t;
285 104 : c = c_re; c_re = c_le; c_le = c;
286 104 : ef = swap_compare(ef);
287 : }
288 :
289 : /* is right swapped ? */
290 4978 : if (exp_match_exp(c_le?le->l:le, c_rf?rf->l:rf)) {
291 177 : t = rf;
292 177 : rf = lf;
293 177 : lf = t;
294 177 : c = c_rf; c_rf = c_lf; c_lf = c;
295 177 : ff = swap_compare(ff);
296 : }
297 :
298 4978 : if (!exp_match_exp(c_le?le->l:le, c_lf?lf->l:lf))
299 3225 : continue;
300 :
301 : /* for now only c1 <[=] x <[=] c2 */
302 1830 : swap = lt = (ef == cmp_lt || ef == cmp_lte);
303 1830 : gt = !lt;
304 :
305 1830 : if (gt && (ff == cmp_gt || ff == cmp_gte))
306 73 : continue;
307 1757 : if (lt && (ff == cmp_lt || ff == cmp_lte))
308 4 : continue;
309 :
310 1753 : cmp_supertype(&super, exp_subtype(le), exp_subtype(lf));
311 1753 : if (!(rf = exp_check_type(v->sql, &super, rel, rf, type_equal)) ||
312 1753 : !(le = exp_check_type(v->sql, &super, rel, le, type_equal)) ||
313 1753 : !(re = exp_check_type(v->sql, &super, rel, re, type_equal))) {
314 0 : v->sql->session->status = 0;
315 0 : v->sql->errstr[0] = 0;
316 0 : continue;
317 : }
318 1753 : if (!swap)
319 1653 : ne = exp_compare2(v->sql->sa, le, re, rf, compare2range(ef, ff), 0);
320 : else
321 100 : ne = exp_compare2(v->sql->sa, le, rf, re, compare2range(ff, ef), 0);
322 :
323 1753 : list_remove_data(exps, NULL, e);
324 1753 : list_remove_data(exps, NULL, f);
325 1753 : list_append(exps, ne);
326 1753 : v->changes++;
327 1753 : return exp_merge_range(v, rel, exps);
328 : }
329 : }
330 : }
331 : }
332 : return exps;
333 : }
334 :
335 : static int
336 30832 : exps_cse( mvc *sql, list *oexps, list *l, list *r )
337 : {
338 30832 : list *nexps;
339 30832 : node *n, *m;
340 30832 : char *lu, *ru;
341 30832 : int lc = 0, rc = 0, match = 0, res = 0;
342 :
343 30832 : if (list_length(l) == 0 || list_length(r) == 0)
344 0 : return 0;
345 :
346 : /* first recursive exps_cse */
347 30832 : nexps = new_exp_list(sql->sa);
348 65468 : for (n = l->h; n; n = n->next) {
349 34636 : sql_exp *e = n->data;
350 :
351 34636 : if (e->type == e_cmp && e->flag == cmp_or && !is_anti(e)) {
352 9736 : res = exps_cse(sql, nexps, e->l, e->r);
353 : } else {
354 24900 : append(nexps, e);
355 : }
356 : }
357 30832 : l = nexps;
358 :
359 30832 : nexps = new_exp_list(sql->sa);
360 68649 : for (n = r->h; n; n = n->next) {
361 37817 : sql_exp *e = n->data;
362 :
363 37817 : if (e->type == e_cmp && e->flag == cmp_or && !is_anti(e)) {
364 2142 : res = exps_cse(sql, nexps, e->l, e->r);
365 : } else {
366 35675 : append(nexps, e);
367 : }
368 : }
369 30832 : r = nexps;
370 :
371 : /* simplify true or .. and .. or true */
372 30832 : if (list_length(l) == list_length(r) && list_length(l) == 1) {
373 26294 : sql_exp *le = l->h->data, *re = r->h->data;
374 :
375 26294 : if (exp_is_true(le)) {
376 14 : append(oexps, le);
377 14 : return 1;
378 : }
379 26280 : if (exp_is_true(re)) {
380 5 : append(oexps, re);
381 5 : return 1;
382 : }
383 : }
384 :
385 30813 : lu = SA_ZNEW_ARRAY(sql->ta, char, list_length(l));
386 30813 : ru = SA_ZNEW_ARRAY(sql->ta, char, list_length(r));
387 65447 : for (n = l->h, lc = 0; n; n = n->next, lc++) {
388 34634 : sql_exp *le = n->data;
389 :
390 78249 : for ( m = r->h, rc = 0; m; m = m->next, rc++) {
391 43615 : sql_exp *re = m->data;
392 :
393 43615 : if (!ru[rc] && exp_match_exp(le,re)) {
394 68 : lu[lc] = 1;
395 68 : ru[rc] = 1;
396 68 : match = 1;
397 : }
398 : }
399 : }
400 30813 : if (match) {
401 49 : list *nl = new_exp_list(sql->sa);
402 49 : list *nr = new_exp_list(sql->sa);
403 :
404 162 : for (n = l->h, lc = 0; n; n = n->next, lc++)
405 113 : if (!lu[lc])
406 49 : append(nl, n->data);
407 176 : for (n = r->h, rc = 0; n; n = n->next, rc++)
408 127 : if (!ru[rc])
409 59 : append(nr, n->data);
410 :
411 49 : if (list_length(nl) && list_length(nr))
412 23 : append(oexps, exp_or(sql->sa, nl, nr, 0));
413 :
414 162 : for (n = l->h, lc = 0; n; n = n->next, lc++) {
415 113 : if (lu[lc])
416 64 : append(oexps, n->data);
417 : }
418 : res = 1;
419 : } else {
420 30764 : append(oexps, exp_or(sql->sa, list_dup(l, (fdup)NULL),
421 : list_dup(r, (fdup)NULL), 0));
422 : }
423 : return res;
424 : }
425 :
426 : static inline int
427 23320 : exp_col_key(sql_exp *e)
428 : {
429 17579 : return e->nid ? e->nid : e->alias.label;
430 : }
431 :
432 : static inline int
433 5765 : exp_cmp_eq_unique_id(sql_exp *e)
434 : {
435 5765 : return exp_col_key(e->l);
436 : }
437 :
438 : static inline int
439 1768 : exp_multi_col_key(list *l)
440 : {
441 1768 : int k = exp_col_key(l->h->data);
442 5765 : for (node *n = l->h->next; n; n = n->next) {
443 3997 : k <<= 4;
444 7994 : k ^= exp_col_key(n->data);
445 : }
446 1768 : return k;
447 : }
448 :
449 : typedef struct exp_eq_col_values {
450 : /* we need ->first in order to remove it from the list of cmp_eq exps
451 : * in case that we find another occurrence (with a different value)
452 : */
453 : sql_exp* first;
454 : sql_exp* col; /* column */
455 : list *vs; /* list of values */
456 : } eq_cv;
457 :
458 : typedef struct exp_eq_multi_col_values {
459 : /* we need ->first in order to remove it from the list of multi col
460 : * cmp_eq exps in case that we find another occurrence (with different values)
461 : */
462 : list *first;
463 : list *cols; /* list of col exps */
464 : list *lvs; /* list of lists of values */
465 : } eq_mcv;
466 :
467 : static bool
468 4276 : detect_col_cmp_eqs(mvc *sql, list *eqs, sql_hash *eqh)
469 : {
470 4276 : bool col_multivalue_cmp_eq = false;
471 16066 : for (node *n = eqs->h; n; n = n->next ) {
472 11790 : sql_exp *e = n->data;
473 11790 : sql_exp *le = e->l, *re = e->r;
474 :
475 : /* find the le in the hash and append the re in the hash value (ea->list) */
476 11790 : bool found = false;
477 :
478 11790 : int key = eqh->key(le);
479 11790 : sql_hash_e *he = eqh->buckets[key&(eqh->size-1)];
480 :
481 14686 : for (;he && !found; he = he->chain) {
482 2896 : eq_cv *cv = he->value;
483 2896 : if (!exp_equal(le, cv->col)) {
484 2896 : cv->vs = append(cv->vs, re);
485 2896 : found = col_multivalue_cmp_eq = true;
486 : /* remove this and the previous (->first) occurrence (if exists) from eqs */
487 2896 : if (cv->first) {
488 1793 : list_remove_data(eqs, NULL, cv->first);
489 1793 : cv->first = NULL;
490 : }
491 2896 : list_remove_node(eqs, NULL, n);
492 : }
493 : }
494 :
495 11790 : if (!found) {
496 8894 : eq_cv *cv = SA_NEW(sql->sa, eq_cv);
497 8894 : cv->first = e;
498 8894 : cv->vs = sa_list(sql->sa);
499 8894 : cv->vs = append(cv->vs, re);
500 8894 : cv->col = le;
501 :
502 8894 : hash_add(eqh, key, cv);
503 : }
504 : }
505 4276 : return col_multivalue_cmp_eq;
506 : }
507 :
508 : static bool
509 692 : detect_multicol_cmp_eqs(mvc *sql, list *mce_ands, sql_hash *meqh)
510 : {
511 : /* we get as input a list of AND associated expressions (hence the entries are lists themselves)
512 : * we need to detect cmp_eq-only AND-associated expressions with the same columns so we can
513 : * group together their values
514 : * e.g. [[n = 1, m = 10], [m = 20, k = 100, l = 3000], [m = 20, n = 2]] has
515 : * - (m,k,l) group with a single value (20, 100, 3000)
516 : * - (n,k) group with two values (1, 10) and (2, 20)
517 : * at the end we return true only if we have at least a group of columns with more than a single value
518 : * e.g. in this example (n,k)
519 : */
520 692 : bool multi_multivalue_cmp_eq = false;
521 2460 : for (node *n = mce_ands->h; n; n = n->next) {
522 1768 : list *l = n->data;
523 :
524 : /* sort the list of the cmp_eq expressions based on the col exp
525 : * NOTE: from now on we only work with the sorted list, sl */
526 1768 : list *sl = list_sort(l, (fkeyvalue)&exp_cmp_eq_unique_id, NULL);
527 1768 : list_append_before(mce_ands, n, sl);
528 1768 : list_remove_node(mce_ands, NULL, n);
529 :
530 : /* find the eq exp in the hash and append the values */
531 1768 : bool found = false;
532 :
533 1768 : int key = meqh->key(sl);
534 1768 : sql_hash_e *he = meqh->buckets[key&(meqh->size-1)];
535 :
536 2866 : for (;he && !found; he = he->chain) {
537 : /* compare the values of the hash_entry with the cols under cmp_eq from the list */
538 1098 : bool same_cols = true;
539 1098 : eq_mcv *mcv = he->value;
540 3479 : for (node *m = sl->h, *k = mcv->cols->h; m && k && same_cols; m = m->next, k = k->next) {
541 2381 : sql_exp *col_exp = ((sql_exp*)m->data)->l;
542 2381 : if (exp_equal(col_exp, k->data))
543 663 : same_cols = false;
544 : }
545 1098 : if (same_cols) {
546 : /* we found the same multi cmp_eq exp in mce_ands list multiple times! */
547 435 : found = multi_multivalue_cmp_eq = true;
548 : /* gather all the values of the list and add them to the hash entry */
549 435 : list *atms = sa_list(sql->sa);
550 1783 : for (node *m = sl->h; m; m = m->next)
551 1348 : atms = append(atms, ((sql_exp*)m->data)->r);
552 435 : mcv->lvs = append(mcv->lvs, atms);
553 : /* remove this and the previous occurrence (which means that's the first time
554 : * that we found the *same* multi cmp_eq exp)
555 : */
556 435 : if (mcv->first) {
557 80 : list_remove_data(mce_ands, NULL, mcv->first);
558 80 : mcv->first = NULL;
559 : }
560 435 : list_remove_data(mce_ands, NULL, sl);
561 : }
562 : }
563 :
564 1768 : if (!found) {
565 1333 : eq_mcv *mcv = SA_NEW(sql->sa, eq_mcv);
566 1333 : mcv->first = sl;
567 1333 : mcv->cols = sa_list(sql->sa);
568 5750 : for (node *m = sl->h; m; m = m->next)
569 4417 : mcv->cols = append(mcv->cols, ((sql_exp*)m->data)->l);
570 : /* for the list of values (atoms) create a list and append it to the lvs list */
571 1333 : list *atms = sa_list(sql->sa);
572 5750 : for (node *m = sl->h; m; m = m->next)
573 4417 : atms = append(atms, ((sql_exp*)m->data)->r);
574 1333 : mcv->lvs = sa_list(sql->sa);
575 1333 : mcv->lvs = append(mcv->lvs, atms);
576 :
577 1333 : hash_add(meqh, key, mcv);
578 : }
579 : }
580 692 : return multi_multivalue_cmp_eq;
581 : }
582 :
583 : static void
584 53796 : exp_or_chain_groups(mvc *sql, list *exps, list **gen_ands, list **mce_ands, list **eqs, list **noneq)
585 : {
586 : /* identify three different groups
587 : * 1. gen_ands: lists of generic expressions (their inner association is AND)
588 : * 2. mce_ands: lists of multi_colum cmp_eq ONLY expressions (same^^^)
589 : * 3. eqs: equality expressions
590 : * 4. neq: non equality col expressions
591 : *
592 : * return true if there is an exp with more than one cmp_eq
593 : */
594 67818 : bool eq_only = true;
595 147147 : for (node *n = exps->h; n && eq_only; n = n->next) {
596 79329 : sql_exp *e = n->data;
597 79329 : sql_exp *le = e->l, *re = e->r;
598 158405 : eq_only &= (e->type == e_cmp && e->flag == cmp_equal &&
599 34422 : le->card != CARD_ATOM && is_column(le->type) &&
600 112592 : re->card == CARD_ATOM && !is_semantics(e));
601 : }
602 :
603 67818 : if (list_length(exps) > 1) {
604 5409 : if (eq_only)
605 4520 : *mce_ands = append(*mce_ands, exps);
606 : else
607 889 : *gen_ands = append(*gen_ands, exps);
608 62409 : } else if (list_length(exps) == 1) {
609 62409 : sql_exp *se = exps->h->data;
610 62409 : sql_exp *le = se->l, *re = se->r;
611 :
612 62409 : if (se->type == e_cmp && se->flag == cmp_or && !is_anti(se)) {
613 : /* for a cmp_or expression go down the tree */
614 14022 : exp_or_chain_groups(sql, (list*)le, gen_ands, mce_ands, eqs, noneq);
615 14022 : exp_or_chain_groups(sql, (list*)re, gen_ands, mce_ands, eqs, noneq);
616 :
617 48387 : } else if (eq_only) {
618 15429 : *eqs = append(*eqs, se);
619 : } else {
620 32958 : *noneq = append(*noneq, se);
621 : }
622 : }
623 53796 : }
624 :
625 : static list *
626 1686 : generate_single_col_cmp_in(mvc *sql, sql_hash *eqh)
627 : {
628 : /* from single col cmp_eq with multiple atoms in the hash generate
629 : * "e_col in (val0, val1, ...)" (see detect_col_cmp_eqs())
630 : */
631 1686 : list *ins = new_exp_list(sql->sa);
632 55638 : for (int i = 0; i < eqh->size; i++) {
633 53952 : sql_hash_e *he = eqh->buckets[i];
634 :
635 56592 : while (he) {
636 2640 : eq_cv *cv = he->value;
637 : /* NOTE: cmp_eq expressions with a single entry are still in eqs */
638 2640 : if (list_length(cv->vs) > 1)
639 1793 : ins = append(ins, exp_in(sql->sa, cv->col, cv->vs, cmp_in));
640 2640 : he = he->chain;
641 : }
642 : }
643 1686 : return ins;
644 : }
645 :
646 : static list *
647 80 : generate_multi_col_cmp_in(mvc *sql, sql_hash *meqh)
648 : {
649 : /* from multivalue cmp_eq with multiple lists of atoms in the hash generate
650 : * "(col1, col2, ...) in [(val10, val20, ...), (val11, val21, ...), ... ]"
651 : * (see detect_multicol_cmp_eqs())
652 : */
653 80 : list *ins = new_exp_list(sql->sa);
654 2640 : for (int i = 0; i < meqh->size; i++) {
655 2560 : sql_hash_e *he = meqh->buckets[i];
656 2654 : while (he) {
657 94 : eq_mcv *mcv = he->value;
658 : /* NOTE: multivalue cmp_eq expressions with a single entry are still in mce_ands */
659 94 : if (list_length(mcv->lvs) > 1) {
660 80 : sql_exp *mc = exp_label(sql->sa, exp_values(sql->sa, mcv->cols), ++sql->label);
661 595 : for (node *a = mcv->lvs->h; a; a = a->next)
662 515 : a->data = exp_values(sql->sa, a->data);
663 80 : ins = append(ins, exp_in(sql->sa, mc, mcv->lvs, cmp_in));
664 : }
665 94 : he = he->chain;
666 : }
667 : }
668 80 : return ins;
669 : }
670 :
671 : static list *
672 524797 : merge_ors(mvc *sql, list *exps, int *changes)
673 : {
674 524797 : sql_hash *eqh = NULL, *meqh = NULL;
675 524797 : list *eqs = NULL, *neq = NULL, *gen_ands = NULL, *mce_ands = NULL, *ins = NULL, *mins = NULL;
676 1120690 : for (node *n = exps->h; n; n = n->next) {
677 595893 : sql_exp *e = n->data;
678 :
679 595893 : if (e->type == e_cmp && e->flag == cmp_or && !is_anti(e)) {
680 : /* NOTE: gen_ands and mce_ands are both a list of lists since the AND association
681 : * between expressions is expressed with a list
682 : * e.g. [[e1, e2], [e3, e4, e5]] semantically translates
683 : * to [(e1 AND e2), (e3 AND e4 AND e5)]
684 : * those (internal) AND list can be then used to
685 : * reconstructed an OR tree [[e1, e2], [e3, e4, e5]] =>
686 : * (([e1, e2] OR [e3, e4, e5]) OR <whatever-else> )
687 : * gen_ands includes general expressions associated with AND
688 : * mce_ands includes only cmp_eq expressions associated with AND
689 : */
690 19887 : gen_ands = new_exp_list(sql->sa);
691 19887 : mce_ands = new_exp_list(sql->sa);
692 19887 : eqs = new_exp_list(sql->sa);
693 19887 : neq = new_exp_list(sql->sa);
694 :
695 : /* walk the OR tree */
696 19887 : exp_or_chain_groups(sql, e->l, &gen_ands, &mce_ands, &eqs, &neq);
697 19887 : exp_or_chain_groups(sql, e->r, &gen_ands, &mce_ands, &eqs, &neq);
698 :
699 : /* detect col cmp_eq exps with multiple values */
700 19887 : bool col_multival = false;
701 19887 : if (list_length(eqs) > 1) {
702 4276 : eqh = hash_new(sql->sa, 32, (fkeyvalue)&exp_col_key);
703 4276 : col_multival = detect_col_cmp_eqs(sql, eqs, eqh);
704 : }
705 :
706 : /* detect mutli-col cmp_eq exps with multiple (lists of) values */
707 19887 : bool multicol_multival = false;
708 19887 : if (list_length(mce_ands) > 1) {
709 692 : meqh = hash_new(sql->sa, 32, (fkeyvalue)&exp_multi_col_key);
710 692 : multicol_multival = detect_multicol_cmp_eqs(sql, mce_ands, meqh);
711 : }
712 :
713 19887 : if (!col_multival && !multicol_multival)
714 18130 : continue;
715 :
716 1757 : if (col_multival)
717 1686 : ins = generate_single_col_cmp_in(sql, eqh);
718 :
719 1757 : if (multicol_multival)
720 80 : mins = generate_multi_col_cmp_in(sql, meqh);
721 :
722 : /* create the new OR tree */
723 1757 : sql_exp *new = (ins) ? ins->h->data : mins->h->data;
724 :
725 71 : if (ins) {
726 1793 : for (node *i = ins->h->next; i; i = i->next) {
727 107 : list *l = new_exp_list(sql->sa);
728 107 : list *r = new_exp_list(sql->sa);
729 107 : l = append(l, new);
730 107 : r = append(r, (sql_exp*)i->data);
731 107 : new = exp_or(sql->sa, l, r, 0);
732 :
733 107 : (*changes)++;
734 : }
735 : }
736 :
737 1757 : if (list_length(eqs)) {
738 1448 : for (node *i = eqs->h; i; i = i->next) {
739 872 : list *l = new_exp_list(sql->sa);
740 872 : list *r = new_exp_list(sql->sa);
741 872 : l = append(l, new);
742 872 : r = append(r, (sql_exp*)i->data);
743 872 : new = exp_or(sql->sa, l, r, 0);
744 : }
745 : }
746 :
747 1757 : if (mins) {
748 169 : for (node *i = ((ins) ? mins->h : mins->h->next); i; i = i->next) {
749 9 : list *l = new_exp_list(sql->sa);
750 9 : list *r = new_exp_list(sql->sa);
751 9 : l = append(l, new);
752 9 : r = append(r, (sql_exp*)i->data);
753 9 : new = exp_or(sql->sa, l, r, 0);
754 :
755 9 : (*changes)++;
756 : }
757 : }
758 :
759 1757 : if (list_length(mce_ands)) {
760 523 : for (node *i = mce_ands->h; i; i = i->next) {
761 273 : list *l = new_exp_list(sql->sa);
762 273 : l = append(l, new);
763 273 : new = exp_or(sql->sa, l, i->data, 0);
764 : }
765 : }
766 :
767 1760 : for (node *a = gen_ands->h; a; a = a->next){
768 3 : list *l = new_exp_list(sql->sa);
769 3 : l = append(l, new);
770 3 : new = exp_or(sql->sa, l, a->data, 0);
771 : }
772 :
773 2075 : for (node *o = neq->h; o; o = o->next){
774 318 : list *l = new_exp_list(sql->sa);
775 318 : list *r = new_exp_list(sql->sa);
776 318 : l = append(l, new);
777 318 : r = append(r, (sql_exp*)o->data);
778 318 : new = exp_or(sql->sa, l, r, 0);
779 : }
780 :
781 1757 : list_remove_node(exps, NULL, n);
782 1757 : exps = append(exps, new);
783 : }
784 : }
785 524797 : return exps;
786 : }
787 :
788 : #define TRIVIAL_NOT_EQUAL_CMP(e) \
789 : ((e)->type == e_cmp && (e)->flag == cmp_notequal && !is_anti((e)) && !is_semantics((e)) && ((sql_exp*)(e)->l)->card != CARD_ATOM && ((sql_exp*)(e)->r)->card == CARD_ATOM)
790 :
791 : static list *
792 586443 : merge_notequal(mvc *sql, list *exps, int *changes)
793 : {
794 586443 : list *inequality_groups = NULL, *nexps = NULL;
795 586443 : int needed = 0;
796 :
797 1254339 : for (node *n = exps->h; n; n = n->next) {
798 667896 : sql_exp *e = n->data;
799 :
800 667896 : if (TRIVIAL_NOT_EQUAL_CMP(e)) {
801 64394 : bool appended = false;
802 :
803 64394 : if (inequality_groups) {
804 1074 : for (node *m = inequality_groups->h; m && !appended; m = m->next) {
805 537 : list *next = m->data;
806 537 : sql_exp *first = (sql_exp*) next->h->data;
807 :
808 537 : if (exp_match(first->l, e->l)) {
809 536 : list_append(next, e);
810 536 : appended = true;
811 : }
812 : }
813 : }
814 537 : if (!appended) {
815 63858 : if (!inequality_groups)
816 63857 : inequality_groups = new_exp_list(sql->sa);
817 63858 : list_append(inequality_groups, list_append(new_exp_list(sql->sa), e));
818 : }
819 : }
820 : }
821 :
822 586443 : if (inequality_groups) { /* if one list of inequalities has more than one entry, then the re-write is needed */
823 127715 : for (node *n = inequality_groups->h; n; n = n->next) {
824 63858 : list *next = n->data;
825 :
826 63858 : if (list_length(next) > 1)
827 533 : needed = 1;
828 : }
829 : }
830 :
831 63857 : if (needed) {
832 533 : nexps = new_exp_list(sql->sa);
833 1067 : for (node *n = inequality_groups->h; n; n = n->next) {
834 534 : list *next = n->data;
835 534 : sql_exp *first = (sql_exp*) next->h->data;
836 :
837 534 : if (list_length(next) > 1) {
838 533 : list *notin = new_exp_list(sql->sa);
839 :
840 1602 : for (node *m = next->h; m; m = m->next) {
841 1069 : sql_exp *e = m->data;
842 1069 : list_append(notin, e->r);
843 : }
844 533 : list_append(nexps, exp_in(sql->sa, first->l, notin, cmp_notin));
845 : } else {
846 1 : list_append(nexps, first);
847 : }
848 : }
849 :
850 1604 : for (node *n = exps->h; n; n = n->next) {
851 1071 : sql_exp *e = n->data;
852 :
853 1071 : if (!TRIVIAL_NOT_EQUAL_CMP(e))
854 1 : list_append(nexps, e);
855 : }
856 533 : (*changes)++;
857 : } else {
858 : nexps = exps;
859 : }
860 :
861 1253803 : for (node *n = nexps->h; n ; n = n->next) {
862 667360 : sql_exp *e = n->data;
863 :
864 667360 : if (e->type == e_cmp && e->flag == cmp_or) {
865 30823 : e->l = merge_notequal(sql, e->l, changes);
866 30823 : e->r = merge_notequal(sql, e->r, changes);
867 : }
868 : }
869 :
870 586443 : return nexps;
871 : }
872 :
873 : int
874 175005 : is_numeric_upcast(sql_exp *e)
875 : {
876 175005 : if (is_convert(e->type)) {
877 4640 : sql_subtype *f = exp_fromtype(e);
878 4640 : sql_subtype *t = exp_totype(e);
879 :
880 4640 : if (f->type->eclass == t->type->eclass && EC_COMPUTE(f->type->eclass)) {
881 1593 : if (f->type->localtype < t->type->localtype)
882 1573 : return 1;
883 : }
884 : }
885 : return 0;
886 : }
887 :
888 : /* optimize (a = b) or (a is null and b is null) -> a = b with null semantics */
889 : static sql_exp *
890 37950 : try_rewrite_equal_or_is_null(mvc *sql, sql_rel *rel, sql_exp *or, list *l1, list *l2)
891 : {
892 37950 : if (list_length(l1) == 1) {
893 34580 : bool valid = true, first_is_null_found = false, second_is_null_found = false;
894 34580 : sql_exp *cmp = l1->h->data;
895 34580 : sql_exp *first = cmp->l, *second = cmp->r;
896 :
897 34580 : if (is_compare(cmp->type) && !is_anti(cmp) && !cmp->f && cmp->flag == cmp_equal) {
898 7207 : int fupcast = is_numeric_upcast(first), supcast = is_numeric_upcast(second);
899 14438 : for(node *n = l2->h ; n && valid; n = n->next) {
900 7231 : sql_exp *e = n->data, *l = e->l, *r = e->r;
901 :
902 7231 : if (is_compare(e->type) && e->flag == cmp_equal && !e->f &&
903 4197 : !is_anti(e) && is_semantics(e)) {
904 1379 : int lupcast = is_numeric_upcast(l);
905 1379 : int rupcast = is_numeric_upcast(r);
906 1379 : sql_exp *rr = rupcast ? r->l : r;
907 :
908 1379 : if (rr->type == e_atom && rr->l && atom_null(rr->l)) {
909 1379 : if (exp_match_exp(fupcast?first->l:first, lupcast?l->l:l))
910 : first_is_null_found = true;
911 121 : else if (exp_match_exp(supcast?second->l:second, lupcast?l->l:l))
912 : second_is_null_found = true;
913 : else
914 5949 : valid = false;
915 : } else {
916 : valid = false;
917 : }
918 : } else {
919 : valid = false;
920 : }
921 : }
922 7207 : if (valid && first_is_null_found && second_is_null_found) {
923 21 : sql_subtype super;
924 :
925 21 : cmp_supertype(&super, exp_subtype(first), exp_subtype(second)); /* first and second must have the same type */
926 21 : if (!(first = exp_check_type(sql, &super, rel, first, type_equal)) ||
927 21 : !(second = exp_check_type(sql, &super, rel, second, type_equal))) {
928 0 : sql->session->status = 0;
929 0 : sql->errstr[0] = 0;
930 0 : return or;
931 : }
932 21 : sql_exp *res = exp_compare(sql->sa, first, second, cmp->flag);
933 21 : set_semantics(res);
934 21 : if (exp_name(or))
935 0 : exp_prop_alias(sql->sa, res, or);
936 21 : return res;
937 : }
938 : }
939 : }
940 : return or;
941 : }
942 :
943 : static list *
944 1044461 : merge_cmp_or_null(mvc *sql, sql_rel *rel, list *exps, int *changes)
945 : {
946 2238517 : for (node *n = exps->h; n ; n = n->next) {
947 1194056 : sql_exp *e = n->data;
948 :
949 1194056 : if (is_compare(e->type) && e->flag == cmp_or && !is_anti(e)) {
950 18975 : sql_exp *ne = try_rewrite_equal_or_is_null(sql, rel, e, e->l, e->r);
951 18975 : if (ne != e) {
952 19 : (*changes)++;
953 19 : n->data = ne;
954 : }
955 18975 : ne = try_rewrite_equal_or_is_null(sql, rel, e, e->r, e->l);
956 18975 : if (ne != e) {
957 2 : (*changes)++;
958 2 : n->data = ne;
959 : }
960 : }
961 : }
962 1044461 : return exps;
963 : }
964 :
965 : static list *
966 1044461 : cleanup_equal_exps(mvc *sql, sql_rel *rel, list *exps, int *changes)
967 : {
968 1044461 : if (list_length(exps) <= 1)
969 : return exps;
970 139205 : if (is_join(rel->op)) {
971 214830 : for(node *n = exps->h; n; n = n->next) {
972 144471 : sql_exp *e = n->data;
973 288773 : if (e->type == e_cmp && !e->f && e->flag <= cmp_notequal &&
974 186825 : !rel_find_exp(rel->l, e->l) && !rel_find_exp(rel->r, e->r) &&
975 42349 : !find_prop(e->p, PROP_HASHCOL) && !find_prop(e->p, PROP_JOINIDX)) {
976 21094 : exp_swap(e);
977 : }
978 : }
979 : }
980 139205 : bool needed = false;
981 428450 : for(node *n = exps->h; !needed && n; n = n->next) {
982 568638 : for (node *m = n->next; !needed && m; m = m->next) {
983 279393 : if (exp_match_exp_semantics(n->data, m->data, true))
984 81 : needed = true;
985 : }
986 : }
987 139205 : if (needed) {
988 81 : list *nexps = sa_list(sql->sa);
989 :
990 249 : for(node *n = exps->h; n; n = n->next) {
991 168 : bool done = false;
992 431 : for (node *m = exps->h; m && !done; m = m->next) {
993 263 : if (n != m && exp_match_exp_semantics(n->data, m->data, false)) {
994 163 : sql_exp *e1 = n->data, *e2 = m->data;
995 163 : if ((is_any(e1) || is_semantics(e1)) || (!is_any(e2) && !is_semantics(e2))) {
996 163 : append(nexps, e1);
997 163 : if ((!is_any(e2) && !is_semantics(e2)) && is_left(rel->op) && list_length(rel->attr) == 1) {
998 : /* nil is false */
999 0 : sql_exp *m = rel->attr->h->data;
1000 0 : if (exp_is_atom(m))
1001 0 : set_no_nil(m);
1002 : }
1003 : }
1004 : done = true;
1005 : }
1006 : }
1007 168 : if (!done)
1008 5 : append(nexps, n->data);
1009 : }
1010 : return nexps;
1011 : }
1012 : (void)changes;
1013 : return exps;
1014 : }
1015 :
1016 : static inline sql_rel *
1017 1044461 : rel_select_cse(visitor *v, sql_rel *rel)
1018 : {
1019 1044461 : if ((is_select(rel->op) || is_join(rel->op) || is_semi(rel->op)) && rel->exps) /* cleanup equal expressions */
1020 1044461 : rel->exps = cleanup_equal_exps(v->sql, rel, rel->exps, &v->changes); /* (a = b) and (a += b) */
1021 :
1022 1044461 : if (is_select(rel->op) && rel->exps)
1023 524797 : rel->exps = merge_ors(v->sql, rel->exps, &v->changes);
1024 :
1025 1044461 : if (is_select(rel->op) && rel->exps)
1026 524797 : rel->exps = merge_notequal(v->sql, rel->exps, &v->changes); /* x <> 1 and x <> 2 => x not in (1, 2)*/
1027 :
1028 1044461 : if ((is_select(rel->op) || is_join(rel->op) || is_semi(rel->op)) && rel->exps)
1029 1044461 : rel->exps = merge_cmp_or_null(v->sql, rel, rel->exps, &v->changes); /* (a = b) or (a is null and b is null) -> a = b with null semantics */
1030 :
1031 1044461 : if ((is_select(rel->op) || is_join(rel->op) || is_semi(rel->op)) && rel->exps) {
1032 1044461 : node *n;
1033 1044461 : list *nexps;
1034 1044461 : int needed = 0;
1035 :
1036 2235238 : for (n=rel->exps->h; n && !needed; n = n->next) {
1037 1190777 : sql_exp *e = n->data;
1038 :
1039 1190777 : if (e->type == e_cmp && e->flag == cmp_or && !is_anti(e))
1040 1190777 : needed = 1;
1041 : }
1042 1044461 : if (!needed)
1043 : return rel;
1044 18742 : nexps = new_exp_list(v->sql->sa);
1045 41225 : for (n=rel->exps->h; n; n = n->next) {
1046 22483 : sql_exp *e = n->data;
1047 :
1048 22483 : if (e->type == e_cmp && e->flag == cmp_or && !is_anti(e)) {
1049 : /* split the common expressions */
1050 18954 : v->changes += exps_cse(v->sql, nexps, e->l, e->r);
1051 : } else {
1052 3529 : append(nexps, e);
1053 : }
1054 : }
1055 18742 : rel->exps = nexps;
1056 : }
1057 : return rel;
1058 : }
1059 :
1060 : static list *
1061 10487 : exps_merge_select_rse( mvc *sql, list *l, list *r, bool *merged)
1062 : {
1063 10487 : node *n, *m, *o;
1064 10487 : list *nexps = NULL, *lexps, *rexps;
1065 10487 : bool lmerged = true, rmerged = true;
1066 :
1067 10487 : lexps = new_exp_list(sql->sa);
1068 22354 : for (n = l->h; n; n = n->next) {
1069 11867 : sql_exp *e = n->data;
1070 :
1071 11867 : if (e->type == e_cmp && e->flag == cmp_or && !is_anti(e) && !is_semantics(e)) {
1072 3326 : lmerged = false;
1073 3326 : list *nexps = exps_merge_select_rse(sql, e->l, e->r, &lmerged);
1074 3728 : for (o = nexps->h; o; o = o->next)
1075 402 : append(lexps, o->data);
1076 : } else {
1077 8541 : append(lexps, e);
1078 : }
1079 : }
1080 10487 : if (lmerged)
1081 7210 : lmerged = (list_length(lexps) == 1);
1082 10487 : rexps = new_exp_list(sql->sa);
1083 23371 : for (n = r->h; n; n = n->next) {
1084 12884 : sql_exp *e = n->data;
1085 :
1086 12884 : if (e->type == e_cmp && e->flag == cmp_or && !is_anti(e) && !is_semantics(e)) {
1087 699 : rmerged = false;
1088 699 : list *nexps = exps_merge_select_rse(sql, e->l, e->r, &rmerged);
1089 724 : for (o = nexps->h; o; o = o->next)
1090 25 : append(rexps, o->data);
1091 : } else {
1092 12185 : append(rexps, e);
1093 : }
1094 : }
1095 10487 : if (rmerged)
1096 9809 : rmerged = (list_length(r) == 1);
1097 :
1098 10487 : nexps = new_exp_list(sql->sa);
1099 :
1100 : /* merge merged lists first ? */
1101 19430 : for (n = lexps->h; n; n = n->next) {
1102 8943 : sql_exp *le = n->data, *re, *fnd = NULL;
1103 :
1104 8943 : if (le->type != e_cmp || le->flag == cmp_or || is_anti(le) || is_semantics(le) || is_symmetric(le))
1105 622 : continue;
1106 17240 : for (m = rexps->h; !fnd && m; m = m->next) {
1107 8919 : re = m->data;
1108 8919 : if (exps_match_col_exps(le, re))
1109 1551 : fnd = re;
1110 : }
1111 8321 : if (fnd && (is_anti(fnd) || is_semantics(fnd)))
1112 50 : continue;
1113 : /* cases
1114 : * 1) 2 values (cmp_equal)
1115 : * 2) 1 value (cmp_equal), and cmp_in
1116 : * (also cmp_in, cmp_equal)
1117 : * 3) 2 cmp_in
1118 : * 4) ranges
1119 : */
1120 1501 : if (fnd) {
1121 1501 : re = fnd;
1122 1501 : fnd = NULL;
1123 1501 : if (is_anti(le) || is_anti(re) || is_symmetric(re))
1124 0 : continue;
1125 1938 : if (le->flag == cmp_equal && re->flag == cmp_equal) {
1126 437 : list *exps = new_exp_list(sql->sa);
1127 :
1128 437 : append(exps, le->r);
1129 437 : append(exps, re->r);
1130 437 : fnd = exp_in(sql->sa, le->l, exps, cmp_in);
1131 1207 : } else if (le->flag == cmp_equal && re->flag == cmp_in){
1132 143 : list *exps = new_exp_list(sql->sa);
1133 :
1134 143 : append(exps, le->r);
1135 143 : list_merge(exps, re->r, NULL);
1136 143 : fnd = exp_in(sql->sa, le->l, exps, cmp_in);
1137 1216 : } else if (le->flag == cmp_in && re->flag == cmp_equal){
1138 295 : list *exps = new_exp_list(sql->sa);
1139 :
1140 295 : append(exps, re->r);
1141 295 : list_merge(exps, le->r, NULL);
1142 295 : fnd = exp_in(sql->sa, le->l, exps, cmp_in);
1143 741 : } else if (le->flag == cmp_in && re->flag == cmp_in){
1144 115 : list *exps = new_exp_list(sql->sa);
1145 :
1146 115 : list_merge(exps, le->r, NULL);
1147 115 : list_merge(exps, re->r, NULL);
1148 115 : fnd = exp_in(sql->sa, le->l, exps, cmp_in);
1149 511 : } else if (le->f && re->f && /* merge ranges */
1150 20 : le->flag == re->flag && le->flag <= cmp_lt) {
1151 20 : sql_exp *mine = NULL, *maxe = NULL;
1152 :
1153 20 : if (!(mine = rel_binop_(sql, NULL, exp_copy(sql, le->r), exp_copy(sql, re->r), "sys", "sql_min", card_value, true))) {
1154 0 : sql->session->status = 0;
1155 0 : sql->errstr[0] = '\0';
1156 0 : continue;
1157 : }
1158 20 : if (!(maxe = rel_binop_(sql, NULL, exp_copy(sql, le->f), exp_copy(sql, re->f), "sys", "sql_max", card_value, true))) {
1159 0 : sql->session->status = 0;
1160 0 : sql->errstr[0] = '\0';
1161 0 : continue;
1162 : }
1163 20 : fnd = exp_compare2(sql->sa, exp_copy(sql, le->l), mine, maxe, le->flag, 0);
1164 20 : lmerged = false;
1165 : }
1166 1010 : if (fnd) {
1167 1010 : append(nexps, fnd);
1168 1864 : *merged = (fnd && lmerged && rmerged);
1169 : }
1170 : }
1171 : }
1172 10487 : return nexps;
1173 : }
1174 :
1175 : /* merge related sub expressions
1176 : *
1177 : * ie (x = a and y > 1 and y < 5) or
1178 : * (x = c and y > 1 and y < 10) or
1179 : * (x = e and y > 1 and y < 20)
1180 : * ->
1181 : * ((x = a and y > 1 and y < 5) or
1182 : * (x = c and y > 1 and y < 10) or
1183 : * (x = e and y > 1 and y < 20)) and
1184 : * x in (a,c,e) and
1185 : * y > 1 and y < 20
1186 : *
1187 : * for single expression or's we can do better
1188 : * x in (a, b, c) or x in (d, e, f)
1189 : * ->
1190 : * x in (a, b, c, d, e, f)
1191 : * */
1192 : static inline sql_rel *
1193 401591 : rel_merge_select_rse(visitor *v, sql_rel *rel)
1194 : {
1195 : /* only execute once per select */
1196 401591 : if ((is_select(rel->op) || is_join(rel->op) || is_semi(rel->op)) && rel->exps && !is_rel_merge_select_rse_used(rel->used)) {
1197 143547 : node *n, *o;
1198 143547 : list *nexps = new_exp_list(v->sql->sa);
1199 :
1200 303906 : for (n=rel->exps->h; n; n = n->next) {
1201 160359 : sql_exp *e = n->data;
1202 :
1203 166821 : if (e->type == e_cmp && e->flag == cmp_or && !is_anti(e) && !is_semantics(e)) {
1204 : /* possibly merge related expressions */
1205 6462 : bool merged = false;
1206 :
1207 6462 : list *ps = exps_merge_select_rse(v->sql, e->l, e->r, &merged);
1208 7045 : for (o = ps->h; o; o = o->next)
1209 583 : append(nexps, o->data);
1210 6462 : if (merged)
1211 87 : v->changes++;
1212 : else
1213 6375 : append(nexps, e);
1214 : } else {
1215 153897 : append(nexps, e);
1216 : }
1217 : }
1218 143547 : rel->exps = nexps;
1219 143547 : rel->used |= rel_merge_select_rse_used;
1220 : }
1221 401591 : return rel;
1222 : }
1223 :
1224 : /* pack optimizers into a single function call to avoid iterations in the AST */
1225 : static sql_rel *
1226 3692350 : rel_optimize_select_and_joins_bottomup_(visitor *v, sql_rel *rel)
1227 : {
1228 3692350 : if (!rel || (!is_join(rel->op) && !is_semi(rel->op) && !is_select(rel->op)) || list_empty(rel->exps))
1229 2647889 : return rel;
1230 1044461 : uint8_t cycle = *(uint8_t*) v->data;
1231 :
1232 1044461 : rel->exps = exp_merge_range(v, rel, rel->exps);
1233 1044461 : rel = rel_select_cse(v, rel);
1234 1044461 : if (cycle == 1)
1235 401591 : rel = rel_merge_select_rse(v, rel);
1236 1044461 : rel = rewrite_simplify(v, cycle, v->value_based_opt, rel);
1237 1044461 : return rel;
1238 : }
1239 :
1240 : static sql_rel *
1241 130162 : rel_optimize_select_and_joins_bottomup(visitor *v, global_props *gp, sql_rel *rel)
1242 : {
1243 130162 : v->data = &gp->opt_cycle;
1244 130162 : rel = rel_visitor_bottomup(v, rel, &rel_optimize_select_and_joins_bottomup_);
1245 130162 : v->data = gp;
1246 130162 : return rel;
1247 : }
1248 :
1249 : run_optimizer
1250 638405 : bind_optimize_select_and_joins_bottomup(visitor *v, global_props *gp)
1251 : {
1252 638405 : int flag = v->sql->sql_optimizer;
1253 638209 : return gp->opt_level == 1 && (gp->cnt[op_join] || gp->cnt[op_left] || gp->cnt[op_right] ||
1254 551645 : gp->cnt[op_full] || gp->cnt[op_semi] || gp->cnt[op_anti] ||
1255 1276614 : gp->cnt[op_select]) && (flag & optimize_select_and_joins_bottomup) ? rel_optimize_select_and_joins_bottomup : NULL;
1256 : }
1257 :
1258 :
1259 : static inline sql_rel *
1260 3487648 : rel_push_join_exps_down(visitor *v, sql_rel *rel)
1261 : {
1262 : /* push select exps part of join expressions down */
1263 : /* TODO CHECK WHY not semi enabled */
1264 3487648 : if ((is_innerjoin(rel->op) || is_left(rel->op) || is_right(rel->op) /*|| is_semi(rel->op)*/) && !list_empty(rel->exps)) {
1265 491327 : int left = is_innerjoin(rel->op) || is_right(rel->op) || rel->op == op_semi;
1266 491327 : int right = is_innerjoin(rel->op) || is_left(rel->op) || is_semi(rel->op);
1267 491327 : sql_rel *jl = rel->l, *ojl = jl, *jr = rel->r, *ojr = jr;
1268 :
1269 491327 : set_processed(jl);
1270 491327 : set_processed(jr);
1271 1056682 : for (node *n = rel->exps->h; n;) {
1272 565355 : node *next = n->next;
1273 565355 : sql_exp *e = n->data;
1274 :
1275 565355 : if (left && rel_rebind_exp(v->sql, jl, e) && !is_any(e)) { /* select expressions on left */
1276 1346 : if (!is_select(jl->op) || rel_is_ref(jl))
1277 1335 : rel->l = jl = rel_select(v->sql->sa, jl, NULL);
1278 1346 : rel_select_add_exp(v->sql->sa, jl, e);
1279 1346 : list_remove_node(rel->exps, NULL, n);
1280 1346 : v->changes++;
1281 564009 : } else if (right && rel_rebind_exp(v->sql, jr, e) && !is_any(e)) { /* select expressions on right */
1282 45 : if (!is_select(jr->op) || rel_is_ref(jr))
1283 42 : rel->r = jr = rel_select(v->sql->sa, jr, NULL);
1284 45 : rel_select_add_exp(v->sql->sa, jr, e);
1285 45 : list_remove_node(rel->exps, NULL, n);
1286 45 : v->changes++;
1287 : }
1288 : n = next;
1289 : }
1290 491327 : if (ojl != jl)
1291 1335 : set_processed(jl);
1292 491327 : if (ojr != jr)
1293 42 : set_processed(jr);
1294 : }
1295 3487648 : return rel;
1296 : }
1297 :
1298 : static inline bool
1299 3487648 : is_non_trivial_select_applied_to_outer_join(sql_rel *rel)
1300 : {
1301 3487648 : return is_select(rel->op) && rel->exps && is_outerjoin(((sql_rel*) rel->l)->op);
1302 : }
1303 :
1304 : extern list *list_append_before(list *l, node *n, void *data);
1305 :
1306 : static void
1307 : replace_column_references_with_nulls_2(mvc *sql, sql_rel *inner_join_side, sql_exp* e);
1308 :
1309 : static void
1310 5842 : replace_column_references_with_nulls_1(mvc *sql, sql_rel *inner_join_side, list* exps) {
1311 5842 : if (list_empty(exps))
1312 : return;
1313 14906 : for(node* n = exps->h; n; n=n->next) {
1314 9064 : sql_exp* e = n->data;
1315 9064 : replace_column_references_with_nulls_2(sql, inner_join_side, e);
1316 : }
1317 : }
1318 :
1319 : static void
1320 28924 : replace_column_references_with_nulls_2(mvc *sql, sql_rel *inner_join_side, sql_exp* e) {
1321 36837 : if (e == NULL) {
1322 : return;
1323 : }
1324 :
1325 30535 : switch (e->type) {
1326 9168 : case e_column:
1327 9168 : if (rel_find_exp_and_corresponding_rel(inner_join_side, e, true, NULL, NULL)) {
1328 2568 : e->type = e_atom;
1329 2568 : e->l = atom_general(sql->sa, &e->tpe, NULL, 0);
1330 2568 : e->r = e->f = NULL;
1331 : }
1332 : break;
1333 9449 : case e_cmp:
1334 9449 : switch (e->flag) {
1335 6732 : case cmp_gt:
1336 : case cmp_gte:
1337 : case cmp_lte:
1338 : case cmp_lt:
1339 : case cmp_equal:
1340 : case cmp_notequal:
1341 : {
1342 6732 : sql_exp* l = e->l;
1343 6732 : sql_exp* r = e->r;
1344 6732 : sql_exp* f = e->f;
1345 :
1346 6732 : replace_column_references_with_nulls_2(sql, inner_join_side, l);
1347 6732 : replace_column_references_with_nulls_2(sql, inner_join_side, r);
1348 6732 : replace_column_references_with_nulls_2(sql, inner_join_side, f);
1349 6732 : break;
1350 : }
1351 1953 : case cmp_filter:
1352 : case cmp_or:
1353 : {
1354 1953 : list* l = e->l;
1355 1953 : list* r = e->r;
1356 1953 : replace_column_references_with_nulls_1(sql, inner_join_side, l);
1357 1953 : replace_column_references_with_nulls_1(sql, inner_join_side, r);
1358 1953 : break;
1359 : }
1360 764 : case cmp_in:
1361 : case cmp_notin:
1362 : {
1363 764 : sql_exp* l = e->l;
1364 764 : list* r = e->r;
1365 764 : replace_column_references_with_nulls_2(sql, inner_join_side, l);
1366 764 : replace_column_references_with_nulls_1(sql, inner_join_side, r);
1367 764 : break;
1368 : }
1369 : default:
1370 : break;
1371 : }
1372 : break;
1373 1172 : case e_func:
1374 : {
1375 1172 : list* l = e->l;
1376 1172 : replace_column_references_with_nulls_1(sql, inner_join_side, l);
1377 1172 : break;
1378 : }
1379 1181 : case e_convert:
1380 : {
1381 1181 : sql_exp* l = e->l;
1382 1181 : replace_column_references_with_nulls_2(sql, inner_join_side, l);
1383 1181 : break;
1384 : }
1385 : default:
1386 : break;
1387 : }
1388 : }
1389 :
1390 : static sql_rel *
1391 4990 : out2inner(visitor *v, sql_rel* sel, sql_rel* join, sql_rel* inner_join_side, operator_type new_type) {
1392 :
1393 : /* handle inner_join relations with a simple select */
1394 4990 : if (is_select(inner_join_side->op) && inner_join_side->l)
1395 4990 : inner_join_side = inner_join_side->l;
1396 :
1397 4990 : list* select_predicates = exps_copy(v->sql, sel->exps);
1398 :
1399 10526 : for(node* n = select_predicates->h; n; n=n->next) {
1400 5632 : sql_exp* e = n->data;
1401 5632 : replace_column_references_with_nulls_2(v->sql, inner_join_side, e);
1402 :
1403 5632 : if (exp_is_false(e)) {
1404 96 : join->op = new_type;
1405 96 : v->changes++;
1406 96 : break;
1407 : }
1408 : }
1409 :
1410 4990 : return sel;
1411 : }
1412 :
1413 : static inline sql_rel *
1414 3487648 : rel_out2inner(visitor *v, sql_rel *rel) {
1415 :
1416 3487648 : if (!is_non_trivial_select_applied_to_outer_join(rel)) {
1417 : // Nothing to do here.
1418 : return rel;
1419 : }
1420 :
1421 4967 : sql_rel* join = (sql_rel*) rel->l;
1422 :
1423 4967 : if (rel_is_ref(join)) {
1424 : /* Do not alter a multi-referenced join relation.
1425 : * This is problematic (e.g. in the case of the plan of a merge statement)
1426 : * basically because there are no guarantees on the other container relations.
1427 : * In particular there is no guarantee that the other referencing relations are
1428 : * select relations with null-rejacting predicates on the inner join side.
1429 : */
1430 : return rel;
1431 : }
1432 :
1433 4967 : sql_rel* inner_join_side;
1434 4967 : if (is_left(join->op)) {
1435 4918 : inner_join_side = join->r;
1436 4918 : return out2inner(v, rel, join, inner_join_side, op_join);
1437 : }
1438 49 : else if (is_right(join->op)) {
1439 26 : inner_join_side = join->l;
1440 26 : return out2inner(v, rel, join, inner_join_side, op_join);
1441 : }
1442 : else /*full outer join*/ {
1443 : // First check if left side can degenerate from full outer join to just right outer join.
1444 23 : inner_join_side = join->r;
1445 23 : rel = out2inner(v, rel, join, inner_join_side, op_right);
1446 : /* Now test if the right side can degenerate to
1447 : * a normal inner join or a left outer join
1448 : * depending on the result of previous call to out2inner.
1449 : */
1450 :
1451 23 : inner_join_side = join->l;
1452 37 : return out2inner(v, rel, join, inner_join_side, is_right(join->op)? op_join: op_left);
1453 : }
1454 : }
1455 :
1456 : static bool
1457 4307 : exps_uses_any(list *exps, list *l)
1458 : {
1459 4307 : bool uses_any = false;
1460 :
1461 4307 : if (list_empty(exps) || list_empty(l))
1462 15 : return false;
1463 8590 : for (node *n = l->h; n && !uses_any; n = n->next) {
1464 4298 : sql_exp *e = n->data;
1465 4298 : uses_any |= list_exps_uses_exp(exps, exp_relname(e), exp_name(e)) != NULL;
1466 : }
1467 :
1468 : return uses_any;
1469 : }
1470 :
1471 : /* TODO At the moment I have to disable the new join2semi because the join order optimizer doesn't take semi-joins into account,
1472 : so plans get deteriorated if more joins are optimized into semi-joins. Later I will review the join order with semi-joins and hopefully,
1473 : I will be able to re-enable the new join2semi. */
1474 : #if 0
1475 : #define NO_EXP_FOUND 0
1476 : #define FOUND_WITH_DUPLICATES 1
1477 : #define MAY_HAVE_DUPLICATE_NULLS 2
1478 : #define ALL_VALUES_DISTINCT 3
1479 :
1480 : static int
1481 : find_projection_for_join2semi(sql_rel *rel, sql_exp *jc)
1482 : {
1483 : sql_rel *res = NULL;
1484 : sql_exp *e = NULL;
1485 : bool underjoin = false;
1486 :
1487 : if ((e = rel_find_exp_and_corresponding_rel(rel, jc, &res, &underjoin))) {
1488 : if (underjoin || e->type != e_column)
1489 : return FOUND_WITH_DUPLICATES;
1490 : /* if just one groupby column is projected or the relation needs distinct values and one column is projected or is a primary key, it will be distinct */
1491 : if (is_unique(e) ||
1492 : (is_groupby(res->op) && list_length(res->r) == 1 && exps_find_exp(res->r, e)) ||
1493 : ((is_project(res->op) || is_base(res->op)) && ((need_distinct(res) && list_length(res->exps) == 1) || res->card < CARD_AGGR)))
1494 : return has_nil(e) ? MAY_HAVE_DUPLICATE_NULLS : ALL_VALUES_DISTINCT;
1495 : return FOUND_WITH_DUPLICATES;
1496 : }
1497 : return NO_EXP_FOUND;
1498 : }
1499 :
1500 : static int
1501 : subrel_uses_exp_outside_subrel(visitor *v, sql_rel *rel, list *l, sql_rel *j)
1502 : {
1503 : if (rel == j)
1504 : return 0;
1505 : if (mvc_highwater(v->sql))
1506 : return 1;
1507 : switch(rel->op){
1508 : case op_join:
1509 : case op_left:
1510 : case op_right:
1511 : case op_full:
1512 : return exps_uses_any(rel->exps, l) ||
1513 : subrel_uses_exp_outside_subrel(v, rel->l, l, j) || subrel_uses_exp_outside_subrel(v, rel->r, l, j);
1514 : case op_semi:
1515 : case op_anti:
1516 : case op_select:
1517 : return exps_uses_any(rel->exps, l) ||
1518 : subrel_uses_exp_outside_subrel(v, rel->l, l, j);
1519 : case op_project:
1520 : case op_groupby:
1521 : return exps_uses_any(rel->exps, l) || exps_uses_any(rel->r, l);
1522 : case op_basetable:
1523 : case op_table:
1524 : case op_union:
1525 : case op_except:
1526 : case op_inter:
1527 : return exps_uses_any(rel->exps, l);
1528 : case op_topn:
1529 : case op_sample:
1530 : return subrel_uses_exp_outside_subrel(v, rel->l, l, j);
1531 : default:
1532 : return 1;
1533 : }
1534 : }
1535 :
1536 : static int
1537 : projrel_uses_exp_outside_subrel(visitor *v, sql_rel *rel, list *l, sql_rel *j)
1538 : {
1539 : /* test if projecting relation uses any of the join expressions */
1540 : assert((is_simple_project(rel->op) || is_groupby(rel->op)) && rel->l);
1541 : return exps_uses_any(rel->exps, l) || exps_uses_any(rel->r, l) || subrel_uses_exp_outside_subrel(v, rel->l, l, j);
1542 : }
1543 :
1544 : static sql_rel *
1545 : rewrite_joins2semi(visitor *v, sql_rel *proj, sql_rel *rel)
1546 : {
1547 : /* generalize possibility : we need the visitor 'step' here */
1548 : if (rel_is_ref(rel) || mvc_highwater(v->sql)) /* if the join has multiple references, it's dangerous to convert it into a semijoin */
1549 : return rel;
1550 : if (is_innerjoin(rel->op) && !list_empty(rel->exps)) {
1551 : sql_rel *l = rel->l, *r = rel->r;
1552 : bool left_unique = true, right_unique = true;
1553 :
1554 : /* these relations don't project anything, so skip them */
1555 : while (is_topn(l->op) || is_sample(l->op) || is_select(l->op) || is_semi(l->op))
1556 : l = l->l;
1557 : /* joins will expand values, so don't search on those */
1558 : if (!is_base(l->op) && !is_project(l->op))
1559 : left_unique = false;
1560 : while (is_topn(r->op) || is_sample(r->op) || is_select(r->op) || is_semi(r->op))
1561 : r = r->l;
1562 : if (!is_base(r->op) && !is_project(r->op))
1563 : right_unique = false;
1564 : /* if all columns used in equi-joins from one of the sides are unique, the join can be rewritten into a semijoin */
1565 : for (node *n=rel->exps->h; n && (left_unique || right_unique); n = n->next) {
1566 : sql_exp *e = n->data, *el = e->l, *er = e->r;
1567 :
1568 : if (!is_compare(e->type) || e->flag != cmp_equal || exp_has_func(el) || exp_has_func(er)) {
1569 : left_unique = right_unique = false;
1570 : } else {
1571 : int found = 0;
1572 :
1573 : if (left_unique && (found = find_projection_for_join2semi(l, el)) > NO_EXP_FOUND)
1574 : left_unique &= (found == ALL_VALUES_DISTINCT || (found == MAY_HAVE_DUPLICATE_NULLS && (!is_semantics(e) || !has_nil(er))));
1575 : if (left_unique && (found = find_projection_for_join2semi(l, er)) > NO_EXP_FOUND)
1576 : left_unique &= (found == ALL_VALUES_DISTINCT || (found == MAY_HAVE_DUPLICATE_NULLS && (!is_semantics(e) || !has_nil(el))));
1577 : if (right_unique && (found = find_projection_for_join2semi(r, el)) > NO_EXP_FOUND)
1578 : right_unique &= (found == ALL_VALUES_DISTINCT || (found == MAY_HAVE_DUPLICATE_NULLS && (!is_semantics(e) || !has_nil(er))));
1579 : if (right_unique && (found = find_projection_for_join2semi(r, er)) > NO_EXP_FOUND)
1580 : right_unique &= (found == ALL_VALUES_DISTINCT || (found == MAY_HAVE_DUPLICATE_NULLS && (!is_semantics(e) || !has_nil(el))));
1581 : }
1582 : }
1583 :
1584 : /* now we need to check relation's expressions are not used */
1585 : if (left_unique && !projrel_uses_exp_outside_subrel(v, proj, l->exps, rel)) {
1586 : sql_rel *tmp = rel->r;
1587 : rel->r = rel->l;
1588 : rel->l = tmp;
1589 : rel->op = op_semi;
1590 : v->changes++;
1591 : } else if (right_unique && !projrel_uses_exp_outside_subrel(v, proj, r->exps, rel)) {
1592 : rel->op = op_semi;
1593 : v->changes++;
1594 : }
1595 : }
1596 : if (is_join(rel->op)) {
1597 : rel->l = rewrite_joins2semi(v, proj, rel->l);
1598 : rel->r = rewrite_joins2semi(v, proj, rel->r);
1599 : } else if (is_topn(rel->op) || is_sample(rel->op) || is_select(rel->op) || is_semi(rel->op)) {
1600 : rel->l = rewrite_joins2semi(v, proj, rel->l);
1601 : }
1602 : return rel;
1603 : }
1604 :
1605 : static inline sql_rel *
1606 : rel_join2semijoin(visitor *v, sql_rel *rel)
1607 : {
1608 : if ((is_simple_project(rel->op) || is_groupby(rel->op)) && rel->l)
1609 : rel->l = rewrite_joins2semi(v, rel, rel->l);
1610 : return rel;
1611 : }
1612 : #endif
1613 :
1614 : #define NO_PROJECTION_FOUND 0
1615 : #define MAY_HAVE_DUPLICATE_NULLS 1
1616 : #define ALL_VALUES_DISTINCT 2
1617 :
1618 : static int
1619 719867 : find_projection_for_join2semi(sql_rel *rel)
1620 : {
1621 719867 : if (is_simple_project(rel->op) || is_groupby(rel->op) || is_inter(rel->op) || is_except(rel->op) || is_base(rel->op) || (is_union(rel->op) && need_distinct(rel))) {
1622 382355 : if (rel->card < CARD_AGGR) /* const or groupby without group by exps */
1623 : return ALL_VALUES_DISTINCT;
1624 381181 : if (list_length(rel->exps) == 1) {
1625 6775 : sql_exp *e = rel->exps->h->data;
1626 : /* a single group by column in the projection list from a group by relation is guaranteed to be unique, but not an aggregate */
1627 6775 : if (e->type == e_column) {
1628 6745 : sql_rel *res = NULL;
1629 6745 : sql_exp *found = NULL;
1630 6745 : bool underjoin = false;
1631 :
1632 : /* if just one groupby column is projected or the relation needs distinct values and one column is projected or is a primary key, it will be distinct */
1633 6745 : if ((is_groupby(rel->op) && list_length(rel->r) == 1 && exps_find_exp(rel->r, e)) || (need_distinct(rel) && list_length(rel->exps) == 1))
1634 9643 : return ALL_VALUES_DISTINCT;
1635 3057 : if (is_unique(e))
1636 2266 : return has_nil(e) ? MAY_HAVE_DUPLICATE_NULLS : ALL_VALUES_DISTINCT;
1637 :
1638 791 : if ((is_simple_project(rel->op) || is_groupby(rel->op) || is_inter(rel->op) || is_except(rel->op)) &&
1639 146 : (found = rel_find_exp_and_corresponding_rel(rel->l, e, false, &res, &underjoin)) && !underjoin) { /* grouping column on inner relation */
1640 130 : if (need_distinct(res) && list_length(res->exps) == 1)
1641 : return ALL_VALUES_DISTINCT;
1642 130 : if (is_unique(found))
1643 0 : return has_nil(e) ? MAY_HAVE_DUPLICATE_NULLS : ALL_VALUES_DISTINCT;
1644 130 : if (found->type == e_column && found->card <= CARD_AGGR) {
1645 1 : if (!is_groupby(res->op) && list_length(res->exps) != 1)
1646 : return NO_PROJECTION_FOUND;
1647 0 : for (node *n = res->exps->h ; n ; n = n->next) { /* must be the single column in the group by expression list */
1648 0 : sql_exp *e = n->data;
1649 0 : if (e != found && e->type == e_column)
1650 : return NO_PROJECTION_FOUND;
1651 : }
1652 : return ALL_VALUES_DISTINCT;
1653 : }
1654 : }
1655 : }
1656 : }
1657 : }
1658 : return NO_PROJECTION_FOUND;
1659 : }
1660 :
1661 : static sql_rel *
1662 2286932 : find_candidate_join2semi(visitor *v, sql_rel *rel, bool *swap)
1663 : {
1664 : /* generalize possibility : we need the visitor 'step' here */
1665 2287529 : if (rel_is_ref(rel)) /* if the join has multiple references, it's dangerous to convert it into a semijoin */
1666 : return NULL;
1667 2169824 : if (rel->op == op_join && !list_empty(rel->exps) && list_empty(rel->attr)) {
1668 362131 : sql_rel *l = rel->l, *r = rel->r;
1669 362131 : int foundr = NO_PROJECTION_FOUND, foundl = NO_PROJECTION_FOUND, found = NO_PROJECTION_FOUND;
1670 362131 : bool ok = false;
1671 :
1672 362131 : foundr = find_projection_for_join2semi(r);
1673 362131 : if (foundr < ALL_VALUES_DISTINCT)
1674 357736 : foundl = find_projection_for_join2semi(l);
1675 362131 : if (foundr && foundr > foundl) {
1676 4405 : *swap = false;
1677 4405 : found = foundr;
1678 357726 : } else if (foundl) {
1679 2716 : *swap = true;
1680 2716 : found = foundl;
1681 : }
1682 :
1683 7121 : if (found > NO_PROJECTION_FOUND) {
1684 : /* if all join expressions can be pushed down or have function calls, then it cannot be rewritten into a semijoin */
1685 14246 : for (node *n=rel->exps->h; n && !ok; n = n->next) {
1686 7125 : sql_exp *e = n->data;
1687 :
1688 10912 : ok |= e->type == e_cmp && e->flag == cmp_equal && !exp_has_func(e) && !rel_rebind_exp(v->sql, l, e) && !rel_rebind_exp(v->sql, r, e) &&
1689 11 : (found == ALL_VALUES_DISTINCT || !is_semantics(e) || !has_nil((sql_exp *)e->l) || !has_nil((sql_exp *)e->r));
1690 : }
1691 : }
1692 :
1693 362131 : if (ok)
1694 : return rel;
1695 : }
1696 2166475 : if (is_join(rel->op) || is_semi(rel->op)) {
1697 544762 : sql_rel *c;
1698 :
1699 544762 : if ((c=find_candidate_join2semi(v, rel->l, swap)) != NULL ||
1700 544356 : (c=find_candidate_join2semi(v, rel->r, swap)) != NULL)
1701 428 : if (list_empty(c->attr))
1702 : return c;
1703 : }
1704 2166047 : if (is_topn(rel->op) || is_sample(rel->op))
1705 597 : return find_candidate_join2semi(v, rel->l, swap);
1706 : return NULL;
1707 : }
1708 :
1709 : static int
1710 2560 : subrel_uses_exp_outside_subrel(sql_rel *rel, list *l, sql_rel *c)
1711 : {
1712 2561 : if (rel == c)
1713 : return 0;
1714 : /* for subrel only expect joins (later possibly selects) */
1715 853 : if (is_join(rel->op) || is_semi(rel->op)) {
1716 432 : if (exps_uses_any(rel->exps, l))
1717 : return 1;
1718 846 : if (subrel_uses_exp_outside_subrel(rel->l, l, c) ||
1719 422 : subrel_uses_exp_outside_subrel(rel->r, l, c))
1720 5 : return 1;
1721 : }
1722 840 : if (is_topn(rel->op) || is_sample(rel->op))
1723 1 : return subrel_uses_exp_outside_subrel(rel->l, l, c);
1724 : return 0;
1725 : }
1726 :
1727 : static int
1728 3349 : rel_uses_exp_outside_subrel(sql_rel *rel, list *l, sql_rel *c)
1729 : {
1730 : /* for now we only expect sub relations of type project, selects (rel) or join/semi */
1731 3349 : if (is_simple_project(rel->op) || is_groupby(rel->op) || is_select(rel->op)) {
1732 3349 : if (!list_empty(rel->exps) && exps_uses_any(rel->exps, l))
1733 : return 1;
1734 1714 : if ((is_simple_project(rel->op) || is_groupby(rel->op)) && !list_empty(rel->r) && exps_uses_any(rel->r, l))
1735 : return 1;
1736 1714 : if (rel->l)
1737 1714 : return subrel_uses_exp_outside_subrel(rel->l, l, c);
1738 : }
1739 0 : if (is_topn(rel->op) || is_sample(rel->op))
1740 0 : return subrel_uses_exp_outside_subrel(rel->l, l, c);
1741 : return 1;
1742 : }
1743 :
1744 : static inline sql_rel *
1745 3487648 : rel_join2semijoin(visitor *v, sql_rel *rel)
1746 : {
1747 3487648 : if ((is_simple_project(rel->op) || is_groupby(rel->op)) && rel->l) {
1748 1197814 : bool swap = false;
1749 1197814 : sql_rel *l = rel->l;
1750 1197814 : sql_rel *c = find_candidate_join2semi(v, l, &swap);
1751 :
1752 1197814 : if (c) {
1753 : /* 'p' is a project */
1754 3349 : sql_rel *p = swap ? c->l : c->r;
1755 :
1756 : /* now we need to check if ce is only used at the level of c */
1757 3349 : if (!rel_uses_exp_outside_subrel(rel, p->exps, c)) {
1758 1706 : c->op = op_semi;
1759 1706 : if (swap) {
1760 522 : sql_rel *tmp = c->r;
1761 522 : c->r = c->l;
1762 522 : c->l = tmp;
1763 : }
1764 1706 : v->changes++;
1765 : }
1766 : }
1767 : }
1768 3487648 : return rel;
1769 : }
1770 :
1771 : static inline sql_rel *
1772 3487648 : rel_push_join_down_outer(visitor *v, sql_rel *rel)
1773 : {
1774 3487648 : if (is_join(rel->op) && !is_outerjoin(rel->op) && !is_single(rel) && !list_empty(rel->exps) && !rel_is_ref(rel)) {
1775 372491 : sql_rel *l = rel->l, *r = rel->r;
1776 :
1777 372491 : if (is_left(r->op) && (is_select(l->op) || (is_join(l->op) && !is_outerjoin(l->op))) && !rel_is_ref(l) &&
1778 654 : !rel_is_ref(r)) {
1779 654 : sql_rel *rl = r->l;
1780 654 : sql_rel *rr = r->r;
1781 654 : if (rel_is_ref(rl) || rel_is_ref(rr))
1782 : return rel;
1783 : /* join exps should only include l and r.l */
1784 654 : list *njexps = sa_list(v->sql->sa);
1785 1316 : for(node *n = rel->exps->h; n; n = n->next) {
1786 662 : sql_exp *je = n->data;
1787 :
1788 662 : assert(je->type == e_cmp);
1789 662 : if (je->f)
1790 : return rel;
1791 662 : if ((rel_find_exp(l, je->l) && rel_find_exp(rl, je->r)) || (rel_find_exp(l, je->r) && rel_find_exp(rl, je->l))) {
1792 662 : list_append(njexps, je);
1793 : } else {
1794 0 : return rel;
1795 : }
1796 : }
1797 654 : sql_rel *nl = rel_crossproduct(v->sql->sa, rel_dup(l), rl, rel->op);
1798 654 : r->l = nl;
1799 654 : nl->exps = njexps;
1800 654 : nl->attr = rel->attr;
1801 654 : rel->attr = NULL;
1802 654 : set_processed(nl);
1803 654 : rel_dup(r);
1804 654 : rel_destroy(rel);
1805 654 : rel = r;
1806 654 : v->changes++;
1807 : }
1808 : }
1809 : return rel;
1810 : }
1811 :
1812 : static sql_rel *
1813 3487648 : rel_optimize_joins_(visitor *v, sql_rel *rel)
1814 : {
1815 3487648 : rel = rel_push_join_exps_down(v, rel);
1816 3487648 : rel = rel_out2inner(v, rel);
1817 3487648 : rel = rel_join2semijoin(v, rel);
1818 3487648 : rel = rel_push_join_down_outer(v, rel);
1819 3487648 : return rel;
1820 : }
1821 :
1822 : static sql_rel *
1823 91872 : rel_optimize_joins(visitor *v, global_props *gp, sql_rel *rel)
1824 : {
1825 91872 : (void) gp;
1826 91872 : return rel_visitor_topdown(v, rel, &rel_optimize_joins_);
1827 : }
1828 :
1829 : run_optimizer
1830 638402 : bind_optimize_joins(visitor *v, global_props *gp)
1831 : {
1832 638402 : int flag = v->sql->sql_optimizer;
1833 638206 : return gp->opt_level == 1 && (gp->cnt[op_join] || gp->cnt[op_left] || gp->cnt[op_right]
1834 1276608 : || gp->cnt[op_full] || gp->cnt[op_semi] || gp->cnt[op_anti]) && (flag & optimize_joins) ? rel_optimize_joins : NULL;
1835 : }
1836 :
1837 :
1838 : static sql_rel *rel_join_order_(visitor *v, sql_rel *rel);
1839 :
1840 : static void
1841 716355 : get_relations(visitor *v, sql_rel *rel, list *rels)
1842 : {
1843 996382 : if (list_empty(rel->attr) && !rel_is_ref(rel) && rel->op == op_join && rel->exps == NULL) {
1844 280027 : sql_rel *l = rel->l;
1845 280027 : sql_rel *r = rel->r;
1846 :
1847 280027 : get_relations(v, l, rels);
1848 280027 : get_relations(v, r, rels);
1849 280027 : rel->l = NULL;
1850 280027 : rel->r = NULL;
1851 280027 : rel_destroy(rel);
1852 : } else {
1853 436328 : rel = rel_join_order_(v, rel);
1854 436328 : append(rels, rel);
1855 : }
1856 716355 : }
1857 :
1858 : static void
1859 186349 : get_inner_relations(mvc *sql, sql_rel *rel, list *rels)
1860 : {
1861 294763 : if (!rel_is_ref(rel) && is_join(rel->op)) {
1862 108414 : sql_rel *l = rel->l;
1863 108414 : sql_rel *r = rel->r;
1864 :
1865 108414 : get_inner_relations(sql, l, rels);
1866 108414 : get_inner_relations(sql, r, rels);
1867 : } else {
1868 186349 : append(rels, rel);
1869 : }
1870 186349 : }
1871 :
1872 : static int
1873 1073059 : exp_count(int *cnt, sql_exp *e)
1874 : {
1875 1073059 : int flag;
1876 1073059 : if (!e)
1877 : return 0;
1878 1073059 : if (find_prop(e->p, PROP_JOINIDX))
1879 3541 : *cnt += 100;
1880 1073059 : if (find_prop(e->p, PROP_HASHCOL))
1881 34463 : *cnt += 100;
1882 1073059 : if (find_prop(e->p, PROP_HASHIDX))
1883 0 : *cnt += 100;
1884 1073059 : switch(e->type) {
1885 394543 : case e_cmp:
1886 394543 : if (!is_complex_exp(e->flag)) {
1887 337729 : exp_count(cnt, e->l);
1888 337729 : exp_count(cnt, e->r);
1889 337729 : if (e->f)
1890 3052 : exp_count(cnt, e->f);
1891 : }
1892 394543 : flag = e->flag;
1893 394543 : switch (flag) {
1894 315460 : case cmp_equal:
1895 315460 : *cnt += 90;
1896 315460 : return 90;
1897 15755 : case cmp_notequal:
1898 15755 : *cnt += 7;
1899 15755 : return 7;
1900 6514 : case cmp_gt:
1901 : case cmp_gte:
1902 : case cmp_lt:
1903 : case cmp_lte:
1904 6514 : *cnt += 6;
1905 6514 : if (e->l) {
1906 6514 : sql_exp *l = e->l;
1907 6514 : sql_subtype *t = exp_subtype(l);
1908 6514 : if (EC_TEMP(t->type->eclass)) /* give preference to temporal ranges */
1909 172 : *cnt += 90;
1910 : }
1911 6514 : if (e->f){ /* range */
1912 3052 : *cnt += 6;
1913 3052 : return 12;
1914 : }
1915 : return 6;
1916 1059 : case cmp_filter:
1917 1059 : if (exps_card(e->r) > CARD_AGGR) {
1918 : /* filters for joins are special */
1919 6 : *cnt += 1000;
1920 6 : return 1000;
1921 : }
1922 1053 : *cnt += 2;
1923 1053 : return 2;
1924 52010 : case cmp_in:
1925 : case cmp_notin: {
1926 52010 : list *l = e->r;
1927 52010 : int c = 9 - 10*list_length(l);
1928 52010 : *cnt += c;
1929 52010 : return c;
1930 : }
1931 3745 : case cmp_or: /* prefer or over functions */
1932 3745 : *cnt += 3;
1933 3745 : return 3;
1934 : default:
1935 : return 0;
1936 : }
1937 536175 : case e_column:
1938 536175 : *cnt += 20;
1939 536175 : return 20;
1940 123351 : case e_atom:
1941 123351 : *cnt += 10;
1942 123351 : return 10;
1943 2871 : case e_func:
1944 : /* functions are more expensive, depending on the number of columns involved. */
1945 2871 : if (e->card == CARD_ATOM)
1946 : return 0;
1947 2583 : *cnt -= 5*list_length(e->l);
1948 2583 : return 5*list_length(e->l);
1949 16119 : case e_convert:
1950 : /* functions are more expensive, depending on the number of columns involved. */
1951 16119 : if (e->card == CARD_ATOM)
1952 : return 0;
1953 : /* fall through */
1954 : default:
1955 15456 : *cnt -= 5;
1956 15456 : return -5;
1957 : }
1958 : }
1959 :
1960 : int
1961 267590 : exp_keyvalue(sql_exp *e)
1962 : {
1963 267590 : int cnt = 0;
1964 56536 : exp_count(&cnt, e);
1965 267590 : return cnt;
1966 : }
1967 :
1968 : static sql_exp *
1969 752758 : joinexp_col(sql_exp *e, sql_rel *r)
1970 : {
1971 752758 : if (e->type == e_cmp) {
1972 752758 : if (rel_has_exp(r, e->l, false) >= 0)
1973 402522 : return e->l;
1974 350236 : return e->r;
1975 : }
1976 0 : assert(0);
1977 : return NULL;
1978 : }
1979 :
1980 : static sql_column *
1981 498014 : table_colexp(sql_exp *e, sql_rel *r)
1982 : {
1983 498014 : sql_table *t = r->l;
1984 :
1985 498014 : if (e->type == e_column) {
1986 483455 : const char *name = exp_name(e);
1987 483455 : node *cn;
1988 :
1989 483455 : if (r->exps) { /* use alias */
1990 894655 : for (cn = r->exps->h; cn; cn = cn->next) {
1991 890310 : sql_exp *ce = cn->data;
1992 890310 : if (strcmp(exp_name(ce), name) == 0) {
1993 479110 : name = ce->r;
1994 479110 : break;
1995 : }
1996 : }
1997 : }
1998 1064701 : for (cn = ol_first_node(t->columns); cn; cn = cn->next) {
1999 1060356 : sql_column *c = cn->data;
2000 1060356 : if (strcmp(c->base.name, name) == 0)
2001 479110 : return c;
2002 : }
2003 : }
2004 : return NULL;
2005 : }
2006 :
2007 : static list *
2008 362393 : matching_joins(allocator *sa, list *rels, list *exps, sql_exp *je)
2009 : {
2010 362393 : sql_rel *l, *r;
2011 :
2012 362393 : assert (je->type == e_cmp);
2013 :
2014 362393 : l = find_rel(rels, je->l);
2015 362393 : r = find_rel(rels, je->r);
2016 362393 : if (l && r) {
2017 362248 : list *res;
2018 362248 : list *n_rels = sa_list(sa);
2019 :
2020 362248 : append(n_rels, l);
2021 362248 : append(n_rels, r);
2022 362248 : res = list_select(exps, n_rels, (fcmp) &exp_joins_rels, (fdup)NULL);
2023 362248 : return res;
2024 : }
2025 145 : return sa_list(sa);
2026 : }
2027 :
2028 : static int
2029 6397 : sql_column_kc_cmp(sql_column *c, sql_kc *kc)
2030 : {
2031 : /* return on equality */
2032 6397 : return (c->colnr - kc->c->colnr);
2033 : }
2034 :
2035 : static sql_idx *
2036 467367 : find_fk_index(mvc *sql, sql_table *l, list *lcols, sql_table *r, list *rcols)
2037 : {
2038 467367 : sql_trans *tr = sql->session->tr;
2039 :
2040 467367 : if (l->idxs) {
2041 467367 : node *in;
2042 555248 : for (in = ol_first_node(l->idxs); in; in = in->next){
2043 88721 : sql_idx *li = in->data;
2044 88721 : if (li->type == join_idx) {
2045 8717 : sql_key *rk = (sql_key*)os_find_id(tr->cat->objects, tr, ((sql_fkey*)li->key)->rkey);
2046 8717 : fcmp cmp = (fcmp)&sql_column_kc_cmp;
2047 :
2048 9666 : if (rk->t == r &&
2049 1789 : list_match(lcols, li->columns, cmp) == 0 &&
2050 840 : list_match(rcols, rk->columns, cmp) == 0) {
2051 840 : return li;
2052 : }
2053 : }
2054 : }
2055 : }
2056 : return NULL;
2057 : }
2058 :
2059 : static sql_rel *
2060 724786 : find_basetable( sql_rel *r)
2061 : {
2062 1070509 : if (!r)
2063 : return NULL;
2064 1062794 : switch(r->op) {
2065 583623 : case op_basetable:
2066 583623 : if (!r->l)
2067 0 : return NULL;
2068 : return r;
2069 345723 : case op_semi:
2070 : case op_anti:
2071 : case op_project:
2072 : case op_select:
2073 : case op_topn:
2074 : case op_sample:
2075 345723 : return find_basetable(r->l);
2076 : default:
2077 : return NULL;
2078 : }
2079 : }
2080 :
2081 : static int
2082 100809 : exps_count(list *exps)
2083 : {
2084 100809 : node *n;
2085 100809 : int cnt = 0;
2086 :
2087 100809 : if (!exps)
2088 : return 0;
2089 227768 : for (n = exps->h; n; n=n->next)
2090 126959 : exp_count(&cnt, n->data);
2091 100809 : return cnt;
2092 : }
2093 :
2094 : static list *
2095 257553 : order_join_expressions(mvc *sql, list *dje, list *rels)
2096 : {
2097 257553 : node *n;
2098 257553 : int cnt = list_length(dje);
2099 :
2100 257553 : if (cnt <= 1)
2101 : return dje;
2102 :
2103 67047 : list *res = sa_list(sql->sa);
2104 67047 : int i, *keys = SA_NEW_ARRAY(sql->ta, int, cnt);
2105 67047 : void **data = SA_NEW_ARRAY(sql->ta, void*, cnt);
2106 :
2107 278101 : for (n = dje->h, i = 0; n; n = n->next, i++) {
2108 211054 : sql_exp *e = n->data;
2109 :
2110 211054 : keys[i] = exp_keyvalue(e);
2111 : /* add some weight for the selections */
2112 211054 : if (e->type == e_cmp && !is_complex_exp(e->flag)) {
2113 211027 : sql_rel *l = find_rel(rels, e->l);
2114 211027 : sql_rel *r = find_rel(rels, e->r);
2115 :
2116 211027 : if (l && is_select(l->op) && l->exps)
2117 53966 : keys[i] += list_length(l->exps)*10 + exps_count(l->exps);
2118 211027 : if (r && is_select(r->op) && r->exps)
2119 46843 : keys[i] += list_length(r->exps)*10 + exps_count(r->exps);
2120 : }
2121 211054 : data[i] = n->data;
2122 : }
2123 : /* sort descending */
2124 67047 : GDKqsort(keys, data, NULL, cnt, sizeof(int), sizeof(void *), TYPE_int, true, true);
2125 345148 : for(i=0; i<cnt; i++) {
2126 211054 : list_append(res, data[i]);
2127 : }
2128 : return res;
2129 : }
2130 :
2131 : static int
2132 240884 : find_join_rels(list **L, list **R, list *exps, list *rels)
2133 : {
2134 240884 : node *n;
2135 :
2136 240884 : *L = sa_list(exps->sa);
2137 240884 : *R = sa_list(exps->sa);
2138 240884 : if (!exps || list_length(exps) <= 1)
2139 : return -1;
2140 294896 : for(n = exps->h; n; n = n->next) {
2141 222661 : sql_exp *e = n->data;
2142 222661 : sql_rel *l = NULL, *r = NULL;
2143 :
2144 222661 : if (!is_complex_exp(e->flag)){
2145 222653 : l = find_rel(rels, e->l);
2146 222653 : r = find_rel(rels, e->r);
2147 : }
2148 222653 : if (l<r) {
2149 144247 : list_append(*L, l);
2150 144247 : list_append(*R, r);
2151 : } else {
2152 78414 : list_append(*L, r);
2153 78414 : list_append(*R, l);
2154 : }
2155 : }
2156 : return 0;
2157 : }
2158 :
2159 : static list *
2160 72235 : distinct_join_exps(list *aje, list *lrels, list *rrels)
2161 : {
2162 72235 : node *n, *m, *o, *p;
2163 72235 : int len = list_length(aje), i, j;
2164 72235 : char *used = SA_ZNEW_ARRAY(aje->sa, char, len);
2165 72235 : list *res = sa_list(aje->sa);
2166 :
2167 72235 : assert(len == list_length(lrels));
2168 294896 : for(n = lrels->h, m = rrels->h, j = 0; n && m;
2169 222661 : n = n->next, m = m->next, j++) {
2170 222661 : if (n->data && m->data)
2171 957824 : for(o = n->next, p = m->next, i = j+1; o && p;
2172 735216 : o = o->next, p = p->next, i++) {
2173 735216 : if (o->data == n->data && p->data == m->data)
2174 30209 : used[i] = 1;
2175 : }
2176 : }
2177 294896 : for (i = 0, n = aje->h; i < len; n = n->next, i++) {
2178 222661 : if (!used[i])
2179 197621 : list_append(res, n->data);
2180 : }
2181 72235 : return res;
2182 : }
2183 :
2184 : static list *
2185 240884 : find_fk( mvc *sql, list *rels, list *exps)
2186 : {
2187 240884 : node *djn;
2188 240884 : list *sdje, *aje, *dje;
2189 240884 : list *lrels, *rrels;
2190 :
2191 : /* first find the distinct join expressions */
2192 240884 : aje = list_select(exps, rels, (fcmp) &exp_is_join, (fdup)NULL);
2193 : /* add left/right relation */
2194 240884 : if (find_join_rels(&lrels, &rrels, aje, rels) < 0)
2195 : dje = aje;
2196 : else
2197 72235 : dje = distinct_join_exps(aje, lrels, rrels);
2198 606777 : for(djn=dje->h; djn; djn = djn->next) {
2199 : /* equal join expressions */
2200 365980 : sql_idx *idx = NULL;
2201 365980 : sql_exp *je = djn->data, *le = je->l, *re = je->r;
2202 :
2203 365980 : if (is_complex_exp(je->flag))
2204 : break;
2205 365893 : if (!find_prop(je->p, PROP_JOINIDX)) {
2206 362393 : int swapped = 0;
2207 362393 : list *aaje = matching_joins(sql->sa, rels, aje, je);
2208 362393 : list *eje = list_select(aaje, (void*)1, (fcmp) &exp_is_eqjoin, (fdup)NULL);
2209 362393 : sql_rel *lr = find_rel(rels, le), *olr = lr;
2210 362393 : sql_rel *rr = find_rel(rels, re), *orr = rr;
2211 362393 : sql_rel *bt = NULL;
2212 362393 : char *iname;
2213 :
2214 362393 : sql_table *l, *r;
2215 362393 : list *lexps = list_map(eje, lr, (fmap) &joinexp_col);
2216 362393 : list *rexps = list_map(eje, rr, (fmap) &joinexp_col);
2217 362393 : list *lcols, *rcols;
2218 :
2219 362393 : lr = find_basetable(lr);
2220 362393 : rr = find_basetable(rr);
2221 362393 : if (!lr || !rr)
2222 238105 : continue;
2223 252665 : l = lr->l;
2224 252665 : r = rr->l;
2225 252665 : lcols = list_map(lexps, lr, (fmap) &table_colexp);
2226 252665 : rcols = list_map(rexps, rr, (fmap) &table_colexp);
2227 252665 : lcols->destroy = NULL;
2228 252665 : rcols->destroy = NULL;
2229 252665 : if (list_length(lcols) != list_length(rcols))
2230 18648 : continue;
2231 :
2232 234017 : idx = find_fk_index(sql, l, lcols, r, rcols);
2233 234017 : if (!idx) {
2234 233350 : idx = find_fk_index(sql, r, rcols, l, lcols);
2235 233350 : swapped = 1;
2236 : }
2237 :
2238 234017 : if (idx && (iname = sa_strconcat( sql->sa, "%", idx->base.name)) != NULL &&
2239 667 : ((!swapped && name_find_column(olr, NULL, iname, -2, &bt) == NULL) ||
2240 173 : ( swapped && name_find_column(orr, NULL, iname, -2, &bt) == NULL)))
2241 : idx = NULL;
2242 :
2243 : if (idx) {
2244 751 : prop *p;
2245 751 : node *n;
2246 751 : sql_exp *t = NULL, *i = NULL;
2247 :
2248 751 : if (list_length(lcols) > 1 || !mvc_debug_on(sql, 512)) {
2249 :
2250 : /* Add join between idx and TID */
2251 751 : if (swapped) {
2252 155 : sql_exp *s = je->l, *l = je->r;
2253 :
2254 155 : t = rel_find_column(sql, olr, s->l, TID);
2255 155 : i = rel_find_column(sql, orr, l->l, iname);
2256 155 : if (!t || !i)
2257 1 : continue;
2258 154 : t->p = NULL;
2259 154 : i->p = NULL;
2260 154 : je = exp_compare(sql->sa, i, t, cmp_equal);
2261 : } else {
2262 596 : sql_exp *s = je->r, *l = je->l;
2263 :
2264 596 : t = rel_find_column(sql, orr, s->l, TID);
2265 596 : i = rel_find_column(sql, olr, l->l, iname);
2266 596 : if (!t || !i)
2267 0 : continue;
2268 596 : t->p = NULL;
2269 596 : i->p = NULL;
2270 596 : je = exp_compare(sql->sa, i, t, cmp_equal);
2271 : }
2272 :
2273 : /* Remove all join expressions */
2274 1502 : for (n = eje->h; n; n = n->next)
2275 752 : list_remove_data(exps, NULL, n->data);
2276 750 : append(exps, je);
2277 750 : djn->data = je;
2278 0 : } else if (swapped) { /* else keep je for single column expressions */
2279 0 : je = exp_compare(sql->sa, je->r, je->l, cmp_equal);
2280 : /* Remove all join expressions */
2281 0 : for (n = eje->h; n; n = n->next)
2282 0 : list_remove_data(exps, NULL, n->data);
2283 0 : append(exps, je);
2284 0 : djn->data = je;
2285 : }
2286 750 : je->p = p = prop_create(sql->sa, PROP_JOINIDX, je->p);
2287 750 : p->value.pval = idx;
2288 : }
2289 : }
2290 : }
2291 :
2292 : /* sort expressions on weighted number of reducing operators */
2293 240884 : sdje = order_join_expressions(sql, dje, rels);
2294 240884 : return sdje;
2295 : }
2296 :
2297 : static int
2298 561781 : exp_find_one_rel( sql_rel **rels, int nr, sql_exp *e)
2299 : {
2300 561781 : int fnd = 0;
2301 :
2302 4469180 : for(int i = 1; i<=nr; i++) {
2303 3907467 : if (rel_has_exp(rels[i], e, false) == 0) {
2304 561765 : if (fnd)
2305 : return 0;
2306 : fnd = i;
2307 : }
2308 : }
2309 : return fnd;
2310 : }
2311 :
2312 : static int
2313 140 : exps_find_one_rel( sql_rel **rels, int nr, list *exps)
2314 : {
2315 140 : int fnd = 0;
2316 :
2317 378 : for(node *n = exps->h; n; n = n->next) {
2318 243 : sql_exp *e = n->data;
2319 243 : if (exp_is_atom(e))
2320 86 : continue;
2321 157 : int nfnd = exp_find_one_rel(rels, nr, n->data);
2322 157 : if (nfnd != fnd && fnd)
2323 : return 0;
2324 154 : fnd = nfnd;
2325 154 : if (!fnd)
2326 : return 0;
2327 : }
2328 : return fnd;
2329 : }
2330 :
2331 : /* TODO move popcount and popcount64 into gdk_*.h, used in gdk_cand, strimps and here */
2332 : static inline int
2333 : popcount64(uint64_t x)
2334 : {
2335 : #ifdef __has_builtin
2336 : #if __has_builtin(__builtin_popcountll)
2337 : return (uint32_t) __builtin_popcountll(x);
2338 : #define BUILTIN_USED
2339 : #endif
2340 : #endif
2341 : #ifndef BUILTIN_USED
2342 : #if defined(_MSC_VER)
2343 : #if SIZEOF_OID == 4
2344 : /* no __popcnt64 on 32 bit Windows */
2345 : return (int) (__popcnt((uint32_t) x) + __popcnt((uint32_t) (x >> 32)));
2346 : #else
2347 : return (uint32_t) __popcnt64(x);
2348 : #endif
2349 : #else
2350 : x = (x & UINT64_C(0x5555555555555555)) + ((x >> 1) & UINT64_C(0x5555555555555555));
2351 : x = (x & UINT64_C(0x3333333333333333)) + ((x >> 2) & UINT64_C(0x3333333333333333));
2352 : x = (x & UINT64_C(0x0F0F0F0F0F0F0F0F)) + ((x >> 4) & UINT64_C(0x0F0F0F0F0F0F0F0F));
2353 : return (x * UINT64_C(0x0101010101010101)) >> 56;
2354 : #endif
2355 : #endif
2356 : #undef BUILTIN_USED
2357 : }
2358 :
2359 : static sql_rel *
2360 156301 : order_joins(visitor *v, list *rels, list *exps)
2361 : {
2362 156301 : sql_rel *top = NULL, *l = NULL, *r = NULL, *f = NULL;
2363 156301 : sql_exp *cje;
2364 156301 : node *djn;
2365 156301 : list *sdje, *n_rels = NULL;
2366 156301 : int fnd = 0;
2367 156301 : unsigned int rsingle;
2368 156301 : int direct = 1;
2369 :
2370 : /* find foreign keys and reorder the expressions on reducing quality */
2371 156301 : sdje = find_fk(v->sql, rels, exps);
2372 :
2373 437117 : for(djn = sdje->h; djn; djn = djn->next ) {
2374 280816 : sql_exp *e = djn->data;
2375 280816 : list_remove_data(exps, NULL, e);
2376 : }
2377 156301 : if (list_length(rels) > 2 && mvc_debug_on(v->sql, 256)) {
2378 0 : top = rel_planner(v->sql, rels, sdje, exps);
2379 0 : return top;
2380 : }
2381 :
2382 156301 : int nr_exps = list_length(sdje), nr_rels = list_length(rels), ci = 1;
2383 156301 : if (nr_rels > 64) {
2384 1 : direct = 0;
2385 1 : n_rels = sa_list(v->sql->ta);
2386 : }
2387 156301 : sql_rel **rels_a = SA_NEW_ARRAY(v->sql->ta, sql_rel*, nr_rels+1); /* don't use slot 0 */
2388 156301 : rels_a[0] = NULL;
2389 592629 : for (node *n = rels->h; n; n = n->next, ci++) {
2390 436328 : rels_a[ci] = n->data;
2391 : }
2392 156301 : ulng *h = SA_NEW_ARRAY(v->sql->ta, ulng, nr_exps), rel_mask = 0; /* bit field (for > 64 its an imprint) */
2393 156301 : uint16_t *r1 = SA_NEW_ARRAY(v->sql->ta, uint16_t, nr_exps);
2394 156301 : uint16_t *r2 = SA_NEW_ARRAY(v->sql->ta, uint16_t, nr_exps);
2395 : /* change r3 into rest list's */
2396 156301 : int *r3 = SA_NEW_ARRAY(v->sql->ta, int, nr_exps);
2397 :
2398 156301 : ci = 0;
2399 437117 : for (node *n = sdje->h; n; n = n->next, ci++) {
2400 280816 : sql_exp *cje = n->data;
2401 :
2402 280816 : h[ci] = r1[ci] = r2[ci] = r3[ci] = 0;
2403 280816 : if (cje->type == e_cmp) {
2404 280816 : cje->tmp = ci;
2405 280816 : r1[ci] = cje->flag == cmp_filter ? exps_find_one_rel(rels_a, nr_rels, cje->l) : exp_find_one_rel(rels_a, nr_rels, cje->l);
2406 280816 : r2[ci] = cje->flag == cmp_filter ? exps_find_one_rel(rels_a, nr_rels, cje->r) : exp_find_one_rel(rels_a, nr_rels, cje->r);
2407 280816 : if (r1[ci])
2408 280754 : h[ci] |= ((ulng)1)<<((r1[ci]-1)%64);
2409 280816 : if (r2[ci])
2410 280734 : h[ci] |= ((ulng)1)<<((r2[ci]-1)%64);
2411 280816 : if (cje->f && cje->flag != cmp_filter) {
2412 132 : r3[ci] = exp_find_one_rel(rels_a, nr_rels, cje->f);
2413 132 : if (r3[ci] == r2[ci] || r3[ci] == r1[ci])
2414 114 : r3[ci] = 0;
2415 132 : if (r3[ci])
2416 8 : h[ci] |= ((ulng)1)<<((r3[ci]-1)%64);
2417 : }
2418 : }
2419 : }
2420 : /* open problem, some expressions use more than 2 relations */
2421 : /* For example a.x = b.y * c.z; */
2422 156301 : if (list_length(rels) >= 2 && sdje->h) {
2423 312561 : for (node *n = sdje->h; n && (!l || !r); n = n->next, ci++) {
2424 156284 : cje = n->data;
2425 :
2426 156284 : if (n->next && r3[cje->tmp])
2427 0 : continue;
2428 :
2429 : /* complex expressions may touch multiple base tables
2430 : * Should be pushed up to extra selection.
2431 : * */
2432 156284 : if (0 && popcount64(h[cje->tmp]) > 2)
2433 : assert(0);
2434 : /* find the involved relations */
2435 156284 : if (cje->type == e_cmp) {
2436 156284 : l = rels_a[r1[cje->tmp]];
2437 156284 : r = rels_a[r2[cje->tmp]];
2438 156284 : if (l && r)
2439 156219 : rel_mask |= h[cje->tmp];
2440 : }
2441 : }
2442 156277 : cje->tmp = 0;
2443 :
2444 156277 : if (l && r && l != r)
2445 156219 : list_remove_data(sdje, NULL, cje);
2446 : }
2447 :
2448 156301 : if (l && r && l != r) {
2449 156219 : list_remove_data(rels, NULL, l);
2450 156219 : list_remove_data(rels, NULL, r);
2451 156219 : if (!direct) {
2452 1 : list_append(n_rels, l);
2453 1 : list_append(n_rels, r);
2454 : }
2455 :
2456 : /* Create a relation between l and r. Since the calling
2457 : functions rewrote the join tree, into a list of expressions
2458 : and a list of (simple) relations, there are no outer joins
2459 : involved, we can simply do a crossproduct here.
2460 : */
2461 156219 : rsingle = is_single(r);
2462 156219 : reset_single(r);
2463 156219 : top = rel_crossproduct(v->sql->sa, l, r, op_join);
2464 156219 : if (rsingle)
2465 5168 : set_single(r);
2466 156219 : rel_join_add_exp(v->sql->sa, top, cje);
2467 :
2468 : /* all other join expressions on these 2 relations */
2469 160023 : for (node *en = exps->h; en; ) {
2470 3804 : node *next = en->next;
2471 3804 : sql_exp *e = en->data;
2472 3804 : if (rel_rebind_exp(v->sql, top, e)) {
2473 3632 : rel_join_add_exp(v->sql->sa, top, e);
2474 3632 : list_remove_data(exps, NULL, e);
2475 : }
2476 : en = next;
2477 : }
2478 : fnd = 1;
2479 : }
2480 : /* build join tree using the ordered list */
2481 277496 : while(list_length(sdje) && fnd) {
2482 121195 : fnd = 0;
2483 : /* find the first expression which could be added */
2484 883441 : for(djn = sdje->h; djn && !fnd && rels->h; djn = (!fnd)?djn->next:NULL) {
2485 381123 : node *en;
2486 381123 : l = r = f = NULL;
2487 381123 : int needs3 = 0;
2488 :
2489 381123 : cje = djn->data;
2490 381123 : if ((h[cje->tmp] & rel_mask) > 0) {
2491 121111 : if (rel_mask & (((ulng)1)<<((r1[cje->tmp]-1)%64)))
2492 68309 : l = rels_a[r1[cje->tmp]];
2493 121111 : if (rel_mask & (((ulng)1)<<((r2[cje->tmp]-1)%64)))
2494 52803 : r = rels_a[r2[cje->tmp]];
2495 121111 : if (cje->f && r3[cje->tmp]) {
2496 0 : needs3 = 1;
2497 0 : if (rel_mask & (((ulng)1)<<((r3[cje->tmp]-1)%64)))
2498 0 : f = rels_a[r3[cje->tmp]];
2499 : }
2500 : }
2501 381123 : if (!direct) { /* check if at least one side in n_rels */
2502 1040 : if (l && !list_find(n_rels, l, NULL))
2503 1007 : l = NULL;
2504 1040 : if (r && !list_find(n_rels, r, NULL))
2505 1010 : r = NULL;
2506 1040 : if (f && !list_find(n_rels, f, NULL))
2507 0 : f = NULL;
2508 : }
2509 :
2510 381123 : if ((!needs3 && l && r) || (needs3 && l && r && f)) {
2511 0 : assert(0);
2512 : /* create a selection on the current */
2513 : rel_join_add_exp(v->sql->sa, top, cje);
2514 : fnd = 1;
2515 381123 : } else if ((!needs3 && (l || r)) || (needs3 && (l || r || f))) {
2516 121111 : sql_rel *nr[2]= {NULL, NULL};
2517 121111 : rel_mask |= h[cje->tmp];
2518 121111 : int i = 0;
2519 121111 : if (!l)
2520 52802 : nr[i++] = rels_a[r1[cje->tmp]];
2521 121111 : if (!r)
2522 68309 : nr[i++] = rels_a[r2[cje->tmp]];
2523 121111 : if (needs3 && !f)
2524 0 : nr[i++] = rels_a[r3[cje->tmp]];
2525 121111 : if (!nr[0]) {
2526 0 : fnd = 1; /* not really, but this bails out */
2527 0 : list_remove_data(sdje, NULL, cje); /* handle later as select */
2528 0 : if (!list_find(exps, cje, NULL))
2529 0 : append(exps, cje);
2530 0 : continue;
2531 : }
2532 : /* remove the expression from the lists */
2533 121111 : list_remove_data(sdje, NULL, cje);
2534 :
2535 121111 : list_remove_data(rels, NULL, nr[0]);
2536 121111 : if (!direct)
2537 63 : append(n_rels, nr[0]);
2538 121111 : if (i > 1 && nr[1]) {
2539 0 : list_remove_data(rels, NULL, nr[1]);
2540 0 : if (!direct)
2541 0 : append(n_rels, nr[1]);
2542 : }
2543 :
2544 : /* create a join using the current expression */
2545 121111 : rsingle = is_single(nr[0]);
2546 121111 : reset_single(nr[0]);
2547 121111 : top = rel_crossproduct(v->sql->sa, top, nr[0], op_join);
2548 121111 : if (rsingle)
2549 0 : set_single(nr[0]);
2550 121111 : if (i > 1 && nr[1]) {
2551 0 : rsingle = is_single(nr[1]);
2552 0 : reset_single(nr[1]);
2553 0 : top = rel_crossproduct(v->sql->sa, top, nr[1], op_join);
2554 0 : if (rsingle)
2555 0 : set_single(nr[1]);
2556 : }
2557 121111 : rel_join_add_exp(v->sql->sa, top, cje);
2558 :
2559 : /* all join expressions on these tables */
2560 121500 : for (en = exps->h; en; ) {
2561 389 : node *next = en->next;
2562 389 : sql_exp *e = en->data;
2563 389 : if (rel_rebind_exp(v->sql, top, e)) {
2564 170 : rel_join_add_exp(v->sql->sa, top, e);
2565 170 : list_remove_data(exps, NULL, e);
2566 : }
2567 : en = next;
2568 : }
2569 : /* Remove other joins on the current 'n_rels'
2570 : set in the distinct list too */
2571 697488 : for (en = sdje->h; en; ) {
2572 576377 : node *next = en->next;
2573 576377 : sql_exp *e = en->data;
2574 576377 : if ((direct && ((e->flag <= cmp_notequal && (h[e->tmp] & rel_mask) == h[e->tmp] && h[e->tmp]) || (e->flag > cmp_notequal && rel_rebind_exp(v->sql, top, e)))) ||
2575 1953 : (!direct && rel_rebind_exp(v->sql, top, e))) {
2576 3339 : rel_join_add_exp(v->sql->sa, top, e);
2577 3339 : list_remove_data(sdje, NULL, en->data);
2578 : }
2579 : en = next;
2580 : }
2581 121111 : fnd = 1;
2582 : }
2583 : }
2584 : }
2585 156301 : if (list_length(rels)) { /* more relations */
2586 1138 : node *n;
2587 3917 : for(n=rels->h; n; n = n->next) {
2588 2779 : sql_rel *nr = n->data;
2589 :
2590 2779 : if (top) {
2591 2697 : rsingle = is_single(nr);
2592 2697 : reset_single(nr);
2593 2697 : top = rel_crossproduct(v->sql->sa, top, nr, op_join);
2594 2697 : if (rsingle)
2595 0 : set_single(nr);
2596 : } else
2597 : top = nr;
2598 : }
2599 : }
2600 156301 : if (list_length(sdje)) {
2601 142 : if (list_empty(exps))
2602 : exps = sdje;
2603 : else
2604 0 : exps = list_merge(exps, sdje, (fdup)NULL);
2605 : }
2606 156301 : if (list_length(exps)) { /* more expressions (add selects) */
2607 168 : top = rel_select(v->sql->sa, top, NULL);
2608 341 : for(node *n=exps->h; n; n = n->next) {
2609 173 : sql_exp *e = n->data;
2610 :
2611 173 : if (exp_is_join_exp(e) == 0) {
2612 159 : sql_rel *nr = NULL;
2613 159 : if (is_theta_exp(e->flag)) {
2614 142 : nr = rel_push_join(v->sql, top->l, e->l, e->r, e->f, e, 0);
2615 17 : } else if (e->flag == cmp_filter || e->flag == cmp_or) {
2616 17 : sql_exp *l = exps_find_one_multi_exp(e->l), *r = exps_find_one_multi_exp(e->r);
2617 17 : if (l && r)
2618 10 : nr = rel_push_join(v->sql, top->l, l, r, NULL, e, 0);
2619 : }
2620 152 : if (!nr)
2621 7 : rel_join_add_exp(v->sql->sa, top->l, e);
2622 : } else
2623 14 : rel_select_add_exp(v->sql->sa, top, e);
2624 : }
2625 168 : if (list_empty(top->exps)) { /* empty select */
2626 154 : sql_rel *l = top->l;
2627 154 : top->l = NULL;
2628 154 : rel_destroy(top);
2629 154 : top = l;
2630 : }
2631 : }
2632 : return top;
2633 : }
2634 :
2635 : static int
2636 436328 : rel_neg_in_size(sql_rel *r)
2637 : {
2638 436328 : if ((is_union(r->op) /*|| is_munion(r->op)*/) && r->nrcols == 0)
2639 0 : return -1 + rel_neg_in_size(r->l);
2640 436328 : if (is_project(r->op) && r->nrcols == 0)
2641 0 : return -1;
2642 : return 0;
2643 : }
2644 :
2645 436328 : static void _rel_destroy(void *dummy, sql_rel *rel)
2646 : {
2647 436328 : (void)dummy;
2648 436328 : rel_destroy(rel);
2649 436328 : }
2650 :
2651 : static list *
2652 156301 : push_in_join_down(mvc *sql, list *rels, list *exps)
2653 : {
2654 156301 : node *n;
2655 156301 : int restart = 1;
2656 156301 : list *nrels;
2657 :
2658 : /* we should sort these first, ie small in's before large one's */
2659 156301 : nrels = list_sort(rels, (fkeyvalue)&rel_neg_in_size, (fdup)&rel_dup);
2660 :
2661 : /* we need to cleanup, the new refs ! */
2662 156301 : rels->destroy = (fdestroy)_rel_destroy;
2663 156301 : list_destroy(rels);
2664 156301 : rels = nrels;
2665 :
2666 : /* one of the rels should be a op_union with nrcols == 0 */
2667 468903 : while (restart) {
2668 592629 : for (n = rels->h; n; n = n->next) {
2669 436328 : sql_rel *r = n->data;
2670 :
2671 436328 : restart = 0;
2672 436328 : if (is_project(r->op) && r->nrcols == 0) {
2673 : /* next step find expression on this relation */
2674 0 : node *m;
2675 0 : sql_rel *l = NULL;
2676 0 : sql_exp *je = NULL;
2677 :
2678 0 : for(m = exps->h; !je && m; m = m->next) {
2679 0 : sql_exp *e = m->data;
2680 :
2681 0 : if (e->type == e_cmp && e->flag == cmp_equal) {
2682 : /* in values are on
2683 : the right of the join */
2684 0 : if (rel_has_exp(r, e->r, false) >= 0)
2685 0 : je = e;
2686 : }
2687 : }
2688 : /* with this expression find other relation */
2689 0 : if (je && (l = find_rel(rels, je->l)) != NULL) {
2690 0 : unsigned int rsingle = is_single(r);
2691 0 : reset_single(r);
2692 0 : sql_rel *nr = rel_crossproduct(sql->sa, l, r, op_join);
2693 0 : if (rsingle)
2694 0 : set_single(r);
2695 0 : rel_join_add_exp(sql->sa, nr, je);
2696 0 : list_append(rels, nr);
2697 0 : list_remove_data(rels, NULL, l);
2698 0 : list_remove_data(rels, NULL, r);
2699 0 : list_remove_data(exps, NULL, je);
2700 0 : restart = 1;
2701 0 : break;
2702 : }
2703 :
2704 : }
2705 : }
2706 : }
2707 156301 : return rels;
2708 : }
2709 :
2710 : static list *
2711 745019 : push_up_join_exps( mvc *sql, sql_rel *rel)
2712 : {
2713 745019 : if (rel_is_ref(rel))
2714 : return NULL;
2715 :
2716 704024 : switch(rel->op) {
2717 292343 : case op_join: {
2718 292343 : sql_rel *rl = rel->l;
2719 292343 : sql_rel *rr = rel->r;
2720 292343 : list *l, *r;
2721 :
2722 292343 : if (rel_is_ref(rl) && rel_is_ref(rr)) {
2723 16 : l = rel->exps;
2724 16 : rel->exps = NULL;
2725 16 : return l;
2726 : }
2727 292327 : l = push_up_join_exps(sql, rl);
2728 292327 : r = push_up_join_exps(sql, rr);
2729 292327 : if (l && r) {
2730 34 : l = list_merge(l, r, (fdup)NULL);
2731 34 : r = NULL;
2732 292293 : } else if (!l) {
2733 177857 : l = r;
2734 177857 : r = NULL;
2735 : }
2736 292327 : if (rel->exps) {
2737 260412 : if (l && !r)
2738 104053 : r = l;
2739 260412 : l = list_merge(rel->exps, r, (fdup)NULL);
2740 : }
2741 292327 : rel->exps = NULL;
2742 292327 : return l;
2743 : }
2744 : default:
2745 : return NULL;
2746 : }
2747 : }
2748 :
2749 : static sql_rel *
2750 288745 : reorder_join(visitor *v, sql_rel *rel)
2751 : {
2752 288745 : list *exps, *rels;
2753 :
2754 288745 : if (is_innerjoin(rel->op) && !is_single(rel) && !rel_is_ref(rel) && list_empty(rel->attr)) {
2755 178033 : if (list_empty(rel->exps)) {
2756 22546 : sql_rel *l = rel->l, *r = rel->r;
2757 22546 : if (!is_innerjoin(l->op) && !is_innerjoin(r->op))
2758 : return rel;
2759 : }
2760 160365 : rel->exps = push_up_join_exps(v->sql, rel);
2761 : }
2762 :
2763 271077 : if (!is_innerjoin(rel->op) || is_single(rel) || rel_is_ref(rel) || list_empty(rel->exps) || !list_empty(rel->attr)) {
2764 114776 : if (!list_empty(rel->exps)) { /* cannot add join idxs to cross products */
2765 77935 : exps = rel->exps;
2766 77935 : rel->exps = NULL; /* should be all crosstables by now */
2767 77935 : rels = sa_list(v->sql->ta);
2768 : /* try to use an join index also for outer joins */
2769 77935 : get_inner_relations(v->sql, rel, rels);
2770 77935 : int cnt = list_length(exps);
2771 77935 : rel->exps = find_fk(v->sql, rels, exps);
2772 77935 : if (list_length(rel->exps) != cnt)
2773 16669 : rel->exps = order_join_expressions(v->sql, exps, rels);
2774 : }
2775 114776 : rel->l = rel_join_order_(v, rel->l);
2776 114776 : rel->r = rel_join_order_(v, rel->r);
2777 : } else {
2778 156301 : exps = rel->exps;
2779 156301 : rel->exps = NULL; /* should be all crosstables by now */
2780 156301 : rels = sa_list(v->sql->ta);
2781 156301 : get_relations(v, rel, rels);
2782 156301 : if (list_length(rels) > 1) {
2783 156301 : rels = push_in_join_down(v->sql, rels, exps);
2784 156301 : rel = order_joins(v, rels, exps);
2785 : } else {
2786 0 : rel->exps = exps;
2787 : }
2788 : }
2789 : return rel;
2790 : }
2791 :
2792 : static sql_rel *
2793 2437825 : rel_join_order_(visitor *v, sql_rel *rel)
2794 : {
2795 2437825 : if (!rel)
2796 : return rel;
2797 :
2798 2421766 : switch (rel->op) {
2799 : case op_basetable:
2800 : break;
2801 3914 : case op_table:
2802 3914 : if (IS_TABLE_PROD_FUNC(rel->flag) || rel->flag == TABLE_FROM_RELATION)
2803 3914 : rel->l = rel_join_order_(v, rel->l);
2804 : break;
2805 : case op_join:
2806 : case op_left:
2807 : case op_right:
2808 : case op_full:
2809 : break;
2810 :
2811 12452 : case op_semi:
2812 : case op_anti:
2813 :
2814 : case op_union:
2815 : case op_inter:
2816 : case op_except:
2817 : case op_merge:
2818 12452 : rel->l = rel_join_order_(v, rel->l);
2819 12452 : rel->r = rel_join_order_(v, rel->r);
2820 12452 : break;
2821 118748 : case op_munion:
2822 118748 : assert(rel->l);
2823 477003 : for (node *n = ((list*)rel->l)->h; n; n = n->next)
2824 358255 : n->data = rel_join_order_(v, n->data);
2825 : break;
2826 1288077 : case op_project:
2827 : case op_select:
2828 : case op_groupby:
2829 : case op_topn:
2830 : case op_sample:
2831 1288077 : rel->l = rel_join_order_(v, rel->l);
2832 1288077 : break;
2833 52 : case op_ddl:
2834 52 : if (rel->flag == ddl_output || rel->flag == ddl_create_seq || rel->flag == ddl_alter_seq || rel->flag == ddl_alter_table || rel->flag == ddl_create_table || rel->flag == ddl_create_view) {
2835 0 : rel->l = rel_join_order_(v, rel->l);
2836 : } else if (rel->flag == ddl_list || rel->flag == ddl_exception) {
2837 52 : rel->l = rel_join_order_(v, rel->l);
2838 52 : rel->r = rel_join_order_(v, rel->r);
2839 : }
2840 : break;
2841 12006 : case op_insert:
2842 : case op_update:
2843 : case op_delete:
2844 12006 : rel->r = rel_join_order_(v, rel->r);
2845 12006 : break;
2846 : case op_truncate:
2847 : break;
2848 : }
2849 2421766 : if (is_join(rel->op))
2850 288745 : rel = reorder_join(v, rel);
2851 : return rel;
2852 : }
2853 :
2854 : static sql_rel *
2855 84685 : rel_join_order(visitor *v, global_props *gp, sql_rel *rel)
2856 : {
2857 84685 : (void) gp;
2858 84685 : sql_rel *r = rel_join_order_(v, rel);
2859 84685 : sa_reset(v->sql->ta);
2860 84685 : return r;
2861 : }
2862 :
2863 : run_optimizer
2864 638402 : bind_join_order(visitor *v, global_props *gp)
2865 : {
2866 638402 : int flag = v->sql->sql_optimizer;
2867 638206 : return gp->opt_level == 1 && gp->opt_cycle < 10 && !gp->cnt[op_update] && (gp->cnt[op_join] || gp->cnt[op_left] ||
2868 1270276 : gp->cnt[op_right] || gp->cnt[op_full]) && (flag & join_order) ? rel_join_order : NULL;
2869 : }
2870 :
2871 : /* this join order is to be done once after statistics are gathered */
2872 : run_optimizer
2873 729760 : bind_join_order2(visitor *v, global_props *gp)
2874 : {
2875 : /*int flag = v->sql->sql_optimizer;
2876 : return gp->opt_level == 1 && !gp->has_special_modify && !gp->cnt[op_update] && (gp->cnt[op_join] || gp->cnt[op_left] ||
2877 : gp->cnt[op_right] || gp->cnt[op_full]) && (flag & join_order) ? rel_join_order : NULL;*/
2878 : /* TODO we have to propagate count statistics here */
2879 729760 : (void) v;
2880 729760 : (void) gp;
2881 729760 : return NULL;
2882 : }
2883 :
2884 :
2885 : static int
2886 14749 : is_identity_of(sql_exp *e, sql_rel *l)
2887 : {
2888 14749 : if (e->type != e_cmp)
2889 : return 0;
2890 14722 : if (!is_identity(e->l, l) || !is_identity(e->r, l) || (e->f && !is_identity(e->f, l)))
2891 14722 : return 0;
2892 : return 1;
2893 : }
2894 :
2895 : static inline sql_rel *
2896 19022 : rel_rewrite_semijoin(visitor *v, sql_rel *rel)
2897 : {
2898 19022 : assert(is_semi(rel->op));
2899 : {
2900 19022 : sql_rel *l = rel->l;
2901 19022 : sql_rel *r = rel->r;
2902 19022 : sql_rel *rl = (r->l)?r->l:NULL;
2903 19022 : int on_identity = 1;
2904 :
2905 19022 : if (!rel->exps || list_length(rel->exps) != 1 || !is_identity_of(rel->exps->h->data, l))
2906 : on_identity = 0;
2907 :
2908 : /* rewrite {semi,anti}join (A, join(A,B)) into {semi,anti}join (A,B)
2909 : * and {semi,anti}join (A, join(B,A)) into {semi,anti}join (A,B)
2910 : * Where the semi/anti join is done using the identity */
2911 0 : if (on_identity && l->ref.refcnt == 2 && ((is_join(r->op) && (l == r->l || l == r->r)) ||
2912 0 : (is_project(r->op) && rl && is_join(rl->op) && (l == rl->l || l == rl->r)))){
2913 0 : sql_rel *or = r;
2914 :
2915 0 : if (is_project(r->op))
2916 0 : r = rl;
2917 :
2918 0 : if (l == r->r)
2919 0 : rel->r = rel_dup(r->l);
2920 : else
2921 0 : rel->r = rel_dup(r->r);
2922 :
2923 0 : rel->exps = r->exps;
2924 0 : r->exps = NULL;
2925 0 : rel->attr = r->attr;
2926 0 : r->attr = NULL;
2927 0 : rel_destroy(or);
2928 0 : v->changes++;
2929 : }
2930 : }
2931 : {
2932 19022 : sql_rel *l = rel->l, *rl = NULL;
2933 19022 : sql_rel *r = rel->r, *or = r;
2934 :
2935 19022 : if (r)
2936 19022 : rl = r->l;
2937 19022 : if (r && is_project(r->op)) {
2938 15869 : r = rl;
2939 15869 : if (r)
2940 15620 : rl = r->l;
2941 : }
2942 :
2943 : /* More general case is (join reduction)
2944 : {semi,anti}join (A, join(A,B) [A.c1 == B.c1]) [ A.c1 == B.c1 ]
2945 : into {semi,anti}join (A,B) [ A.c1 == B.c1 ]
2946 :
2947 : for semijoin also A.c1 == B.k1 ] [ A.c1 == B.k2 ] could be rewritten
2948 : */
2949 19022 : if (l && r && rl &&
2950 17627 : is_basetable(l->op) && is_basetable(rl->op) &&
2951 4056 : is_join(r->op) && l->l == rl->l)
2952 : {
2953 25 : node *n, *m;
2954 25 : list *exps;
2955 :
2956 49 : if (!rel->exps || !r->exps ||
2957 24 : list_length(rel->exps) != list_length(r->exps))
2958 1 : return rel;
2959 24 : exps = new_exp_list(v->sql->sa);
2960 :
2961 : /* are the join conditions equal */
2962 24 : for (n = rel->exps->h, m = r->exps->h;
2963 24 : n && m; n = n->next, m = m->next)
2964 : {
2965 24 : sql_exp *le = NULL, *oe = n->data;
2966 24 : sql_exp *re = NULL, *ne = m->data;
2967 24 : sql_column *cl;
2968 24 : int equal = 0;
2969 :
2970 24 : if (oe->type != e_cmp || ne->type != e_cmp ||
2971 24 : oe->flag != cmp_equal ||
2972 24 : ne->flag != cmp_equal || is_anti(oe) || is_anti(ne))
2973 : return rel;
2974 :
2975 24 : if ((cl = exp_find_column(rel->l, oe->l, -2)) != NULL) {
2976 24 : le = oe->l;
2977 24 : re = oe->r;
2978 0 : } else if ((cl = exp_find_column(rel->l, oe->r, -2)) != NULL) {
2979 0 : le = oe->r;
2980 0 : re = oe->l;
2981 : } else
2982 : return rel;
2983 :
2984 24 : if (exp_find_column(rl, ne->l, -2) == cl) {
2985 24 : sql_exp *e = (or != r)?rel_find_exp(or, re):re;
2986 :
2987 24 : if (e)
2988 24 : equal = exp_match_exp(ne->r, e);
2989 24 : if (!e || !equal)
2990 : return rel;
2991 0 : re = ne->r;
2992 0 : } else if (exp_find_column(rl, ne->r, -2) == cl) {
2993 0 : sql_exp *e = (or != r)?rel_find_exp(or, re):re;
2994 :
2995 0 : if (e)
2996 0 : equal = exp_match_exp(ne->l, e);
2997 0 : if (!e || !equal)
2998 : return rel;
2999 0 : re = ne->l;
3000 : } else
3001 : return rel;
3002 :
3003 0 : ne = exp_compare(v->sql->sa, le, re, cmp_equal);
3004 0 : append(exps, ne);
3005 : }
3006 :
3007 0 : rel->r = rel_dup(r->r);
3008 0 : rel->exps = exps;
3009 0 : rel_destroy(or);
3010 0 : v->changes++;
3011 : }
3012 : }
3013 : return rel;
3014 : }
3015 :
3016 : /*
3017 : * Push semijoins down, pushes the semijoin through a join.
3018 : *
3019 : * semijoin( join(A, B) [ A.x == B.y ], C ) [ A.z == C.c ]
3020 : * ->
3021 : * join( semijoin(A, C) [ A.z == C.c ], B ) [ A.x == B.y ]
3022 : *
3023 : * also push simple expressions of a semijoin down if they only
3024 : * involve the left sided of the semijoin.
3025 : *
3026 : * in some cases the other way is useful, ie push join down
3027 : * semijoin. When the join reduces (ie when there are selects on it).
3028 : *
3029 : * At the moment, we only flag changes by this optimizer on the first level of optimization
3030 : */
3031 : static inline sql_rel *
3032 471694 : rel_push_semijoin_down_or_up(visitor *v, sql_rel *rel)
3033 : {
3034 471694 : uint8_t cycle = *(uint8_t*) v->data;
3035 :
3036 471694 : if (rel->op == op_join && rel->exps && rel->l) {
3037 434328 : sql_rel *l = rel->l, *r = rel->r;
3038 :
3039 434328 : if (is_semi(l->op) && !rel_is_ref(l) && is_select(r->op) && !rel_is_ref(r)) {
3040 22 : rel->l = l->l;
3041 22 : l->l = rel;
3042 22 : if (cycle <= 0)
3043 0 : v->changes++;
3044 22 : return l;
3045 : }
3046 : }
3047 : /* also case with 2 joins */
3048 : /* join ( join ( semijoin(), table), select (table)); */
3049 471672 : if (rel->op == op_join && rel->exps && rel->l) {
3050 434306 : sql_rel *l = rel->l, *r = rel->r;
3051 434306 : sql_rel *ll;
3052 :
3053 434306 : if (is_join(l->op) && !rel_is_ref(l) && is_select(r->op) && !rel_is_ref(r)) {
3054 8216 : ll = l->l;
3055 8216 : if (is_semi(ll->op) && !rel_is_ref(ll)) {
3056 51 : l->l = ll->l;
3057 51 : ll->l = rel;
3058 51 : if (cycle <= 0)
3059 12 : v->changes++;
3060 51 : return ll;
3061 : }
3062 : }
3063 : }
3064 : /* first push down the expressions involving only A */
3065 471621 : if (rel->op == op_semi && rel->exps && rel->l) {
3066 12372 : sql_rel *jl = rel->l, *ojl = jl;
3067 :
3068 12372 : set_processed(jl);
3069 28348 : for (node *n = rel->exps->h; n;) {
3070 15976 : node *next = n->next;
3071 15976 : sql_exp *e = n->data;
3072 :
3073 15976 : if (n != rel->exps->h && e->type == e_cmp && rel_rebind_exp(v->sql, jl, e)) {
3074 16 : if (!is_select(jl->op) || rel_is_ref(jl))
3075 14 : rel->l = jl = rel_select(v->sql->sa, jl, NULL);
3076 16 : rel_select_add_exp(v->sql->sa, jl, e);
3077 16 : list_remove_node(rel->exps, NULL, n);
3078 16 : if (cycle <= 0)
3079 16 : v->changes++;
3080 : }
3081 : n = next;
3082 : }
3083 12372 : if (ojl != jl)
3084 14 : set_processed(jl);
3085 : }
3086 471621 : if (rel->op == op_semi && rel->exps && rel->l) {
3087 12372 : operator_type op = rel->op, lop;
3088 12372 : node *n;
3089 12372 : sql_rel *l = rel->l, *ll = NULL, *lr = NULL;
3090 12372 : sql_rel *r = rel->r;
3091 12372 : list *exps = rel->exps, *nsexps, *njexps, *nsattr, *njattr;
3092 12372 : int left = 1, right = 1;
3093 :
3094 : /* handle project
3095 : if (l->op == op_project && !need_distinct(l))
3096 : l = l->l;
3097 : */
3098 :
3099 12372 : if (!is_join(l->op) || is_full(l->op) || rel_is_ref(l) || is_single(l))
3100 : return rel;
3101 :
3102 1225 : lop = l->op;
3103 1225 : ll = l->l;
3104 1225 : lr = l->r;
3105 :
3106 : /* check which side is used and other exps are atoms or from right of semijoin */
3107 2360 : for(n = exps->h; n; n = n->next) {
3108 1352 : sql_exp *sje = n->data;
3109 :
3110 1352 : if (sje->type != e_cmp || is_complex_exp(sje->flag))
3111 : return rel;
3112 : /* sje->l from ll and sje->r/f from semijoin r ||
3113 : * sje->l from semijoin r and sje->r/f from ll ||
3114 : * sje->l from lr and sje->r/f from semijoin r ||
3115 : * sje->l from semijoin r and sje->r/f from lr */
3116 2640 : if (left &&
3117 2640 : ((rel_rebind_exp(v->sql, ll, sje->l) && rel_rebind_exp(v->sql, rel->r, sje->r) && (!sje->f || rel_rebind_exp(v->sql, rel->r, sje->f))) ||
3118 1288 : (rel_rebind_exp(v->sql, rel->r, sje->l) && rel_rebind_exp(v->sql, ll, sje->r) && (!sje->f || rel_rebind_exp(v->sql, ll, sje->f)))))
3119 : right = 0;
3120 : else
3121 961 : left = 0;
3122 1797 : if (right &&
3123 1672 : ((rel_rebind_exp(v->sql, lr, sje->l) && rel_rebind_exp(v->sql, rel->r, sje->r) && (!sje->f || rel_rebind_exp(v->sql, rel->r, sje->f))) ||
3124 566 : (rel_rebind_exp(v->sql, rel->r, sje->l) && rel_rebind_exp(v->sql, lr, sje->r) && (!sje->f || rel_rebind_exp(v->sql, lr, sje->f)))))
3125 : left = 0;
3126 : else
3127 : right = 0;
3128 1320 : if (!right && !left)
3129 : return rel;
3130 : }
3131 1008 : if (left && is_right(lop))
3132 : return rel;
3133 1007 : if (right && is_left(lop))
3134 : return rel;
3135 1005 : nsexps = exps_copy(v->sql, rel->exps);
3136 1005 : nsattr = exps_copy(v->sql, rel->attr);
3137 1005 : njexps = exps_copy(v->sql, l->exps);
3138 1005 : njattr = exps_copy(v->sql, l->attr);
3139 1005 : if (left)
3140 231 : l = rel_crossproduct(v->sql->sa, rel_dup(ll), rel_dup(r), op);
3141 : else
3142 774 : l = rel_crossproduct(v->sql->sa, rel_dup(lr), rel_dup(r), op);
3143 1005 : l->exps = nsexps;
3144 1005 : l->attr = nsattr;
3145 1005 : set_processed(l);
3146 1005 : if (left)
3147 231 : l = rel_crossproduct(v->sql->sa, l, rel_dup(lr), lop);
3148 : else
3149 774 : l = rel_crossproduct(v->sql->sa, rel_dup(ll), l, lop);
3150 1005 : l->exps = njexps;
3151 1005 : l->attr = njattr;
3152 1005 : set_processed(l);
3153 1005 : rel_destroy(rel);
3154 1005 : rel = l;
3155 1005 : if (cycle <= 0)
3156 590 : v->changes++;
3157 : }
3158 : return rel;
3159 : }
3160 :
3161 : /* antijoin(a, union(b,c)) -> antijoin(antijoin(a,b), c) */
3162 : static inline sql_rel *
3163 6594 : rel_rewrite_antijoin(visitor *v, sql_rel *rel)
3164 : {
3165 6594 : sql_rel *l = rel->l;
3166 6594 : sql_rel *r = rel->r;
3167 :
3168 6594 : assert(rel->op == op_anti);
3169 6594 : if (l && !rel_is_ref(l) && r && !rel_is_ref(r) && is_union(r->op) && !is_single(r)) {
3170 0 : sql_rel *rl = rel_dup(r->l), *nl;
3171 0 : sql_rel *rr = rel_dup(r->r);
3172 :
3173 0 : if (!is_project(rl->op))
3174 0 : rl = rel_project(v->sql->sa, rl,
3175 : rel_projections(v->sql, rl, NULL, 1, 1));
3176 0 : if (!is_project(rr->op))
3177 0 : rr = rel_project(v->sql->sa, rr,
3178 : rel_projections(v->sql, rr, NULL, 1, 1));
3179 0 : rel_rename_exps(v->sql, r->exps, rl->exps);
3180 0 : rel_rename_exps(v->sql, r->exps, rr->exps);
3181 :
3182 0 : nl = rel_crossproduct(v->sql->sa, rel->l, rl, op_anti);
3183 0 : nl->exps = exps_copy(v->sql, rel->exps);
3184 0 : nl->attr = exps_copy(v->sql, rel->attr);
3185 0 : set_processed(nl);
3186 0 : rel->l = nl;
3187 0 : rel->r = rr;
3188 0 : rel_destroy(r);
3189 0 : v->changes++;
3190 0 : return rel;
3191 : }
3192 : return rel;
3193 : }
3194 :
3195 : static sql_rel *
3196 3487665 : rel_optimize_semi_and_anti_(visitor *v, sql_rel *rel)
3197 : {
3198 : /* rewrite semijoin (A, join(A,B)) into semijoin (A,B) */
3199 3487665 : if (rel && is_semi(rel->op))
3200 19022 : rel = rel_rewrite_semijoin(v, rel);
3201 : /* push semijoin through join */
3202 3487665 : if (rel && (is_semi(rel->op) || is_innerjoin(rel->op)))
3203 471694 : rel = rel_push_semijoin_down_or_up(v, rel);
3204 : /* antijoin(a, union(b,c)) -> antijoin(antijoin(a,b), c) */
3205 3487665 : if (rel && rel->op == op_anti)
3206 6594 : rel = rel_rewrite_antijoin(v, rel);
3207 3487665 : return rel;
3208 : }
3209 :
3210 : static sql_rel *
3211 91872 : rel_optimize_semi_and_anti(visitor *v, global_props *gp, sql_rel *rel)
3212 : {
3213 91872 : v->data = &gp->opt_cycle;
3214 91872 : rel = rel_visitor_bottomup(v, rel, &rel_optimize_semi_and_anti_);
3215 91872 : v->data = gp;
3216 91872 : return rel;
3217 : }
3218 :
3219 : run_optimizer
3220 638404 : bind_optimize_semi_and_anti(visitor *v, global_props *gp)
3221 : {
3222 : /* Important -> Re-write semijoins after rel_join_order */
3223 638404 : int flag = v->sql->sql_optimizer;
3224 638208 : return gp->opt_level == 1 && (gp->cnt[op_join] || gp->cnt[op_left] || gp->cnt[op_right]
3225 1276612 : || gp->cnt[op_full] || gp->cnt[op_semi] || gp->cnt[op_anti]) && (flag & optimize_semi_and_anti) ? rel_optimize_semi_and_anti : NULL;
3226 : }
3227 :
3228 :
3229 : static sql_rel *
3230 1621385 : rel_semijoin_use_fk(visitor *v, sql_rel *rel)
3231 : {
3232 1621385 : if (is_semi(rel->op) && rel->exps) {
3233 6648 : list *exps = rel->exps;
3234 6648 : list *rels = sa_list(v->sql->sa);
3235 :
3236 6648 : rel->exps = NULL;
3237 6648 : append(rels, rel->l);
3238 6648 : append(rels, rel->r);
3239 6648 : (void) find_fk( v->sql, rels, exps);
3240 :
3241 6648 : rel->exps = exps;
3242 : }
3243 1621385 : return rel;
3244 : }
3245 :
3246 : /*
3247 : * Push {semi}joins down, pushes the joins through group by expressions.
3248 : * When the join is on the group by columns, we can push the joins left
3249 : * under the group by. This should only be done, iff the new semijoin would
3250 : * reduce the input table to the groupby. So there should be a reduction
3251 : * (selection) on the table A and this should be propagated to the groupby via
3252 : * for example a primary key.
3253 : *
3254 : * {semi}join( A, groupby( B ) [gbe][aggrs] ) [ gbe == A.x ]
3255 : * ->
3256 : * {semi}join( A, groupby( semijoin(B,A) [gbe == A.x] ) [gbe][aggrs] ) [ gbe == A.x ]
3257 : */
3258 : static inline sql_rel *
3259 1621385 : rel_push_join_down(visitor *v, sql_rel *rel)
3260 : {
3261 1621385 : if (!rel_is_ref(rel) && ((is_left(rel->op) || rel->op == op_join || is_semi(rel->op)) && rel->l && rel->exps)) {
3262 235841 : sql_rel *gb = rel->r, *ogb = gb, *l = NULL, *rell = rel->l;
3263 :
3264 235841 : if (is_simple_project(gb->op) && !rel_is_ref(gb))
3265 46591 : gb = gb->l;
3266 :
3267 235841 : if (rel_is_ref(rell) || !gb || rel_is_ref(gb))
3268 : return rel;
3269 :
3270 225888 : if (is_groupby(gb->op) && gb->r && list_length(gb->r)) {
3271 190 : list *exps = rel->exps, *jes = new_exp_list(v->sql->sa), *gbes = gb->r;
3272 190 : node *n, *m;
3273 : /* find out if all group by expressions are used in the join */
3274 195 : for(n = gbes->h; n; n = n->next) {
3275 190 : sql_exp *gbe = n->data;
3276 190 : int fnd = 0;
3277 190 : const char *rname = NULL, *name = NULL;
3278 :
3279 : /* project in between, ie find alias */
3280 : /* first find expression in expression list */
3281 190 : gbe = exps_uses_exp( gb->exps, gbe);
3282 190 : if (!gbe)
3283 0 : continue;
3284 190 : if (ogb != gb)
3285 166 : gbe = exps_uses_exp( ogb->exps, gbe);
3286 190 : if (gbe) {
3287 150 : rname = exp_find_rel_name(gbe);
3288 150 : name = exp_name(gbe);
3289 : }
3290 :
3291 150 : if (!name)
3292 40 : return rel;
3293 :
3294 372 : for (m = exps->h; m && !fnd; m = m->next) {
3295 222 : sql_exp *je = m->data;
3296 :
3297 222 : if (je->card >= CARD_ATOM && je->type == e_cmp &&
3298 222 : !is_complex_exp(je->flag)) {
3299 : /* expect right expression to match */
3300 222 : sql_exp *r = je->r;
3301 :
3302 222 : if (r == 0 || r->type != e_column)
3303 11 : continue;
3304 211 : if (r->l && rname && strcmp(r->l, rname) == 0 && strcmp(r->r, name)==0) {
3305 : fnd = 1;
3306 83 : } else if (!r->l && !rname && strcmp(r->r, name)==0) {
3307 : fnd = 1;
3308 : }
3309 : if (fnd) {
3310 128 : sql_exp *le = je->l;
3311 128 : sql_exp *re = exp_push_down_prj(v->sql, r, gb, gb->l);
3312 128 : if (!re || (list_length(jes) == 0 && !find_prop(le->p, PROP_HASHCOL))) {
3313 : fnd = 0;
3314 : } else {
3315 5 : int anti = is_anti(je), semantics = is_semantics(je);
3316 :
3317 5 : je = exp_compare(v->sql->sa, le, re, je->flag);
3318 5 : if (anti) set_anti(je);
3319 5 : if (semantics) set_semantics(je);
3320 5 : list_append(jes, je);
3321 : }
3322 : }
3323 : }
3324 : }
3325 150 : if (!fnd)
3326 : return rel;
3327 : }
3328 5 : l = rel_dup(rel->l);
3329 :
3330 : /* push join's left side (as semijoin) down group by */
3331 5 : l = gb->l = rel_crossproduct(v->sql->sa, gb->l, l, op_semi);
3332 5 : l->exps = jes;
3333 5 : set_processed(l);
3334 5 : v->changes++;
3335 5 : return rel;
3336 : }
3337 : }
3338 : return rel;
3339 : }
3340 :
3341 : static bool
3342 728 : check_projection_on_foreignside(sql_rel *r, list *pexps, int fk_left)
3343 : {
3344 : /* projection columns from the foreign side */
3345 728 : if (list_empty(pexps))
3346 : return true;
3347 3156 : for (node *n = pexps->h; n; n = n->next) {
3348 3091 : sql_exp *pe = n->data;
3349 :
3350 3091 : if (pe && is_atom(pe->type))
3351 23 : continue;
3352 3068 : if (pe && !is_alias(pe->type))
3353 : return false;
3354 : /* check for columns from the pk side, then keep the join with the pk */
3355 3047 : if ((fk_left && rel_find_exp(r->r, pe)) || (!fk_left && rel_find_exp(r->l, pe)))
3356 594 : return false;
3357 : }
3358 : return true;
3359 : }
3360 :
3361 : static sql_rel *
3362 242278 : rel_simplify_project_fk_join(mvc *sql, sql_rel *r, list *pexps, list *orderexps, int *changes)
3363 : {
3364 242278 : sql_rel *rl = r->l, *rr = r->r, *nr = NULL;
3365 242278 : sql_exp *je, *le, *nje, *re;
3366 242278 : int fk_left = 1;
3367 :
3368 : /* check for foreign key join */
3369 242278 : if (list_length(r->exps) != 1 || !list_empty(r->attr))
3370 7556 : return r;
3371 234722 : if (!(je = exps_find_prop(r->exps, PROP_JOINIDX)) || je->flag != cmp_equal)
3372 : return r;
3373 : /* je->l == foreign expression, je->r == primary expression */
3374 1204 : if (rel_find_exp(r->l, je->l)) {
3375 : fk_left = 1;
3376 34 : } else if (rel_find_exp(r->r, je->l)) {
3377 : fk_left = 0;
3378 : } else { /* not found */
3379 : return r;
3380 : }
3381 :
3382 : /* primary side must be a full table */
3383 1170 : if ((fk_left && (!is_left(r->op) && !is_full(r->op)) && !is_basetable(rr->op)) ||
3384 34 : (!fk_left && (!is_right(r->op) && !is_full(r->op)) && !is_basetable(rl->op)))
3385 554 : return r;
3386 :
3387 650 : if (!check_projection_on_foreignside(r, pexps, fk_left) || !check_projection_on_foreignside(r, orderexps, fk_left))
3388 613 : return r;
3389 :
3390 : /* rewrite, ie remove pkey side if possible */
3391 37 : le = (sql_exp*)je->l, re = (sql_exp*)je->l;
3392 :
3393 : /* both have NULL and there are semantics, the join cannot be removed */
3394 37 : if (is_semantics(je) && has_nil(le) && has_nil(re))
3395 : return r;
3396 :
3397 37 : (*changes)++;
3398 : /* if the foreign key column doesn't have NULL values, then return it */
3399 37 : if (!has_nil(le) || is_full(r->op) || (fk_left && is_left(r->op)) || (!fk_left && is_right(r->op))) {
3400 : /* if ->attr, introduce group by on index */
3401 31 : if (fk_left) {
3402 23 : nr = rel_dup(r->l);
3403 : } else {
3404 8 : nr = rel_dup(r->r);
3405 : }
3406 31 : if (!list_empty(r->attr)) {
3407 0 : nr = rel_groupby(sql, nr, NULL);
3408 0 : if (nr) {
3409 : // printf("# introduced groupby \n");
3410 0 : nr->r = append(sa_list(sql->sa), le);
3411 0 : nr->exps = r->attr;
3412 : }
3413 : }
3414 31 : return nr;
3415 : }
3416 :
3417 : /* remove NULL values, ie generate a select not null */
3418 6 : nje = exp_compare(sql->sa, exp_ref(sql, le), exp_atom(sql->sa, atom_general(sql->sa, exp_subtype(le), NULL, 0)), cmp_equal);
3419 6 : set_anti(nje);
3420 6 : set_has_no_nil(nje);
3421 6 : set_semantics(nje);
3422 6 : if (fk_left) {
3423 6 : nr = rel_dup(r->l);
3424 : } else {
3425 0 : nr = rel_dup(r->r);
3426 : }
3427 6 : nr = rel_select(sql->sa, nr, nje);
3428 6 : set_processed(nr);
3429 6 : return nr;
3430 : }
3431 :
3432 : static sql_rel *
3433 1855 : rel_simplify_count_fk_join(mvc *sql, sql_rel *r, list *gexps, list *gcols, int *changes)
3434 : {
3435 1855 : sql_rel *rl = r->l, *rr = r->r, *nr = NULL;
3436 1855 : sql_exp *je, *le, *nje, *re, *oce;
3437 1855 : int fk_left = 1;
3438 :
3439 : /* check for foreign key join */
3440 1855 : if (list_length(r->exps) != 1)
3441 : return r;
3442 1854 : if (!(je = exps_find_prop(r->exps, PROP_JOINIDX)) || je->flag != cmp_equal)
3443 : return r;
3444 : /* je->l == foreign expression, je->r == primary expression */
3445 63 : if (rel_find_exp(r->l, je->l)) {
3446 : fk_left = 1;
3447 7 : } else if (rel_find_exp(r->r, je->l)) {
3448 : fk_left = 0;
3449 : } else { /* not found */
3450 : return r;
3451 : }
3452 :
3453 63 : oce = gexps->h->data;
3454 63 : if (oce->l) /* we only handle COUNT(*) */
3455 : return r;
3456 :
3457 : /* primary side must be a full table */
3458 62 : if ((fk_left && (!is_left(r->op) && !is_full(r->op)) && !is_basetable(rr->op)) ||
3459 6 : (!fk_left && (!is_right(r->op) && !is_full(r->op)) && !is_basetable(rl->op)))
3460 : return r;
3461 :
3462 33 : if (fk_left && is_join(rl->op) && !rel_is_ref(rl)) {
3463 12 : r->l = rel_simplify_count_fk_join(sql, rl, gexps, gcols, changes);
3464 12 : if (rl != r->l)
3465 9 : rel_destroy(rl);
3466 : }
3467 39 : if (!fk_left && is_join(rr->op) && !rel_is_ref(rr)) {
3468 2 : r->r = rel_simplify_count_fk_join(sql, rr, gexps, gcols, changes);
3469 2 : if (rr != r->r)
3470 2 : rel_destroy(rr);
3471 : }
3472 :
3473 39 : if (!check_projection_on_foreignside(r, gcols, fk_left))
3474 : return r;
3475 :
3476 : /* rewrite, ie remove pkey side if possible */
3477 37 : le = (sql_exp*)je->l, re = (sql_exp*)je->l;
3478 :
3479 : /* both have NULL and there are semantics, the join cannot be removed */
3480 37 : if (is_semantics(je) && has_nil(le) && has_nil(re))
3481 : return r;
3482 :
3483 37 : (*changes)++;
3484 : /* if the foreign key column doesn't have NULL values, then return it */
3485 37 : if (!has_nil(le) || is_full(r->op) || (fk_left && is_left(r->op)) || (!fk_left && is_right(r->op))) {
3486 31 : if (fk_left) {
3487 25 : nr = rel_dup(r->l);
3488 : } else {
3489 6 : nr = rel_dup(r->r);
3490 : }
3491 31 : return nr;
3492 : }
3493 :
3494 : /* remove NULL values, ie generate a select not null */
3495 6 : nje = exp_compare(sql->sa, exp_ref(sql, le), exp_atom(sql->sa, atom_general(sql->sa, exp_subtype(le), NULL, 0)), cmp_equal);
3496 6 : set_anti(nje);
3497 6 : set_has_no_nil(nje);
3498 6 : set_semantics(nje);
3499 6 : if (fk_left) {
3500 6 : nr = rel_dup(r->l);
3501 : } else {
3502 0 : nr = rel_dup(r->r);
3503 : }
3504 6 : nr = rel_select(sql->sa, nr, nje);
3505 6 : set_processed(nr);
3506 6 : return nr;
3507 : }
3508 :
3509 : /*
3510 : * Handle (left/right/outer/natural) join fk-pk rewrites
3511 : * 1 group by ( fk-pk-join () ) [ count(*) ] -> group by ( fk )
3512 : * 2 project ( fk-pk-join () ) [ fk-column ] -> project (fk table)[ fk-column ]
3513 : * 3 project ( fk1-pk1-join( fk2-pk2-join()) [ fk-column, pk1 column ] -> project (fk1-pk1-join)[ fk-column, pk1 column ]
3514 : */
3515 : static inline sql_rel *
3516 3714707 : rel_simplify_fk_joins(visitor *v, sql_rel *rel)
3517 : {
3518 3714707 : sql_rel *r = NULL;
3519 :
3520 3714707 : if (is_simple_project(rel->op))
3521 1212892 : r = rel->l;
3522 :
3523 3714744 : while (is_simple_project(rel->op) && r && list_length(r->exps) == 1 && (is_join(r->op) || r->op == op_semi) && !(rel_is_ref(r))) {
3524 242278 : sql_rel *or = r;
3525 :
3526 242278 : r = rel_simplify_project_fk_join(v->sql, r, rel->exps, rel->r, &v->changes);
3527 242278 : if (r == or)
3528 : return rel;
3529 37 : rel_destroy(rel->l);
3530 37 : rel->l = r;
3531 : }
3532 :
3533 3472466 : if (!is_groupby(rel->op))
3534 : return rel;
3535 :
3536 91155 : r = rel->l;
3537 135408 : while(r && is_simple_project(r->op))
3538 44253 : r = r->l;
3539 :
3540 105679 : while (is_groupby(rel->op) && !rel_is_ref(rel) && r && (is_join(r->op) || r->op == op_semi) && list_length(r->exps) == 1 && !(rel_is_ref(r)) &&
3541 : /* currently only single count aggregation is handled, no other projects or aggregation */
3542 109706 : list_length(rel->exps) == 1 && exp_aggr_is_count(rel->exps->h->data)) {
3543 1841 : sql_rel *or = r;
3544 :
3545 1841 : r = rel_simplify_count_fk_join(v->sql, r, rel->exps, rel->r, &v->changes);
3546 1841 : if (r == or)
3547 : return rel;
3548 26 : rel_destroy(rel->l);
3549 26 : rel->l = r;
3550 : }
3551 : return rel;
3552 : }
3553 :
3554 : /*
3555 : * Gets the column expressions of a diff function and adds them to "columns".
3556 : * The diff function has two possible argument types: either a sql_exp representing a column
3557 : * or a sql_exp representing another diff function, therefore this function is recursive.
3558 : */
3559 : static void
3560 168 : get_diff_function_columns(sql_exp *diffExp, list *columns) {
3561 168 : list *args = diffExp->l;
3562 :
3563 370 : for (node *arg = args->h; arg; arg = arg->next) {
3564 202 : sql_exp *exp = arg->data;
3565 :
3566 : // diff function
3567 202 : if (exp->type == e_func) {
3568 34 : get_diff_function_columns(exp, columns);
3569 : }
3570 : // column
3571 : else {
3572 168 : list_append(columns, exp);
3573 : }
3574 : }
3575 168 : }
3576 :
3577 : /*
3578 : * Builds a list of aggregation key columns to be used by the select push down algorithm, namely for
3579 : * window functions. Returns NULL if the window function does not partition by any column
3580 : */
3581 : static list *
3582 1747 : get_partition_by_key_columns(allocator *sa, sql_rel *r) {
3583 18925 : for (node* n = r->exps->h; n; n = n->next) {
3584 17402 : sql_exp *e = n->data;
3585 :
3586 17402 : if (e->type == e_func) {
3587 6047 : sql_subfunc *f = e->f;
3588 :
3589 : // aggregation function
3590 6047 : if (!strcmp(f->func->base.name, "rank")) {
3591 224 : list* rankArguments = e->l;
3592 : // the partition key is the second argument
3593 224 : sql_exp *partitionExp = rankArguments->h->next->data;
3594 :
3595 : // check if the key contains any columns, i.e., is a diff function
3596 224 : if (partitionExp->type == e_func) {
3597 : // get columns to list
3598 134 : list *aggColumns = sa_list(sa);
3599 134 : get_diff_function_columns(partitionExp, aggColumns);
3600 134 : return aggColumns;
3601 : }
3602 : // the function has no aggregation columns (e_atom of boolean)
3603 : else {
3604 : return NULL;
3605 : }
3606 :
3607 : }
3608 : }
3609 : }
3610 : return NULL;
3611 : }
3612 :
3613 : /*
3614 : static bool
3615 : rank_exp_has_partition_key(sql_exp *e)
3616 : {
3617 : if (e->type == e_func) {
3618 : sql_subfunc *f = e->f;
3619 :
3620 : if (f->func->type == F_ANALYTIC) {
3621 : list *args = e->l;
3622 :
3623 : if (list_length(args) >= 2) { // the partition key is the second argument
3624 : return true;
3625 : }
3626 : }
3627 : }
3628 : return false;
3629 : }
3630 : */
3631 :
3632 : /*
3633 : * Checks if a filter column is also used as an aggregation key, so it can be later safely pushed down.
3634 : */
3635 : static int
3636 187 : filter_column_in_partition_by_columns(sql_exp *column, list *keyColumns)
3637 : {
3638 : /* check if it is a column or an e_convert, and get the actual column if it is the latter */
3639 187 : if (column->type == e_convert) {
3640 1 : column = column->l;
3641 : }
3642 :
3643 187 : char *tableName = column->l;
3644 187 : char *columnName = column->r;
3645 :
3646 404 : for (node *n = keyColumns->h; n; n = n->next) {
3647 226 : sql_exp *keyCol = n->data;
3648 226 : char *keyColTableName = keyCol->l;
3649 226 : char *keyColColumnName = keyCol->r;
3650 :
3651 226 : if (!strcmp(tableName, keyColTableName) && !strcmp(columnName, keyColColumnName)) {
3652 : /* match */
3653 : return 1;
3654 : }
3655 : }
3656 :
3657 : /* no matches found */
3658 : return 0;
3659 : }
3660 :
3661 : /*
3662 : * Push select down, pushes the selects through (simple) projections. Also
3663 : * it cleans up the projections which become useless.
3664 : *
3665 : * WARNING - Make sure to call try_remove_empty_select macro before returning so we ensure
3666 : * possible generated empty selects won't never be generated
3667 : */
3668 : static sql_rel *
3669 7353995 : rel_push_select_down(visitor *v, sql_rel *rel)
3670 : {
3671 7353995 : list *exps = NULL;
3672 7353995 : sql_rel *r = NULL;
3673 7353995 : node *n;
3674 :
3675 7353995 : if (rel_is_ref(rel)) {
3676 378466 : if (is_select(rel->op) && !list_empty(rel->exps)) {
3677 : /* add inplace empty select */
3678 1744 : sql_rel *l = rel_select(v->sql->sa, rel->l, NULL);
3679 :
3680 1744 : l->exps = rel->exps;
3681 1744 : set_processed(l);
3682 1744 : rel->exps = NULL;
3683 1744 : rel->l = l;
3684 1744 : v->changes++;
3685 : }
3686 378466 : return rel;
3687 : }
3688 :
3689 : /* don't make changes for empty selects */
3690 6975529 : if (is_select(rel->op) && list_empty(rel->exps))
3691 10 : return try_remove_empty_select(v, rel);
3692 :
3693 : /* merge 2 selects */
3694 6975519 : r = rel->l;
3695 6975519 : if (is_select(rel->op) && r && r->exps && is_select(r->op) && !(rel_is_ref(r)) && !exps_have_func(rel->exps)) {
3696 28 : (void)list_merge(r->exps, rel->exps, (fdup)NULL);
3697 28 : rel->l = NULL;
3698 28 : rel_destroy(rel);
3699 28 : v->changes++;
3700 28 : return try_remove_empty_select(v, r);
3701 : }
3702 : /*
3703 : * Push select through semi/anti join
3704 : * select (semi(A,B)) == semi(select(A), B)
3705 : */
3706 6975491 : if (is_select(rel->op) && r && is_semi(r->op) && !(rel_is_ref(r))) {
3707 330 : rel->l = r->l;
3708 330 : r->l = rel;
3709 330 : v->changes++;
3710 : /*
3711 : * if A has 2 references (ie used on both sides of
3712 : * the semi join), we also push the select into A.
3713 : */
3714 330 : if (rel_is_ref(rel->l) && rel->l == rel_find_ref(r->r)){
3715 0 : sql_rel *lx = rel->l;
3716 0 : sql_rel *rx = r->r;
3717 0 : if (lx->ref.refcnt == 2 && !rel_is_ref(rx)) {
3718 0 : while (rx->l && !rel_is_ref(rx->l) &&
3719 0 : (is_project(rx->op) ||
3720 : is_select(rx->op) ||
3721 : is_join(rx->op)))
3722 : rx = rx->l;
3723 : /* probably we need to introduce a project */
3724 0 : rel_destroy(rel->l);
3725 0 : lx = rel_project(v->sql->sa, rel, rel_projections(v->sql, rel, NULL, 1, 1));
3726 0 : r->l = lx;
3727 0 : rx->l = rel_dup(lx);
3728 : }
3729 : }
3730 330 : return r;
3731 : }
3732 6975161 : exps = rel->exps;
3733 :
3734 : /* push select through join */
3735 6975161 : if (is_select(rel->op) && r && is_join(r->op) && !rel_is_ref(r) && !is_single(r)){
3736 22427 : sql_rel *jl = r->l, *ojl = jl, *jr = r->r, *ojr = jr;
3737 22427 : int left = r->op == op_join || r->op == op_left;
3738 22427 : int right = r->op == op_join || r->op == op_right;
3739 :
3740 22427 : if (r->op == op_full)
3741 : return rel;
3742 :
3743 : /* introduce selects under the join (if needed) */
3744 22403 : set_processed(jl);
3745 22403 : set_processed(jr);
3746 49295 : for (n = exps->h; n;) {
3747 26892 : node *next = n->next;
3748 26892 : sql_exp *e = n->data;
3749 :
3750 26892 : if (!exp_unsafe(e, false, true)) {
3751 26674 : if (left && rel_rebind_exp(v->sql, jl, e)) {
3752 13629 : if (!is_select(jl->op) || rel_is_ref(jl))
3753 11376 : r->l = jl = rel_select(v->sql->sa, jl, NULL);
3754 13629 : rel_select_add_exp(v->sql->sa, jl, e);
3755 13629 : list_remove_node(exps, NULL, n);
3756 13629 : v->changes++;
3757 13045 : } else if (right && rel_rebind_exp(v->sql, jr, e)) {
3758 5264 : if (!is_select(jr->op) || rel_is_ref(jr))
3759 4292 : r->r = jr = rel_select(v->sql->sa, jr, NULL);
3760 5264 : rel_select_add_exp(v->sql->sa, jr, e);
3761 5264 : list_remove_node(exps, NULL, n);
3762 5264 : v->changes++;
3763 : }
3764 : }
3765 : n = next;
3766 : }
3767 22403 : if (ojl != jl)
3768 11376 : set_processed(jl);
3769 22403 : if (ojr != jr)
3770 4292 : set_processed(jr);
3771 : }
3772 :
3773 : /* merge select and cross product ? */
3774 6975137 : if (is_select(rel->op) && r && r->op == op_join && !rel_is_ref(r) && !is_single(r) && !exps_have_unsafe(exps, false, true)) {
3775 14611 : for (n = exps->h; n;) {
3776 1854 : node *next = n->next;
3777 1854 : sql_exp *e = n->data;
3778 :
3779 1854 : if (exp_is_join(e, NULL) == 0) {
3780 508 : if (!r->exps)
3781 158 : r->exps = sa_list(v->sql->sa);
3782 508 : append(r->exps, e);
3783 508 : list_remove_node(exps, NULL, n);
3784 508 : v->changes++;
3785 : }
3786 : n = next;
3787 : }
3788 : }
3789 :
3790 6975137 : if (is_select(rel->op) && r && (is_simple_project(r->op) || (is_groupby(r->op) && !list_empty(r->r))) && !rel_is_ref(r) && !is_single(r)){
3791 78726 : sql_rel *pl = r->l, *opl = pl;
3792 : /* we cannot push through window functions (for safety I disabled projects over DDL too) */
3793 78726 : if (pl && pl->op != op_ddl && !exps_have_unsafe(r->exps, false, false)) {
3794 : /* introduce selects under the project (if needed) */
3795 59053 : set_processed(pl);
3796 59053 : if (!pl->exps)
3797 422 : pl->exps = sa_list(v->sql->sa);
3798 125894 : for (n = exps->h; n;) {
3799 66841 : node *next = n->next;
3800 66841 : sql_exp *e = n->data, *ne = NULL;
3801 :
3802 : /* can we move it down */
3803 66841 : if (e->type == e_cmp && (ne = exp_push_down_prj(v->sql, e, r, pl)) && ne != e) {
3804 37238 : if (!(is_select(pl->op) && is_join(pl->op) && is_semi(pl->op)) || rel_is_ref(pl))
3805 37238 : r->l = pl = rel_select(v->sql->sa, pl, NULL);
3806 37238 : rel_select_add_exp(v->sql->sa, pl, ne);
3807 37238 : list_remove_node(exps, NULL, n);
3808 37238 : v->changes++;
3809 : }
3810 : n = next;
3811 : }
3812 59053 : if (opl != pl)
3813 28587 : set_processed(pl);
3814 : }
3815 :
3816 : /* push filters if they match the partition by key on a window function */
3817 1747 : else if (pl && pl->op != op_ddl && exps_have_unsafe(r->exps, false, false)) {
3818 1747 : set_processed(pl);
3819 : /* list of partition by key columns */
3820 1747 : list *keyColumns = get_partition_by_key_columns(v->sql->sa, r);
3821 :
3822 : /* partition by keys found, check if any filter matches them */
3823 1747 : if (keyColumns) {
3824 325 : for (n = exps->h; n;) {
3825 191 : node *next = n->next;
3826 191 : sql_exp *e = n->data, *ne = NULL;
3827 :
3828 191 : if (e->type == e_cmp) {
3829 : /* simple comparison filter */
3830 191 : if (e->flag == cmp_gt || e->flag == cmp_gte || e->flag == cmp_lte || e->flag == cmp_lt
3831 191 : || e->flag == cmp_equal || e->flag == cmp_notequal || e->flag == cmp_in || e->flag == cmp_notin
3832 5 : || (e->flag == cmp_filter && ((list*)e->l)->cnt == 1)) {
3833 187 : sql_exp* column;
3834 : /* the column in 'like' filters is stored inside a list */
3835 187 : if (e->flag == cmp_filter) {
3836 1 : column = ((list*)e->l)->h->data;
3837 : } else {
3838 186 : column = e->l;
3839 : }
3840 :
3841 : /* check if the expression matches any partition by key, meaning we can
3842 : try to safely push it down */
3843 187 : if (filter_column_in_partition_by_columns(column, keyColumns)) {
3844 9 : ne = exp_push_down_prj(v->sql, e, r, pl);
3845 :
3846 : /* can we move it down */
3847 9 : if (ne && ne != e && pl->exps) {
3848 9 : if (!is_select(pl->op) || rel_is_ref(pl))
3849 8 : r->l = pl = rel_select(v->sql->sa, pl, NULL);
3850 9 : rel_select_add_exp(v->sql->sa, pl, ne);
3851 9 : list_remove_node(exps, NULL, n);
3852 9 : v->changes++;
3853 : }
3854 : }
3855 : }
3856 : }
3857 : n = next;
3858 : }
3859 :
3860 : /* cleanup list */
3861 134 : list_destroy(keyColumns);
3862 : }
3863 : /* also push (rewrite) limits on output of row_number/(*)rank like window functions */
3864 1747 : if (is_simple_project(r->op) /*&& is_simple_project(pl->op)*/) { /* possible window functions */
3865 3677 : for (n = exps->h; n; n = n->next) {
3866 1938 : sql_exp *e = n->data;
3867 :
3868 1938 : if (e->type == e_cmp && (e->flag == cmp_lt || e->flag == cmp_lte) && exp_is_atom(e->r)) { /* simple limit */
3869 80 : sql_exp *ranke = rel_find_exp(r, e->l);
3870 :
3871 80 : if (ranke && ranke->type == e_func) {
3872 75 : sql_subfunc *rankf = ranke->f;
3873 75 : if (rankf->func->type == F_ANALYTIC) { /* rank functions cannot have a frame */
3874 : // For now only for rank/row_number without partition by
3875 74 : sql_rel *tn = NULL;
3876 74 : if (strcmp(rankf->func->base.name, "rank") == 0 && is_simple_project(pl->op) && pl->r /* &&
3877 : !rank_exp_has_partition_key(ranke)*/) {
3878 6 : tn = r->l = rel_topn(v->sql->sa, r->l, append(sa_list(v->sql->sa), e->r));
3879 6 : tn->grouped = 1;
3880 6 : v->changes++;
3881 6 : break;
3882 : }
3883 68 : if (strcmp(rankf->func->base.name, "row_number") == 0 && list_empty(r->r) && !is_topn(pl->op) /*&&
3884 : !rank_exp_has_partition_key(ranke)*/) {
3885 2 : tn = r->l = rel_topn(v->sql->sa, r->l, append(sa_list(v->sql->sa), e->r));
3886 2 : tn->grouped = 1;
3887 2 : v->changes++;
3888 2 : break;
3889 : }
3890 : }
3891 : }
3892 : }
3893 : }
3894 : }
3895 : }
3896 : }
3897 :
3898 : /* try push select under set relation */
3899 6975137 : if (is_select(rel->op) && r && is_set(r->op) && !list_empty(r->exps) && !rel_is_ref(r) && !is_single(r) && !list_empty(exps)) {
3900 20 : sql_rel *u = r, *ul = u->l, *ur = u->r;
3901 :
3902 20 : ul = rel_dup(ul);
3903 20 : ur = rel_dup(ur);
3904 20 : if (!is_project(ul->op))
3905 0 : ul = rel_project(v->sql->sa, ul,
3906 : rel_projections(v->sql, ul, NULL, 1, 1));
3907 20 : if (!is_project(ur->op))
3908 0 : ur = rel_project(v->sql->sa, ur,
3909 : rel_projections(v->sql, ur, NULL, 1, 1));
3910 20 : rel_rename_exps(v->sql, u->exps, ul->exps);
3911 20 : rel_rename_exps(v->sql, u->exps, ur->exps);
3912 :
3913 : /* introduce selects under the set */
3914 20 : ul = rel_select(v->sql->sa, ul, NULL);
3915 20 : ul->exps = exps_copy(v->sql, exps);
3916 20 : set_processed(ul);
3917 20 : ur = rel_select(v->sql->sa, ur, NULL);
3918 20 : ur->exps = exps_copy(v->sql, exps);
3919 20 : set_processed(ur);
3920 :
3921 20 : rel = rel_inplace_setop(v->sql, rel, ul, ur, u->op, rel_projections(v->sql, rel, NULL, 1, 1));
3922 20 : if (need_distinct(u))
3923 0 : set_distinct(rel);
3924 20 : v->changes++;
3925 : }
3926 6975137 : if (is_select(rel->op) && r && is_munion(r->op) && !is_recursive(r) && !list_empty(r->exps) && !rel_is_ref(r) && !is_single(r) && !list_empty(exps)) {
3927 4979 : sql_rel *u = r;
3928 4979 : list *rels = u->l, *nrels = sa_list(v->sql->sa);
3929 14945 : for(node *n = rels->h; n; n = n->next) {
3930 9966 : sql_rel *ul = n->data;
3931 9966 : ul = rel_dup(ul);
3932 9966 : if (!is_project(ul->op) || rel_is_ref(ul))
3933 9966 : ul = rel_project(v->sql->sa, ul,
3934 : rel_projections(v->sql, ul, NULL, 1, 1));
3935 9966 : rel_rename_exps(v->sql, u->exps, ul->exps);
3936 :
3937 : /* introduce selects under the set */
3938 9966 : ul = rel_select(v->sql->sa, ul, NULL);
3939 9966 : ul->exps = exps_copy(v->sql, exps);
3940 9966 : set_processed(ul);
3941 9966 : nrels = append(nrels, ul);
3942 : }
3943 :
3944 4979 : rel = rel_inplace_setop_n_ary(v->sql, rel, nrels, u->op, rel_projections(v->sql, rel, NULL, 1, 1));
3945 4979 : if (need_distinct(u))
3946 9 : set_distinct(rel);
3947 4979 : if (is_recursive(u))
3948 0 : set_recursive(rel);
3949 4979 : v->changes++;
3950 : }
3951 :
3952 6975137 : return try_remove_empty_select(v, rel);
3953 : }
3954 :
3955 : static int
3956 10457 : index_exp(sql_exp *e, sql_idx *i)
3957 : {
3958 10457 : if (e->type == e_cmp && !is_complex_exp(e->flag)) {
3959 6790 : switch(i->type) {
3960 6790 : case hash_idx:
3961 : case oph_idx:
3962 6790 : if (e->flag == cmp_equal)
3963 : return 0;
3964 : /* fall through */
3965 : case join_idx:
3966 : default:
3967 157 : return -1;
3968 : }
3969 : }
3970 : return -1;
3971 : }
3972 :
3973 : /* find column for the select/join expression */
3974 : static sql_column *
3975 6633 : sjexp_col(sql_exp *e, sql_rel *r)
3976 : {
3977 6633 : sql_column *res = NULL;
3978 :
3979 6633 : if (e->type == e_cmp && !is_complex_exp(e->flag)) {
3980 6633 : res = exp_find_column(r, e->l, -2);
3981 6633 : if (!res)
3982 1567 : res = exp_find_column(r, e->r, -2);
3983 : }
3984 6633 : return res;
3985 : }
3986 :
3987 : static sql_idx *
3988 1796713 : find_index(allocator *sa, sql_rel *rel, sql_rel *sub, list **EXPS)
3989 : {
3990 1796713 : node *n;
3991 :
3992 : /* any (partial) match of the expressions with the index columns */
3993 : /* Depending on the index type we may need full matches and only
3994 : limited number of cmp types (hash only equality etc) */
3995 : /* Depending on the index type we should (in the rel_bin) generate
3996 : more code, ie for spatial index add post filter etc, for hash
3997 : compute hash value and use index */
3998 :
3999 1796713 : if (sub->exps && rel->exps)
4000 7171788 : for(n = sub->exps->h; n; n = n->next) {
4001 5561640 : prop *p;
4002 5561640 : sql_exp *e = n->data;
4003 :
4004 5561640 : if ((p = find_prop(e->p, PROP_HASHIDX)) != NULL) {
4005 9419 : list *exps, *cols;
4006 9419 : sql_idx *i = p->value.pval;
4007 9419 : fcmp cmp = (fcmp)&sql_column_kc_cmp;
4008 :
4009 : /* join indices are only interesting for joins */
4010 9419 : if (i->type == join_idx || list_length(i->columns) <= 1)
4011 0 : continue;
4012 : /* based on the index type, find qualifying exps */
4013 9419 : exps = list_select(rel->exps, i, (fcmp) &index_exp, (fdup)NULL);
4014 9419 : if (list_empty(exps))
4015 3756 : continue;
4016 : /* now we obtain the columns, move into sql_column_kc_cmp! */
4017 5663 : cols = list_map(exps, sub, (fmap) &sjexp_col);
4018 :
4019 : /* TODO check that at most 2 relations are involved */
4020 :
4021 : /* Match the index columns with the expression columns.
4022 : TODO, Allow partial matches ! */
4023 5663 : if (list_match(cols, i->columns, cmp) == 0) {
4024 : /* re-order exps in index order */
4025 290 : node *n, *m;
4026 290 : list *es = sa_list(sa);
4027 :
4028 970 : for(n = i->columns->h; n; n = n->next) {
4029 680 : int i = 0;
4030 2164 : for(m = cols->h; m; m = m->next, i++) {
4031 2164 : if (cmp(m->data, n->data) == 0){
4032 680 : sql_exp *e = list_fetch(exps, i);
4033 680 : list_append(es, e);
4034 680 : break;
4035 : }
4036 : }
4037 : }
4038 : /* fix the destroy function */
4039 290 : cols->destroy = NULL;
4040 290 : *EXPS = es;
4041 290 : e->used = 1;
4042 290 : return i;
4043 : }
4044 5373 : cols->destroy = NULL;
4045 : }
4046 : }
4047 : return NULL;
4048 : }
4049 :
4050 : static inline sql_rel *
4051 1171621 : rel_use_index(visitor *v, sql_rel *rel)
4052 : {
4053 1171621 : list *exps = NULL;
4054 1171621 : sql_idx *i = find_index(v->sql->sa, rel, rel->l, &exps);
4055 1171621 : int left = 1;
4056 :
4057 1171621 : assert(is_select(rel->op) || is_join(rel->op));
4058 1171621 : if (!i && is_join(rel->op)) {
4059 625092 : left = 0;
4060 625092 : i = find_index(v->sql->sa, rel, rel->r, &exps);
4061 : }
4062 :
4063 1171437 : if (i) {
4064 290 : prop *p;
4065 290 : node *n;
4066 290 : int single_table = 1;
4067 290 : sql_exp *re = NULL;
4068 :
4069 968 : for( n = exps->h; n && single_table; n = n->next) {
4070 678 : sql_exp *e = n->data, *nre = e->l;
4071 :
4072 678 : if (!is_compare(e->type) || is_anti(e) || e->flag != cmp_equal)
4073 : return rel;
4074 678 : if (is_join(rel->op) && ((left && !rel_find_exp(rel->l, nre)) || (!left && rel_find_exp(rel->r, nre))))
4075 111 : nre = e->r;
4076 678 : single_table = (!re || (exp_relname(nre) && exp_relname(re) && strcmp(exp_relname(nre), exp_relname(re)) == 0));
4077 678 : re = nre;
4078 : }
4079 290 : if (single_table) { /* add PROP_HASHCOL to all column exps */
4080 944 : for( n = exps->h; n; n = n->next) {
4081 662 : sql_exp *e = n->data;
4082 :
4083 : /* swapped ? */
4084 662 : if (is_join(rel->op) && ((left && !rel_find_exp(rel->l, e->l)) || (!left && !rel_find_exp(rel->r, e->l)))) {
4085 165 : exp_swap(e);
4086 : }
4087 662 : p = find_prop(e->p, PROP_HASHCOL);
4088 662 : if (!p)
4089 285 : e->p = p = prop_create(v->sql->sa, PROP_HASHCOL, e->p);
4090 662 : p->value.pval = i;
4091 : }
4092 : }
4093 : /* add the remaining exps to the new exp list */
4094 290 : if (list_length(rel->exps) > list_length(exps)) {
4095 108 : for( n = rel->exps->h; n; n = n->next) {
4096 81 : sql_exp *e = n->data;
4097 81 : if (!list_find(exps, e, (fcmp)&exp_cmp))
4098 27 : list_append(exps, e);
4099 : }
4100 : }
4101 290 : rel->exps = exps;
4102 : }
4103 : return rel;
4104 : }
4105 :
4106 : static sql_rel *
4107 3714706 : rel_select_leftgroup_2_semi(visitor *v, sql_rel *rel)
4108 : {
4109 3714706 : if (rel_is_ref(rel) || !is_select(rel->op) || list_empty(rel->exps))
4110 3178513 : return rel;
4111 536193 : sql_rel *l = rel->l;
4112 :
4113 536193 : if (!l || rel_is_ref(l) || !is_left(l->op) || list_empty(l->attr))
4114 535207 : return rel;
4115 :
4116 1964 : for(node *n = rel->exps->h; n; n = n->next) {
4117 986 : sql_exp *e = n->data;
4118 :
4119 986 : if (e->type == e_cmp && !is_semantics(e) && !e->f) {
4120 973 : list *attrs = l->attr;
4121 973 : sql_exp *a = attrs->h->data;
4122 :
4123 973 : if (exps_find_exp(l->attr, e->l) && exp_is_true(e->r) && e->flag == cmp_equal /*&& exp_is_true(a)*/) {
4124 : // printf("# optimize select leftgroup -> semi\n");
4125 8 : if (!list_empty(l->exps)) {
4126 13 : for(node *m = l->exps->h; m; m = m->next) {
4127 7 : sql_exp *j = m->data;
4128 7 : reset_any(j);
4129 : }
4130 : }
4131 8 : l->attr = NULL;
4132 8 : l->op = exp_is_true(a)?op_semi:op_anti;
4133 8 : list_remove_node(rel->exps, NULL, n);
4134 8 : rel = rel_project(v->sql->sa, rel, rel_projections(v->sql, rel, NULL, 1, 1));
4135 8 : list_append(rel->exps, attrs->h->data);
4136 8 : v->changes++;
4137 8 : return rel;
4138 : }
4139 : }
4140 : }
4141 : return rel;
4142 : }
4143 :
4144 : static sql_rel *
4145 3714707 : rel_optimize_select_and_joins_topdown_(visitor *v, sql_rel *rel)
4146 : {
4147 : /* push_join_down introduces semijoins */
4148 3714707 : uint8_t cycle = *(uint8_t*) v->data;
4149 3714707 : if (cycle <= 0) {
4150 1621385 : rel = rel_semijoin_use_fk(v, rel);
4151 1621385 : rel = rel_push_join_down(v, rel);
4152 : }
4153 :
4154 3714707 : rel = rel_simplify_fk_joins(v, rel);
4155 3714706 : rel = rel_push_select_down(v, rel);
4156 3714706 : rel = rel_select_leftgroup_2_semi(v, rel);
4157 3714706 : if (rel && rel->l && (is_select(rel->op) || is_join(rel->op)))
4158 1171621 : rel = rel_use_index(v, rel);
4159 3714706 : return rel;
4160 : }
4161 :
4162 : static sql_rel *
4163 130162 : rel_optimize_select_and_joins_topdown(visitor *v, global_props *gp, sql_rel *rel)
4164 : {
4165 130162 : v->data = &gp->opt_cycle;
4166 130162 : rel = rel_visitor_topdown(v, rel, &rel_optimize_select_and_joins_topdown_);
4167 130162 : v->data = gp;
4168 130162 : return rel;
4169 : }
4170 :
4171 : run_optimizer
4172 638406 : bind_optimize_select_and_joins_topdown(visitor *v, global_props *gp)
4173 : {
4174 638406 : int flag = v->sql->sql_optimizer;
4175 638210 : return gp->opt_level == 1 && (gp->cnt[op_join] || gp->cnt[op_left] || gp->cnt[op_right]
4176 551646 : || gp->cnt[op_full] || gp->cnt[op_semi] || gp->cnt[op_anti] ||
4177 1276616 : gp->cnt[op_select]) && (flag & optimize_select_and_joins_topdown) ? rel_optimize_select_and_joins_topdown : NULL;
4178 : }
4179 :
4180 :
4181 : static int
4182 1248923 : can_push_func(sql_exp *e, sql_rel *rel, int *must, int depth)
4183 : {
4184 1294557 : switch(e->type) {
4185 1096160 : case e_cmp: {
4186 1096160 : sql_exp *l = e->l, *r = e->r, *f = e->f;
4187 :
4188 : /* don't push down functions inside attribute joins */
4189 1096160 : if (e->flag == cmp_or || e->flag == cmp_in || e->flag == cmp_notin || e->flag == cmp_filter || (is_join(rel->op) && is_any(e)))
4190 : return 0;
4191 1047152 : if (depth > 0) { /* for comparisons under the top ones, they become functions */
4192 66 : int lmust = 0;
4193 66 : int res = can_push_func(l, rel, &lmust, depth + 1) && can_push_func(r, rel, &lmust, depth + 1) &&
4194 24 : (!f || can_push_func(f, rel, &lmust, depth + 1));
4195 14 : if (res && !lmust)
4196 : return 1;
4197 64 : (*must) |= lmust;
4198 64 : return res;
4199 : } else {
4200 1047086 : int mustl = 0, mustr = 0, mustf = 0;
4201 1047086 : return ((l->type == e_column || can_push_func(l, rel, &mustl, depth + 1)) && (*must = mustl)) ||
4202 2086969 : ((r->type == e_column || can_push_func(r, rel, &mustr, depth + 1)) && (*must = mustr)) ||
4203 3634 : ((f && (f->type == e_column || can_push_func(f, rel, &mustf, depth + 1)) && (*must = mustf)));
4204 : }
4205 : }
4206 45634 : case e_convert:
4207 45634 : return can_push_func(e->l, rel, must, depth + 1);
4208 9579 : case e_aggr:
4209 : case e_func: {
4210 9579 : list *l = e->l;
4211 9579 : int res = 1, lmust = 0;
4212 :
4213 9579 : if (exp_unsafe(e, false, false))
4214 : return 0;
4215 27294 : if (l) for (node *n = l->h; n && res; n = n->next)
4216 17723 : res &= can_push_func(n->data, rel, &lmust, depth + 1);
4217 9571 : if (res && !lmust)
4218 : return 1;
4219 8699 : (*must) |= lmust;
4220 8699 : return res;
4221 : }
4222 51999 : case e_column:
4223 51999 : if (rel && !rel_find_exp(rel, e))
4224 : return 0;
4225 30531 : (*must) = 1;
4226 : /* fall through */
4227 : default:
4228 : return 1;
4229 : }
4230 : }
4231 :
4232 : static int
4233 915503 : exps_can_push_func(list *exps, sql_rel *rel)
4234 : {
4235 3936335 : for(node *n = exps->h; n; n = n->next) {
4236 3045443 : sql_exp *e = n->data;
4237 3045443 : int mustl = 0, mustr = 0;
4238 :
4239 3045443 : if ((is_joinop(rel->op) || is_select(rel->op)) && ((can_push_func(e, rel->l, &mustl, 0) && mustl)))
4240 24611 : return 1;
4241 3041115 : if (is_joinop(rel->op) && can_push_func(e, rel->r, &mustr, 0) && mustr)
4242 : return 1;
4243 : }
4244 : return 0;
4245 : }
4246 :
4247 : static int
4248 84756 : exp_needs_push_down(sql_rel *rel, sql_exp *e)
4249 : {
4250 110077 : switch(e->type) {
4251 28444 : case e_cmp:
4252 : /* don't push down functions inside attribute joins */
4253 28444 : if (e->flag == cmp_or || e->flag == cmp_in || e->flag == cmp_notin || e->flag == cmp_filter || (is_join(rel->op) && is_any(e)))
4254 : return 0;
4255 27511 : return exp_needs_push_down(rel, e->l) || exp_needs_push_down(rel, e->r) || (e->f && exp_needs_push_down(rel, e->f));
4256 25321 : case e_convert:
4257 25321 : return exp_needs_push_down(rel, e->l);
4258 1924 : case e_aggr:
4259 : case e_func:
4260 1924 : if (!e->l || exps_are_atoms(e->l))
4261 18 : return 0;
4262 : return 1;
4263 1463 : case e_atom:
4264 1463 : if (!e->f || exps_are_atoms(e->f))
4265 1463 : return 0;
4266 : return 1;
4267 : case e_column:
4268 : default:
4269 : return 0;
4270 : }
4271 : }
4272 :
4273 : static int
4274 24611 : exps_need_push_down(sql_rel *rel, list *exps )
4275 : {
4276 51137 : for(node *n = exps->h; n; n = n->next)
4277 28432 : if (exp_needs_push_down(rel, n->data))
4278 : return 1;
4279 : return 0;
4280 : }
4281 :
4282 : static sql_exp *exp_push_single_func_down(visitor *v, sql_rel *rel, sql_rel *ol, sql_rel *or, sql_exp *e, int depth);
4283 :
4284 : static list *
4285 2416 : exps_push_single_func_down(visitor *v, sql_rel *rel, sql_rel *ol, sql_rel *or, list *exps, int depth)
4286 : {
4287 4832 : if (mvc_highwater(v->sql))
4288 : return exps;
4289 :
4290 6724 : for (node *n = exps->h; n; n = n->next)
4291 4308 : if ((n->data = exp_push_single_func_down(v, rel, ol, or, n->data, depth)) == NULL)
4292 : return NULL;
4293 : return exps;
4294 : }
4295 :
4296 : static sql_exp *
4297 15738 : exp_push_single_func_down(visitor *v, sql_rel *rel, sql_rel *ol, sql_rel *or, sql_exp *e, int depth)
4298 : {
4299 27509 : if (mvc_highwater(v->sql))
4300 : return e;
4301 :
4302 15738 : switch(e->type) {
4303 4250 : case e_cmp: {
4304 4250 : if (e->flag == cmp_or || e->flag == cmp_filter) {
4305 246 : if ((e->l = exps_push_single_func_down(v, rel, ol, or, e->l, depth + 1)) == NULL)
4306 : return NULL;
4307 246 : if ((e->r = exps_push_single_func_down(v, rel, ol, or, e->r, depth + 1)) == NULL)
4308 : return NULL;
4309 4004 : } else if (e->flag == cmp_in || e->flag == cmp_notin) {
4310 18 : if ((e->l = exp_push_single_func_down(v, rel, ol, or, e->l, depth + 1)) == NULL)
4311 : return NULL;
4312 18 : if ((e->r = exps_push_single_func_down(v, rel, ol, or, e->r, depth + 1)) == NULL)
4313 : return NULL;
4314 : } else {
4315 3986 : if ((e->l = exp_push_single_func_down(v, rel, ol, or, e->l, depth + 1)) == NULL)
4316 : return NULL;
4317 3986 : if ((e->r = exp_push_single_func_down(v, rel, ol, or, e->r, depth + 1)) == NULL)
4318 : return NULL;
4319 3986 : if (e->f && (e->f = exp_push_single_func_down(v, rel, ol, or, e->f, depth + 1)) == NULL)
4320 : return NULL;
4321 : }
4322 : } break;
4323 2008 : case e_convert:
4324 2008 : if ((e->l = exp_push_single_func_down(v, rel, ol, or, e->l, depth + 1)) == NULL)
4325 : return NULL;
4326 : break;
4327 3990 : case e_aggr:
4328 : case e_func: {
4329 3990 : sql_rel *l = rel->l, *r = rel->r;
4330 3990 : int must = 0, mustl = 0, mustr = 0;
4331 :
4332 3990 : if (exp_unsafe(e, false, false))
4333 23 : return e;
4334 3990 : if (!e->l || exps_are_atoms(e->l))
4335 23 : return e;
4336 3967 : if ((is_joinop(rel->op) && ((can_push_func(e, l, &mustl, depth + 1) && mustl) || (can_push_func(e, r, &mustr, depth + 1) && mustr))) ||
4337 2973 : (is_select(rel->op) && can_push_func(e, l, &must, depth + 1) && must)) {
4338 3954 : exp_label(v->sql->sa, e, ++v->sql->label);
4339 : /* we need a full projection, group by's and unions cannot be extended with more expressions */
4340 3954 : if (mustr) {
4341 356 : if (r == or) /* don't project twice */
4342 333 : rel->r = r = rel_project(v->sql->sa, r, rel_projections(v->sql, r, NULL, 1, 1));
4343 356 : list_append(r->exps, e);
4344 : } else {
4345 3598 : if (l == ol) /* don't project twice */
4346 1842 : rel->l = l = rel_project(v->sql->sa, l, rel_projections(v->sql, l, NULL, 1, 1));
4347 3598 : list_append(l->exps, e);
4348 : }
4349 3954 : e = exp_ref(v->sql, e);
4350 3954 : v->changes++;
4351 : }
4352 3967 : } break;
4353 1156 : case e_atom: {
4354 1156 : if (e->f && (e->f = exps_push_single_func_down(v, rel, ol, or, e->f, depth + 1)) == NULL)
4355 : return NULL;
4356 : } break;
4357 : case e_column:
4358 : case e_psm:
4359 : break;
4360 : }
4361 : return e;
4362 : }
4363 :
4364 : static inline sql_rel *
4365 3639289 : rel_push_func_down(visitor *v, sql_rel *rel)
4366 : {
4367 3639289 : if ((is_select(rel->op) || is_joinop(rel->op)) && rel->l && rel->exps && !(rel_is_ref(rel))) {
4368 1096748 : int changes = v->changes;
4369 1096748 : sql_rel *l = rel->l, *r = rel->r;
4370 :
4371 : /* only push down when is useful */
4372 1096748 : if ((is_select(rel->op) && list_length(rel->exps) <= 1) || rel_is_ref(l) || (is_joinop(rel->op) && rel_is_ref(r)))
4373 540148 : return rel;
4374 556600 : if (exps_can_push_func(rel->exps, rel) && exps_need_push_down(rel, rel->exps) && !exps_push_single_func_down(v, rel, l, r, rel->exps, 0))
4375 : return NULL;
4376 556600 : if (v->changes > changes) /* once we get a better join order, we can try to remove this projection */
4377 1906 : return rel_project(v->sql->sa, rel, rel_projections(v->sql, rel, NULL, 1, 1));
4378 : }
4379 3097235 : if (is_simple_project(rel->op) && rel->l && rel->exps) {
4380 1151638 : sql_rel *pl = rel->l;
4381 :
4382 1151638 : if (is_joinop(pl->op) && exps_can_push_func(rel->exps, rel)) {
4383 0 : sql_rel *l = pl->l, *r = pl->r, *ol = l, *or = r;
4384 :
4385 0 : for (node *n = rel->exps->h; n; ) {
4386 0 : node *next = n->next;
4387 0 : sql_exp *e = n->data;
4388 0 : int mustl = 0, mustr = 0;
4389 :
4390 0 : if ((can_push_func(e, l, &mustl, 0) && mustl) || (can_push_func(e, r, &mustr, 0) && mustr)) {
4391 0 : if (mustl) {
4392 0 : if (l == ol) /* don't project twice */
4393 0 : pl->l = l = rel_project(v->sql->sa, l, rel_projections(v->sql, l, NULL, 1, 1));
4394 0 : list_append(l->exps, e);
4395 0 : list_remove_node(rel->exps, NULL, n);
4396 0 : v->changes++;
4397 : } else {
4398 0 : if (r == or) /* don't project twice */
4399 0 : pl->r = r = rel_project(v->sql->sa, r, rel_projections(v->sql, r, NULL, 1, 1));
4400 0 : list_append(r->exps, e);
4401 0 : list_remove_node(rel->exps, NULL, n);
4402 0 : v->changes++;
4403 : }
4404 : }
4405 0 : n = next;
4406 : }
4407 : }
4408 : }
4409 : return rel;
4410 : }
4411 :
4412 : static sql_rel *
4413 3639289 : rel_push_func_and_select_down_(visitor *v, sql_rel *rel)
4414 : {
4415 3639289 : if (rel)
4416 3639289 : rel = rel_push_func_down(v, rel);
4417 3639289 : if (rel)
4418 3639289 : rel = rel_push_select_down(v, rel);
4419 3639289 : return rel;
4420 : }
4421 :
4422 : static sql_rel *
4423 130162 : rel_push_func_and_select_down(visitor *v, global_props *gp, sql_rel *rel)
4424 : {
4425 130162 : (void) gp;
4426 130162 : return rel_visitor_topdown(v, rel, &rel_push_func_and_select_down_);
4427 : }
4428 :
4429 : run_optimizer
4430 638405 : bind_push_func_and_select_down(visitor *v, global_props *gp)
4431 : {
4432 638405 : int flag = v->sql->sql_optimizer;
4433 638209 : return gp->opt_level == 1 && (gp->cnt[op_join] || gp->cnt[op_left] || gp->cnt[op_right]
4434 551645 : || gp->cnt[op_full] || gp->cnt[op_semi] || gp->cnt[op_anti] || gp->cnt[op_select])
4435 768567 : && (flag & push_func_and_select_down) ? rel_push_func_and_select_down : NULL;
4436 : }
|