Line data Source code
1 : /*
2 : * SPDX-License-Identifier: MPL-2.0
3 : *
4 : * This Source Code Form is subject to the terms of the Mozilla Public
5 : * License, v. 2.0. If a copy of the MPL was not distributed with this
6 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
7 : *
8 : * Copyright 2024, 2025 MonetDB Foundation;
9 : * Copyright August 2008 - 2023 MonetDB B.V.;
10 : * Copyright 1997 - July 2008 CWI.
11 : */
12 :
13 : #include "monetdb_config.h"
14 : #include "rel_optimizer_private.h"
15 : #include "rel_exp.h"
16 : #include "rel_select.h"
17 : #include "rel_rewriter.h"
18 :
19 : /* Split_select optimizer splits case statements in select expressions. This is a step needed for cse */
20 : static void select_split_exps(mvc *sql, list *exps, sql_rel *rel);
21 :
22 : static sql_exp *
23 154375 : select_split_exp(mvc *sql, sql_exp *e, sql_rel *rel)
24 : {
25 154375 : switch(e->type) {
26 : case e_column:
27 : return e;
28 5171 : case e_convert:
29 5171 : e->l = select_split_exp(sql, e->l, rel);
30 5171 : return e;
31 14320 : case e_aggr:
32 : case e_func:
33 14320 : if (!is_analytic(e) && !exp_has_sideeffect(e)) {
34 14315 : sql_subfunc *f = e->f;
35 14315 : if (e->type == e_func && !f->func->s && is_caselike_func(f) /*is_ifthenelse_func(f)*/)
36 283 : return add_exp_too_project(sql, e, rel);
37 : }
38 : return e;
39 52187 : case e_cmp:
40 52187 : if (e->flag == cmp_or || e->flag == cmp_filter) {
41 13806 : select_split_exps(sql, e->l, rel);
42 13806 : select_split_exps(sql, e->r, rel);
43 38381 : } else if (e->flag == cmp_in || e->flag == cmp_notin) {
44 2799 : e->l = select_split_exp(sql, e->l, rel);
45 2799 : select_split_exps(sql, e->r, rel);
46 : } else {
47 35582 : e->l = select_split_exp(sql, e->l, rel);
48 35582 : e->r = select_split_exp(sql, e->r, rel);
49 35582 : if (e->f)
50 2438 : e->f = select_split_exp(sql, e->f, rel);
51 : }
52 : return e;
53 : case e_atom:
54 : case e_psm:
55 : return e;
56 : }
57 : return e;
58 : }
59 :
60 : static void
61 45535 : select_split_exps(mvc *sql, list *exps, sql_rel *rel)
62 : {
63 45535 : node *n;
64 :
65 45535 : if (!exps)
66 : return;
67 118338 : for(n=exps->h; n; n = n->next){
68 72803 : sql_exp *e = n->data;
69 :
70 72803 : e = select_split_exp(sql, e, rel);
71 72803 : n->data = e;
72 : }
73 : }
74 :
75 : static sql_rel *
76 682236 : rel_split_select_(visitor *v, sql_rel *rel)
77 : {
78 682236 : if (!rel || !is_select(rel->op) || list_empty(rel->exps) || !rel->l || mvc_highwater(v->sql))
79 578780 : return rel;
80 :
81 103456 : bool funcs = false;
82 :
83 : /* are there functions */
84 216936 : for (node *n = rel->exps->h; n && !funcs; n = n->next)
85 113480 : funcs = exp_has_func(n->data);
86 :
87 : /* introduce extra project */
88 103456 : if (funcs) {
89 15124 : sql_rel *nrel = rel_project(v->sql->sa, rel->l,
90 15124 : rel_projections(v->sql, rel->l, NULL, 1, 1));
91 15124 : if (!nrel || !nrel->exps)
92 : return NULL;
93 15124 : rel->l = nrel;
94 : /* recursively split all functions and add those to the projection list */
95 15124 : select_split_exps(v->sql, rel->exps, nrel);
96 15124 : return rel;
97 : }
98 : return rel;
99 : }
100 :
101 : static sql_rel *
102 59743 : rel_split_select(visitor *v, global_props *gp, sql_rel *rel)
103 : {
104 59743 : (void) gp;
105 59743 : return rel_visitor_bottomup(v, rel, &rel_split_select_);
106 : }
107 :
108 : run_optimizer
109 643842 : bind_split_select(visitor *v, global_props *gp)
110 : {
111 643842 : int flag = v->sql->sql_optimizer;
112 564061 : return gp->opt_cycle == 0 && gp->opt_level == 1 && (flag & split_select)
113 1207895 : && gp->cnt[op_select] ? rel_split_select : NULL;
114 : }
115 :
116 :
117 : /*
118 : * Remove a redundant join
119 : *
120 : * join (L, Distinct Project(join(L,P) [ p.key == l.lkey]) [p.key]) [ p.key == l.lkey]
121 : * =>
122 : * join(L, P) [p.key==l.lkey]
123 : */
124 : static sql_rel *
125 651050 : rel_remove_redundant_join_(visitor *v, sql_rel *rel)
126 : {
127 651050 : if ((is_join(rel->op) || is_semi(rel->op)) && !list_empty(rel->exps)) {
128 111322 : sql_rel *l = rel->l, *r = rel->r, *b, *p = NULL, *j;
129 :
130 111322 : if (is_basetable(l->op) && is_simple_project(r->op) && need_distinct(r)) {
131 8 : b = l;
132 8 : p = r;
133 8 : j = p->l;
134 111314 : } else if (is_basetable(r->op) && is_simple_project(l->op) && need_distinct(l)) {
135 2142 : b = r;
136 2142 : p = l;
137 2142 : j = p->l;
138 : }
139 111322 : if (!p || !j || j->op != rel->op)
140 : return rel;
141 : /* j must have b->l (ie table) */
142 9 : sql_rel *jl = j->l, *jr = j->r;
143 9 : if ((is_basetable(jl->op) && jl->l == b->l) ||
144 5 : (is_basetable(jr->op) && jr->l == b->l)) {
145 8 : int left = 0;
146 8 : if (is_basetable(jl->op) && jl->l == b->l)
147 8 : left = 1;
148 8 : if (!list_empty(p->exps)) {
149 13 : for (node *n=p->exps->h; n; n = n->next) { /* all exps of 'p' must be bound to the opposite side */
150 8 : sql_exp *e = n->data;
151 :
152 12 : if (!rel_rebind_exp(v->sql, left ? jr : jl, e))
153 : return rel;
154 : }
155 : }
156 5 : if (exp_match_list(j->exps, rel->exps)) {
157 0 : p->l = (left)?rel_dup(jr):rel_dup(jl);
158 0 : rel_destroy(j);
159 0 : set_nodistinct(p);
160 0 : v->changes++;
161 0 : return rel;
162 : }
163 : }
164 : }
165 : return rel;
166 : }
167 :
168 : static sql_rel *
169 49367 : rel_remove_redundant_join(visitor *v, global_props *gp, sql_rel *rel)
170 : {
171 49367 : (void) gp;
172 49367 : return rel_visitor_bottomup(v, rel, &rel_remove_redundant_join_); /* this optimizer has to run before rel_first_level_optimizations */
173 : }
174 :
175 : run_optimizer
176 643796 : bind_remove_redundant_join(visitor *v, global_props *gp)
177 : {
178 643796 : int flag = v->sql->sql_optimizer;
179 564087 : return gp->opt_cycle == 0 && gp->opt_level == 1 && (gp->cnt[op_left] || gp->cnt[op_right]
180 545371 : || gp->cnt[op_full] || gp->cnt[op_join] || gp->cnt[op_semi] || gp->cnt[op_anti]) &&
181 693062 : (flag & remove_redundant_join) ? rel_remove_redundant_join : NULL;
182 : }
183 :
184 :
185 : static list *
186 540916 : exp_merge_range(visitor *v, sql_rel *rel, list *exps)
187 : {
188 540916 : node *n, *m;
189 1157634 : for (n=exps->h; n; n = n->next) {
190 618614 : sql_exp *e = n->data;
191 618614 : sql_exp *le = e->l;
192 618614 : sql_exp *re = e->r;
193 :
194 : /* handle the and's in the or lists */
195 618614 : if (e->type == e_cmp && e->flag == cmp_or && !is_anti(e)) {
196 30536 : e->l = exp_merge_range(v, rel, e->l);
197 30536 : e->r = exp_merge_range(v, rel, e->r);
198 : /* only look for gt, gte, lte, lt */
199 588078 : } else if (n->next &&
200 76592 : e->type == e_cmp && e->flag < cmp_equal && !e->f &&
201 6092 : re->card == CARD_ATOM && !is_anti(e)) {
202 1176 : for (m=n->next; m; m = m->next) {
203 716 : sql_exp *f = m->data;
204 716 : sql_exp *lf = f->l;
205 716 : sql_exp *rf = f->r;
206 716 : int c_le = is_numeric_upcast(le), c_lf = is_numeric_upcast(lf);
207 :
208 716 : if (f->type == e_cmp && f->flag < cmp_equal && !f->f &&
209 645 : rf->card == CARD_ATOM && !is_anti(f) &&
210 322 : exp_match_exp(c_le?le->l:le, c_lf?lf->l:lf)) {
211 173 : sql_exp *ne;
212 173 : int swap = 0, lt = 0, gt = 0;
213 173 : sql_subtype super;
214 : /* for now only c1 <[=] x <[=] c2 */
215 :
216 173 : swap = lt = (e->flag == cmp_lt || e->flag == cmp_lte);
217 173 : gt = !lt;
218 :
219 173 : if (gt &&
220 149 : (f->flag == cmp_gt ||
221 : f->flag == cmp_gte))
222 30 : continue;
223 24 : if (lt &&
224 24 : (f->flag == cmp_lt ||
225 : f->flag == cmp_lte))
226 0 : continue;
227 :
228 143 : cmp_supertype(&super, exp_subtype(le), exp_subtype(lf));
229 143 : if (!(rf = exp_check_type(v->sql, &super, rel, rf, type_equal)) ||
230 143 : !(le = exp_check_type(v->sql, &super, rel, le, type_equal)) ||
231 143 : !(re = exp_check_type(v->sql, &super, rel, re, type_equal))) {
232 0 : v->sql->session->status = 0;
233 0 : v->sql->errstr[0] = 0;
234 0 : continue;
235 : }
236 143 : if (!swap)
237 119 : ne = exp_compare2(v->sql->sa, le, re, rf, compare2range(e->flag, f->flag), 0);
238 : else
239 24 : ne = exp_compare2(v->sql->sa, le, rf, re, compare2range(f->flag, e->flag), 0);
240 :
241 143 : list_remove_data(exps, NULL, e);
242 143 : list_remove_data(exps, NULL, f);
243 143 : list_append(exps, ne);
244 143 : v->changes++;
245 143 : return exp_merge_range(v, rel, exps);
246 : }
247 : }
248 587475 : } else if (n->next &&
249 75989 : e->type == e_cmp && e->flag < cmp_equal && !e->f &&
250 5489 : re->card > CARD_ATOM && !is_anti(e)) {
251 9718 : for (m=n->next; m; m = m->next) {
252 5982 : sql_exp *f = m->data;
253 5982 : sql_exp *lf = f->l;
254 5982 : sql_exp *rf = f->r;
255 :
256 5982 : if (f->type == e_cmp && f->flag < cmp_equal && !f->f &&
257 4689 : rf->card > CARD_ATOM && !is_anti(f)) {
258 4688 : sql_exp *ne, *t;
259 4688 : int swap = 0, lt = 0, gt = 0;
260 4688 : comp_type ef = (comp_type) e->flag, ff = (comp_type) f->flag;
261 4688 : int c_re = is_numeric_upcast(re), c_rf = is_numeric_upcast(rf);
262 4688 : int c_le = is_numeric_upcast(le), c_lf = is_numeric_upcast(lf), c;
263 4688 : sql_subtype super;
264 :
265 : /* both swapped ? */
266 4688 : if (exp_match_exp(c_re?re->l:re, c_rf?rf->l:rf)) {
267 19 : t = re;
268 19 : re = le;
269 19 : le = t;
270 19 : c = c_re; c_re = c_le; c_le = c;
271 19 : ef = swap_compare(ef);
272 19 : t = rf;
273 19 : rf = lf;
274 19 : lf = t;
275 19 : c = c_rf; c_rf = c_lf; c_lf = c;
276 19 : ff = swap_compare(ff);
277 : }
278 :
279 : /* is left swapped ? */
280 4688 : if (exp_match_exp(c_re?re->l:re, c_lf?lf->l:lf)) {
281 104 : t = re;
282 104 : re = le;
283 104 : le = t;
284 104 : c = c_re; c_re = c_le; c_le = c;
285 104 : ef = swap_compare(ef);
286 : }
287 :
288 : /* is right swapped ? */
289 4688 : if (exp_match_exp(c_le?le->l:le, c_rf?rf->l:rf)) {
290 177 : t = rf;
291 177 : rf = lf;
292 177 : lf = t;
293 177 : c = c_rf; c_rf = c_lf; c_lf = c;
294 177 : ff = swap_compare(ff);
295 : }
296 :
297 4688 : if (!exp_match_exp(c_le?le->l:le, c_lf?lf->l:lf))
298 2935 : continue;
299 :
300 : /* for now only c1 <[=] x <[=] c2 */
301 1830 : swap = lt = (ef == cmp_lt || ef == cmp_lte);
302 1830 : gt = !lt;
303 :
304 1830 : if (gt && (ff == cmp_gt || ff == cmp_gte))
305 73 : continue;
306 1757 : if (lt && (ff == cmp_lt || ff == cmp_lte))
307 4 : continue;
308 :
309 1753 : cmp_supertype(&super, exp_subtype(le), exp_subtype(lf));
310 1753 : if (!(rf = exp_check_type(v->sql, &super, rel, rf, type_equal)) ||
311 1753 : !(le = exp_check_type(v->sql, &super, rel, le, type_equal)) ||
312 1753 : !(re = exp_check_type(v->sql, &super, rel, re, type_equal))) {
313 0 : v->sql->session->status = 0;
314 0 : v->sql->errstr[0] = 0;
315 0 : continue;
316 : }
317 1753 : if (!swap)
318 1653 : ne = exp_compare2(v->sql->sa, le, re, rf, compare2range(ef, ff), 0);
319 : else
320 100 : ne = exp_compare2(v->sql->sa, le, rf, re, compare2range(ff, ef), 0);
321 :
322 1753 : list_remove_data(exps, NULL, e);
323 1753 : list_remove_data(exps, NULL, f);
324 1753 : list_append(exps, ne);
325 1753 : v->changes++;
326 1753 : return exp_merge_range(v, rel, exps);
327 : }
328 : }
329 : }
330 : }
331 : return exps;
332 : }
333 :
334 : static int
335 26950 : exps_cse( mvc *sql, list *oexps, list *l, list *r )
336 : {
337 26950 : list *nexps;
338 26950 : node *n, *m;
339 26950 : char *lu, *ru;
340 26950 : int lc = 0, rc = 0, match = 0, res = 0;
341 :
342 26950 : if (list_length(l) == 0 || list_length(r) == 0)
343 0 : return 0;
344 :
345 : /* first recursive exps_cse */
346 26950 : nexps = new_exp_list(sql->sa);
347 57702 : for (n = l->h; n; n = n->next) {
348 30752 : sql_exp *e = n->data;
349 :
350 30752 : if (e->type == e_cmp && e->flag == cmp_or && !is_anti(e)) {
351 8727 : res = exps_cse(sql, nexps, e->l, e->r);
352 : } else {
353 22025 : append(nexps, e);
354 : }
355 : }
356 26950 : l = nexps;
357 :
358 26950 : nexps = new_exp_list(sql->sa);
359 60882 : for (n = r->h; n; n = n->next) {
360 33932 : sql_exp *e = n->data;
361 :
362 33932 : if (e->type == e_cmp && e->flag == cmp_or && !is_anti(e)) {
363 1732 : res = exps_cse(sql, nexps, e->l, e->r);
364 : } else {
365 32200 : append(nexps, e);
366 : }
367 : }
368 26950 : r = nexps;
369 :
370 : /* simplify true or .. and .. or true */
371 26950 : if (list_length(l) == list_length(r) && list_length(l) == 1) {
372 22414 : sql_exp *le = l->h->data, *re = r->h->data;
373 :
374 22414 : if (exp_is_true(le)) {
375 14 : append(oexps, le);
376 14 : return 1;
377 : }
378 22400 : if (exp_is_true(re)) {
379 5 : append(oexps, re);
380 5 : return 1;
381 : }
382 : }
383 :
384 26931 : lu = SA_ZNEW_ARRAY(sql->ta, char, list_length(l));
385 26931 : ru = SA_ZNEW_ARRAY(sql->ta, char, list_length(r));
386 57681 : for (n = l->h, lc = 0; n; n = n->next, lc++) {
387 30750 : sql_exp *le = n->data;
388 :
389 70475 : for ( m = r->h, rc = 0; m; m = m->next, rc++) {
390 39725 : sql_exp *re = m->data;
391 :
392 39725 : if (!ru[rc] && exp_match_exp(le,re)) {
393 68 : lu[lc] = 1;
394 68 : ru[rc] = 1;
395 68 : match = 1;
396 : }
397 : }
398 : }
399 26931 : if (match) {
400 49 : list *nl = new_exp_list(sql->sa);
401 49 : list *nr = new_exp_list(sql->sa);
402 :
403 162 : for (n = l->h, lc = 0; n; n = n->next, lc++)
404 113 : if (!lu[lc])
405 49 : append(nl, n->data);
406 176 : for (n = r->h, rc = 0; n; n = n->next, rc++)
407 127 : if (!ru[rc])
408 59 : append(nr, n->data);
409 :
410 49 : if (list_length(nl) && list_length(nr))
411 23 : append(oexps, exp_or(sql->sa, nl, nr, 0));
412 :
413 162 : for (n = l->h, lc = 0; n; n = n->next, lc++) {
414 113 : if (lu[lc])
415 64 : append(oexps, n->data);
416 : }
417 : res = 1;
418 : } else {
419 26882 : append(oexps, exp_or(sql->sa, list_dup(l, (fdup)NULL),
420 : list_dup(r, (fdup)NULL), 0));
421 : }
422 : return res;
423 : }
424 :
425 : static inline int
426 23340 : exp_col_key(sql_exp *e)
427 : {
428 17584 : return e->nid ? e->nid : e->alias.label;
429 : }
430 :
431 : static inline int
432 5780 : exp_cmp_eq_unique_id(sql_exp *e)
433 : {
434 5780 : return exp_col_key(e->l);
435 : }
436 :
437 : static inline int
438 1773 : exp_multi_col_key(list *l)
439 : {
440 1773 : int k = exp_col_key(l->h->data);
441 5780 : for (node *n = l->h->next; n; n = n->next) {
442 4007 : k <<= 4;
443 8014 : k ^= exp_col_key(n->data);
444 : }
445 1773 : return k;
446 : }
447 :
448 : typedef struct exp_eq_col_values {
449 : /* we need ->first in order to remove it from the list of cmp_eq exps
450 : * in case that we find another occurrence (with a different value)
451 : */
452 : sql_exp* first;
453 : sql_exp* col; /* column */
454 : list *vs; /* list of values */
455 : } eq_cv;
456 :
457 : typedef struct exp_eq_multi_col_values {
458 : /* we need ->first in order to remove it from the list of multi col
459 : * cmp_eq exps in case that we find another occurrence (with different values)
460 : */
461 : list *first;
462 : list *cols; /* list of col exps */
463 : list *lvs; /* list of lists of values */
464 : } eq_mcv;
465 :
466 : static bool
467 4271 : detect_col_cmp_eqs(mvc *sql, list *eqs, sql_hash *eqh)
468 : {
469 4271 : bool col_multivalue_cmp_eq = false;
470 16051 : for (node *n = eqs->h; n; n = n->next ) {
471 11780 : sql_exp *e = n->data;
472 11780 : sql_exp *le = e->l, *re = e->r;
473 :
474 : /* find the le in the hash and append the re in the hash value (ea->list) */
475 11780 : bool found = false;
476 :
477 11780 : int key = eqh->key(le);
478 11780 : sql_hash_e *he = eqh->buckets[key&(eqh->size-1)];
479 :
480 14676 : for (;he && !found; he = he->chain) {
481 2896 : eq_cv *cv = he->value;
482 2896 : if (!exp_equal(le, cv->col)) {
483 2896 : cv->vs = append(cv->vs, re);
484 2896 : found = col_multivalue_cmp_eq = true;
485 : /* remove this and the previous (->first) occurrence (if exists) from eqs */
486 2896 : if (cv->first) {
487 1793 : list_remove_data(eqs, NULL, cv->first);
488 1793 : cv->first = NULL;
489 : }
490 2896 : list_remove_node(eqs, NULL, n);
491 : }
492 : }
493 :
494 11780 : if (!found) {
495 8884 : eq_cv *cv = SA_NEW(sql->sa, eq_cv);
496 8884 : cv->first = e;
497 8884 : cv->vs = sa_list(sql->sa);
498 8884 : cv->vs = append(cv->vs, re);
499 8884 : cv->col = le;
500 :
501 8884 : hash_add(eqh, key, cv);
502 : }
503 : }
504 4271 : return col_multivalue_cmp_eq;
505 : }
506 :
507 : static bool
508 693 : detect_multicol_cmp_eqs(mvc *sql, list *mce_ands, sql_hash *meqh)
509 : {
510 : /* we get as input a list of AND associated expressions (hence the entries are lists themselves)
511 : * we need to detect cmp_eq-only AND-associated expressions with the same columns so we can
512 : * group together their values
513 : * e.g. [[n = 1, m = 10], [m = 20, k = 100, l = 3000], [m = 20, n = 2]] has
514 : * - (m,k,l) group with a single value (20, 100, 3000)
515 : * - (n,k) group with two values (1, 10) and (2, 20)
516 : * at the end we return true only if we have at least a group of columns with more than a single value
517 : * e.g. in this example (n,k)
518 : */
519 693 : bool multi_multivalue_cmp_eq = false;
520 2466 : for (node *n = mce_ands->h; n; n = n->next) {
521 1773 : list *l = n->data;
522 :
523 : /* sort the list of the cmp_eq expressions based on the col exp
524 : * NOTE: from now on we only work with the sorted list, sl */
525 1773 : list *sl = list_sort(l, (fkeyvalue)&exp_cmp_eq_unique_id, NULL);
526 1773 : list_append_before(mce_ands, n, sl);
527 1773 : list_remove_node(mce_ands, NULL, n);
528 :
529 : /* find the eq exp in the hash and append the values */
530 1773 : bool found = false;
531 :
532 1773 : int key = meqh->key(sl);
533 1773 : sql_hash_e *he = meqh->buckets[key&(meqh->size-1)];
534 :
535 2875 : for (;he && !found; he = he->chain) {
536 : /* compare the values of the hash_entry with the cols under cmp_eq from the list */
537 1102 : bool same_cols = true;
538 1102 : eq_mcv *mcv = he->value;
539 3495 : for (node *m = sl->h, *k = mcv->cols->h; m && k && same_cols; m = m->next, k = k->next) {
540 2393 : sql_exp *col_exp = ((sql_exp*)m->data)->l;
541 2393 : if (exp_equal(col_exp, k->data))
542 663 : same_cols = false;
543 : }
544 1102 : if (same_cols) {
545 : /* we found the same multi cmp_eq exp in mce_ands list multiple times! */
546 439 : found = multi_multivalue_cmp_eq = true;
547 : /* gather all the values of the list and add them to the hash entry */
548 439 : list *atms = sa_list(sql->sa);
549 1799 : for (node *m = sl->h; m; m = m->next)
550 1360 : atms = append(atms, ((sql_exp*)m->data)->r);
551 439 : mcv->lvs = append(mcv->lvs, atms);
552 : /* remove this and the previous occurrence (which means that's the first time
553 : * that we found the *same* multi cmp_eq exp)
554 : */
555 439 : if (mcv->first) {
556 81 : list_remove_data(mce_ands, NULL, mcv->first);
557 81 : mcv->first = NULL;
558 : }
559 439 : list_remove_data(mce_ands, NULL, sl);
560 : }
561 : }
562 :
563 1773 : if (!found) {
564 1334 : eq_mcv *mcv = SA_NEW(sql->sa, eq_mcv);
565 1334 : mcv->first = sl;
566 1334 : mcv->cols = sa_list(sql->sa);
567 5754 : for (node *m = sl->h; m; m = m->next)
568 4420 : mcv->cols = append(mcv->cols, ((sql_exp*)m->data)->l);
569 : /* for the list of values (atoms) create a list and append it to the lvs list */
570 1334 : list *atms = sa_list(sql->sa);
571 5754 : for (node *m = sl->h; m; m = m->next)
572 4420 : atms = append(atms, ((sql_exp*)m->data)->r);
573 1334 : mcv->lvs = sa_list(sql->sa);
574 1334 : mcv->lvs = append(mcv->lvs, atms);
575 :
576 1334 : hash_add(meqh, key, mcv);
577 : }
578 : }
579 693 : return multi_multivalue_cmp_eq;
580 : }
581 :
582 : static void
583 47472 : exp_or_chain_groups(mvc *sql, list *exps, list **gen_ands, list **mce_ands, list **eqs, list **noneq)
584 : {
585 : /* identify three different groups
586 : * 1. gen_ands: lists of generic expressions (their inner association is AND)
587 : * 2. mce_ands: lists of multi_colum cmp_eq ONLY expressions (same^^^)
588 : * 3. eqs: equality expressions
589 : * 4. neq: non equality col expressions
590 : *
591 : * return true if there is an exp with more than one cmp_eq
592 : */
593 60078 : bool eq_only = true;
594 131675 : for (node *n = exps->h; n && eq_only; n = n->next) {
595 71597 : sql_exp *e = n->data;
596 71597 : sql_exp *le = e->l, *re = e->r;
597 142941 : eq_only &= (e->type == e_cmp && e->flag == cmp_equal &&
598 34126 : le->card != CARD_ATOM && is_column(le->type) &&
599 104647 : re->card == CARD_ATOM && !is_semantics(e));
600 : }
601 :
602 60078 : if (list_length(exps) > 1) {
603 5409 : if (eq_only)
604 4522 : *mce_ands = append(*mce_ands, exps);
605 : else
606 887 : *gen_ands = append(*gen_ands, exps);
607 54669 : } else if (list_length(exps) == 1) {
608 54669 : sql_exp *se = exps->h->data;
609 54669 : sql_exp *le = se->l, *re = se->r;
610 :
611 54669 : if (se->type == e_cmp && se->flag == cmp_or && !is_anti(se)) {
612 : /* for a cmp_or expression go down the tree */
613 12606 : exp_or_chain_groups(sql, (list*)le, gen_ands, mce_ands, eqs, noneq);
614 12606 : exp_or_chain_groups(sql, (list*)re, gen_ands, mce_ands, eqs, noneq);
615 :
616 42063 : } else if (eq_only) {
617 15312 : *eqs = append(*eqs, se);
618 : } else {
619 26751 : *noneq = append(*noneq, se);
620 : }
621 : }
622 47472 : }
623 :
624 : static list *
625 1686 : generate_single_col_cmp_in(mvc *sql, sql_hash *eqh)
626 : {
627 : /* from single col cmp_eq with multiple atoms in the hash generate
628 : * "e_col in (val0, val1, ...)" (see detect_col_cmp_eqs())
629 : */
630 1686 : list *ins = new_exp_list(sql->sa);
631 55638 : for (int i = 0; i < eqh->size; i++) {
632 53952 : sql_hash_e *he = eqh->buckets[i];
633 :
634 56592 : while (he) {
635 2640 : eq_cv *cv = he->value;
636 : /* NOTE: cmp_eq expressions with a single entry are still in eqs */
637 2640 : if (list_length(cv->vs) > 1)
638 1793 : ins = append(ins, exp_in(sql->sa, cv->col, cv->vs, cmp_in));
639 2640 : he = he->chain;
640 : }
641 : }
642 1686 : return ins;
643 : }
644 :
645 : static list *
646 81 : generate_multi_col_cmp_in(mvc *sql, sql_hash *meqh)
647 : {
648 : /* from multivalue cmp_eq with multiple lists of atoms in the hash generate
649 : * "(col1, col2, ...) in [(val10, val20, ...), (val11, val21, ...), ... ]"
650 : * (see detect_multicol_cmp_eqs())
651 : */
652 81 : list *ins = new_exp_list(sql->sa);
653 2673 : for (int i = 0; i < meqh->size; i++) {
654 2592 : sql_hash_e *he = meqh->buckets[i];
655 2687 : while (he) {
656 95 : eq_mcv *mcv = he->value;
657 : /* NOTE: multivalue cmp_eq expressions with a single entry are still in mce_ands */
658 95 : if (list_length(mcv->lvs) > 1) {
659 81 : sql_exp *mc = exp_label(sql->sa, exp_values(sql->sa, mcv->cols), ++sql->label);
660 601 : for (node *a = mcv->lvs->h; a; a = a->next)
661 520 : a->data = exp_values(sql->sa, a->data);
662 81 : ins = append(ins, exp_in(sql->sa, mc, mcv->lvs, cmp_in));
663 : }
664 95 : he = he->chain;
665 : }
666 : }
667 81 : return ins;
668 : }
669 :
670 : static list *
671 211406 : merge_ors(mvc *sql, list *exps, int *changes)
672 : {
673 211406 : sql_hash *eqh = NULL, *meqh = NULL;
674 211406 : list *eqs = NULL, *neq = NULL, *gen_ands = NULL, *mce_ands = NULL, *ins = NULL, *mins = NULL;
675 449993 : for (node *n = exps->h; n; n = n->next) {
676 238587 : sql_exp *e = n->data;
677 :
678 238587 : if (e->type == e_cmp && e->flag == cmp_or && !is_anti(e)) {
679 : /* NOTE: gen_ands and mce_ands are both a list of lists since the AND association
680 : * between expressions is expressed with a list
681 : * e.g. [[e1, e2], [e3, e4, e5]] semantically translates
682 : * to [(e1 AND e2), (e3 AND e4 AND e5)]
683 : * those (internal) AND list can be then used to
684 : * reconstructed an OR tree [[e1, e2], [e3, e4, e5]] =>
685 : * (([e1, e2] OR [e3, e4, e5]) OR <whatever-else> )
686 : * gen_ands includes general expressions associated with AND
687 : * mce_ands includes only cmp_eq expressions associated with AND
688 : */
689 17433 : gen_ands = new_exp_list(sql->sa);
690 17433 : mce_ands = new_exp_list(sql->sa);
691 17433 : eqs = new_exp_list(sql->sa);
692 17433 : neq = new_exp_list(sql->sa);
693 :
694 : /* walk the OR tree */
695 17433 : exp_or_chain_groups(sql, e->l, &gen_ands, &mce_ands, &eqs, &neq);
696 17433 : exp_or_chain_groups(sql, e->r, &gen_ands, &mce_ands, &eqs, &neq);
697 :
698 : /* detect col cmp_eq exps with multiple values */
699 17433 : bool col_multival = false;
700 17433 : if (list_length(eqs) > 1) {
701 4271 : eqh = hash_new(sql->sa, 32, (fkeyvalue)&exp_col_key);
702 4271 : col_multival = detect_col_cmp_eqs(sql, eqs, eqh);
703 : }
704 :
705 : /* detect mutli-col cmp_eq exps with multiple (lists of) values */
706 17433 : bool multicol_multival = false;
707 17433 : if (list_length(mce_ands) > 1) {
708 693 : meqh = hash_new(sql->sa, 32, (fkeyvalue)&exp_multi_col_key);
709 693 : multicol_multival = detect_multicol_cmp_eqs(sql, mce_ands, meqh);
710 : }
711 :
712 17433 : if (!col_multival && !multicol_multival)
713 15675 : continue;
714 :
715 1758 : if (col_multival)
716 1686 : ins = generate_single_col_cmp_in(sql, eqh);
717 :
718 1758 : if (multicol_multival)
719 81 : mins = generate_multi_col_cmp_in(sql, meqh);
720 :
721 : /* create the new OR tree */
722 1758 : sql_exp *new = (ins) ? ins->h->data : mins->h->data;
723 :
724 72 : if (ins) {
725 1793 : for (node *i = ins->h->next; i; i = i->next) {
726 107 : list *l = new_exp_list(sql->sa);
727 107 : list *r = new_exp_list(sql->sa);
728 107 : l = append(l, new);
729 107 : r = append(r, (sql_exp*)i->data);
730 107 : new = exp_or(sql->sa, l, r, 0);
731 :
732 107 : (*changes)++;
733 : }
734 : }
735 :
736 1758 : if (list_length(eqs)) {
737 1448 : for (node *i = eqs->h; i; i = i->next) {
738 872 : list *l = new_exp_list(sql->sa);
739 872 : list *r = new_exp_list(sql->sa);
740 872 : l = append(l, new);
741 872 : r = append(r, (sql_exp*)i->data);
742 872 : new = exp_or(sql->sa, l, r, 0);
743 : }
744 : }
745 :
746 1758 : if (mins) {
747 171 : for (node *i = ((ins) ? mins->h : mins->h->next); i; i = i->next) {
748 9 : list *l = new_exp_list(sql->sa);
749 9 : list *r = new_exp_list(sql->sa);
750 9 : l = append(l, new);
751 9 : r = append(r, (sql_exp*)i->data);
752 9 : new = exp_or(sql->sa, l, r, 0);
753 :
754 9 : (*changes)++;
755 : }
756 : }
757 :
758 1758 : if (list_length(mce_ands)) {
759 523 : for (node *i = mce_ands->h; i; i = i->next) {
760 273 : list *l = new_exp_list(sql->sa);
761 273 : l = append(l, new);
762 273 : new = exp_or(sql->sa, l, i->data, 0);
763 : }
764 : }
765 :
766 1761 : for (node *a = gen_ands->h; a; a = a->next){
767 3 : list *l = new_exp_list(sql->sa);
768 3 : l = append(l, new);
769 3 : new = exp_or(sql->sa, l, a->data, 0);
770 : }
771 :
772 2076 : for (node *o = neq->h; o; o = o->next){
773 318 : list *l = new_exp_list(sql->sa);
774 318 : list *r = new_exp_list(sql->sa);
775 318 : l = append(l, new);
776 318 : r = append(r, (sql_exp*)o->data);
777 318 : new = exp_or(sql->sa, l, r, 0);
778 : }
779 :
780 1758 : list_remove_node(exps, NULL, n);
781 1758 : exps = append(exps, new);
782 : }
783 : }
784 211406 : return exps;
785 : }
786 :
787 : #define TRIVIAL_NOT_EQUAL_CMP(e) \
788 : ((e)->type == e_cmp && (e)->flag == cmp_notequal && !is_anti((e)) && !is_semantics((e)) && ((sql_exp*)(e)->l)->card != CARD_ATOM && ((sql_exp*)(e)->r)->card == CARD_ATOM)
789 :
790 : static list *
791 265304 : merge_notequal(mvc *sql, list *exps, int *changes)
792 : {
793 265304 : list *inequality_groups = NULL, *nexps = NULL;
794 265304 : int needed = 0;
795 :
796 568141 : for (node *n = exps->h; n; n = n->next) {
797 302837 : sql_exp *e = n->data;
798 :
799 302837 : if (TRIVIAL_NOT_EQUAL_CMP(e)) {
800 21402 : bool appended = false;
801 :
802 21402 : if (inequality_groups) {
803 1074 : for (node *m = inequality_groups->h; m && !appended; m = m->next) {
804 537 : list *next = m->data;
805 537 : sql_exp *first = (sql_exp*) next->h->data;
806 :
807 537 : if (exp_match(first->l, e->l)) {
808 536 : list_append(next, e);
809 536 : appended = true;
810 : }
811 : }
812 : }
813 537 : if (!appended) {
814 20866 : if (!inequality_groups)
815 20865 : inequality_groups = new_exp_list(sql->sa);
816 20866 : list_append(inequality_groups, list_append(new_exp_list(sql->sa), e));
817 : }
818 : }
819 : }
820 :
821 265304 : if (inequality_groups) { /* if one list of inequalities has more than one entry, then the re-write is needed */
822 41731 : for (node *n = inequality_groups->h; n; n = n->next) {
823 20866 : list *next = n->data;
824 :
825 20866 : if (list_length(next) > 1)
826 533 : needed = 1;
827 : }
828 : }
829 :
830 20865 : if (needed) {
831 533 : nexps = new_exp_list(sql->sa);
832 1067 : for (node *n = inequality_groups->h; n; n = n->next) {
833 534 : list *next = n->data;
834 534 : sql_exp *first = (sql_exp*) next->h->data;
835 :
836 534 : if (list_length(next) > 1) {
837 533 : list *notin = new_exp_list(sql->sa);
838 :
839 1602 : for (node *m = next->h; m; m = m->next) {
840 1069 : sql_exp *e = m->data;
841 1069 : list_append(notin, e->r);
842 : }
843 533 : list_append(nexps, exp_in(sql->sa, first->l, notin, cmp_notin));
844 : } else {
845 1 : list_append(nexps, first);
846 : }
847 : }
848 :
849 1604 : for (node *n = exps->h; n; n = n->next) {
850 1071 : sql_exp *e = n->data;
851 :
852 1071 : if (!TRIVIAL_NOT_EQUAL_CMP(e))
853 1 : list_append(nexps, e);
854 : }
855 533 : (*changes)++;
856 : } else {
857 : nexps = exps;
858 : }
859 :
860 567605 : for (node *n = nexps->h; n ; n = n->next) {
861 302301 : sql_exp *e = n->data;
862 :
863 302301 : if (e->type == e_cmp && e->flag == cmp_or) {
864 26949 : e->l = merge_notequal(sql, e->l, changes);
865 26949 : e->r = merge_notequal(sql, e->r, changes);
866 : }
867 : }
868 :
869 265304 : return nexps;
870 : }
871 :
872 : int
873 113422 : is_numeric_upcast(sql_exp *e)
874 : {
875 113422 : if (is_convert(e->type)) {
876 4536 : sql_subtype *f = exp_fromtype(e);
877 4536 : sql_subtype *t = exp_totype(e);
878 :
879 4536 : if (f->type->eclass == t->type->eclass && EC_COMPUTE(f->type->eclass)) {
880 1587 : if (f->type->localtype < t->type->localtype)
881 1567 : return 1;
882 : }
883 : }
884 : return 0;
885 : }
886 :
887 : /* optimize (a = b) or (a is null and b is null) -> a = b with null semantics */
888 : static sql_exp *
889 33024 : try_rewrite_equal_or_is_null(mvc *sql, sql_rel *rel, sql_exp *or, list *l1, list *l2)
890 : {
891 33024 : if (list_length(l1) == 1) {
892 29653 : bool valid = true, first_is_null_found = false, second_is_null_found = false;
893 29653 : sql_exp *cmp = l1->h->data;
894 29653 : sql_exp *first = cmp->l, *second = cmp->r;
895 :
896 29653 : if (is_compare(cmp->type) && !is_anti(cmp) && !cmp->f && cmp->flag == cmp_equal) {
897 6995 : int fupcast = is_numeric_upcast(first), supcast = is_numeric_upcast(second);
898 14014 : for(node *n = l2->h ; n && valid; n = n->next) {
899 7019 : sql_exp *e = n->data, *l = e->l, *r = e->r;
900 :
901 7019 : if (is_compare(e->type) && e->flag == cmp_equal && !e->f &&
902 4174 : !is_anti(e) && is_semantics(e)) {
903 1374 : int lupcast = is_numeric_upcast(l);
904 1374 : int rupcast = is_numeric_upcast(r);
905 1374 : sql_exp *rr = rupcast ? r->l : r;
906 :
907 1374 : if (rr->type == e_atom && rr->l && atom_null(rr->l)) {
908 1374 : if (exp_match_exp(fupcast?first->l:first, lupcast?l->l:l))
909 : first_is_null_found = true;
910 116 : else if (exp_match_exp(supcast?second->l:second, lupcast?l->l:l))
911 : second_is_null_found = true;
912 : else
913 5737 : valid = false;
914 : } else {
915 : valid = false;
916 : }
917 : } else {
918 : valid = false;
919 : }
920 : }
921 6995 : if (valid && first_is_null_found && second_is_null_found) {
922 21 : sql_subtype super;
923 :
924 21 : cmp_supertype(&super, exp_subtype(first), exp_subtype(second)); /* first and second must have the same type */
925 21 : if (!(first = exp_check_type(sql, &super, rel, first, type_equal)) ||
926 21 : !(second = exp_check_type(sql, &super, rel, second, type_equal))) {
927 0 : sql->session->status = 0;
928 0 : sql->errstr[0] = 0;
929 0 : return or;
930 : }
931 21 : sql_exp *res = exp_compare(sql->sa, first, second, cmp->flag);
932 21 : set_semantics(res);
933 21 : if (exp_name(or))
934 0 : exp_prop_alias(sql->sa, res, or);
935 21 : return res;
936 : }
937 : }
938 : }
939 : return or;
940 : }
941 :
942 : static list *
943 477948 : merge_cmp_or_null(mvc *sql, sql_rel *rel, list *exps, int *changes)
944 : {
945 1020542 : for (node *n = exps->h; n ; n = n->next) {
946 542594 : sql_exp *e = n->data;
947 :
948 542594 : if (is_compare(e->type) && e->flag == cmp_or && !is_anti(e)) {
949 16512 : sql_exp *ne = try_rewrite_equal_or_is_null(sql, rel, e, e->l, e->r);
950 16512 : if (ne != e) {
951 19 : (*changes)++;
952 19 : n->data = ne;
953 : }
954 16512 : ne = try_rewrite_equal_or_is_null(sql, rel, e, e->r, e->l);
955 16512 : if (ne != e) {
956 2 : (*changes)++;
957 2 : n->data = ne;
958 : }
959 : }
960 : }
961 477948 : return exps;
962 : }
963 :
964 : static list *
965 477948 : cleanup_equal_exps(mvc *sql, sql_rel *rel, list *exps, int *changes)
966 : {
967 477948 : if (list_length(exps) <= 1)
968 : return exps;
969 55522 : if (is_join(rel->op)) {
970 93200 : for(node *n = exps->h; n; n = n->next) {
971 63163 : sql_exp *e = n->data;
972 126181 : if (e->type == e_cmp && !e->f && e->flag <= cmp_notequal &&
973 105508 : !rel_find_exp(rel->l, e->l) && !rel_find_exp(rel->r, e->r) &&
974 42325 : !find_prop(e->p, PROP_HASHCOL) && !find_prop(e->p, PROP_JOINIDX)) {
975 21089 : exp_swap(e);
976 : }
977 : }
978 : }
979 55522 : bool needed = false;
980 176137 : for(node *n = exps->h; !needed && n; n = n->next) {
981 313528 : for (node *m = n->next; !needed && m; m = m->next) {
982 192913 : if (exp_match_exp_semantics(n->data, m->data, true))
983 79 : needed = true;
984 : }
985 : }
986 55522 : if (needed) {
987 79 : list *nexps = sa_list(sql->sa);
988 :
989 243 : for(node *n = exps->h; n; n = n->next) {
990 164 : bool done = false;
991 421 : for (node *m = exps->h; m && !done; m = m->next) {
992 257 : if (n != m && exp_match_exp_semantics(n->data, m->data, false)) {
993 159 : sql_exp *e1 = n->data, *e2 = m->data;
994 159 : if ((is_any(e1) || is_semantics(e1)) || (!is_any(e2) && !is_semantics(e2))) {
995 159 : append(nexps, e1);
996 159 : if ((!is_any(e2) && !is_semantics(e2)) && is_left(rel->op) && list_length(rel->attr) == 1) {
997 : /* nil is false */
998 0 : sql_exp *m = rel->attr->h->data;
999 0 : if (exp_is_atom(m))
1000 0 : set_no_nil(m);
1001 : }
1002 : }
1003 : done = true;
1004 : }
1005 : }
1006 164 : if (!done)
1007 5 : append(nexps, n->data);
1008 : }
1009 : return nexps;
1010 : }
1011 : (void)changes;
1012 : return exps;
1013 : }
1014 :
1015 : static inline sql_rel *
1016 477948 : rel_select_cse(visitor *v, sql_rel *rel)
1017 : {
1018 477948 : if ((is_select(rel->op) || is_join(rel->op) || is_semi(rel->op)) && rel->exps) /* cleanup equal expressions */
1019 477948 : rel->exps = cleanup_equal_exps(v->sql, rel, rel->exps, &v->changes); /* (a = b) and (a += b) */
1020 :
1021 477948 : if (is_select(rel->op) && rel->exps)
1022 211406 : rel->exps = merge_ors(v->sql, rel->exps, &v->changes);
1023 :
1024 477948 : if (is_select(rel->op) && rel->exps)
1025 211406 : rel->exps = merge_notequal(v->sql, rel->exps, &v->changes); /* x <> 1 and x <> 2 => x not in (1, 2)*/
1026 :
1027 477948 : if ((is_select(rel->op) || is_join(rel->op) || is_semi(rel->op)) && rel->exps)
1028 477948 : rel->exps = merge_cmp_or_null(v->sql, rel, rel->exps, &v->changes); /* (a = b) or (a is null and b is null) -> a = b with null semantics */
1029 :
1030 477948 : if ((is_select(rel->op) || is_join(rel->op) || is_semi(rel->op)) && rel->exps) {
1031 477948 : node *n;
1032 477948 : list *nexps;
1033 477948 : int needed = 0;
1034 :
1035 1018144 : for (n=rel->exps->h; n && !needed; n = n->next) {
1036 540196 : sql_exp *e = n->data;
1037 :
1038 540196 : if (e->type == e_cmp && e->flag == cmp_or && !is_anti(e))
1039 540196 : needed = 1;
1040 : }
1041 477948 : if (!needed)
1042 : return rel;
1043 16363 : nexps = new_exp_list(v->sql->sa);
1044 35585 : for (n=rel->exps->h; n; n = n->next) {
1045 19222 : sql_exp *e = n->data;
1046 :
1047 19222 : if (e->type == e_cmp && e->flag == cmp_or && !is_anti(e)) {
1048 : /* split the common expressions */
1049 16491 : v->changes += exps_cse(v->sql, nexps, e->l, e->r);
1050 : } else {
1051 2731 : append(nexps, e);
1052 : }
1053 : }
1054 16363 : rel->exps = nexps;
1055 : }
1056 : return rel;
1057 : }
1058 :
1059 : static list *
1060 10487 : exps_merge_select_rse( mvc *sql, list *l, list *r, bool *merged)
1061 : {
1062 10487 : node *n, *m, *o;
1063 10487 : list *nexps = NULL, *lexps, *rexps;
1064 10487 : bool lmerged = true, rmerged = true;
1065 :
1066 10487 : lexps = new_exp_list(sql->sa);
1067 22354 : for (n = l->h; n; n = n->next) {
1068 11867 : sql_exp *e = n->data;
1069 :
1070 11867 : if (e->type == e_cmp && e->flag == cmp_or && !is_anti(e) && !is_semantics(e)) {
1071 3326 : lmerged = false;
1072 3326 : list *nexps = exps_merge_select_rse(sql, e->l, e->r, &lmerged);
1073 3728 : for (o = nexps->h; o; o = o->next)
1074 402 : append(lexps, o->data);
1075 : } else {
1076 8541 : append(lexps, e);
1077 : }
1078 : }
1079 10487 : if (lmerged)
1080 7210 : lmerged = (list_length(lexps) == 1);
1081 10487 : rexps = new_exp_list(sql->sa);
1082 23371 : for (n = r->h; n; n = n->next) {
1083 12884 : sql_exp *e = n->data;
1084 :
1085 12884 : if (e->type == e_cmp && e->flag == cmp_or && !is_anti(e) && !is_semantics(e)) {
1086 699 : rmerged = false;
1087 699 : list *nexps = exps_merge_select_rse(sql, e->l, e->r, &rmerged);
1088 724 : for (o = nexps->h; o; o = o->next)
1089 25 : append(rexps, o->data);
1090 : } else {
1091 12185 : append(rexps, e);
1092 : }
1093 : }
1094 10487 : if (rmerged)
1095 9809 : rmerged = (list_length(r) == 1);
1096 :
1097 10487 : nexps = new_exp_list(sql->sa);
1098 :
1099 : /* merge merged lists first ? */
1100 19430 : for (n = lexps->h; n; n = n->next) {
1101 8943 : sql_exp *le = n->data, *re, *fnd = NULL;
1102 :
1103 8943 : if (le->type != e_cmp || le->flag == cmp_or || is_anti(le) || is_semantics(le) || is_symmetric(le))
1104 622 : continue;
1105 17240 : for (m = rexps->h; !fnd && m; m = m->next) {
1106 8919 : re = m->data;
1107 8919 : if (exps_match_col_exps(le, re))
1108 1551 : fnd = re;
1109 : }
1110 8321 : if (fnd && (is_anti(fnd) || is_semantics(fnd)))
1111 50 : continue;
1112 : /* cases
1113 : * 1) 2 values (cmp_equal)
1114 : * 2) 1 value (cmp_equal), and cmp_in
1115 : * (also cmp_in, cmp_equal)
1116 : * 3) 2 cmp_in
1117 : * 4) ranges
1118 : */
1119 1501 : if (fnd) {
1120 1501 : re = fnd;
1121 1501 : fnd = NULL;
1122 1501 : if (is_anti(le) || is_anti(re) || is_symmetric(re))
1123 0 : continue;
1124 1938 : if (le->flag == cmp_equal && re->flag == cmp_equal) {
1125 437 : list *exps = new_exp_list(sql->sa);
1126 :
1127 437 : append(exps, le->r);
1128 437 : append(exps, re->r);
1129 437 : fnd = exp_in(sql->sa, le->l, exps, cmp_in);
1130 1207 : } else if (le->flag == cmp_equal && re->flag == cmp_in){
1131 143 : list *exps = new_exp_list(sql->sa);
1132 :
1133 143 : append(exps, le->r);
1134 143 : list_merge(exps, re->r, NULL);
1135 143 : fnd = exp_in(sql->sa, le->l, exps, cmp_in);
1136 1216 : } else if (le->flag == cmp_in && re->flag == cmp_equal){
1137 295 : list *exps = new_exp_list(sql->sa);
1138 :
1139 295 : append(exps, re->r);
1140 295 : list_merge(exps, le->r, NULL);
1141 295 : fnd = exp_in(sql->sa, le->l, exps, cmp_in);
1142 741 : } else if (le->flag == cmp_in && re->flag == cmp_in){
1143 115 : list *exps = new_exp_list(sql->sa);
1144 :
1145 115 : list_merge(exps, le->r, NULL);
1146 115 : list_merge(exps, re->r, NULL);
1147 115 : fnd = exp_in(sql->sa, le->l, exps, cmp_in);
1148 511 : } else if (le->f && re->f && /* merge ranges */
1149 20 : le->flag == re->flag && le->flag <= cmp_lt) {
1150 20 : sql_exp *mine = NULL, *maxe = NULL;
1151 :
1152 20 : if (!(mine = rel_binop_(sql, NULL, exp_copy(sql, le->r), exp_copy(sql, re->r), "sys", "sql_min", card_value, true))) {
1153 0 : sql->session->status = 0;
1154 0 : sql->errstr[0] = '\0';
1155 0 : continue;
1156 : }
1157 20 : if (!(maxe = rel_binop_(sql, NULL, exp_copy(sql, le->f), exp_copy(sql, re->f), "sys", "sql_max", card_value, true))) {
1158 0 : sql->session->status = 0;
1159 0 : sql->errstr[0] = '\0';
1160 0 : continue;
1161 : }
1162 20 : fnd = exp_compare2(sql->sa, exp_copy(sql, le->l), mine, maxe, le->flag, 0);
1163 20 : lmerged = false;
1164 : }
1165 1010 : if (fnd) {
1166 1010 : append(nexps, fnd);
1167 1864 : *merged = (fnd && lmerged && rmerged);
1168 : }
1169 : }
1170 : }
1171 10487 : return nexps;
1172 : }
1173 :
1174 : /* merge related sub expressions
1175 : *
1176 : * ie (x = a and y > 1 and y < 5) or
1177 : * (x = c and y > 1 and y < 10) or
1178 : * (x = e and y > 1 and y < 20)
1179 : * ->
1180 : * ((x = a and y > 1 and y < 5) or
1181 : * (x = c and y > 1 and y < 10) or
1182 : * (x = e and y > 1 and y < 20)) and
1183 : * x in (a,c,e) and
1184 : * y > 1 and y < 20
1185 : *
1186 : * for single expression or's we can do better
1187 : * x in (a, b, c) or x in (d, e, f)
1188 : * ->
1189 : * x in (a, b, c, d, e, f)
1190 : * */
1191 : static inline sql_rel *
1192 146491 : rel_merge_select_rse(visitor *v, sql_rel *rel)
1193 : {
1194 : /* only execute once per select */
1195 146491 : if ((is_select(rel->op) || is_join(rel->op) || is_semi(rel->op)) && rel->exps && !is_rel_merge_select_rse_used(rel->used)) {
1196 146491 : node *n, *o;
1197 146491 : list *nexps = new_exp_list(v->sql->sa);
1198 :
1199 309843 : for (n=rel->exps->h; n; n = n->next) {
1200 163352 : sql_exp *e = n->data;
1201 :
1202 169814 : if (e->type == e_cmp && e->flag == cmp_or && !is_anti(e) && !is_semantics(e)) {
1203 : /* possibly merge related expressions */
1204 6462 : bool merged = false;
1205 :
1206 6462 : list *ps = exps_merge_select_rse(v->sql, e->l, e->r, &merged);
1207 7045 : for (o = ps->h; o; o = o->next)
1208 583 : append(nexps, o->data);
1209 6462 : if (merged)
1210 87 : v->changes++;
1211 : else
1212 6375 : append(nexps, e);
1213 : } else {
1214 156890 : append(nexps, e);
1215 : }
1216 : }
1217 146491 : rel->exps = nexps;
1218 146491 : rel->used |= rel_merge_select_rse_used;
1219 : }
1220 146491 : return rel;
1221 : }
1222 :
1223 : /* pack optimizers into a single function call to avoid iterations in the AST */
1224 : static sql_rel *
1225 1828021 : rel_optimize_select_and_joins_bottomup_(visitor *v, sql_rel *rel)
1226 : {
1227 1828021 : if (!rel || (!is_join(rel->op) && !is_semi(rel->op) && !is_select(rel->op)) || list_empty(rel->exps))
1228 1350073 : return rel;
1229 477948 : uint8_t cycle = *(uint8_t*) v->data;
1230 :
1231 477948 : rel->exps = exp_merge_range(v, rel, rel->exps);
1232 477948 : rel = rel_select_cse(v, rel);
1233 477948 : if (cycle == 1)
1234 146491 : rel = rel_merge_select_rse(v, rel);
1235 477948 : rel = rewrite_simplify(v, cycle, v->value_based_opt, rel);
1236 477948 : return rel;
1237 : }
1238 :
1239 : static sql_rel *
1240 134798 : rel_optimize_select_and_joins_bottomup(visitor *v, global_props *gp, sql_rel *rel)
1241 : {
1242 134798 : v->data = &gp->opt_cycle;
1243 134798 : rel = rel_visitor_bottomup(v, rel, &rel_optimize_select_and_joins_bottomup_);
1244 134798 : v->data = gp;
1245 134798 : return rel;
1246 : }
1247 :
1248 : run_optimizer
1249 643754 : bind_optimize_select_and_joins_bottomup(visitor *v, global_props *gp)
1250 : {
1251 643754 : int flag = v->sql->sql_optimizer;
1252 643550 : return gp->opt_level == 1 && (gp->cnt[op_join] || gp->cnt[op_left] || gp->cnt[op_right] ||
1253 557442 : gp->cnt[op_full] || gp->cnt[op_semi] || gp->cnt[op_anti] ||
1254 1287304 : gp->cnt[op_select]) && (flag & optimize_select_and_joins_bottomup) ? rel_optimize_select_and_joins_bottomup : NULL;
1255 : }
1256 :
1257 :
1258 : static inline sql_rel *
1259 1603195 : rel_push_join_exps_down(visitor *v, sql_rel *rel)
1260 : {
1261 : /* push select exps part of join expressions down */
1262 : /* TODO CHECK WHY not semi enabled */
1263 1603195 : if ((is_innerjoin(rel->op) || is_left(rel->op) || is_right(rel->op) /*|| is_semi(rel->op)*/) && !list_empty(rel->exps)) {
1264 248110 : int left = is_innerjoin(rel->op) || is_right(rel->op) || rel->op == op_semi;
1265 248110 : int right = is_innerjoin(rel->op) || is_left(rel->op) || is_semi(rel->op);
1266 248110 : sql_rel *jl = rel->l, *ojl = jl, *jr = rel->r, *ojr = jr;
1267 :
1268 248110 : set_processed(jl);
1269 248110 : set_processed(jr);
1270 529285 : for (node *n = rel->exps->h; n;) {
1271 281175 : node *next = n->next;
1272 281175 : sql_exp *e = n->data;
1273 :
1274 281175 : if (left && rel_rebind_exp(v->sql, jl, e) && !is_any(e)) { /* select expressions on left */
1275 1346 : if (!is_select(jl->op) || rel_is_ref(jl))
1276 1335 : rel->l = jl = rel_select(v->sql->sa, jl, NULL);
1277 1346 : rel_select_add_exp(v->sql->sa, jl, e);
1278 1346 : list_remove_node(rel->exps, NULL, n);
1279 1346 : v->changes++;
1280 279829 : } else if (right && rel_rebind_exp(v->sql, jr, e) && !is_any(e)) { /* select expressions on right */
1281 45 : if (!is_select(jr->op) || rel_is_ref(jr))
1282 42 : rel->r = jr = rel_select(v->sql->sa, jr, NULL);
1283 45 : rel_select_add_exp(v->sql->sa, jr, e);
1284 45 : list_remove_node(rel->exps, NULL, n);
1285 45 : v->changes++;
1286 : }
1287 : n = next;
1288 : }
1289 248110 : if (ojl != jl)
1290 1335 : set_processed(jl);
1291 248110 : if (ojr != jr)
1292 42 : set_processed(jr);
1293 : }
1294 1603195 : return rel;
1295 : }
1296 :
1297 : static inline bool
1298 1603195 : is_non_trivial_select_applied_to_outer_join(sql_rel *rel)
1299 : {
1300 1603195 : return is_select(rel->op) && rel->exps && is_outerjoin(((sql_rel*) rel->l)->op);
1301 : }
1302 :
1303 : extern list *list_append_before(list *l, node *n, void *data);
1304 :
1305 : static void
1306 : replace_column_references_with_nulls_2(mvc *sql, sql_rel *inner_join_side, sql_exp* e);
1307 :
1308 : static void
1309 5584 : replace_column_references_with_nulls_1(mvc *sql, sql_rel *inner_join_side, list* exps) {
1310 5584 : if (list_empty(exps))
1311 : return;
1312 14348 : for(node* n = exps->h; n; n=n->next) {
1313 8764 : sql_exp* e = n->data;
1314 8764 : replace_column_references_with_nulls_2(sql, inner_join_side, e);
1315 : }
1316 : }
1317 :
1318 : static void
1319 28181 : replace_column_references_with_nulls_2(mvc *sql, sql_rel *inner_join_side, sql_exp* e) {
1320 35890 : if (e == NULL) {
1321 : return;
1322 : }
1323 :
1324 29726 : switch (e->type) {
1325 8861 : case e_column:
1326 8861 : if (rel_find_exp_and_corresponding_rel(inner_join_side, e, true, NULL, NULL)) {
1327 2556 : e->type = e_atom;
1328 2556 : e->l = atom_general(sql->sa, &e->tpe, NULL, 0);
1329 2556 : e->r = e->f = NULL;
1330 : }
1331 : break;
1332 9178 : case e_cmp:
1333 9178 : switch (e->flag) {
1334 6538 : case cmp_gt:
1335 : case cmp_gte:
1336 : case cmp_lte:
1337 : case cmp_lt:
1338 : case cmp_equal:
1339 : case cmp_notequal:
1340 : {
1341 6538 : sql_exp* l = e->l;
1342 6538 : sql_exp* r = e->r;
1343 6538 : sql_exp* f = e->f;
1344 :
1345 6538 : replace_column_references_with_nulls_2(sql, inner_join_side, l);
1346 6538 : replace_column_references_with_nulls_2(sql, inner_join_side, r);
1347 6538 : replace_column_references_with_nulls_2(sql, inner_join_side, f);
1348 6538 : break;
1349 : }
1350 1876 : case cmp_filter:
1351 : case cmp_or:
1352 : {
1353 1876 : list* l = e->l;
1354 1876 : list* r = e->r;
1355 1876 : replace_column_references_with_nulls_1(sql, inner_join_side, l);
1356 1876 : replace_column_references_with_nulls_1(sql, inner_join_side, r);
1357 1876 : break;
1358 : }
1359 764 : case cmp_in:
1360 : case cmp_notin:
1361 : {
1362 764 : sql_exp* l = e->l;
1363 764 : list* r = e->r;
1364 764 : replace_column_references_with_nulls_2(sql, inner_join_side, l);
1365 764 : replace_column_references_with_nulls_1(sql, inner_join_side, r);
1366 764 : break;
1367 : }
1368 : default:
1369 : break;
1370 : }
1371 : break;
1372 1068 : case e_func:
1373 : {
1374 1068 : list* l = e->l;
1375 1068 : replace_column_references_with_nulls_1(sql, inner_join_side, l);
1376 1068 : break;
1377 : }
1378 1171 : case e_convert:
1379 : {
1380 1171 : sql_exp* l = e->l;
1381 1171 : replace_column_references_with_nulls_2(sql, inner_join_side, l);
1382 1171 : break;
1383 : }
1384 : default:
1385 : break;
1386 : }
1387 : }
1388 :
1389 : static sql_rel *
1390 4940 : out2inner(visitor *v, sql_rel* sel, sql_rel* join, sql_rel* inner_join_side, operator_type new_type) {
1391 :
1392 : /* handle inner_join relations with a simple select */
1393 4940 : if (is_select(inner_join_side->op) && inner_join_side->l)
1394 4940 : inner_join_side = inner_join_side->l;
1395 :
1396 4940 : list* select_predicates = exps_copy(v->sql, sel->exps);
1397 :
1398 10421 : for(node* n = select_predicates->h; n; n=n->next) {
1399 5577 : sql_exp* e = n->data;
1400 5577 : replace_column_references_with_nulls_2(v->sql, inner_join_side, e);
1401 :
1402 5577 : if (exp_is_false(e)) {
1403 96 : join->op = new_type;
1404 96 : v->changes++;
1405 96 : break;
1406 : }
1407 : }
1408 :
1409 4940 : return sel;
1410 : }
1411 :
1412 : static inline sql_rel *
1413 1603195 : rel_out2inner(visitor *v, sql_rel *rel) {
1414 :
1415 1603195 : if (!is_non_trivial_select_applied_to_outer_join(rel)) {
1416 : // Nothing to do here.
1417 : return rel;
1418 : }
1419 :
1420 4917 : sql_rel* join = (sql_rel*) rel->l;
1421 :
1422 4917 : if (rel_is_ref(join)) {
1423 : /* Do not alter a multi-referenced join relation.
1424 : * This is problematic (e.g. in the case of the plan of a merge statement)
1425 : * basically because there are no guarantees on the other container relations.
1426 : * In particular there is no guarantee that the other referencing relations are
1427 : * select relations with null-rejacting predicates on the inner join side.
1428 : */
1429 : return rel;
1430 : }
1431 :
1432 4917 : sql_rel* inner_join_side;
1433 4917 : if (is_left(join->op)) {
1434 4868 : inner_join_side = join->r;
1435 4868 : return out2inner(v, rel, join, inner_join_side, op_join);
1436 : }
1437 49 : else if (is_right(join->op)) {
1438 26 : inner_join_side = join->l;
1439 26 : return out2inner(v, rel, join, inner_join_side, op_join);
1440 : }
1441 : else /*full outer join*/ {
1442 : // First check if left side can degenerate from full outer join to just right outer join.
1443 23 : inner_join_side = join->r;
1444 23 : rel = out2inner(v, rel, join, inner_join_side, op_right);
1445 : /* Now test if the right side can degenerate to
1446 : * a normal inner join or a left outer join
1447 : * depending on the result of previous call to out2inner.
1448 : */
1449 :
1450 23 : inner_join_side = join->l;
1451 37 : return out2inner(v, rel, join, inner_join_side, is_right(join->op)? op_join: op_left);
1452 : }
1453 : }
1454 :
1455 : static bool
1456 3982 : exps_uses_any(list *exps, list *l)
1457 : {
1458 3982 : bool uses_any = false;
1459 :
1460 3982 : if (list_empty(exps) || list_empty(l))
1461 15 : return false;
1462 7940 : for (node *n = l->h; n && !uses_any; n = n->next) {
1463 3973 : sql_exp *e = n->data;
1464 3973 : uses_any |= list_exps_uses_exp(exps, exp_relname(e), exp_name(e)) != NULL;
1465 : }
1466 :
1467 : return uses_any;
1468 : }
1469 :
1470 : /* TODO At the moment I have to disable the new join2semi because the join order optimizer doesn't take semi-joins into account,
1471 : so plans get deteriorated if more joins are optimized into semi-joins. Later I will review the join order with semi-joins and hopefully,
1472 : I will be able to re-enable the new join2semi. */
1473 : #if 0
1474 : #define NO_EXP_FOUND 0
1475 : #define FOUND_WITH_DUPLICATES 1
1476 : #define MAY_HAVE_DUPLICATE_NULLS 2
1477 : #define ALL_VALUES_DISTINCT 3
1478 :
1479 : static int
1480 : find_projection_for_join2semi(sql_rel *rel, sql_exp *jc)
1481 : {
1482 : sql_rel *res = NULL;
1483 : sql_exp *e = NULL;
1484 : bool underjoin = false;
1485 :
1486 : if ((e = rel_find_exp_and_corresponding_rel(rel, jc, &res, &underjoin))) {
1487 : if (underjoin || e->type != e_column)
1488 : return FOUND_WITH_DUPLICATES;
1489 : /* if just one groupby column is projected or the relation needs distinct values and one column is projected or is a primary key, it will be distinct */
1490 : if (is_unique(e) ||
1491 : (is_groupby(res->op) && list_length(res->r) == 1 && exps_find_exp(res->r, e)) ||
1492 : ((is_project(res->op) || is_base(res->op)) && ((need_distinct(res) && list_length(res->exps) == 1) || res->card < CARD_AGGR)))
1493 : return has_nil(e) ? MAY_HAVE_DUPLICATE_NULLS : ALL_VALUES_DISTINCT;
1494 : return FOUND_WITH_DUPLICATES;
1495 : }
1496 : return NO_EXP_FOUND;
1497 : }
1498 :
1499 : static int
1500 : subrel_uses_exp_outside_subrel(visitor *v, sql_rel *rel, list *l, sql_rel *j)
1501 : {
1502 : if (rel == j)
1503 : return 0;
1504 : if (mvc_highwater(v->sql))
1505 : return 1;
1506 : switch(rel->op){
1507 : case op_join:
1508 : case op_left:
1509 : case op_right:
1510 : case op_full:
1511 : return exps_uses_any(rel->exps, l) ||
1512 : subrel_uses_exp_outside_subrel(v, rel->l, l, j) || subrel_uses_exp_outside_subrel(v, rel->r, l, j);
1513 : case op_semi:
1514 : case op_anti:
1515 : case op_select:
1516 : return exps_uses_any(rel->exps, l) ||
1517 : subrel_uses_exp_outside_subrel(v, rel->l, l, j);
1518 : case op_project:
1519 : case op_groupby:
1520 : return exps_uses_any(rel->exps, l) || exps_uses_any(rel->r, l);
1521 : case op_basetable:
1522 : case op_table:
1523 : case op_union:
1524 : case op_except:
1525 : case op_inter:
1526 : return exps_uses_any(rel->exps, l);
1527 : case op_topn:
1528 : case op_sample:
1529 : return subrel_uses_exp_outside_subrel(v, rel->l, l, j);
1530 : default:
1531 : return 1;
1532 : }
1533 : }
1534 :
1535 : static int
1536 : projrel_uses_exp_outside_subrel(visitor *v, sql_rel *rel, list *l, sql_rel *j)
1537 : {
1538 : /* test if projecting relation uses any of the join expressions */
1539 : assert((is_simple_project(rel->op) || is_groupby(rel->op)) && rel->l);
1540 : return exps_uses_any(rel->exps, l) || exps_uses_any(rel->r, l) || subrel_uses_exp_outside_subrel(v, rel->l, l, j);
1541 : }
1542 :
1543 : static sql_rel *
1544 : rewrite_joins2semi(visitor *v, sql_rel *proj, sql_rel *rel)
1545 : {
1546 : /* generalize possibility : we need the visitor 'step' here */
1547 : if (rel_is_ref(rel) || mvc_highwater(v->sql)) /* if the join has multiple references, it's dangerous to convert it into a semijoin */
1548 : return rel;
1549 : if (is_innerjoin(rel->op) && !list_empty(rel->exps)) {
1550 : sql_rel *l = rel->l, *r = rel->r;
1551 : bool left_unique = true, right_unique = true;
1552 :
1553 : /* these relations don't project anything, so skip them */
1554 : while (is_topn(l->op) || is_sample(l->op) || is_select(l->op) || is_semi(l->op))
1555 : l = l->l;
1556 : /* joins will expand values, so don't search on those */
1557 : if (!is_base(l->op) && !is_project(l->op))
1558 : left_unique = false;
1559 : while (is_topn(r->op) || is_sample(r->op) || is_select(r->op) || is_semi(r->op))
1560 : r = r->l;
1561 : if (!is_base(r->op) && !is_project(r->op))
1562 : right_unique = false;
1563 : /* if all columns used in equi-joins from one of the sides are unique, the join can be rewritten into a semijoin */
1564 : for (node *n=rel->exps->h; n && (left_unique || right_unique); n = n->next) {
1565 : sql_exp *e = n->data, *el = e->l, *er = e->r;
1566 :
1567 : if (!is_compare(e->type) || e->flag != cmp_equal || exp_has_func(el) || exp_has_func(er)) {
1568 : left_unique = right_unique = false;
1569 : } else {
1570 : int found = 0;
1571 :
1572 : if (left_unique && (found = find_projection_for_join2semi(l, el)) > NO_EXP_FOUND)
1573 : left_unique &= (found == ALL_VALUES_DISTINCT || (found == MAY_HAVE_DUPLICATE_NULLS && (!is_semantics(e) || !has_nil(er))));
1574 : if (left_unique && (found = find_projection_for_join2semi(l, er)) > NO_EXP_FOUND)
1575 : left_unique &= (found == ALL_VALUES_DISTINCT || (found == MAY_HAVE_DUPLICATE_NULLS && (!is_semantics(e) || !has_nil(el))));
1576 : if (right_unique && (found = find_projection_for_join2semi(r, el)) > NO_EXP_FOUND)
1577 : right_unique &= (found == ALL_VALUES_DISTINCT || (found == MAY_HAVE_DUPLICATE_NULLS && (!is_semantics(e) || !has_nil(er))));
1578 : if (right_unique && (found = find_projection_for_join2semi(r, er)) > NO_EXP_FOUND)
1579 : right_unique &= (found == ALL_VALUES_DISTINCT || (found == MAY_HAVE_DUPLICATE_NULLS && (!is_semantics(e) || !has_nil(el))));
1580 : }
1581 : }
1582 :
1583 : /* now we need to check relation's expressions are not used */
1584 : if (left_unique && !projrel_uses_exp_outside_subrel(v, proj, l->exps, rel)) {
1585 : sql_rel *tmp = rel->r;
1586 : rel->r = rel->l;
1587 : rel->l = tmp;
1588 : rel->op = op_semi;
1589 : v->changes++;
1590 : } else if (right_unique && !projrel_uses_exp_outside_subrel(v, proj, r->exps, rel)) {
1591 : rel->op = op_semi;
1592 : v->changes++;
1593 : }
1594 : }
1595 : if (is_join(rel->op)) {
1596 : rel->l = rewrite_joins2semi(v, proj, rel->l);
1597 : rel->r = rewrite_joins2semi(v, proj, rel->r);
1598 : } else if (is_topn(rel->op) || is_sample(rel->op) || is_select(rel->op) || is_semi(rel->op)) {
1599 : rel->l = rewrite_joins2semi(v, proj, rel->l);
1600 : }
1601 : return rel;
1602 : }
1603 :
1604 : static inline sql_rel *
1605 : rel_join2semijoin(visitor *v, sql_rel *rel)
1606 : {
1607 : if ((is_simple_project(rel->op) || is_groupby(rel->op)) && rel->l)
1608 : rel->l = rewrite_joins2semi(v, rel, rel->l);
1609 : return rel;
1610 : }
1611 : #endif
1612 :
1613 : #define NO_PROJECTION_FOUND 0
1614 : #define MAY_HAVE_DUPLICATE_NULLS 1
1615 : #define ALL_VALUES_DISTINCT 2
1616 :
1617 : static int
1618 370579 : find_projection_for_join2semi(sql_rel *rel)
1619 : {
1620 370579 : if (is_simple_project(rel->op) || is_groupby(rel->op) || is_inter(rel->op) || is_except(rel->op) || is_base(rel->op) || (is_union(rel->op) && need_distinct(rel))) {
1621 234874 : if (rel->card < CARD_AGGR) /* const or groupby without group by exps */
1622 : return ALL_VALUES_DISTINCT;
1623 233747 : if (list_length(rel->exps) == 1) {
1624 6315 : sql_exp *e = rel->exps->h->data;
1625 : /* a single group by column in the projection list from a group by relation is guaranteed to be unique, but not an aggregate */
1626 6315 : if (e->type == e_column) {
1627 6285 : sql_rel *res = NULL;
1628 6285 : sql_exp *found = NULL;
1629 6285 : bool underjoin = false;
1630 :
1631 : /* if just one groupby column is projected or the relation needs distinct values and one column is projected or is a primary key, it will be distinct */
1632 6285 : if ((is_groupby(rel->op) && list_length(rel->r) == 1 && exps_find_exp(rel->r, e)) || (need_distinct(rel) && list_length(rel->exps) == 1))
1633 8832 : return ALL_VALUES_DISTINCT;
1634 2916 : if (is_unique(e))
1635 2093 : return has_nil(e) ? MAY_HAVE_DUPLICATE_NULLS : ALL_VALUES_DISTINCT;
1636 :
1637 823 : if ((is_simple_project(rel->op) || is_groupby(rel->op) || is_inter(rel->op) || is_except(rel->op)) &&
1638 210 : (found = rel_find_exp_and_corresponding_rel(rel->l, e, false, &res, &underjoin)) && !underjoin) { /* grouping column on inner relation */
1639 194 : if (need_distinct(res) && list_length(res->exps) == 1)
1640 : return ALL_VALUES_DISTINCT;
1641 194 : if (is_unique(found))
1642 0 : return has_nil(e) ? MAY_HAVE_DUPLICATE_NULLS : ALL_VALUES_DISTINCT;
1643 194 : if (found->type == e_column && found->card <= CARD_AGGR) {
1644 1 : if (!is_groupby(res->op) && list_length(res->exps) != 1)
1645 : return NO_PROJECTION_FOUND;
1646 0 : for (node *n = res->exps->h ; n ; n = n->next) { /* must be the single column in the group by expression list */
1647 0 : sql_exp *e = n->data;
1648 0 : if (e != found && e->type == e_column)
1649 : return NO_PROJECTION_FOUND;
1650 : }
1651 : return ALL_VALUES_DISTINCT;
1652 : }
1653 : }
1654 : }
1655 : }
1656 : }
1657 : return NO_PROJECTION_FOUND;
1658 : }
1659 :
1660 : static sql_rel *
1661 1152958 : find_candidate_join2semi(visitor *v, sql_rel *rel, bool *swap)
1662 : {
1663 : /* generalize possibility : we need the visitor 'step' here */
1664 1153488 : if (rel_is_ref(rel)) /* if the join has multiple references, it's dangerous to convert it into a semijoin */
1665 : return NULL;
1666 1095930 : if (rel->op == op_join && !list_empty(rel->exps) && list_empty(rel->attr)) {
1667 187295 : sql_rel *l = rel->l, *r = rel->r;
1668 187295 : int foundr = NO_PROJECTION_FOUND, foundl = NO_PROJECTION_FOUND, found = NO_PROJECTION_FOUND;
1669 187295 : bool ok = false;
1670 :
1671 187295 : foundr = find_projection_for_join2semi(r);
1672 187295 : if (foundr < ALL_VALUES_DISTINCT)
1673 183284 : foundl = find_projection_for_join2semi(l);
1674 187295 : if (foundr && foundr > foundl) {
1675 4021 : *swap = false;
1676 4021 : found = foundr;
1677 183274 : } else if (foundl) {
1678 2561 : *swap = true;
1679 2561 : found = foundl;
1680 : }
1681 :
1682 6582 : if (found > NO_PROJECTION_FOUND) {
1683 : /* if all join expressions can be pushed down or have function calls, then it cannot be rewritten into a semijoin */
1684 13168 : for (node *n=rel->exps->h; n && !ok; n = n->next) {
1685 6586 : sql_exp *e = n->data;
1686 :
1687 10159 : ok |= e->type == e_cmp && e->flag == cmp_equal && !exp_has_func(e) && !rel_rebind_exp(v->sql, l, e) && !rel_rebind_exp(v->sql, r, e) &&
1688 11 : (found == ALL_VALUES_DISTINCT || !is_semantics(e) || !has_nil((sql_exp *)e->l) || !has_nil((sql_exp *)e->r));
1689 : }
1690 : }
1691 :
1692 187295 : if (ok)
1693 : return rel;
1694 : }
1695 1092906 : if (is_join(rel->op) || is_semi(rel->op)) {
1696 283586 : sql_rel *c;
1697 :
1698 283586 : if ((c=find_candidate_join2semi(v, rel->l, swap)) != NULL ||
1699 283180 : (c=find_candidate_join2semi(v, rel->r, swap)) != NULL)
1700 428 : if (list_empty(c->attr))
1701 : return c;
1702 : }
1703 1092478 : if (is_topn(rel->op) || is_sample(rel->op))
1704 530 : return find_candidate_join2semi(v, rel->l, swap);
1705 : return NULL;
1706 : }
1707 :
1708 : static int
1709 2560 : subrel_uses_exp_outside_subrel(sql_rel *rel, list *l, sql_rel *c)
1710 : {
1711 2561 : if (rel == c)
1712 : return 0;
1713 : /* for subrel only expect joins (later possibly selects) */
1714 853 : if (is_join(rel->op) || is_semi(rel->op)) {
1715 432 : if (exps_uses_any(rel->exps, l))
1716 : return 1;
1717 846 : if (subrel_uses_exp_outside_subrel(rel->l, l, c) ||
1718 422 : subrel_uses_exp_outside_subrel(rel->r, l, c))
1719 5 : return 1;
1720 : }
1721 840 : if (is_topn(rel->op) || is_sample(rel->op))
1722 1 : return subrel_uses_exp_outside_subrel(rel->l, l, c);
1723 : return 0;
1724 : }
1725 :
1726 : static int
1727 3024 : rel_uses_exp_outside_subrel(sql_rel *rel, list *l, sql_rel *c)
1728 : {
1729 : /* for now we only expect sub relations of type project, selects (rel) or join/semi */
1730 3024 : if (is_simple_project(rel->op) || is_groupby(rel->op) || is_select(rel->op)) {
1731 3024 : if (!list_empty(rel->exps) && exps_uses_any(rel->exps, l))
1732 : return 1;
1733 1714 : if ((is_simple_project(rel->op) || is_groupby(rel->op)) && !list_empty(rel->r) && exps_uses_any(rel->r, l))
1734 : return 1;
1735 1714 : if (rel->l)
1736 1714 : return subrel_uses_exp_outside_subrel(rel->l, l, c);
1737 : }
1738 0 : if (is_topn(rel->op) || is_sample(rel->op))
1739 0 : return subrel_uses_exp_outside_subrel(rel->l, l, c);
1740 : return 1;
1741 : }
1742 :
1743 : static inline sql_rel *
1744 1603195 : rel_join2semijoin(visitor *v, sql_rel *rel)
1745 : {
1746 1603195 : if ((is_simple_project(rel->op) || is_groupby(rel->op)) && rel->l) {
1747 586192 : bool swap = false;
1748 586192 : sql_rel *l = rel->l;
1749 586192 : sql_rel *c = find_candidate_join2semi(v, l, &swap);
1750 :
1751 586192 : if (c) {
1752 : /* 'p' is a project */
1753 3024 : sql_rel *p = swap ? c->l : c->r;
1754 :
1755 : /* now we need to check if ce is only used at the level of c */
1756 3024 : if (!rel_uses_exp_outside_subrel(rel, p->exps, c)) {
1757 1706 : c->op = op_semi;
1758 1706 : if (swap) {
1759 522 : sql_rel *tmp = c->r;
1760 522 : c->r = c->l;
1761 522 : c->l = tmp;
1762 : }
1763 1706 : v->changes++;
1764 : }
1765 : }
1766 : }
1767 1603195 : return rel;
1768 : }
1769 :
1770 : static inline sql_rel *
1771 1603195 : rel_push_join_down_outer(visitor *v, sql_rel *rel)
1772 : {
1773 1603195 : if (is_join(rel->op) && !is_outerjoin(rel->op) && !is_single(rel) && !list_empty(rel->exps) && !rel_is_ref(rel)) {
1774 196186 : sql_rel *l = rel->l, *r = rel->r;
1775 :
1776 196186 : if (is_left(r->op) && (is_select(l->op) || (is_join(l->op) && !is_outerjoin(l->op))) && !rel_is_ref(l) &&
1777 654 : !rel_is_ref(r)) {
1778 654 : sql_rel *rl = r->l;
1779 654 : sql_rel *rr = r->r;
1780 654 : if (rel_is_ref(rl) || rel_is_ref(rr))
1781 : return rel;
1782 : /* join exps should only include l and r.l */
1783 654 : list *njexps = sa_list(v->sql->sa);
1784 1316 : for(node *n = rel->exps->h; n; n = n->next) {
1785 662 : sql_exp *je = n->data;
1786 :
1787 662 : assert(je->type == e_cmp);
1788 662 : if (je->f)
1789 : return rel;
1790 662 : if ((rel_find_exp(l, je->l) && rel_find_exp(rl, je->r)) || (rel_find_exp(l, je->r) && rel_find_exp(rl, je->l))) {
1791 662 : list_append(njexps, je);
1792 : } else {
1793 0 : return rel;
1794 : }
1795 : }
1796 654 : sql_rel *nl = rel_crossproduct(v->sql->sa, rel_dup(l), rl, rel->op);
1797 654 : r->l = nl;
1798 654 : nl->exps = njexps;
1799 654 : nl->attr = rel->attr;
1800 654 : rel->attr = NULL;
1801 654 : set_processed(nl);
1802 654 : rel_dup(r);
1803 654 : rel_destroy(rel);
1804 654 : rel = r;
1805 654 : v->changes++;
1806 : }
1807 : }
1808 : return rel;
1809 : }
1810 :
1811 : static sql_rel *
1812 1603195 : rel_optimize_joins_(visitor *v, sql_rel *rel)
1813 : {
1814 1603195 : rel = rel_push_join_exps_down(v, rel);
1815 1603195 : rel = rel_out2inner(v, rel);
1816 1603195 : rel = rel_join2semijoin(v, rel);
1817 1603195 : rel = rel_push_join_down_outer(v, rel);
1818 1603195 : return rel;
1819 : }
1820 :
1821 : static sql_rel *
1822 91676 : rel_optimize_joins(visitor *v, global_props *gp, sql_rel *rel)
1823 : {
1824 91676 : (void) gp;
1825 91676 : return rel_visitor_topdown(v, rel, &rel_optimize_joins_);
1826 : }
1827 :
1828 : run_optimizer
1829 643945 : bind_optimize_joins(visitor *v, global_props *gp)
1830 : {
1831 643945 : int flag = v->sql->sql_optimizer;
1832 643746 : return gp->opt_level == 1 && (gp->cnt[op_join] || gp->cnt[op_left] || gp->cnt[op_right]
1833 1287691 : || gp->cnt[op_full] || gp->cnt[op_semi] || gp->cnt[op_anti]) && (flag & optimize_joins) ? rel_optimize_joins : NULL;
1834 : }
1835 :
1836 :
1837 : static sql_rel *rel_join_order_(visitor *v, sql_rel *rel);
1838 :
1839 : static void
1840 717654 : get_relations(visitor *v, sql_rel *rel, list *rels)
1841 : {
1842 998153 : if (list_empty(rel->attr) && !rel_is_ref(rel) && rel->op == op_join && rel->exps == NULL) {
1843 280499 : sql_rel *l = rel->l;
1844 280499 : sql_rel *r = rel->r;
1845 :
1846 280499 : get_relations(v, l, rels);
1847 280499 : get_relations(v, r, rels);
1848 280499 : rel->l = NULL;
1849 280499 : rel->r = NULL;
1850 280499 : rel_destroy(rel);
1851 : } else {
1852 437155 : rel = rel_join_order_(v, rel);
1853 437155 : append(rels, rel);
1854 : }
1855 717654 : }
1856 :
1857 : static void
1858 185052 : get_inner_relations(mvc *sql, sql_rel *rel, list *rels)
1859 : {
1860 292801 : if (!rel_is_ref(rel) && is_join(rel->op)) {
1861 107749 : sql_rel *l = rel->l;
1862 107749 : sql_rel *r = rel->r;
1863 :
1864 107749 : get_inner_relations(sql, l, rels);
1865 107749 : get_inner_relations(sql, r, rels);
1866 : } else {
1867 185052 : append(rels, rel);
1868 : }
1869 185052 : }
1870 :
1871 : static int
1872 997128 : exp_count(int *cnt, sql_exp *e)
1873 : {
1874 997128 : int flag;
1875 997128 : if (!e)
1876 : return 0;
1877 997128 : if (find_prop(e->p, PROP_JOINIDX))
1878 3699 : *cnt += 100;
1879 997128 : if (find_prop(e->p, PROP_HASHCOL))
1880 34510 : *cnt += 100;
1881 997128 : if (find_prop(e->p, PROP_HASHIDX))
1882 0 : *cnt += 100;
1883 997128 : switch(e->type) {
1884 356949 : case e_cmp:
1885 356949 : if (!is_complex_exp(e->flag)) {
1886 318772 : exp_count(cnt, e->l);
1887 318772 : exp_count(cnt, e->r);
1888 318772 : if (e->f)
1889 2629 : exp_count(cnt, e->f);
1890 : }
1891 356949 : flag = e->flag;
1892 356949 : switch (flag) {
1893 297287 : case cmp_equal:
1894 297287 : *cnt += 90;
1895 297287 : return 90;
1896 15724 : case cmp_notequal:
1897 15724 : *cnt += 7;
1898 15724 : return 7;
1899 5761 : case cmp_gt:
1900 : case cmp_gte:
1901 : case cmp_lt:
1902 : case cmp_lte:
1903 5761 : *cnt += 6;
1904 5761 : if (e->l) {
1905 5761 : sql_exp *l = e->l;
1906 5761 : sql_subtype *t = exp_subtype(l);
1907 5761 : if (EC_TEMP(t->type->eclass)) /* give preference to temporal ranges */
1908 172 : *cnt += 90;
1909 : }
1910 5761 : if (e->f){ /* range */
1911 2629 : *cnt += 6;
1912 2629 : return 12;
1913 : }
1914 : return 6;
1915 1465 : case cmp_filter:
1916 1465 : if (exps_card(e->r) > CARD_AGGR) {
1917 : /* filters for joins are special */
1918 6 : *cnt += 1000;
1919 6 : return 1000;
1920 : }
1921 1459 : *cnt += 2;
1922 1459 : return 2;
1923 33258 : case cmp_in:
1924 : case cmp_notin: {
1925 33258 : list *l = e->r;
1926 33258 : int c = 9 - 10*list_length(l);
1927 33258 : *cnt += c;
1928 33258 : return c;
1929 : }
1930 3454 : case cmp_or: /* prefer or over functions */
1931 3454 : *cnt += 3;
1932 3454 : return 3;
1933 : default:
1934 : return 0;
1935 : }
1936 515937 : case e_column:
1937 515937 : *cnt += 20;
1938 515937 : return 20;
1939 105569 : case e_atom:
1940 105569 : *cnt += 10;
1941 105569 : return 10;
1942 2866 : case e_func:
1943 : /* functions are more expensive, depending on the number of columns involved. */
1944 2866 : if (e->card == CARD_ATOM)
1945 : return 0;
1946 2581 : *cnt -= 5*list_length(e->l);
1947 2581 : return 5*list_length(e->l);
1948 15807 : case e_convert:
1949 : /* functions are more expensive, depending on the number of columns involved. */
1950 15807 : if (e->card == CARD_ATOM)
1951 : return 0;
1952 : /* fall through */
1953 : default:
1954 15101 : *cnt -= 5;
1955 15101 : return -5;
1956 : }
1957 : }
1958 :
1959 : int
1960 229082 : exp_keyvalue(sql_exp *e)
1961 : {
1962 229082 : int cnt = 0;
1963 18547 : exp_count(&cnt, e);
1964 229082 : return cnt;
1965 : }
1966 :
1967 : static sql_exp *
1968 750462 : joinexp_col(sql_exp *e, sql_rel *r)
1969 : {
1970 750462 : if (e->type == e_cmp) {
1971 750462 : if (rel_has_exp(r, e->l, false) >= 0)
1972 401534 : return e->l;
1973 348928 : return e->r;
1974 : }
1975 0 : assert(0);
1976 : return NULL;
1977 : }
1978 :
1979 : static sql_column *
1980 498590 : table_colexp(sql_exp *e, sql_rel *r)
1981 : {
1982 498590 : sql_table *t = r->l;
1983 :
1984 498590 : if (e->type == e_column) {
1985 484030 : const char *name = exp_name(e);
1986 484030 : node *cn;
1987 :
1988 484030 : if (r->exps) { /* use alias */
1989 896205 : for (cn = r->exps->h; cn; cn = cn->next) {
1990 891836 : sql_exp *ce = cn->data;
1991 891836 : if (strcmp(exp_name(ce), name) == 0) {
1992 479661 : name = ce->r;
1993 479661 : break;
1994 : }
1995 : }
1996 : }
1997 1066539 : for (cn = ol_first_node(t->columns); cn; cn = cn->next) {
1998 1062176 : sql_column *c = cn->data;
1999 1062176 : if (strcmp(c->base.name, name) == 0)
2000 479667 : return c;
2001 : }
2002 : }
2003 : return NULL;
2004 : }
2005 :
2006 : static list *
2007 361730 : matching_joins(allocator *sa, list *rels, list *exps, sql_exp *je)
2008 : {
2009 361730 : sql_rel *l, *r;
2010 :
2011 361730 : assert (je->type == e_cmp);
2012 :
2013 361730 : l = find_rel(rels, je->l);
2014 361730 : r = find_rel(rels, je->r);
2015 361730 : if (l && r) {
2016 361605 : list *res;
2017 361605 : list *n_rels = sa_list(sa);
2018 :
2019 361605 : append(n_rels, l);
2020 361605 : append(n_rels, r);
2021 361605 : res = list_select(exps, n_rels, (fcmp) &exp_joins_rels, (fdup)NULL);
2022 361605 : return res;
2023 : }
2024 125 : return sa_list(sa);
2025 : }
2026 :
2027 : static int
2028 5974 : sql_column_kc_cmp(sql_column *c, sql_kc *kc)
2029 : {
2030 : /* return on equality */
2031 5974 : return (c->colnr - kc->c->colnr);
2032 : }
2033 :
2034 : static sql_idx *
2035 467620 : find_fk_index(mvc *sql, sql_table *l, list *lcols, sql_table *r, list *rcols)
2036 : {
2037 467620 : sql_trans *tr = sql->session->tr;
2038 :
2039 467620 : if (l->idxs) {
2040 467620 : node *in;
2041 555752 : for (in = ol_first_node(l->idxs); in; in = in->next){
2042 88979 : sql_idx *li = in->data;
2043 88979 : if (li->type == join_idx) {
2044 8936 : sql_key *rk = (sql_key*)os_find_id(tr->cat->objects, tr, ((sql_fkey*)li->key)->rkey);
2045 8936 : fcmp cmp = (fcmp)&sql_column_kc_cmp;
2046 :
2047 9893 : if (rk->t == r &&
2048 1804 : list_match(lcols, li->columns, cmp) == 0 &&
2049 847 : list_match(rcols, rk->columns, cmp) == 0) {
2050 847 : return li;
2051 : }
2052 : }
2053 : }
2054 : }
2055 : return NULL;
2056 : }
2057 :
2058 : static sql_rel *
2059 723460 : find_basetable( sql_rel *r)
2060 : {
2061 1069089 : if (!r)
2062 : return NULL;
2063 1062093 : switch(r->op) {
2064 583228 : case op_basetable:
2065 583228 : if (!r->l)
2066 0 : return NULL;
2067 : return r;
2068 345629 : case op_semi:
2069 : case op_anti:
2070 : case op_project:
2071 : case op_select:
2072 : case op_topn:
2073 : case op_sample:
2074 345629 : return find_basetable(r->l);
2075 : default:
2076 : return NULL;
2077 : }
2078 : }
2079 :
2080 : static int
2081 101708 : exps_count(list *exps)
2082 : {
2083 101708 : node *n;
2084 101708 : int cnt = 0;
2085 :
2086 101708 : if (!exps)
2087 : return 0;
2088 229581 : for (n = exps->h; n; n=n->next)
2089 127873 : exp_count(&cnt, n->data);
2090 101708 : return cnt;
2091 : }
2092 :
2093 : static list *
2094 256783 : order_join_expressions(mvc *sql, list *dje, list *rels)
2095 : {
2096 256783 : node *n;
2097 256783 : int cnt = list_length(dje);
2098 :
2099 256783 : if (cnt <= 1)
2100 : return dje;
2101 :
2102 67009 : list *res = sa_list(sql->sa);
2103 67009 : int i, *keys = SA_NEW_ARRAY(sql->ta, int, cnt);
2104 67009 : void **data = SA_NEW_ARRAY(sql->ta, void*, cnt);
2105 :
2106 277544 : for (n = dje->h, i = 0; n; n = n->next, i++) {
2107 210535 : sql_exp *e = n->data;
2108 :
2109 210535 : keys[i] = exp_keyvalue(e);
2110 : /* add some weight for the selections */
2111 210535 : if (e->type == e_cmp && !is_complex_exp(e->flag)) {
2112 210508 : sql_rel *l = find_rel(rels, e->l);
2113 210508 : sql_rel *r = find_rel(rels, e->r);
2114 :
2115 210508 : if (l && is_select(l->op) && l->exps)
2116 54793 : keys[i] += list_length(l->exps)*10 + exps_count(l->exps);
2117 210508 : if (r && is_select(r->op) && r->exps)
2118 46915 : keys[i] += list_length(r->exps)*10 + exps_count(r->exps);
2119 : }
2120 210535 : data[i] = n->data;
2121 : }
2122 : /* sort descending */
2123 67009 : GDKqsort(keys, data, NULL, cnt, sizeof(int), sizeof(void *), TYPE_int, true, true);
2124 344553 : for(i=0; i<cnt; i++) {
2125 210535 : list_append(res, data[i]);
2126 : }
2127 : return res;
2128 : }
2129 :
2130 : static int
2131 240264 : find_join_rels(list **L, list **R, list *exps, list *rels)
2132 : {
2133 240264 : node *n;
2134 :
2135 240264 : *L = sa_list(exps->sa);
2136 240264 : *R = sa_list(exps->sa);
2137 240264 : if (!exps || list_length(exps) <= 1)
2138 : return -1;
2139 294411 : for(n = exps->h; n; n = n->next) {
2140 222197 : sql_exp *e = n->data;
2141 222197 : sql_rel *l = NULL, *r = NULL;
2142 :
2143 222197 : if (!is_complex_exp(e->flag)){
2144 222189 : l = find_rel(rels, e->l);
2145 222189 : r = find_rel(rels, e->r);
2146 : }
2147 222189 : if (l<r) {
2148 143100 : list_append(*L, l);
2149 143100 : list_append(*R, r);
2150 : } else {
2151 79097 : list_append(*L, r);
2152 79097 : list_append(*R, l);
2153 : }
2154 : }
2155 : return 0;
2156 : }
2157 :
2158 : static list *
2159 72214 : distinct_join_exps(list *aje, list *lrels, list *rrels)
2160 : {
2161 72214 : node *n, *m, *o, *p;
2162 72214 : int len = list_length(aje), i, j;
2163 72214 : char *used = SA_ZNEW_ARRAY(aje->sa, char, len);
2164 72214 : list *res = sa_list(aje->sa);
2165 :
2166 72214 : assert(len == list_length(lrels));
2167 294411 : for(n = lrels->h, m = rrels->h, j = 0; n && m;
2168 222197 : n = n->next, m = m->next, j++) {
2169 222197 : if (n->data && m->data)
2170 956269 : for(o = n->next, p = m->next, i = j+1; o && p;
2171 734122 : o = o->next, p = p->next, i++) {
2172 734122 : if (o->data == n->data && p->data == m->data)
2173 28950 : used[i] = 1;
2174 : }
2175 : }
2176 294411 : for (i = 0, n = aje->h; i < len; n = n->next, i++) {
2177 222197 : if (!used[i])
2178 197733 : list_append(res, n->data);
2179 : }
2180 72214 : return res;
2181 : }
2182 :
2183 : static list *
2184 240264 : find_fk( mvc *sql, list *rels, list *exps)
2185 : {
2186 240264 : node *djn;
2187 240264 : list *sdje, *aje, *dje;
2188 240264 : list *lrels, *rrels;
2189 :
2190 : /* first find the distinct join expressions */
2191 240264 : aje = list_select(exps, rels, (fcmp) &exp_is_join, (fdup)NULL);
2192 : /* add left/right relation */
2193 240264 : if (find_join_rels(&lrels, &rrels, aje, rels) < 0)
2194 : dje = aje;
2195 : else
2196 72214 : dje = distinct_join_exps(aje, lrels, rrels);
2197 605706 : for(djn=dje->h; djn; djn = djn->next) {
2198 : /* equal join expressions */
2199 365528 : sql_idx *idx = NULL;
2200 365528 : sql_exp *je = djn->data, *le = je->l, *re = je->r;
2201 :
2202 365528 : if (is_complex_exp(je->flag))
2203 : break;
2204 365442 : if (!find_prop(je->p, PROP_JOINIDX)) {
2205 361730 : int swapped = 0;
2206 361730 : list *aaje = matching_joins(sql->sa, rels, aje, je);
2207 361730 : list *eje = list_select(aaje, (void*)1, (fcmp) &exp_is_eqjoin, (fdup)NULL);
2208 361730 : sql_rel *lr = find_rel(rels, le), *olr = lr;
2209 361730 : sql_rel *rr = find_rel(rels, re), *orr = rr;
2210 361730 : sql_rel *bt = NULL;
2211 361730 : char *iname;
2212 :
2213 361730 : sql_table *l, *r;
2214 361730 : list *lexps = list_map(eje, lr, (fmap) &joinexp_col);
2215 361730 : list *rexps = list_map(eje, rr, (fmap) &joinexp_col);
2216 361730 : list *lcols, *rcols;
2217 :
2218 361730 : lr = find_basetable(lr);
2219 361730 : rr = find_basetable(rr);
2220 361730 : if (!lr || !rr)
2221 236498 : continue;
2222 252815 : l = lr->l;
2223 252815 : r = rr->l;
2224 252815 : lcols = list_map(lexps, lr, (fmap) &table_colexp);
2225 252815 : rcols = list_map(rexps, rr, (fmap) &table_colexp);
2226 252815 : lcols->destroy = NULL;
2227 252815 : rcols->destroy = NULL;
2228 252815 : if (list_length(lcols) != list_length(rcols))
2229 18667 : continue;
2230 :
2231 234148 : idx = find_fk_index(sql, l, lcols, r, rcols);
2232 234148 : if (!idx) {
2233 233472 : idx = find_fk_index(sql, r, rcols, l, lcols);
2234 233472 : swapped = 1;
2235 : }
2236 :
2237 234148 : if (idx && (iname = sa_strconcat( sql->sa, "%", idx->base.name)) != NULL &&
2238 676 : ((!swapped && name_find_column(olr, NULL, iname, -2, &bt) == NULL) ||
2239 171 : ( swapped && name_find_column(orr, NULL, iname, -2, &bt) == NULL)))
2240 : idx = NULL;
2241 :
2242 : if (idx) {
2243 754 : prop *p;
2244 754 : node *n;
2245 754 : sql_exp *t = NULL, *i = NULL;
2246 :
2247 754 : if (list_length(lcols) > 1 || !mvc_debug_on(sql, 512)) {
2248 :
2249 : /* Add join between idx and TID */
2250 754 : if (swapped) {
2251 155 : sql_exp *s = je->l, *l = je->r;
2252 :
2253 155 : t = rel_find_column(sql, olr, s->l, TID);
2254 155 : i = rel_find_column(sql, orr, l->l, iname);
2255 155 : if (!t || !i)
2256 1 : continue;
2257 154 : t->p = NULL;
2258 154 : i->p = NULL;
2259 154 : je = exp_compare(sql->sa, i, t, cmp_equal);
2260 : } else {
2261 599 : sql_exp *s = je->r, *l = je->l;
2262 :
2263 599 : t = rel_find_column(sql, orr, s->l, TID);
2264 599 : i = rel_find_column(sql, olr, l->l, iname);
2265 599 : if (!t || !i)
2266 0 : continue;
2267 599 : t->p = NULL;
2268 599 : i->p = NULL;
2269 599 : je = exp_compare(sql->sa, i, t, cmp_equal);
2270 : }
2271 :
2272 : /* Remove all join expressions */
2273 1508 : for (n = eje->h; n; n = n->next)
2274 755 : list_remove_data(exps, NULL, n->data);
2275 753 : append(exps, je);
2276 753 : djn->data = je;
2277 0 : } else if (swapped) { /* else keep je for single column expressions */
2278 0 : je = exp_compare(sql->sa, je->r, je->l, cmp_equal);
2279 : /* Remove all join expressions */
2280 0 : for (n = eje->h; n; n = n->next)
2281 0 : list_remove_data(exps, NULL, n->data);
2282 0 : append(exps, je);
2283 0 : djn->data = je;
2284 : }
2285 753 : je->p = p = prop_create(sql->sa, PROP_JOINIDX, je->p);
2286 753 : p->value.pval = idx;
2287 : }
2288 : }
2289 : }
2290 :
2291 : /* sort expressions on weighted number of reducing operators */
2292 240264 : sdje = order_join_expressions(sql, dje, rels);
2293 240264 : return sdje;
2294 : }
2295 :
2296 : static int
2297 562765 : exp_find_one_rel( sql_rel **rels, int nr, sql_exp *e)
2298 : {
2299 562765 : int fnd = 0;
2300 :
2301 4472731 : for(int i = 1; i<=nr; i++) {
2302 3910035 : if (rel_has_exp(rels[i], e, false) == 0) {
2303 562749 : if (fnd)
2304 : return 0;
2305 : fnd = i;
2306 : }
2307 : }
2308 : return fnd;
2309 : }
2310 :
2311 : static int
2312 140 : exps_find_one_rel( sql_rel **rels, int nr, list *exps)
2313 : {
2314 140 : int fnd = 0;
2315 :
2316 378 : for(node *n = exps->h; n; n = n->next) {
2317 243 : sql_exp *e = n->data;
2318 243 : if (exp_is_atom(e))
2319 86 : continue;
2320 157 : int nfnd = exp_find_one_rel(rels, nr, n->data);
2321 157 : if (nfnd != fnd && fnd)
2322 : return 0;
2323 154 : fnd = nfnd;
2324 154 : if (!fnd)
2325 : return 0;
2326 : }
2327 : return fnd;
2328 : }
2329 :
2330 : /* TODO move popcount and popcount64 into gdk_*.h, used in gdk_cand, strimps and here */
2331 : static inline int
2332 : popcount64(uint64_t x)
2333 : {
2334 : #ifdef __has_builtin
2335 : #if __has_builtin(__builtin_popcountll)
2336 : return (uint32_t) __builtin_popcountll(x);
2337 : #define BUILTIN_USED
2338 : #endif
2339 : #endif
2340 : #ifndef BUILTIN_USED
2341 : #if defined(_MSC_VER)
2342 : #if SIZEOF_OID == 4
2343 : /* no __popcnt64 on 32 bit Windows */
2344 : return (int) (__popcnt((uint32_t) x) + __popcnt((uint32_t) (x >> 32)));
2345 : #else
2346 : return (uint32_t) __popcnt64(x);
2347 : #endif
2348 : #else
2349 : x = (x & UINT64_C(0x5555555555555555)) + ((x >> 1) & UINT64_C(0x5555555555555555));
2350 : x = (x & UINT64_C(0x3333333333333333)) + ((x >> 2) & UINT64_C(0x3333333333333333));
2351 : x = (x & UINT64_C(0x0F0F0F0F0F0F0F0F)) + ((x >> 4) & UINT64_C(0x0F0F0F0F0F0F0F0F));
2352 : return (x * UINT64_C(0x0101010101010101)) >> 56;
2353 : #endif
2354 : #endif
2355 : #undef BUILTIN_USED
2356 : }
2357 :
2358 : static sql_rel *
2359 156656 : order_joins(visitor *v, list *rels, list *exps)
2360 : {
2361 156656 : sql_rel *top = NULL, *l = NULL, *r = NULL, *f = NULL;
2362 156656 : sql_exp *cje;
2363 156656 : node *djn;
2364 156656 : list *sdje, *n_rels = NULL;
2365 156656 : int fnd = 0;
2366 156656 : unsigned int rsingle;
2367 156656 : int direct = 1;
2368 :
2369 : /* find foreign keys and reorder the expressions on reducing quality */
2370 156656 : sdje = find_fk(v->sql, rels, exps);
2371 :
2372 437964 : for(djn = sdje->h; djn; djn = djn->next ) {
2373 281308 : sql_exp *e = djn->data;
2374 281308 : list_remove_data(exps, NULL, e);
2375 : }
2376 :
2377 156656 : int nr_exps = list_length(sdje), nr_rels = list_length(rels), ci = 1;
2378 156656 : if (nr_rels > 64) {
2379 1 : direct = 0;
2380 1 : n_rels = sa_list(v->sql->ta);
2381 : }
2382 156656 : sql_rel **rels_a = SA_NEW_ARRAY(v->sql->ta, sql_rel*, nr_rels+1); /* don't use slot 0 */
2383 156656 : rels_a[0] = NULL;
2384 593811 : for (node *n = rels->h; n; n = n->next, ci++) {
2385 437155 : rels_a[ci] = n->data;
2386 : }
2387 156656 : ulng *h = SA_NEW_ARRAY(v->sql->ta, ulng, nr_exps), rel_mask = 0; /* bit field (for > 64 its an imprint) */
2388 156656 : uint16_t *r1 = SA_NEW_ARRAY(v->sql->ta, uint16_t, nr_exps);
2389 156656 : uint16_t *r2 = SA_NEW_ARRAY(v->sql->ta, uint16_t, nr_exps);
2390 : /* change r3 into rest list's */
2391 156656 : int *r3 = SA_NEW_ARRAY(v->sql->ta, int, nr_exps);
2392 :
2393 156656 : ci = 0;
2394 437964 : for (node *n = sdje->h; n; n = n->next, ci++) {
2395 281308 : sql_exp *cje = n->data;
2396 :
2397 281308 : h[ci] = r1[ci] = r2[ci] = r3[ci] = 0;
2398 281308 : if (cje->type == e_cmp) {
2399 281308 : cje->tmp = ci;
2400 281308 : r1[ci] = cje->flag == cmp_filter ? exps_find_one_rel(rels_a, nr_rels, cje->l) : exp_find_one_rel(rels_a, nr_rels, cje->l);
2401 281308 : r2[ci] = cje->flag == cmp_filter ? exps_find_one_rel(rels_a, nr_rels, cje->r) : exp_find_one_rel(rels_a, nr_rels, cje->r);
2402 281308 : if (r1[ci])
2403 281245 : h[ci] |= ((ulng)1)<<((r1[ci]-1)%64);
2404 281308 : if (r2[ci])
2405 281225 : h[ci] |= ((ulng)1)<<((r2[ci]-1)%64);
2406 281308 : if (cje->f && cje->flag != cmp_filter) {
2407 132 : r3[ci] = exp_find_one_rel(rels_a, nr_rels, cje->f);
2408 132 : if (r3[ci] == r2[ci] || r3[ci] == r1[ci])
2409 114 : r3[ci] = 0;
2410 132 : if (r3[ci])
2411 8 : h[ci] |= ((ulng)1)<<((r3[ci]-1)%64);
2412 : }
2413 : }
2414 : }
2415 : /* open problem, some expressions use more than 2 relations */
2416 : /* For example a.x = b.y * c.z; */
2417 156656 : if (list_length(rels) >= 2 && sdje->h) {
2418 313271 : for (node *n = sdje->h; n && (!l || !r); n = n->next, ci++) {
2419 156639 : cje = n->data;
2420 :
2421 156639 : if (n->next && r3[cje->tmp])
2422 0 : continue;
2423 :
2424 : /* complex expressions may touch multiple base tables
2425 : * Should be pushed up to extra selection.
2426 : * */
2427 156639 : if (0 && popcount64(h[cje->tmp]) > 2)
2428 : assert(0);
2429 : /* find the involved relations */
2430 156639 : if (cje->type == e_cmp) {
2431 156639 : l = rels_a[r1[cje->tmp]];
2432 156639 : r = rels_a[r2[cje->tmp]];
2433 156639 : if (l && r)
2434 156574 : rel_mask |= h[cje->tmp];
2435 : }
2436 : }
2437 156632 : cje->tmp = 0;
2438 :
2439 156632 : if (l && r && l != r)
2440 156574 : list_remove_data(sdje, NULL, cje);
2441 : }
2442 :
2443 156656 : if (l && r && l != r) {
2444 156574 : list_remove_data(rels, NULL, l);
2445 156574 : list_remove_data(rels, NULL, r);
2446 156574 : if (!direct) {
2447 1 : list_append(n_rels, l);
2448 1 : list_append(n_rels, r);
2449 : }
2450 :
2451 : /* Create a relation between l and r. Since the calling
2452 : functions rewrote the join tree, into a list of expressions
2453 : and a list of (simple) relations, there are no outer joins
2454 : involved, we can simply do a crossproduct here.
2455 : */
2456 156574 : rsingle = is_single(r);
2457 156574 : reset_single(r);
2458 156574 : top = rel_crossproduct(v->sql->sa, l, r, op_join);
2459 156574 : if (rsingle)
2460 5168 : set_single(r);
2461 156574 : rel_join_add_exp(v->sql->sa, top, cje);
2462 :
2463 : /* all other join expressions on these 2 relations */
2464 160426 : for (node *en = exps->h; en; ) {
2465 3852 : node *next = en->next;
2466 3852 : sql_exp *e = en->data;
2467 3852 : if (rel_rebind_exp(v->sql, top, e)) {
2468 3680 : rel_join_add_exp(v->sql->sa, top, e);
2469 3680 : list_remove_data(exps, NULL, e);
2470 : }
2471 : en = next;
2472 : }
2473 : fnd = 1;
2474 : }
2475 : /* build join tree using the ordered list */
2476 277989 : while(list_length(sdje) && fnd) {
2477 121333 : fnd = 0;
2478 : /* find the first expression which could be added */
2479 883853 : for(djn = sdje->h; djn && !fnd && rels->h; djn = (!fnd)?djn->next:NULL) {
2480 381260 : node *en;
2481 381260 : l = r = f = NULL;
2482 381260 : int needs3 = 0;
2483 :
2484 381260 : cje = djn->data;
2485 381260 : if ((h[cje->tmp] & rel_mask) > 0) {
2486 121248 : if (rel_mask & (((ulng)1)<<((r1[cje->tmp]-1)%64)))
2487 68824 : l = rels_a[r1[cje->tmp]];
2488 121248 : if (rel_mask & (((ulng)1)<<((r2[cje->tmp]-1)%64)))
2489 52425 : r = rels_a[r2[cje->tmp]];
2490 121248 : if (cje->f && r3[cje->tmp]) {
2491 0 : needs3 = 1;
2492 0 : if (rel_mask & (((ulng)1)<<((r3[cje->tmp]-1)%64)))
2493 0 : f = rels_a[r3[cje->tmp]];
2494 : }
2495 : }
2496 381260 : if (!direct) { /* check if at least one side in n_rels */
2497 1040 : if (l && !list_find(n_rels, l, NULL))
2498 1007 : l = NULL;
2499 1040 : if (r && !list_find(n_rels, r, NULL))
2500 1010 : r = NULL;
2501 1040 : if (f && !list_find(n_rels, f, NULL))
2502 0 : f = NULL;
2503 : }
2504 :
2505 381260 : if ((!needs3 && l && r) || (needs3 && l && r && f)) {
2506 0 : assert(0);
2507 : /* create a selection on the current */
2508 : rel_join_add_exp(v->sql->sa, top, cje);
2509 : fnd = 1;
2510 381260 : } else if ((!needs3 && (l || r)) || (needs3 && (l || r || f))) {
2511 121248 : sql_rel *nr[2]= {NULL, NULL};
2512 121248 : rel_mask |= h[cje->tmp];
2513 121248 : int i = 0;
2514 121248 : if (!l)
2515 52424 : nr[i++] = rels_a[r1[cje->tmp]];
2516 121248 : if (!r)
2517 68824 : nr[i++] = rels_a[r2[cje->tmp]];
2518 121248 : if (needs3 && !f)
2519 0 : nr[i++] = rels_a[r3[cje->tmp]];
2520 121248 : if (!nr[0]) {
2521 0 : fnd = 1; /* not really, but this bails out */
2522 0 : list_remove_data(sdje, NULL, cje); /* handle later as select */
2523 0 : if (!list_find(exps, cje, NULL))
2524 0 : append(exps, cje);
2525 0 : continue;
2526 : }
2527 : /* remove the expression from the lists */
2528 121248 : list_remove_data(sdje, NULL, cje);
2529 :
2530 121248 : list_remove_data(rels, NULL, nr[0]);
2531 121248 : if (!direct)
2532 63 : append(n_rels, nr[0]);
2533 121248 : if (i > 1 && nr[1]) {
2534 0 : list_remove_data(rels, NULL, nr[1]);
2535 0 : if (!direct)
2536 0 : append(n_rels, nr[1]);
2537 : }
2538 :
2539 : /* create a join using the current expression */
2540 121248 : rsingle = is_single(nr[0]);
2541 121248 : reset_single(nr[0]);
2542 121248 : top = rel_crossproduct(v->sql->sa, top, nr[0], op_join);
2543 121248 : if (rsingle)
2544 0 : set_single(nr[0]);
2545 121248 : if (i > 1 && nr[1]) {
2546 0 : rsingle = is_single(nr[1]);
2547 0 : reset_single(nr[1]);
2548 0 : top = rel_crossproduct(v->sql->sa, top, nr[1], op_join);
2549 0 : if (rsingle)
2550 0 : set_single(nr[1]);
2551 : }
2552 121248 : rel_join_add_exp(v->sql->sa, top, cje);
2553 :
2554 : /* all join expressions on these tables */
2555 121636 : for (en = exps->h; en; ) {
2556 388 : node *next = en->next;
2557 388 : sql_exp *e = en->data;
2558 388 : if (rel_rebind_exp(v->sql, top, e)) {
2559 170 : rel_join_add_exp(v->sql->sa, top, e);
2560 170 : list_remove_data(exps, NULL, e);
2561 : }
2562 : en = next;
2563 : }
2564 : /* Remove other joins on the current 'n_rels'
2565 : set in the distinct list too */
2566 697646 : for (en = sdje->h; en; ) {
2567 576398 : node *next = en->next;
2568 576398 : sql_exp *e = en->data;
2569 576398 : if ((direct && ((e->flag <= cmp_notequal && (h[e->tmp] & rel_mask) == h[e->tmp] && h[e->tmp]) || (e->flag > cmp_notequal && rel_rebind_exp(v->sql, top, e)))) ||
2570 1953 : (!direct && rel_rebind_exp(v->sql, top, e))) {
2571 3338 : rel_join_add_exp(v->sql->sa, top, e);
2572 3338 : list_remove_data(sdje, NULL, en->data);
2573 : }
2574 : en = next;
2575 : }
2576 121248 : fnd = 1;
2577 : }
2578 : }
2579 : }
2580 156656 : if (list_length(rels)) { /* more relations */
2581 1130 : node *n;
2582 3889 : for(n=rels->h; n; n = n->next) {
2583 2759 : sql_rel *nr = n->data;
2584 :
2585 2759 : if (top) {
2586 2677 : rsingle = is_single(nr);
2587 2677 : reset_single(nr);
2588 2677 : top = rel_crossproduct(v->sql->sa, top, nr, op_join);
2589 2677 : if (rsingle)
2590 0 : set_single(nr);
2591 : } else
2592 : top = nr;
2593 : }
2594 : }
2595 156656 : if (list_length(sdje)) {
2596 143 : if (list_empty(exps))
2597 : exps = sdje;
2598 : else
2599 0 : exps = list_merge(exps, sdje, (fdup)NULL);
2600 : }
2601 156656 : if (list_length(exps)) { /* more expressions (add selects) */
2602 169 : top = rel_select(v->sql->sa, top, NULL);
2603 343 : for(node *n=exps->h; n; n = n->next) {
2604 174 : sql_exp *e = n->data;
2605 :
2606 174 : if (exp_is_join_exp(e) == 0) {
2607 160 : sql_rel *nr = NULL;
2608 160 : if (is_theta_exp(e->flag)) {
2609 143 : nr = rel_push_join(v->sql, top->l, e->l, e->r, e->f, e, 0);
2610 17 : } else if (e->flag == cmp_filter || e->flag == cmp_or) {
2611 17 : sql_exp *l = exps_find_one_multi_exp(e->l), *r = exps_find_one_multi_exp(e->r);
2612 17 : if (l && r)
2613 10 : nr = rel_push_join(v->sql, top->l, l, r, NULL, e, 0);
2614 : }
2615 153 : if (!nr)
2616 7 : rel_join_add_exp(v->sql->sa, top->l, e);
2617 : } else
2618 14 : rel_select_add_exp(v->sql->sa, top, e);
2619 : }
2620 169 : if (list_empty(top->exps)) { /* empty select */
2621 155 : sql_rel *l = top->l;
2622 155 : top->l = NULL;
2623 155 : rel_destroy(top);
2624 155 : top = l;
2625 : }
2626 : }
2627 156656 : return top;
2628 : }
2629 :
2630 : static int
2631 437155 : rel_neg_in_size(sql_rel *r)
2632 : {
2633 437155 : if ((is_union(r->op) /*|| is_munion(r->op)*/) && r->nrcols == 0)
2634 0 : return -1 + rel_neg_in_size(r->l);
2635 437155 : if (is_project(r->op) && r->nrcols == 0)
2636 0 : return -1;
2637 : return 0;
2638 : }
2639 :
2640 437155 : static void _rel_destroy(void *dummy, sql_rel *rel)
2641 : {
2642 437155 : (void)dummy;
2643 437155 : rel_destroy(rel);
2644 437155 : }
2645 :
2646 : static list *
2647 156656 : push_in_join_down(mvc *sql, list *rels, list *exps)
2648 : {
2649 156656 : node *n;
2650 156656 : int restart = 1;
2651 156656 : list *nrels;
2652 :
2653 : /* we should sort these first, ie small in's before large one's */
2654 156656 : nrels = list_sort(rels, (fkeyvalue)&rel_neg_in_size, (fdup)&rel_dup);
2655 :
2656 : /* we need to cleanup, the new refs ! */
2657 156656 : rels->destroy = (fdestroy)_rel_destroy;
2658 156656 : list_destroy(rels);
2659 156656 : rels = nrels;
2660 :
2661 : /* one of the rels should be a op_union with nrcols == 0 */
2662 469968 : while (restart) {
2663 593811 : for (n = rels->h; n; n = n->next) {
2664 437155 : sql_rel *r = n->data;
2665 :
2666 437155 : restart = 0;
2667 437155 : if (is_project(r->op) && r->nrcols == 0) {
2668 : /* next step find expression on this relation */
2669 0 : node *m;
2670 0 : sql_rel *l = NULL;
2671 0 : sql_exp *je = NULL;
2672 :
2673 0 : for(m = exps->h; !je && m; m = m->next) {
2674 0 : sql_exp *e = m->data;
2675 :
2676 0 : if (e->type == e_cmp && e->flag == cmp_equal) {
2677 : /* in values are on
2678 : the right of the join */
2679 0 : if (rel_has_exp(r, e->r, false) >= 0)
2680 0 : je = e;
2681 : }
2682 : }
2683 : /* with this expression find other relation */
2684 0 : if (je && (l = find_rel(rels, je->l)) != NULL) {
2685 0 : unsigned int rsingle = is_single(r);
2686 0 : reset_single(r);
2687 0 : sql_rel *nr = rel_crossproduct(sql->sa, l, r, op_join);
2688 0 : if (rsingle)
2689 0 : set_single(r);
2690 0 : rel_join_add_exp(sql->sa, nr, je);
2691 0 : list_append(rels, nr);
2692 0 : list_remove_data(rels, NULL, l);
2693 0 : list_remove_data(rels, NULL, r);
2694 0 : list_remove_data(exps, NULL, je);
2695 0 : restart = 1;
2696 0 : break;
2697 : }
2698 :
2699 : }
2700 : }
2701 : }
2702 156656 : return rels;
2703 : }
2704 :
2705 : static list *
2706 745761 : push_up_join_exps( mvc *sql, sql_rel *rel)
2707 : {
2708 745761 : if (rel_is_ref(rel))
2709 : return NULL;
2710 :
2711 704418 : switch(rel->op) {
2712 292577 : case op_join: {
2713 292577 : sql_rel *rl = rel->l;
2714 292577 : sql_rel *rr = rel->r;
2715 292577 : list *l, *r;
2716 :
2717 292577 : if (rel_is_ref(rl) && rel_is_ref(rr)) {
2718 16 : l = rel->exps;
2719 16 : rel->exps = NULL;
2720 16 : return l;
2721 : }
2722 292561 : l = push_up_join_exps(sql, rl);
2723 292561 : r = push_up_join_exps(sql, rr);
2724 292561 : if (l && r) {
2725 34 : l = list_merge(l, r, (fdup)NULL);
2726 34 : r = NULL;
2727 292527 : } else if (!l) {
2728 177975 : l = r;
2729 177975 : r = NULL;
2730 : }
2731 292561 : if (rel->exps) {
2732 260906 : if (l && !r)
2733 104188 : r = l;
2734 260906 : l = list_merge(rel->exps, r, (fdup)NULL);
2735 : }
2736 292561 : rel->exps = NULL;
2737 292561 : return l;
2738 : }
2739 : default:
2740 : return NULL;
2741 : }
2742 : }
2743 :
2744 : static sql_rel *
2745 288092 : reorder_join(visitor *v, sql_rel *rel)
2746 : {
2747 288092 : list *exps, *rels;
2748 :
2749 288092 : if (is_innerjoin(rel->op) && !is_single(rel) && !rel_is_ref(rel) && list_empty(rel->attr)) {
2750 178000 : if (list_empty(rel->exps)) {
2751 22150 : sql_rel *l = rel->l, *r = rel->r;
2752 22150 : if (!is_innerjoin(l->op) && !is_innerjoin(r->op))
2753 : return rel;
2754 : }
2755 160639 : rel->exps = push_up_join_exps(v->sql, rel);
2756 : }
2757 :
2758 270731 : if (!is_innerjoin(rel->op) || is_single(rel) || rel_is_ref(rel) || list_empty(rel->exps) || !list_empty(rel->attr)) {
2759 114075 : if (!list_empty(rel->exps)) { /* cannot add join idxs to cross products */
2760 77303 : exps = rel->exps;
2761 77303 : rel->exps = NULL; /* should be all crosstables by now */
2762 77303 : rels = sa_list(v->sql->ta);
2763 : /* try to use an join index also for outer joins */
2764 77303 : get_inner_relations(v->sql, rel, rels);
2765 77303 : int cnt = list_length(exps);
2766 77303 : rel->exps = find_fk(v->sql, rels, exps);
2767 77303 : if (list_length(rel->exps) != cnt)
2768 16519 : rel->exps = order_join_expressions(v->sql, exps, rels);
2769 : }
2770 114075 : rel->l = rel_join_order_(v, rel->l);
2771 114075 : rel->r = rel_join_order_(v, rel->r);
2772 : } else {
2773 156656 : exps = rel->exps;
2774 156656 : rel->exps = NULL; /* should be all crosstables by now */
2775 156656 : rels = sa_list(v->sql->ta);
2776 156656 : get_relations(v, rel, rels);
2777 156656 : if (list_length(rels) > 1) {
2778 156656 : rels = push_in_join_down(v->sql, rels, exps);
2779 156656 : rel = order_joins(v, rels, exps);
2780 : } else {
2781 0 : rel->exps = exps;
2782 : }
2783 : }
2784 : return rel;
2785 : }
2786 :
2787 : static sql_rel *
2788 2479266 : rel_join_order_(visitor *v, sql_rel *rel)
2789 : {
2790 2479266 : if (!rel)
2791 : return rel;
2792 :
2793 2463891 : switch (rel->op) {
2794 : case op_basetable:
2795 : break;
2796 3914 : case op_table:
2797 3914 : if (IS_TABLE_PROD_FUNC(rel->flag) || rel->flag == TABLE_FROM_RELATION)
2798 3914 : rel->l = rel_join_order_(v, rel->l);
2799 : break;
2800 : case op_join:
2801 : case op_left:
2802 : case op_right:
2803 : case op_full:
2804 : break;
2805 :
2806 12512 : case op_semi:
2807 : case op_anti:
2808 :
2809 : case op_union:
2810 : case op_inter:
2811 : case op_except:
2812 : case op_merge:
2813 12512 : rel->l = rel_join_order_(v, rel->l);
2814 12512 : rel->r = rel_join_order_(v, rel->r);
2815 12512 : break;
2816 168707 : case op_munion:
2817 168707 : assert(rel->l);
2818 576650 : for (node *n = ((list*)rel->l)->h; n; n = n->next)
2819 407943 : n->data = rel_join_order_(v, n->data);
2820 : break;
2821 1280476 : case op_project:
2822 : case op_select:
2823 : case op_groupby:
2824 : case op_topn:
2825 : case op_sample:
2826 1280476 : rel->l = rel_join_order_(v, rel->l);
2827 1280476 : break;
2828 52 : case op_ddl:
2829 52 : if (rel->flag == ddl_output || rel->flag == ddl_create_seq || rel->flag == ddl_alter_seq || rel->flag == ddl_alter_table || rel->flag == ddl_create_table || rel->flag == ddl_create_view) {
2830 0 : rel->l = rel_join_order_(v, rel->l);
2831 : } else if (rel->flag == ddl_list || rel->flag == ddl_exception) {
2832 52 : rel->l = rel_join_order_(v, rel->l);
2833 52 : rel->r = rel_join_order_(v, rel->r);
2834 : }
2835 : break;
2836 12011 : case op_insert:
2837 : case op_update:
2838 : case op_delete:
2839 12011 : rel->r = rel_join_order_(v, rel->r);
2840 12011 : break;
2841 : case op_truncate:
2842 : break;
2843 : }
2844 2463891 : if (is_join(rel->op))
2845 288092 : rel = reorder_join(v, rel);
2846 : return rel;
2847 : }
2848 :
2849 : static sql_rel *
2850 84489 : rel_join_order(visitor *v, global_props *gp, sql_rel *rel)
2851 : {
2852 84489 : (void) gp;
2853 84489 : sql_rel *r = rel_join_order_(v, rel);
2854 84489 : sa_reset(v->sql->ta);
2855 84489 : return r;
2856 : }
2857 :
2858 : run_optimizer
2859 643968 : bind_join_order(visitor *v, global_props *gp)
2860 : {
2861 643968 : int flag = v->sql->sql_optimizer;
2862 643772 : return gp->opt_level == 1 && gp->opt_cycle < 10 && !gp->cnt[op_update] && (gp->cnt[op_join] || gp->cnt[op_left] ||
2863 1281408 : gp->cnt[op_right] || gp->cnt[op_full]) && (flag & join_order) ? rel_join_order : NULL;
2864 : }
2865 :
2866 : /* this join order is to be done once after statistics are gathered */
2867 : run_optimizer
2868 732635 : bind_join_order2(visitor *v, global_props *gp)
2869 : {
2870 : /*int flag = v->sql->sql_optimizer;
2871 : return gp->opt_level == 1 && !gp->has_special_modify && !gp->cnt[op_update] && (gp->cnt[op_join] || gp->cnt[op_left] ||
2872 : gp->cnt[op_right] || gp->cnt[op_full]) && (flag & join_order) ? rel_join_order : NULL;*/
2873 : /* TODO we have to propagate count statistics here */
2874 732635 : (void) v;
2875 732635 : (void) gp;
2876 732635 : return NULL;
2877 : }
2878 :
2879 :
2880 : static int
2881 13915 : is_identity_of(sql_exp *e, sql_rel *l)
2882 : {
2883 13915 : if (e->type != e_cmp)
2884 : return 0;
2885 13900 : if (!is_identity(e->l, l) || !is_identity(e->r, l) || (e->f && !is_identity(e->f, l)))
2886 13900 : return 0;
2887 : return 1;
2888 : }
2889 :
2890 : static inline sql_rel *
2891 18140 : rel_rewrite_semijoin(visitor *v, sql_rel *rel)
2892 : {
2893 18140 : assert(is_semi(rel->op));
2894 : {
2895 18140 : sql_rel *l = rel->l;
2896 18140 : sql_rel *r = rel->r;
2897 18140 : sql_rel *rl = (r->l)?r->l:NULL;
2898 18140 : int on_identity = 1;
2899 :
2900 18140 : if (!rel->exps || list_length(rel->exps) != 1 || !is_identity_of(rel->exps->h->data, l))
2901 : on_identity = 0;
2902 :
2903 : /* rewrite {semi,anti}join (A, join(A,B)) into {semi,anti}join (A,B)
2904 : * and {semi,anti}join (A, join(B,A)) into {semi,anti}join (A,B)
2905 : * Where the semi/anti join is done using the identity */
2906 0 : if (on_identity && l->ref.refcnt == 2 && ((is_join(r->op) && (l == r->l || l == r->r)) ||
2907 0 : (is_project(r->op) && rl && is_join(rl->op) && (l == rl->l || l == rl->r)))){
2908 0 : sql_rel *or = r;
2909 :
2910 0 : if (is_project(r->op))
2911 0 : r = rl;
2912 :
2913 0 : if (l == r->r)
2914 0 : rel->r = rel_dup(r->l);
2915 : else
2916 0 : rel->r = rel_dup(r->r);
2917 :
2918 0 : rel->exps = r->exps;
2919 0 : r->exps = NULL;
2920 0 : rel->attr = r->attr;
2921 0 : r->attr = NULL;
2922 0 : rel_destroy(or);
2923 0 : v->changes++;
2924 : }
2925 : }
2926 : {
2927 18140 : sql_rel *l = rel->l, *rl = NULL;
2928 18140 : sql_rel *r = rel->r, *or = r;
2929 :
2930 18140 : if (r)
2931 18140 : rl = r->l;
2932 18140 : if (r && is_project(r->op)) {
2933 15011 : r = rl;
2934 15011 : if (r)
2935 14804 : rl = r->l;
2936 : }
2937 :
2938 : /* More general case is (join reduction)
2939 : {semi,anti}join (A, join(A,B) [A.c1 == B.c1]) [ A.c1 == B.c1 ]
2940 : into {semi,anti}join (A,B) [ A.c1 == B.c1 ]
2941 :
2942 : for semijoin also A.c1 == B.k1 ] [ A.c1 == B.k2 ] could be rewritten
2943 : */
2944 18140 : if (l && r && rl &&
2945 16863 : is_basetable(l->op) && is_basetable(rl->op) &&
2946 3884 : is_join(r->op) && l->l == rl->l)
2947 : {
2948 7 : node *n, *m;
2949 7 : list *exps;
2950 :
2951 13 : if (!rel->exps || !r->exps ||
2952 6 : list_length(rel->exps) != list_length(r->exps))
2953 1 : return rel;
2954 6 : exps = new_exp_list(v->sql->sa);
2955 :
2956 : /* are the join conditions equal */
2957 6 : for (n = rel->exps->h, m = r->exps->h;
2958 6 : n && m; n = n->next, m = m->next)
2959 : {
2960 6 : sql_exp *le = NULL, *oe = n->data;
2961 6 : sql_exp *re = NULL, *ne = m->data;
2962 6 : sql_column *cl;
2963 6 : int equal = 0;
2964 :
2965 6 : if (oe->type != e_cmp || ne->type != e_cmp ||
2966 6 : oe->flag != cmp_equal ||
2967 6 : ne->flag != cmp_equal || is_anti(oe) || is_anti(ne))
2968 : return rel;
2969 :
2970 6 : if ((cl = exp_find_column(rel->l, oe->l, -2)) != NULL) {
2971 6 : le = oe->l;
2972 6 : re = oe->r;
2973 0 : } else if ((cl = exp_find_column(rel->l, oe->r, -2)) != NULL) {
2974 0 : le = oe->r;
2975 0 : re = oe->l;
2976 : } else
2977 : return rel;
2978 :
2979 6 : if (exp_find_column(rl, ne->l, -2) == cl) {
2980 6 : sql_exp *e = (or != r)?rel_find_exp(or, re):re;
2981 :
2982 6 : if (e)
2983 6 : equal = exp_match_exp(ne->r, e);
2984 6 : if (!e || !equal)
2985 : return rel;
2986 0 : re = ne->r;
2987 0 : } else if (exp_find_column(rl, ne->r, -2) == cl) {
2988 0 : sql_exp *e = (or != r)?rel_find_exp(or, re):re;
2989 :
2990 0 : if (e)
2991 0 : equal = exp_match_exp(ne->l, e);
2992 0 : if (!e || !equal)
2993 : return rel;
2994 0 : re = ne->l;
2995 : } else
2996 : return rel;
2997 :
2998 0 : ne = exp_compare(v->sql->sa, le, re, cmp_equal);
2999 0 : append(exps, ne);
3000 : }
3001 :
3002 0 : rel->r = rel_dup(r->r);
3003 0 : rel->exps = exps;
3004 0 : rel_destroy(or);
3005 0 : v->changes++;
3006 : }
3007 : }
3008 : return rel;
3009 : }
3010 :
3011 : /*
3012 : * Push semijoins down, pushes the semijoin through a join.
3013 : *
3014 : * semijoin( join(A, B) [ A.x == B.y ], C ) [ A.z == C.c ]
3015 : * ->
3016 : * join( semijoin(A, C) [ A.z == C.c ], B ) [ A.x == B.y ]
3017 : *
3018 : * also push simple expressions of a semijoin down if they only
3019 : * involve the left sided of the semijoin.
3020 : *
3021 : * in some cases the other way is useful, ie push join down
3022 : * semijoin. When the join reduces (ie when there are selects on it).
3023 : *
3024 : * At the moment, we only flag changes by this optimizer on the first level of optimization
3025 : */
3026 : static inline sql_rel *
3027 251792 : rel_push_semijoin_down_or_up(visitor *v, sql_rel *rel)
3028 : {
3029 251792 : uint8_t cycle = *(uint8_t*) v->data;
3030 :
3031 251792 : if (rel->op == op_join && rel->exps && rel->l) {
3032 216784 : sql_rel *l = rel->l, *r = rel->r;
3033 :
3034 216784 : if (is_semi(l->op) && !rel_is_ref(l) && is_select(r->op) && !rel_is_ref(r)) {
3035 16 : rel->l = l->l;
3036 16 : l->l = rel;
3037 16 : if (cycle <= 0)
3038 0 : v->changes++;
3039 16 : return l;
3040 : }
3041 : }
3042 : /* also case with 2 joins */
3043 : /* join ( join ( semijoin(), table), select (table)); */
3044 251776 : if (rel->op == op_join && rel->exps && rel->l) {
3045 216768 : sql_rel *l = rel->l, *r = rel->r;
3046 216768 : sql_rel *ll;
3047 :
3048 216768 : if (is_join(l->op) && !rel_is_ref(l) && is_select(r->op) && !rel_is_ref(r)) {
3049 8044 : ll = l->l;
3050 8044 : if (is_semi(ll->op) && !rel_is_ref(ll)) {
3051 27 : l->l = ll->l;
3052 27 : ll->l = rel;
3053 27 : if (cycle <= 0)
3054 0 : v->changes++;
3055 27 : return ll;
3056 : }
3057 : }
3058 : }
3059 : /* first push down the expressions involving only A */
3060 251749 : if (rel->op == op_semi && rel->exps && rel->l) {
3061 11573 : sql_rel *jl = rel->l, *ojl = jl;
3062 :
3063 11573 : set_processed(jl);
3064 26730 : for (node *n = rel->exps->h; n;) {
3065 15157 : node *next = n->next;
3066 15157 : sql_exp *e = n->data;
3067 :
3068 15157 : if (n != rel->exps->h && e->type == e_cmp && rel_rebind_exp(v->sql, jl, e)) {
3069 16 : if (!is_select(jl->op) || rel_is_ref(jl))
3070 14 : rel->l = jl = rel_select(v->sql->sa, jl, NULL);
3071 16 : rel_select_add_exp(v->sql->sa, jl, e);
3072 16 : list_remove_node(rel->exps, NULL, n);
3073 16 : if (cycle <= 0)
3074 16 : v->changes++;
3075 : }
3076 : n = next;
3077 : }
3078 11573 : if (ojl != jl)
3079 14 : set_processed(jl);
3080 : }
3081 251749 : if (rel->op == op_semi && rel->exps && rel->l) {
3082 11573 : operator_type op = rel->op, lop;
3083 11573 : node *n;
3084 11573 : sql_rel *l = rel->l, *ll = NULL, *lr = NULL;
3085 11573 : sql_rel *r = rel->r;
3086 11573 : list *exps = rel->exps, *nsexps, *njexps, *nsattr, *njattr;
3087 11573 : int left = 1, right = 1;
3088 :
3089 : /* handle project
3090 : if (l->op == op_project && !need_distinct(l))
3091 : l = l->l;
3092 : */
3093 :
3094 11573 : if (!is_join(l->op) || is_full(l->op) || rel_is_ref(l) || is_single(l))
3095 : return rel;
3096 :
3097 1123 : lop = l->op;
3098 1123 : ll = l->l;
3099 1123 : lr = l->r;
3100 :
3101 : /* check which side is used and other exps are atoms or from right of semijoin */
3102 2203 : for(n = exps->h; n; n = n->next) {
3103 1247 : sql_exp *sje = n->data;
3104 :
3105 1247 : if (sje->type != e_cmp || is_complex_exp(sje->flag))
3106 : return rel;
3107 : /* sje->l from ll and sje->r/f from semijoin r ||
3108 : * sje->l from semijoin r and sje->r/f from ll ||
3109 : * sje->l from lr and sje->r/f from semijoin r ||
3110 : * sje->l from semijoin r and sje->r/f from lr */
3111 2462 : if (left &&
3112 2462 : ((rel_rebind_exp(v->sql, ll, sje->l) && rel_rebind_exp(v->sql, rel->r, sje->r) && (!sje->f || rel_rebind_exp(v->sql, rel->r, sje->f))) ||
3113 1253 : (rel_rebind_exp(v->sql, rel->r, sje->l) && rel_rebind_exp(v->sql, ll, sje->r) && (!sje->f || rel_rebind_exp(v->sql, ll, sje->f)))))
3114 : right = 0;
3115 : else
3116 926 : left = 0;
3117 1730 : if (right &&
3118 1608 : ((rel_rebind_exp(v->sql, lr, sje->l) && rel_rebind_exp(v->sql, rel->r, sje->r) && (!sje->f || rel_rebind_exp(v->sql, rel->r, sje->f))) ||
3119 535 : (rel_rebind_exp(v->sql, rel->r, sje->l) && rel_rebind_exp(v->sql, lr, sje->r) && (!sje->f || rel_rebind_exp(v->sql, lr, sje->f)))))
3120 : left = 0;
3121 : else
3122 : right = 0;
3123 1231 : if (!right && !left)
3124 : return rel;
3125 : }
3126 956 : if (left && is_right(lop))
3127 : return rel;
3128 955 : if (right && is_left(lop))
3129 : return rel;
3130 954 : nsexps = exps_copy(v->sql, rel->exps);
3131 954 : nsattr = exps_copy(v->sql, rel->attr);
3132 954 : njexps = exps_copy(v->sql, l->exps);
3133 954 : njattr = exps_copy(v->sql, l->attr);
3134 954 : if (left)
3135 180 : l = rel_crossproduct(v->sql->sa, rel_dup(ll), rel_dup(r), op);
3136 : else
3137 774 : l = rel_crossproduct(v->sql->sa, rel_dup(lr), rel_dup(r), op);
3138 954 : l->exps = nsexps;
3139 954 : l->attr = nsattr;
3140 954 : set_processed(l);
3141 954 : if (left)
3142 180 : l = rel_crossproduct(v->sql->sa, l, rel_dup(lr), lop);
3143 : else
3144 774 : l = rel_crossproduct(v->sql->sa, rel_dup(ll), l, lop);
3145 954 : l->exps = njexps;
3146 954 : l->attr = njattr;
3147 954 : set_processed(l);
3148 954 : rel_destroy(rel);
3149 954 : rel = l;
3150 954 : if (cycle <= 0)
3151 566 : v->changes++;
3152 : }
3153 : return rel;
3154 : }
3155 :
3156 : /* antijoin(a, union(b,c)) -> antijoin(antijoin(a,b), c) */
3157 : static inline sql_rel *
3158 6511 : rel_rewrite_antijoin(visitor *v, sql_rel *rel)
3159 : {
3160 6511 : sql_rel *l = rel->l;
3161 6511 : sql_rel *r = rel->r;
3162 :
3163 6511 : assert(rel->op == op_anti);
3164 6511 : if (l && !rel_is_ref(l) && r && !rel_is_ref(r) && is_union(r->op) && !is_single(r)) {
3165 0 : sql_rel *rl = rel_dup(r->l), *nl;
3166 0 : sql_rel *rr = rel_dup(r->r);
3167 :
3168 0 : if (!is_project(rl->op))
3169 0 : rl = rel_project(v->sql->sa, rl,
3170 : rel_projections(v->sql, rl, NULL, 1, 1));
3171 0 : if (!is_project(rr->op))
3172 0 : rr = rel_project(v->sql->sa, rr,
3173 : rel_projections(v->sql, rr, NULL, 1, 1));
3174 0 : rel_rename_exps(v->sql, r->exps, rl->exps);
3175 0 : rel_rename_exps(v->sql, r->exps, rr->exps);
3176 :
3177 0 : nl = rel_crossproduct(v->sql->sa, rel->l, rl, op_anti);
3178 0 : nl->exps = exps_copy(v->sql, rel->exps);
3179 0 : nl->attr = exps_copy(v->sql, rel->attr);
3180 0 : set_processed(nl);
3181 0 : rel->l = nl;
3182 0 : rel->r = rr;
3183 0 : rel_destroy(r);
3184 0 : v->changes++;
3185 0 : return rel;
3186 : }
3187 : return rel;
3188 : }
3189 :
3190 : static sql_rel *
3191 1603209 : rel_optimize_semi_and_anti_(visitor *v, sql_rel *rel)
3192 : {
3193 : /* rewrite semijoin (A, join(A,B)) into semijoin (A,B) */
3194 1603209 : if (rel && is_semi(rel->op))
3195 18140 : rel = rel_rewrite_semijoin(v, rel);
3196 : /* push semijoin through join */
3197 1603209 : if (rel && (is_semi(rel->op) || is_innerjoin(rel->op)))
3198 251792 : rel = rel_push_semijoin_down_or_up(v, rel);
3199 : /* antijoin(a, union(b,c)) -> antijoin(antijoin(a,b), c) */
3200 1603209 : if (rel && rel->op == op_anti)
3201 6511 : rel = rel_rewrite_antijoin(v, rel);
3202 1603209 : return rel;
3203 : }
3204 :
3205 : static sql_rel *
3206 91676 : rel_optimize_semi_and_anti(visitor *v, global_props *gp, sql_rel *rel)
3207 : {
3208 91676 : v->data = &gp->opt_cycle;
3209 91676 : rel = rel_visitor_bottomup(v, rel, &rel_optimize_semi_and_anti_);
3210 91676 : v->data = gp;
3211 91676 : return rel;
3212 : }
3213 :
3214 : run_optimizer
3215 643783 : bind_optimize_semi_and_anti(visitor *v, global_props *gp)
3216 : {
3217 : /* Important -> Re-write semijoins after rel_join_order */
3218 643783 : int flag = v->sql->sql_optimizer;
3219 643610 : return gp->opt_level == 1 && (gp->cnt[op_join] || gp->cnt[op_left] || gp->cnt[op_right]
3220 1287393 : || gp->cnt[op_full] || gp->cnt[op_semi] || gp->cnt[op_anti]) && (flag & optimize_semi_and_anti) ? rel_optimize_semi_and_anti : NULL;
3221 : }
3222 :
3223 :
3224 : static sql_rel *
3225 778590 : rel_semijoin_use_fk(visitor *v, sql_rel *rel)
3226 : {
3227 778590 : if (is_semi(rel->op) && rel->exps) {
3228 6305 : list *exps = rel->exps;
3229 6305 : list *rels = sa_list(v->sql->sa);
3230 :
3231 6305 : rel->exps = NULL;
3232 6305 : append(rels, rel->l);
3233 6305 : append(rels, rel->r);
3234 6305 : (void) find_fk( v->sql, rels, exps);
3235 :
3236 6305 : rel->exps = exps;
3237 : }
3238 778590 : return rel;
3239 : }
3240 :
3241 : /*
3242 : * Push {semi}joins down, pushes the joins through group by expressions.
3243 : * When the join is on the group by columns, we can push the joins left
3244 : * under the group by. This should only be done, iff the new semijoin would
3245 : * reduce the input table to the groupby. So there should be a reduction
3246 : * (selection) on the table A and this should be propagated to the groupby via
3247 : * for example a primary key.
3248 : *
3249 : * {semi}join( A, groupby( B ) [gbe][aggrs] ) [ gbe == A.x ]
3250 : * ->
3251 : * {semi}join( A, groupby( semijoin(B,A) [gbe == A.x] ) [gbe][aggrs] ) [ gbe == A.x ]
3252 : */
3253 : static inline sql_rel *
3254 778590 : rel_push_join_down(visitor *v, sql_rel *rel)
3255 : {
3256 778590 : if (!rel_is_ref(rel) && ((is_left(rel->op) || rel->op == op_join || is_semi(rel->op)) && rel->l && rel->exps)) {
3257 114496 : sql_rel *gb = rel->r, *ogb = gb, *l = NULL, *rell = rel->l;
3258 :
3259 114496 : if (is_simple_project(gb->op) && !rel_is_ref(gb))
3260 16673 : gb = gb->l;
3261 :
3262 114496 : if (rel_is_ref(rell) || !gb || rel_is_ref(gb))
3263 : return rel;
3264 :
3265 105341 : if (is_groupby(gb->op) && gb->r && list_length(gb->r)) {
3266 140 : list *exps = rel->exps, *jes = new_exp_list(v->sql->sa), *gbes = gb->r;
3267 140 : node *n, *m;
3268 : /* find out if all group by expressions are used in the join */
3269 145 : for(n = gbes->h; n; n = n->next) {
3270 140 : sql_exp *gbe = n->data;
3271 140 : int fnd = 0;
3272 140 : sql_alias *rname = NULL;
3273 140 : const char *name = NULL;
3274 :
3275 : /* project in between, ie find alias */
3276 : /* first find expression in expression list */
3277 140 : gbe = exps_uses_exp( gb->exps, gbe);
3278 140 : if (!gbe)
3279 0 : continue;
3280 140 : if (ogb != gb)
3281 117 : gbe = exps_uses_exp( ogb->exps, gbe);
3282 140 : if (gbe) {
3283 137 : rname = exp_find_rel_name(gbe);
3284 137 : name = exp_name(gbe);
3285 : }
3286 :
3287 137 : if (!name)
3288 3 : return rel;
3289 :
3290 341 : for (m = exps->h; m && !fnd; m = m->next) {
3291 204 : sql_exp *je = m->data;
3292 :
3293 204 : if (je->card >= CARD_ATOM && je->type == e_cmp &&
3294 204 : !is_complex_exp(je->flag)) {
3295 : /* expect right expression to match */
3296 204 : sql_exp *r = je->r;
3297 :
3298 204 : if (r == 0 || r->type != e_column)
3299 11 : continue;
3300 193 : if (r->l && rname && a_match(r->l, rname) && strcmp(r->r, name)==0) {
3301 : fnd = 1;
3302 71 : } else if (!r->l && !rname && strcmp(r->r, name)==0) {
3303 : fnd = 1;
3304 : }
3305 : if (fnd) {
3306 122 : sql_exp *le = je->l;
3307 122 : sql_exp *re = exp_push_down_prj(v->sql, r, gb, gb->l);
3308 122 : if (!re || (list_length(jes) == 0 && !find_prop(le->p, PROP_HASHCOL))) {
3309 : fnd = 0;
3310 : } else {
3311 5 : int anti = is_anti(je), semantics = is_semantics(je);
3312 :
3313 5 : je = exp_compare(v->sql->sa, le, re, je->flag);
3314 5 : if (anti) set_anti(je);
3315 5 : if (semantics) set_semantics(je);
3316 5 : list_append(jes, je);
3317 : }
3318 : }
3319 : }
3320 : }
3321 137 : if (!fnd)
3322 : return rel;
3323 : }
3324 5 : l = rel_dup(rel->l);
3325 :
3326 : /* push join's left side (as semijoin) down group by */
3327 5 : l = gb->l = rel_crossproduct(v->sql->sa, gb->l, l, op_semi);
3328 5 : l->exps = jes;
3329 5 : set_processed(l);
3330 5 : v->changes++;
3331 5 : return rel;
3332 : }
3333 : }
3334 : return rel;
3335 : }
3336 :
3337 : static bool
3338 364 : check_projection_on_foreignside(sql_rel *r, list *pexps, int fk_left)
3339 : {
3340 : /* projection columns from the foreign side */
3341 364 : if (list_empty(pexps))
3342 : return true;
3343 1284 : for (node *n = pexps->h; n; n = n->next) {
3344 1219 : sql_exp *pe = n->data;
3345 :
3346 1219 : if (pe && is_atom(pe->type))
3347 7 : continue;
3348 1212 : if (pe && !is_alias(pe->type))
3349 : return false;
3350 : /* check for columns from the pk side, then keep the join with the pk */
3351 1191 : if ((fk_left && rel_find_exp(r->r, pe)) || (!fk_left && rel_find_exp(r->l, pe)))
3352 230 : return false;
3353 : }
3354 : return true;
3355 : }
3356 :
3357 : static sql_rel *
3358 129411 : rel_simplify_project_fk_join(mvc *sql, sql_rel *r, list *pexps, list *orderexps, int *changes)
3359 : {
3360 129411 : sql_rel *rl = r->l, *rr = r->r, *nr = NULL;
3361 129411 : sql_exp *je, *le, *nje, *re;
3362 129411 : int fk_left = 1;
3363 :
3364 : /* check for foreign key join */
3365 129411 : if (list_length(r->exps) != 1 || !list_empty(r->attr))
3366 7566 : return r;
3367 121845 : if (!(je = exps_find_prop(r->exps, PROP_JOINIDX)) || je->flag != cmp_equal)
3368 : return r;
3369 : /* je->l == foreign expression, je->r == primary expression */
3370 420 : if (rel_find_exp(r->l, je->l)) {
3371 : fk_left = 1;
3372 34 : } else if (rel_find_exp(r->r, je->l)) {
3373 : fk_left = 0;
3374 : } else { /* not found */
3375 : return r;
3376 : }
3377 :
3378 : /* primary side must be a full table */
3379 386 : if ((fk_left && (!is_left(r->op) && !is_full(r->op)) && !is_basetable(rr->op)) ||
3380 34 : (!fk_left && (!is_right(r->op) && !is_full(r->op)) && !is_basetable(rl->op)))
3381 134 : return r;
3382 :
3383 286 : if (!check_projection_on_foreignside(r, pexps, fk_left) || !check_projection_on_foreignside(r, orderexps, fk_left))
3384 249 : return r;
3385 :
3386 : /* rewrite, ie remove pkey side if possible */
3387 37 : le = (sql_exp*)je->l, re = (sql_exp*)je->l;
3388 :
3389 : /* both have NULL and there are semantics, the join cannot be removed */
3390 37 : if (is_semantics(je) && has_nil(le) && has_nil(re))
3391 : return r;
3392 :
3393 37 : (*changes)++;
3394 : /* if the foreign key column doesn't have NULL values, then return it */
3395 37 : if (!has_nil(le) || is_full(r->op) || (fk_left && is_left(r->op)) || (!fk_left && is_right(r->op))) {
3396 : /* if ->attr, introduce group by on index */
3397 31 : if (fk_left) {
3398 23 : nr = rel_dup(r->l);
3399 : } else {
3400 8 : nr = rel_dup(r->r);
3401 : }
3402 31 : if (!list_empty(r->attr)) {
3403 0 : nr = rel_groupby(sql, nr, NULL);
3404 0 : if (nr) {
3405 : // printf("# introduced groupby \n");
3406 0 : nr->r = append(sa_list(sql->sa), le);
3407 0 : nr->exps = r->attr;
3408 : }
3409 : }
3410 31 : return nr;
3411 : }
3412 :
3413 : /* remove NULL values, ie generate a select not null */
3414 6 : nje = exp_compare(sql->sa, exp_ref(sql, le), exp_atom(sql->sa, atom_general(sql->sa, exp_subtype(le), NULL, 0)), cmp_equal);
3415 6 : set_anti(nje);
3416 6 : set_has_no_nil(nje);
3417 6 : set_semantics(nje);
3418 6 : if (fk_left) {
3419 6 : nr = rel_dup(r->l);
3420 : } else {
3421 0 : nr = rel_dup(r->r);
3422 : }
3423 6 : nr = rel_select(sql->sa, nr, nje);
3424 6 : set_processed(nr);
3425 6 : return nr;
3426 : }
3427 :
3428 : static sql_rel *
3429 1853 : rel_simplify_count_fk_join(mvc *sql, sql_rel *r, list *gexps, list *gcols, int *changes)
3430 : {
3431 1853 : sql_rel *rl = r->l, *rr = r->r, *nr = NULL;
3432 1853 : sql_exp *je, *le, *nje, *re, *oce;
3433 1853 : int fk_left = 1;
3434 :
3435 : /* check for foreign key join */
3436 1853 : if (list_length(r->exps) != 1)
3437 : return r;
3438 1852 : if (!(je = exps_find_prop(r->exps, PROP_JOINIDX)) || je->flag != cmp_equal)
3439 : return r;
3440 : /* je->l == foreign expression, je->r == primary expression */
3441 63 : if (rel_find_exp(r->l, je->l)) {
3442 : fk_left = 1;
3443 7 : } else if (rel_find_exp(r->r, je->l)) {
3444 : fk_left = 0;
3445 : } else { /* not found */
3446 : return r;
3447 : }
3448 :
3449 63 : oce = gexps->h->data;
3450 63 : if (oce->l) /* we only handle COUNT(*) */
3451 : return r;
3452 :
3453 : /* primary side must be a full table */
3454 62 : if ((fk_left && (!is_left(r->op) && !is_full(r->op)) && !is_basetable(rr->op)) ||
3455 6 : (!fk_left && (!is_right(r->op) && !is_full(r->op)) && !is_basetable(rl->op)))
3456 : return r;
3457 :
3458 33 : if (fk_left && is_join(rl->op) && !rel_is_ref(rl)) {
3459 12 : r->l = rel_simplify_count_fk_join(sql, rl, gexps, gcols, changes);
3460 12 : if (rl != r->l)
3461 9 : rel_destroy(rl);
3462 : }
3463 39 : if (!fk_left && is_join(rr->op) && !rel_is_ref(rr)) {
3464 2 : r->r = rel_simplify_count_fk_join(sql, rr, gexps, gcols, changes);
3465 2 : if (rr != r->r)
3466 2 : rel_destroy(rr);
3467 : }
3468 :
3469 39 : if (!check_projection_on_foreignside(r, gcols, fk_left))
3470 : return r;
3471 :
3472 : /* rewrite, ie remove pkey side if possible */
3473 37 : le = (sql_exp*)je->l, re = (sql_exp*)je->l;
3474 :
3475 : /* both have NULL and there are semantics, the join cannot be removed */
3476 37 : if (is_semantics(je) && has_nil(le) && has_nil(re))
3477 : return r;
3478 :
3479 37 : (*changes)++;
3480 : /* if the foreign key column doesn't have NULL values, then return it */
3481 37 : if (!has_nil(le) || is_full(r->op) || (fk_left && is_left(r->op)) || (!fk_left && is_right(r->op))) {
3482 31 : if (fk_left) {
3483 25 : nr = rel_dup(r->l);
3484 : } else {
3485 6 : nr = rel_dup(r->r);
3486 : }
3487 31 : return nr;
3488 : }
3489 :
3490 : /* remove NULL values, ie generate a select not null */
3491 6 : nje = exp_compare(sql->sa, exp_ref(sql, le), exp_atom(sql->sa, atom_general(sql->sa, exp_subtype(le), NULL, 0)), cmp_equal);
3492 6 : set_anti(nje);
3493 6 : set_has_no_nil(nje);
3494 6 : set_semantics(nje);
3495 6 : if (fk_left) {
3496 6 : nr = rel_dup(r->l);
3497 : } else {
3498 0 : nr = rel_dup(r->r);
3499 : }
3500 6 : nr = rel_select(sql->sa, nr, nje);
3501 6 : set_processed(nr);
3502 6 : return nr;
3503 : }
3504 :
3505 : /*
3506 : * Handle (left/right/outer/natural) join fk-pk rewrites
3507 : * 1 group by ( fk-pk-join () ) [ count(*) ] -> group by ( fk )
3508 : * 2 project ( fk-pk-join () ) [ fk-column ] -> project (fk table)[ fk-column ]
3509 : * 3 project ( fk1-pk1-join( fk2-pk2-join()) [ fk-column, pk1 column ] -> project (fk1-pk1-join)[ fk-column, pk1 column ]
3510 : */
3511 : static inline sql_rel *
3512 1840006 : rel_simplify_fk_joins(visitor *v, sql_rel *rel)
3513 : {
3514 1840006 : sql_rel *r = NULL;
3515 :
3516 1840006 : if (is_simple_project(rel->op))
3517 623993 : r = rel->l;
3518 :
3519 1840043 : while (is_simple_project(rel->op) && r && list_length(r->exps) == 1 && (is_join(r->op) || r->op == op_semi) && !(rel_is_ref(r))) {
3520 129411 : sql_rel *or = r;
3521 :
3522 129411 : r = rel_simplify_project_fk_join(v->sql, r, rel->exps, rel->r, &v->changes);
3523 129411 : if (r == or)
3524 : return rel;
3525 37 : rel_destroy(rel->l);
3526 37 : rel->l = r;
3527 : }
3528 :
3529 1710632 : if (!is_groupby(rel->op))
3530 : return rel;
3531 :
3532 49008 : r = rel->l;
3533 67773 : while(r && is_simple_project(r->op))
3534 18765 : r = r->l;
3535 :
3536 61566 : while (is_groupby(rel->op) && !rel_is_ref(rel) && r && (is_join(r->op) || r->op == op_semi) && list_length(r->exps) == 1 && !(rel_is_ref(r)) &&
3537 : /* currently only single count aggregation is handled, no other projects or aggregation */
3538 65074 : list_length(rel->exps) == 1 && exp_aggr_is_count(rel->exps->h->data)) {
3539 1839 : sql_rel *or = r;
3540 :
3541 1839 : r = rel_simplify_count_fk_join(v->sql, r, rel->exps, rel->r, &v->changes);
3542 1839 : if (r == or)
3543 : return rel;
3544 26 : rel_destroy(rel->l);
3545 26 : rel->l = r;
3546 : }
3547 : return rel;
3548 : }
3549 :
3550 : /*
3551 : * Gets the column expressions of a diff function and adds them to "columns".
3552 : * The diff function has two possible argument types: either a sql_exp representing a column
3553 : * or a sql_exp representing another diff function, therefore this function is recursive.
3554 : */
3555 : static void
3556 116 : get_diff_function_columns(sql_exp *diffExp, list *columns) {
3557 116 : list *args = diffExp->l;
3558 :
3559 252 : for (node *arg = args->h; arg; arg = arg->next) {
3560 136 : sql_exp *exp = arg->data;
3561 :
3562 : // diff function
3563 136 : if (exp->type == e_func) {
3564 20 : get_diff_function_columns(exp, columns);
3565 : }
3566 : // column
3567 : else {
3568 116 : list_append(columns, exp);
3569 : }
3570 : }
3571 116 : }
3572 :
3573 : /*
3574 : * Builds a list of aggregation key columns to be used by the select push down algorithm, namely for
3575 : * window functions. Returns NULL if the window function does not partition by any column
3576 : */
3577 : static list *
3578 1615 : get_partition_by_key_columns(allocator *sa, sql_rel *r) {
3579 18050 : for (node* n = r->exps->h; n; n = n->next) {
3580 16621 : sql_exp *e = n->data;
3581 :
3582 16621 : if (e->type == e_func) {
3583 5915 : sql_subfunc *f = e->f;
3584 :
3585 : // aggregation function
3586 5915 : if (!strcmp(f->func->base.name, "rank")) {
3587 186 : list* rankArguments = e->l;
3588 : // the partition key is the second argument
3589 186 : sql_exp *partitionExp = rankArguments->h->next->data;
3590 :
3591 : // check if the key contains any columns, i.e., is a diff function
3592 186 : if (partitionExp->type == e_func) {
3593 : // get columns to list
3594 96 : list *aggColumns = sa_list(sa);
3595 96 : get_diff_function_columns(partitionExp, aggColumns);
3596 96 : return aggColumns;
3597 : }
3598 : // the function has no aggregation columns (e_atom of boolean)
3599 : else {
3600 : return NULL;
3601 : }
3602 :
3603 : }
3604 : }
3605 : }
3606 : return NULL;
3607 : }
3608 :
3609 : /*
3610 : static bool
3611 : rank_exp_has_partition_key(sql_exp *e)
3612 : {
3613 : if (e->type == e_func) {
3614 : sql_subfunc *f = e->f;
3615 :
3616 : if (f->func->type == F_ANALYTIC) {
3617 : list *args = e->l;
3618 :
3619 : if (list_length(args) >= 2) { // the partition key is the second argument
3620 : return true;
3621 : }
3622 : }
3623 : }
3624 : return false;
3625 : }
3626 : */
3627 :
3628 : /*
3629 : * Checks if a filter column is also used as an aggregation key, so it can be later safely pushed down.
3630 : */
3631 : static int
3632 149 : filter_column_in_partition_by_columns(sql_exp *column, list *keyColumns)
3633 : {
3634 : /* check if it is a column or an e_convert, and get the actual column if it is the latter */
3635 149 : if (column->type == e_convert) {
3636 1 : column = column->l;
3637 : }
3638 :
3639 149 : char *tableName = column->l;
3640 149 : char *columnName = column->r;
3641 :
3642 314 : for (node *n = keyColumns->h; n; n = n->next) {
3643 174 : sql_exp *keyCol = n->data;
3644 174 : char *keyColTableName = keyCol->l;
3645 174 : char *keyColColumnName = keyCol->r;
3646 :
3647 174 : if (!strcmp(tableName, keyColTableName) && !strcmp(columnName, keyColColumnName)) {
3648 : /* match */
3649 : return 1;
3650 : }
3651 : }
3652 :
3653 : /* no matches found */
3654 : return 0;
3655 : }
3656 :
3657 : /*
3658 : * Push select down, pushes the selects through (simple) projections. Also
3659 : * it cleans up the projections which become useless.
3660 : *
3661 : * WARNING - Make sure to call try_remove_empty_select macro before returning so we ensure
3662 : * possible generated empty selects won't never be generated
3663 : */
3664 : static sql_rel *
3665 3694008 : rel_push_select_down(visitor *v, sql_rel *rel)
3666 : {
3667 3694008 : list *exps = NULL;
3668 3694008 : sql_rel *r = NULL;
3669 3694008 : node *n;
3670 :
3671 3694008 : if (rel_is_ref(rel)) {
3672 72686 : if (is_select(rel->op) && !list_empty(rel->exps)) {
3673 : /* add inplace empty select */
3674 1744 : sql_rel *l = rel_select(v->sql->sa, rel->l, NULL);
3675 :
3676 1744 : l->exps = rel->exps;
3677 1744 : set_processed(l);
3678 1744 : rel->exps = NULL;
3679 1744 : rel->l = l;
3680 1744 : v->changes++;
3681 : }
3682 72686 : return rel;
3683 : }
3684 :
3685 : /* don't make changes for empty selects */
3686 3621322 : if (is_select(rel->op) && list_empty(rel->exps))
3687 10 : return try_remove_empty_select(v, rel);
3688 :
3689 : /* merge 2 selects */
3690 3621312 : r = rel->l;
3691 3621312 : if (is_select(rel->op) && r && r->exps && is_select(r->op) && !(rel_is_ref(r)) && !exps_have_func(rel->exps)) {
3692 28 : (void)list_merge(r->exps, rel->exps, (fdup)NULL);
3693 28 : rel->l = NULL;
3694 28 : rel_destroy(rel);
3695 28 : v->changes++;
3696 28 : return try_remove_empty_select(v, r);
3697 : }
3698 : /*
3699 : * Push select through semi/anti join
3700 : * select (semi(A,B)) == semi(select(A), B)
3701 : */
3702 3621284 : if (is_select(rel->op) && r && is_semi(r->op) && !(rel_is_ref(r))) {
3703 331 : rel->l = r->l;
3704 331 : r->l = rel;
3705 331 : v->changes++;
3706 : /*
3707 : * if A has 2 references (ie used on both sides of
3708 : * the semi join), we also push the select into A.
3709 : */
3710 331 : if (rel_is_ref(rel->l) && rel->l == rel_find_ref(r->r)){
3711 0 : sql_rel *lx = rel->l;
3712 0 : sql_rel *rx = r->r;
3713 0 : if (lx->ref.refcnt == 2 && !rel_is_ref(rx)) {
3714 0 : while (rx->l && !rel_is_ref(rx->l) &&
3715 0 : (is_project(rx->op) ||
3716 : is_select(rx->op) ||
3717 : is_join(rx->op)))
3718 : rx = rx->l;
3719 : /* probably we need to introduce a project */
3720 0 : rel_destroy(rel->l);
3721 0 : lx = rel_project(v->sql->sa, rel, rel_projections(v->sql, rel, NULL, 1, 1));
3722 0 : r->l = lx;
3723 0 : rx->l = rel_dup(lx);
3724 : }
3725 : }
3726 331 : return r;
3727 : }
3728 3620953 : exps = rel->exps;
3729 :
3730 : /* push select through join */
3731 3620953 : if (is_select(rel->op) && r && is_join(r->op) && !rel_is_ref(r) && !is_single(r)){
3732 22137 : sql_rel *jl = r->l, *ojl = jl, *jr = r->r, *ojr = jr;
3733 22137 : int left = r->op == op_join || r->op == op_left;
3734 22137 : int right = r->op == op_join || r->op == op_right;
3735 :
3736 22137 : if (r->op == op_full)
3737 : return rel;
3738 :
3739 : /* introduce selects under the join (if needed) */
3740 22113 : set_processed(jl);
3741 22113 : set_processed(jr);
3742 48748 : for (n = exps->h; n;) {
3743 26635 : node *next = n->next;
3744 26635 : sql_exp *e = n->data;
3745 :
3746 26635 : if (!exp_unsafe(e, false, true)) {
3747 26448 : if (left && rel_rebind_exp(v->sql, jl, e)) {
3748 13724 : if (!is_select(jl->op) || rel_is_ref(jl))
3749 11439 : r->l = jl = rel_select(v->sql->sa, jl, NULL);
3750 13724 : rel_select_add_exp(v->sql->sa, jl, e);
3751 13724 : list_remove_node(exps, NULL, n);
3752 13724 : v->changes++;
3753 12724 : } else if (right && rel_rebind_exp(v->sql, jr, e)) {
3754 5297 : if (!is_select(jr->op) || rel_is_ref(jr))
3755 4324 : r->r = jr = rel_select(v->sql->sa, jr, NULL);
3756 5297 : rel_select_add_exp(v->sql->sa, jr, e);
3757 5297 : list_remove_node(exps, NULL, n);
3758 5297 : v->changes++;
3759 : }
3760 : }
3761 : n = next;
3762 : }
3763 22113 : if (ojl != jl)
3764 11439 : set_processed(jl);
3765 22113 : if (ojr != jr)
3766 4324 : set_processed(jr);
3767 : }
3768 :
3769 : /* merge select and cross product ? */
3770 3620929 : if (is_select(rel->op) && r && r->op == op_join && !rel_is_ref(r) && !is_single(r) && !exps_have_unsafe(exps, false, true)) {
3771 14637 : for (n = exps->h; n;) {
3772 1851 : node *next = n->next;
3773 1851 : sql_exp *e = n->data;
3774 :
3775 1851 : if (exp_is_join(e, NULL) == 0) {
3776 508 : if (!r->exps)
3777 158 : r->exps = sa_list(v->sql->sa);
3778 508 : append(r->exps, e);
3779 508 : list_remove_node(exps, NULL, n);
3780 508 : v->changes++;
3781 : }
3782 : n = next;
3783 : }
3784 : }
3785 :
3786 3620929 : if (is_select(rel->op) && r && (is_simple_project(r->op) || (is_groupby(r->op) && !list_empty(r->r))) && !rel_is_ref(r) && !is_single(r)){
3787 74509 : sql_rel *pl = r->l, *opl = pl;
3788 : /* we cannot push through window functions (for safety I disabled projects over DDL too) */
3789 74509 : if (pl && pl->op != op_ddl && !exps_have_unsafe(r->exps, false, false)) {
3790 : /* introduce selects under the project (if needed) */
3791 54528 : set_processed(pl);
3792 54528 : if (!pl->exps)
3793 425 : pl->exps = sa_list(v->sql->sa);
3794 116336 : for (n = exps->h; n;) {
3795 61808 : node *next = n->next;
3796 61808 : sql_exp *e = n->data, *ne = NULL;
3797 :
3798 : /* can we move it down */
3799 61808 : if (e->type == e_cmp && (ne = exp_push_down_prj(v->sql, e, r, pl)) && ne != e) {
3800 37372 : if (!(is_select(pl->op) && is_join(pl->op) && is_semi(pl->op)) || rel_is_ref(pl))
3801 37372 : r->l = pl = rel_select(v->sql->sa, pl, NULL);
3802 37372 : rel_select_add_exp(v->sql->sa, pl, ne);
3803 37372 : list_remove_node(exps, NULL, n);
3804 37372 : v->changes++;
3805 : }
3806 : n = next;
3807 : }
3808 54528 : if (opl != pl)
3809 28694 : set_processed(pl);
3810 : }
3811 :
3812 : /* push filters if they match the partition by key on a window function */
3813 1615 : else if (pl && pl->op != op_ddl && exps_have_unsafe(r->exps, false, false)) {
3814 1615 : set_processed(pl);
3815 : /* list of partition by key columns */
3816 1615 : list *keyColumns = get_partition_by_key_columns(v->sql->sa, r);
3817 :
3818 : /* partition by keys found, check if any filter matches them */
3819 1615 : if (keyColumns) {
3820 249 : for (n = exps->h; n;) {
3821 153 : node *next = n->next;
3822 153 : sql_exp *e = n->data, *ne = NULL;
3823 :
3824 153 : if (e->type == e_cmp) {
3825 : /* simple comparison filter */
3826 153 : if (e->flag == cmp_gt || e->flag == cmp_gte || e->flag == cmp_lte || e->flag == cmp_lt
3827 153 : || e->flag == cmp_equal || e->flag == cmp_notequal || e->flag == cmp_in || e->flag == cmp_notin
3828 5 : || (e->flag == cmp_filter && ((list*)e->l)->cnt == 1)) {
3829 149 : sql_exp* column;
3830 : /* the column in 'like' filters is stored inside a list */
3831 149 : if (e->flag == cmp_filter) {
3832 1 : column = ((list*)e->l)->h->data;
3833 : } else {
3834 148 : column = e->l;
3835 : }
3836 :
3837 : /* check if the expression matches any partition by key, meaning we can
3838 : try to safely push it down */
3839 149 : if (filter_column_in_partition_by_columns(column, keyColumns)) {
3840 9 : ne = exp_push_down_prj(v->sql, e, r, pl);
3841 :
3842 : /* can we move it down */
3843 9 : if (ne && ne != e && pl->exps) {
3844 9 : if (!is_select(pl->op) || rel_is_ref(pl))
3845 8 : r->l = pl = rel_select(v->sql->sa, pl, NULL);
3846 9 : rel_select_add_exp(v->sql->sa, pl, ne);
3847 9 : list_remove_node(exps, NULL, n);
3848 9 : v->changes++;
3849 : }
3850 : }
3851 : }
3852 : }
3853 : n = next;
3854 : }
3855 :
3856 : /* cleanup list */
3857 96 : list_destroy(keyColumns);
3858 : }
3859 : /* also push (rewrite) limits on output of row_number/(*)rank like window functions */
3860 1615 : if (is_simple_project(r->op) /*&& is_simple_project(pl->op)*/) { /* possible window functions */
3861 3331 : for (n = exps->h; n; n = n->next) {
3862 1724 : sql_exp *e = n->data;
3863 :
3864 1724 : if (e->type == e_cmp && (e->flag == cmp_lt || e->flag == cmp_lte) && exp_is_atom(e->r)) { /* simple limit */
3865 56 : sql_exp *ranke = rel_find_exp(r, e->l);
3866 :
3867 56 : if (ranke && ranke->type == e_func) {
3868 51 : sql_subfunc *rankf = ranke->f;
3869 51 : if (rankf->func->type == F_ANALYTIC) { /* rank functions cannot have a frame */
3870 : // For now only for rank/row_number without partition by
3871 50 : sql_rel *tn = NULL;
3872 50 : if (strcmp(rankf->func->base.name, "rank") == 0 && is_simple_project(pl->op) && pl->r /* &&
3873 : !rank_exp_has_partition_key(ranke)*/) {
3874 6 : tn = r->l = rel_topn(v->sql->sa, r->l, append(sa_list(v->sql->sa), e->r));
3875 6 : tn->grouped = 1;
3876 6 : v->changes++;
3877 6 : break;
3878 : }
3879 44 : if (strcmp(rankf->func->base.name, "row_number") == 0 && list_empty(r->r) && !is_topn(pl->op) /*&&
3880 : !rank_exp_has_partition_key(ranke)*/) {
3881 2 : tn = r->l = rel_topn(v->sql->sa, r->l, append(sa_list(v->sql->sa), e->r));
3882 2 : tn->grouped = 1;
3883 2 : v->changes++;
3884 2 : break;
3885 : }
3886 : }
3887 : }
3888 : }
3889 : }
3890 : }
3891 : }
3892 : }
3893 :
3894 : /* try push select under set relation */
3895 3620929 : if (is_select(rel->op) && r && is_set(r->op) && !list_empty(r->exps) && !rel_is_ref(r) && !is_single(r) && !list_empty(exps)) {
3896 20 : sql_rel *u = r, *ul = u->l, *ur = u->r;
3897 :
3898 20 : ul = rel_dup(ul);
3899 20 : ur = rel_dup(ur);
3900 20 : if (!is_project(ul->op))
3901 0 : ul = rel_project(v->sql->sa, ul,
3902 : rel_projections(v->sql, ul, NULL, 1, 1));
3903 20 : if (!is_project(ur->op))
3904 0 : ur = rel_project(v->sql->sa, ur,
3905 : rel_projections(v->sql, ur, NULL, 1, 1));
3906 20 : rel_rename_exps(v->sql, u->exps, ul->exps);
3907 20 : rel_rename_exps(v->sql, u->exps, ur->exps);
3908 :
3909 : /* introduce selects under the set */
3910 20 : ul = rel_select(v->sql->sa, ul, NULL);
3911 20 : ul->exps = exps_copy(v->sql, exps);
3912 20 : set_processed(ul);
3913 20 : ur = rel_select(v->sql->sa, ur, NULL);
3914 20 : ur->exps = exps_copy(v->sql, exps);
3915 20 : set_processed(ur);
3916 :
3917 20 : rel = rel_inplace_setop(v->sql, rel, ul, ur, u->op, rel_projections(v->sql, rel, NULL, 1, 1));
3918 20 : if (need_distinct(u))
3919 0 : set_distinct(rel);
3920 20 : v->changes++;
3921 : }
3922 3620929 : if (is_select(rel->op) && r && is_munion(r->op) && !is_recursive(r) && !list_empty(r->exps) && !rel_is_ref(r) && !is_single(r) && !list_empty(exps)) {
3923 4990 : sql_rel *u = r;
3924 4990 : list *rels = u->l, *nrels = sa_list(v->sql->sa);
3925 14978 : for(node *n = rels->h; n; n = n->next) {
3926 9988 : sql_rel *ul = n->data;
3927 9988 : ul = rel_dup(ul);
3928 9988 : if (!is_project(ul->op) || rel_is_ref(ul))
3929 9988 : ul = rel_project(v->sql->sa, ul,
3930 : rel_projections(v->sql, ul, NULL, 1, 1));
3931 9988 : rel_rename_exps(v->sql, u->exps, ul->exps);
3932 :
3933 : /* introduce selects under the set */
3934 9988 : ul = rel_select(v->sql->sa, ul, NULL);
3935 9988 : ul->exps = exps_copy(v->sql, exps);
3936 9988 : set_processed(ul);
3937 9988 : nrels = append(nrels, ul);
3938 : }
3939 :
3940 4990 : rel = rel_inplace_setop_n_ary(v->sql, rel, nrels, u->op, rel_projections(v->sql, rel, NULL, 1, 1));
3941 4990 : if (need_distinct(u))
3942 9 : set_distinct(rel);
3943 4990 : if (is_recursive(u))
3944 0 : set_recursive(rel);
3945 4990 : v->changes++;
3946 : }
3947 :
3948 3620929 : return try_remove_empty_select(v, rel);
3949 : }
3950 :
3951 : static int
3952 9008 : index_exp(sql_exp *e, sql_idx *i)
3953 : {
3954 9008 : if (e->type == e_cmp && !is_complex_exp(e->flag)) {
3955 5353 : switch(i->type) {
3956 5353 : case hash_idx:
3957 : case oph_idx:
3958 5353 : if (e->flag == cmp_equal)
3959 : return 0;
3960 : /* fall through */
3961 : case join_idx:
3962 : default:
3963 144 : return -1;
3964 : }
3965 : }
3966 : return -1;
3967 : }
3968 :
3969 : /* find column for the select/join expression */
3970 : static sql_column *
3971 5209 : sjexp_col(sql_exp *e, sql_rel *r)
3972 : {
3973 5209 : sql_column *res = NULL;
3974 :
3975 5209 : if (e->type == e_cmp && !is_complex_exp(e->flag)) {
3976 5209 : res = exp_find_column(r, e->l, -2);
3977 5209 : if (!res)
3978 1420 : res = exp_find_column(r, e->r, -2);
3979 : }
3980 5209 : return res;
3981 : }
3982 :
3983 : static sql_idx *
3984 815247 : find_index(allocator *sa, sql_rel *rel, sql_rel *sub, list **EXPS)
3985 : {
3986 815247 : node *n;
3987 :
3988 : /* any (partial) match of the expressions with the index columns */
3989 : /* Depending on the index type we may need full matches and only
3990 : limited number of cmp types (hash only equality etc) */
3991 : /* Depending on the index type we should (in the rel_bin) generate
3992 : more code, ie for spatial index add post filter etc, for hash
3993 : compute hash value and use index */
3994 :
3995 815247 : if (sub->exps && rel->exps)
3996 3716002 : for(n = sub->exps->h; n; n = n->next) {
3997 2978390 : prop *p;
3998 2978390 : sql_exp *e = n->data;
3999 :
4000 2978390 : if ((p = find_prop(e->p, PROP_HASHIDX)) != NULL) {
4001 8050 : list *exps, *cols;
4002 8050 : sql_idx *i = p->value.pval;
4003 8050 : fcmp cmp = (fcmp)&sql_column_kc_cmp;
4004 :
4005 : /* join indices are only interesting for joins */
4006 8050 : if (i->type == join_idx || list_length(i->columns) <= 1)
4007 0 : continue;
4008 : /* based on the index type, find qualifying exps */
4009 8050 : exps = list_select(rel->exps, i, (fcmp) &index_exp, (fdup)NULL);
4010 8050 : if (list_empty(exps))
4011 3735 : continue;
4012 : /* now we obtain the columns, move into sql_column_kc_cmp! */
4013 4315 : cols = list_map(exps, sub, (fmap) &sjexp_col);
4014 :
4015 : /* TODO check that at most 2 relations are involved */
4016 :
4017 : /* Match the index columns with the expression columns.
4018 : TODO, Allow partial matches ! */
4019 4315 : if (list_match(cols, i->columns, cmp) == 0) {
4020 : /* re-order exps in index order */
4021 217 : node *n, *m;
4022 217 : list *es = sa_list(sa);
4023 :
4024 751 : for(n = i->columns->h; n; n = n->next) {
4025 534 : int i = 0;
4026 1945 : for(m = cols->h; m; m = m->next, i++) {
4027 1945 : if (cmp(m->data, n->data) == 0){
4028 534 : sql_exp *e = list_fetch(exps, i);
4029 534 : list_append(es, e);
4030 534 : break;
4031 : }
4032 : }
4033 : }
4034 : /* fix the destroy function */
4035 217 : cols->destroy = NULL;
4036 217 : *EXPS = es;
4037 217 : e->used = 1;
4038 217 : return i;
4039 : }
4040 4098 : cols->destroy = NULL;
4041 : }
4042 : }
4043 : return NULL;
4044 : }
4045 :
4046 : static inline sql_rel *
4047 516550 : rel_use_index(visitor *v, sql_rel *rel)
4048 : {
4049 516550 : list *exps = NULL;
4050 516550 : sql_idx *i = find_index(v->sql->sa, rel, rel->l, &exps);
4051 516550 : int left = 1;
4052 :
4053 516550 : assert(is_select(rel->op) || is_join(rel->op));
4054 516550 : if (!i && is_join(rel->op)) {
4055 298697 : left = 0;
4056 298697 : i = find_index(v->sql->sa, rel, rel->r, &exps);
4057 : }
4058 :
4059 516431 : if (i) {
4060 217 : prop *p;
4061 217 : node *n;
4062 217 : int single_table = 1;
4063 217 : sql_exp *re = NULL;
4064 :
4065 749 : for( n = exps->h; n && single_table; n = n->next) {
4066 532 : sql_exp *e = n->data, *nre = e->l;
4067 :
4068 532 : if (!is_compare(e->type) || is_anti(e) || e->flag != cmp_equal)
4069 : return rel;
4070 532 : if (is_join(rel->op) && ((left && !rel_find_exp(rel->l, nre)) || (!left && rel_find_exp(rel->r, nre))))
4071 95 : nre = e->r;
4072 532 : single_table = (!re || (exp_relname(nre) && exp_relname(re) && a_match(exp_relname(nre), exp_relname(re))));
4073 532 : re = nre;
4074 : }
4075 217 : if (single_table) { /* add PROP_HASHCOL to all column exps */
4076 734 : for( n = exps->h; n; n = n->next) {
4077 522 : sql_exp *e = n->data;
4078 :
4079 : /* swapped ? */
4080 522 : if (is_join(rel->op) && ((left && !rel_find_exp(rel->l, e->l)) || (!left && !rel_find_exp(rel->r, e->l)))) {
4081 165 : exp_swap(e);
4082 : }
4083 522 : p = find_prop(e->p, PROP_HASHCOL);
4084 522 : if (!p)
4085 285 : e->p = p = prop_create(v->sql->sa, PROP_HASHCOL, e->p);
4086 522 : p->value.pval = i;
4087 : }
4088 : }
4089 : /* add the remaining exps to the new exp list */
4090 217 : if (list_length(rel->exps) > list_length(exps)) {
4091 96 : for( n = rel->exps->h; n; n = n->next) {
4092 72 : sql_exp *e = n->data;
4093 72 : if (!list_find(exps, e, (fcmp)&exp_cmp))
4094 24 : list_append(exps, e);
4095 : }
4096 : }
4097 217 : rel->exps = exps;
4098 : }
4099 : return rel;
4100 : }
4101 :
4102 : static sql_rel *
4103 1840006 : rel_select_leftgroup_2_semi(visitor *v, sql_rel *rel)
4104 : {
4105 1840006 : if (rel_is_ref(rel) || !is_select(rel->op) || list_empty(rel->exps))
4106 1626428 : return rel;
4107 213578 : sql_rel *l = rel->l;
4108 :
4109 213578 : if (!l || rel_is_ref(l) || !is_left(l->op) || list_empty(l->attr))
4110 212755 : return rel;
4111 :
4112 1638 : for(node *n = rel->exps->h; n; n = n->next) {
4113 823 : sql_exp *e = n->data;
4114 :
4115 823 : if (e->type == e_cmp && !is_semantics(e) && !e->f) {
4116 810 : list *attrs = l->attr;
4117 810 : sql_exp *a = attrs->h->data;
4118 :
4119 810 : if (exps_find_exp(l->attr, e->l) && exp_is_true(e->r) && e->flag == cmp_equal /*&& exp_is_true(a)*/) {
4120 : // printf("# optimize select leftgroup -> semi\n");
4121 8 : if (!list_empty(l->exps)) {
4122 13 : for(node *m = l->exps->h; m; m = m->next) {
4123 7 : sql_exp *j = m->data;
4124 7 : reset_any(j);
4125 : }
4126 : }
4127 8 : l->attr = NULL;
4128 8 : l->op = exp_is_true(a)?op_semi:op_anti;
4129 8 : list_remove_node(rel->exps, NULL, n);
4130 8 : rel = rel_project(v->sql->sa, rel, rel_projections(v->sql, rel, NULL, 1, 1));
4131 8 : list_append(rel->exps, attrs->h->data);
4132 8 : v->changes++;
4133 8 : return rel;
4134 : }
4135 : }
4136 : }
4137 : return rel;
4138 : }
4139 :
4140 : static sql_rel *
4141 1840006 : rel_optimize_select_and_joins_topdown_(visitor *v, sql_rel *rel)
4142 : {
4143 : /* push_join_down introduces semijoins */
4144 1840006 : uint8_t cycle = *(uint8_t*) v->data;
4145 1840006 : if (cycle <= 0) {
4146 778590 : rel = rel_semijoin_use_fk(v, rel);
4147 778590 : rel = rel_push_join_down(v, rel);
4148 : }
4149 :
4150 1840006 : rel = rel_simplify_fk_joins(v, rel);
4151 1840006 : rel = rel_push_select_down(v, rel);
4152 1840006 : rel = rel_select_leftgroup_2_semi(v, rel);
4153 1840006 : if (rel && rel->l && (is_select(rel->op) || is_join(rel->op)))
4154 516550 : rel = rel_use_index(v, rel);
4155 1840006 : return rel;
4156 : }
4157 :
4158 : static sql_rel *
4159 134798 : rel_optimize_select_and_joins_topdown(visitor *v, global_props *gp, sql_rel *rel)
4160 : {
4161 134798 : v->data = &gp->opt_cycle;
4162 134798 : rel = rel_visitor_topdown(v, rel, &rel_optimize_select_and_joins_topdown_);
4163 134798 : v->data = gp;
4164 134798 : return rel;
4165 : }
4166 :
4167 : run_optimizer
4168 643851 : bind_optimize_select_and_joins_topdown(visitor *v, global_props *gp)
4169 : {
4170 643851 : int flag = v->sql->sql_optimizer;
4171 643653 : return gp->opt_level == 1 && (gp->cnt[op_join] || gp->cnt[op_left] || gp->cnt[op_right]
4172 557429 : || gp->cnt[op_full] || gp->cnt[op_semi] || gp->cnt[op_anti] ||
4173 1287504 : gp->cnt[op_select]) && (flag & optimize_select_and_joins_topdown) ? rel_optimize_select_and_joins_topdown : NULL;
4174 : }
4175 :
4176 :
4177 : static int
4178 642364 : can_push_func(sql_exp *e, sql_rel *rel, int *must, int depth)
4179 : {
4180 686398 : switch(e->type) {
4181 534519 : case e_cmp: {
4182 534519 : sql_exp *l = e->l, *r = e->r, *f = e->f;
4183 :
4184 : /* don't push down functions inside attribute joins */
4185 534519 : if (e->flag == cmp_or || e->flag == cmp_in || e->flag == cmp_notin || e->flag == cmp_filter || (is_join(rel->op) && is_any(e)))
4186 : return 0;
4187 527121 : if (depth > 0) { /* for comparisons under the top ones, they become functions */
4188 60 : int lmust = 0;
4189 60 : int res = can_push_func(l, rel, &lmust, depth + 1) && can_push_func(r, rel, &lmust, depth + 1) &&
4190 24 : (!f || can_push_func(f, rel, &lmust, depth + 1));
4191 14 : if (res && !lmust)
4192 : return 1;
4193 58 : (*must) |= lmust;
4194 58 : return res;
4195 : } else {
4196 527061 : int mustl = 0, mustr = 0, mustf = 0;
4197 527061 : return ((l->type == e_column || can_push_func(l, rel, &mustl, depth + 1)) && (*must = mustl)) ||
4198 1047425 : ((r->type == e_column || can_push_func(r, rel, &mustr, depth + 1)) && (*must = mustr)) ||
4199 2884 : ((f && (f->type == e_column || can_push_func(f, rel, &mustf, depth + 1)) && (*must = mustf)));
4200 : }
4201 : }
4202 44034 : case e_convert:
4203 44034 : return can_push_func(e->l, rel, must, depth + 1);
4204 9333 : case e_aggr:
4205 : case e_func: {
4206 9333 : list *l = e->l;
4207 9333 : int res = 1, lmust = 0;
4208 :
4209 9333 : if (exp_unsafe(e, false, false))
4210 : return 0;
4211 26633 : if (l) for (node *n = l->h; n && res; n = n->next)
4212 17308 : res &= can_push_func(n->data, rel, &lmust, depth + 1);
4213 9325 : if (res && !lmust)
4214 : return 1;
4215 8547 : (*must) |= lmust;
4216 8547 : return res;
4217 : }
4218 50410 : case e_column:
4219 50410 : if (rel && !rel_find_exp(rel, e))
4220 : return 0;
4221 29487 : (*must) = 1;
4222 : /* fall through */
4223 : default:
4224 : return 1;
4225 : }
4226 : }
4227 :
4228 : static int
4229 440432 : exps_can_push_func(list *exps, sql_rel *rel)
4230 : {
4231 2138794 : for(node *n = exps->h; n; n = n->next) {
4232 1721978 : sql_exp *e = n->data;
4233 1721978 : int mustl = 0, mustr = 0;
4234 :
4235 1721978 : if ((is_joinop(rel->op) || is_select(rel->op)) && ((can_push_func(e, rel->l, &mustl, 0) && mustl)))
4236 23616 : return 1;
4237 1718196 : if (is_joinop(rel->op) && can_push_func(e, rel->r, &mustr, 0) && mustr)
4238 : return 1;
4239 : }
4240 : return 0;
4241 : }
4242 :
4243 : static int
4244 80360 : exp_needs_push_down(sql_rel *rel, sql_exp *e)
4245 : {
4246 104348 : switch(e->type) {
4247 26931 : case e_cmp:
4248 : /* don't push down functions inside attribute joins */
4249 26931 : if (e->flag == cmp_or || e->flag == cmp_in || e->flag == cmp_notin || e->flag == cmp_filter || (is_join(rel->op) && is_any(e)))
4250 : return 0;
4251 26282 : return exp_needs_push_down(rel, e->l) || exp_needs_push_down(rel, e->r) || (e->f && exp_needs_push_down(rel, e->f));
4252 23988 : case e_convert:
4253 23988 : return exp_needs_push_down(rel, e->l);
4254 1924 : case e_aggr:
4255 : case e_func:
4256 1924 : if (!e->l || exps_are_atoms(e->l))
4257 18 : return 0;
4258 : return 1;
4259 1295 : case e_atom:
4260 1295 : if (!e->f || exps_are_atoms(e->f))
4261 1295 : return 0;
4262 : return 1;
4263 : case e_column:
4264 : default:
4265 : return 0;
4266 : }
4267 : }
4268 :
4269 : static int
4270 23616 : exps_need_push_down(sql_rel *rel, list *exps )
4271 : {
4272 48629 : for(node *n = exps->h; n; n = n->next)
4273 26919 : if (exp_needs_push_down(rel, n->data))
4274 : return 1;
4275 : return 0;
4276 : }
4277 :
4278 : static sql_exp *exp_push_single_func_down(visitor *v, sql_rel *rel, sql_rel *ol, sql_rel *or, sql_exp *e, int depth);
4279 :
4280 : static list *
4281 2416 : exps_push_single_func_down(visitor *v, sql_rel *rel, sql_rel *ol, sql_rel *or, list *exps, int depth)
4282 : {
4283 4832 : if (mvc_highwater(v->sql))
4284 : return exps;
4285 :
4286 6724 : for (node *n = exps->h; n; n = n->next)
4287 4308 : if ((n->data = exp_push_single_func_down(v, rel, ol, or, n->data, depth)) == NULL)
4288 : return NULL;
4289 : return exps;
4290 : }
4291 :
4292 : static sql_exp *
4293 15738 : exp_push_single_func_down(visitor *v, sql_rel *rel, sql_rel *ol, sql_rel *or, sql_exp *e, int depth)
4294 : {
4295 27509 : if (mvc_highwater(v->sql))
4296 : return e;
4297 :
4298 15738 : switch(e->type) {
4299 4250 : case e_cmp: {
4300 4250 : if (e->flag == cmp_or || e->flag == cmp_filter) {
4301 246 : if ((e->l = exps_push_single_func_down(v, rel, ol, or, e->l, depth + 1)) == NULL)
4302 : return NULL;
4303 246 : if ((e->r = exps_push_single_func_down(v, rel, ol, or, e->r, depth + 1)) == NULL)
4304 : return NULL;
4305 4004 : } else if (e->flag == cmp_in || e->flag == cmp_notin) {
4306 18 : if ((e->l = exp_push_single_func_down(v, rel, ol, or, e->l, depth + 1)) == NULL)
4307 : return NULL;
4308 18 : if ((e->r = exps_push_single_func_down(v, rel, ol, or, e->r, depth + 1)) == NULL)
4309 : return NULL;
4310 : } else {
4311 3986 : if ((e->l = exp_push_single_func_down(v, rel, ol, or, e->l, depth + 1)) == NULL)
4312 : return NULL;
4313 3986 : if ((e->r = exp_push_single_func_down(v, rel, ol, or, e->r, depth + 1)) == NULL)
4314 : return NULL;
4315 3986 : if (e->f && (e->f = exp_push_single_func_down(v, rel, ol, or, e->f, depth + 1)) == NULL)
4316 : return NULL;
4317 : }
4318 : } break;
4319 2008 : case e_convert:
4320 2008 : if ((e->l = exp_push_single_func_down(v, rel, ol, or, e->l, depth + 1)) == NULL)
4321 : return NULL;
4322 : break;
4323 3990 : case e_aggr:
4324 : case e_func: {
4325 3990 : sql_rel *l = rel->l, *r = rel->r;
4326 3990 : int must = 0, mustl = 0, mustr = 0;
4327 :
4328 3990 : if (exp_unsafe(e, false, false))
4329 23 : return e;
4330 3990 : if (!e->l || exps_are_atoms(e->l))
4331 23 : return e;
4332 3967 : if ((is_joinop(rel->op) && ((can_push_func(e, l, &mustl, depth + 1) && mustl) || (can_push_func(e, r, &mustr, depth + 1) && mustr))) ||
4333 2973 : (is_select(rel->op) && can_push_func(e, l, &must, depth + 1) && must)) {
4334 3954 : exp_label(v->sql->sa, e, ++v->sql->label);
4335 : /* we need a full projection, group by's and unions cannot be extended with more expressions */
4336 3954 : if (mustr) {
4337 356 : if (r == or) /* don't project twice */
4338 333 : rel->r = r = rel_project(v->sql->sa, r, rel_projections(v->sql, r, NULL, 1, 1));
4339 356 : list_append(r->exps, e);
4340 : } else {
4341 3598 : if (l == ol) /* don't project twice */
4342 1842 : rel->l = l = rel_project(v->sql->sa, l, rel_projections(v->sql, l, NULL, 1, 1));
4343 3598 : list_append(l->exps, e);
4344 : }
4345 3954 : e = exp_ref(v->sql, e);
4346 3954 : v->changes++;
4347 : }
4348 3967 : } break;
4349 1156 : case e_atom: {
4350 1156 : if (e->f && (e->f = exps_push_single_func_down(v, rel, ol, or, e->f, depth + 1)) == NULL)
4351 : return NULL;
4352 : } break;
4353 : case e_column:
4354 : case e_psm:
4355 : break;
4356 : }
4357 : return e;
4358 : }
4359 :
4360 : static inline sql_rel *
4361 1854002 : rel_push_func_down(visitor *v, sql_rel *rel)
4362 : {
4363 1854002 : if ((is_select(rel->op) || is_joinop(rel->op)) && rel->l && rel->exps && !(rel_is_ref(rel))) {
4364 507227 : int changes = v->changes;
4365 507227 : sql_rel *l = rel->l, *r = rel->r;
4366 :
4367 : /* only push down when is useful */
4368 507227 : if ((is_select(rel->op) && list_length(rel->exps) <= 1) || rel_is_ref(l) || (is_joinop(rel->op) && rel_is_ref(r)))
4369 250024 : return rel;
4370 257203 : if (exps_can_push_func(rel->exps, rel) && exps_need_push_down(rel, rel->exps) && !exps_push_single_func_down(v, rel, l, r, rel->exps, 0))
4371 : return NULL;
4372 257203 : if (v->changes > changes) /* once we get a better join order, we can try to remove this projection */
4373 1906 : return rel_project(v->sql->sa, rel, rel_projections(v->sql, rel, NULL, 1, 1));
4374 : }
4375 1602072 : if (is_simple_project(rel->op) && rel->l && rel->exps) {
4376 644583 : sql_rel *pl = rel->l;
4377 :
4378 644583 : if (is_joinop(pl->op) && exps_can_push_func(rel->exps, rel)) {
4379 0 : sql_rel *l = pl->l, *r = pl->r, *ol = l, *or = r;
4380 :
4381 0 : for (node *n = rel->exps->h; n; ) {
4382 0 : node *next = n->next;
4383 0 : sql_exp *e = n->data;
4384 0 : int mustl = 0, mustr = 0;
4385 :
4386 0 : if ((can_push_func(e, l, &mustl, 0) && mustl) || (can_push_func(e, r, &mustr, 0) && mustr)) {
4387 0 : if (mustl) {
4388 0 : if (l == ol) /* don't project twice */
4389 0 : pl->l = l = rel_project(v->sql->sa, l, rel_projections(v->sql, l, NULL, 1, 1));
4390 0 : list_append(l->exps, e);
4391 0 : list_remove_node(rel->exps, NULL, n);
4392 0 : v->changes++;
4393 : } else {
4394 0 : if (r == or) /* don't project twice */
4395 0 : pl->r = r = rel_project(v->sql->sa, r, rel_projections(v->sql, r, NULL, 1, 1));
4396 0 : list_append(r->exps, e);
4397 0 : list_remove_node(rel->exps, NULL, n);
4398 0 : v->changes++;
4399 : }
4400 : }
4401 0 : n = next;
4402 : }
4403 : }
4404 : }
4405 : return rel;
4406 : }
4407 :
4408 : static sql_rel *
4409 1854002 : rel_push_func_and_select_down_(visitor *v, sql_rel *rel)
4410 : {
4411 1854002 : if (rel)
4412 1854002 : rel = rel_push_func_down(v, rel);
4413 1854002 : if (rel)
4414 1854002 : rel = rel_push_select_down(v, rel);
4415 1854002 : return rel;
4416 : }
4417 :
4418 : static sql_rel *
4419 134798 : rel_push_func_and_select_down(visitor *v, global_props *gp, sql_rel *rel)
4420 : {
4421 134798 : (void) gp;
4422 134798 : return rel_visitor_topdown(v, rel, &rel_push_func_and_select_down_);
4423 : }
4424 :
4425 : run_optimizer
4426 643607 : bind_push_func_and_select_down(visitor *v, global_props *gp)
4427 : {
4428 643607 : int flag = v->sql->sql_optimizer;
4429 643411 : return gp->opt_level == 1 && (gp->cnt[op_join] || gp->cnt[op_left] || gp->cnt[op_right]
4430 557343 : || gp->cnt[op_full] || gp->cnt[op_semi] || gp->cnt[op_anti] || gp->cnt[op_select])
4431 778066 : && (flag & push_func_and_select_down) ? rel_push_func_and_select_down : NULL;
4432 : }
|