Line data Source code
1 : /*
2 : * SPDX-License-Identifier: MPL-2.0
3 : *
4 : * This Source Code Form is subject to the terms of the Mozilla Public
5 : * License, v. 2.0. If a copy of the MPL was not distributed with this
6 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
7 : *
8 : * Copyright 2024, 2025 MonetDB Foundation;
9 : * Copyright August 2008 - 2023 MonetDB B.V.;
10 : * Copyright 1997 - July 2008 CWI.
11 : */
12 :
13 : #include "monetdb_config.h"
14 : #include "rel_optimizer_private.h"
15 : #include "rel_exp.h"
16 : #include "rel_select.h"
17 : #include "rel_rewriter.h"
18 :
19 : /* Split_select optimizer splits case statements in select expressions. This is a step needed for cse */
20 : static void select_split_exps(mvc *sql, list *exps, sql_rel *rel);
21 :
22 : static sql_exp *
23 154449 : select_split_exp(mvc *sql, sql_exp *e, sql_rel *rel)
24 : {
25 154449 : switch(e->type) {
26 : case e_column:
27 : return e;
28 5173 : case e_convert:
29 5173 : e->l = select_split_exp(sql, e->l, rel);
30 5173 : return e;
31 14329 : case e_aggr:
32 : case e_func:
33 14329 : if (!is_analytic(e) && !exp_has_sideeffect(e)) {
34 14324 : sql_subfunc *f = e->f;
35 14324 : if (e->type == e_func && !f->func->s && is_caselike_func(f) /*is_ifthenelse_func(f)*/)
36 283 : return add_exp_too_project(sql, e, rel);
37 : }
38 : return e;
39 52209 : case e_cmp:
40 52209 : if (e->flag == cmp_or || e->flag == cmp_filter) {
41 13808 : select_split_exps(sql, e->l, rel);
42 13808 : select_split_exps(sql, e->r, rel);
43 38401 : } else if (e->flag == cmp_in || e->flag == cmp_notin) {
44 2802 : e->l = select_split_exp(sql, e->l, rel);
45 2802 : select_split_exps(sql, e->r, rel);
46 : } else {
47 35599 : e->l = select_split_exp(sql, e->l, rel);
48 35599 : e->r = select_split_exp(sql, e->r, rel);
49 35599 : if (e->f)
50 2438 : e->f = select_split_exp(sql, e->f, rel);
51 : }
52 : return e;
53 : case e_atom:
54 : case e_psm:
55 : return e;
56 : }
57 : return e;
58 : }
59 :
60 : static void
61 45554 : select_split_exps(mvc *sql, list *exps, sql_rel *rel)
62 : {
63 45554 : node *n;
64 :
65 45554 : if (!exps)
66 : return;
67 118392 : for(n=exps->h; n; n = n->next){
68 72838 : sql_exp *e = n->data;
69 :
70 72838 : e = select_split_exp(sql, e, rel);
71 72838 : n->data = e;
72 : }
73 : }
74 :
75 : static sql_rel *
76 683324 : rel_split_select_(visitor *v, sql_rel *rel)
77 : {
78 683324 : if (!rel || !is_select(rel->op) || list_empty(rel->exps) || !rel->l || mvc_highwater(v->sql))
79 579729 : return rel;
80 :
81 103595 : bool funcs = false;
82 :
83 : /* are there functions */
84 217198 : for (node *n = rel->exps->h; n && !funcs; n = n->next)
85 113603 : funcs = exp_has_func(n->data);
86 :
87 : /* introduce extra project */
88 103595 : if (funcs) {
89 15136 : sql_rel *nrel = rel_project(v->sql->sa, rel->l,
90 15136 : rel_projections(v->sql, rel->l, NULL, 1, 1));
91 15136 : if (!nrel || !nrel->exps)
92 : return NULL;
93 15136 : rel->l = nrel;
94 : /* recursively split all functions and add those to the projection list */
95 15136 : select_split_exps(v->sql, rel->exps, nrel);
96 15136 : return rel;
97 : }
98 : return rel;
99 : }
100 :
101 : static sql_rel *
102 59831 : rel_split_select(visitor *v, global_props *gp, sql_rel *rel)
103 : {
104 59831 : (void) gp;
105 59831 : return rel_visitor_bottomup(v, rel, &rel_split_select_);
106 : }
107 :
108 : run_optimizer
109 645121 : bind_split_select(visitor *v, global_props *gp)
110 : {
111 645121 : int flag = v->sql->sql_optimizer;
112 565611 : return gp->opt_cycle == 0 && gp->opt_level == 1 && (flag & split_select)
113 1210724 : && gp->cnt[op_select] ? rel_split_select : NULL;
114 : }
115 :
116 :
117 : /*
118 : * Remove a redundant join
119 : *
120 : * join (L, Distinct Project(join(L,P) [ p.key == l.lkey]) [p.key]) [ p.key == l.lkey]
121 : * =>
122 : * join(L, P) [p.key==l.lkey]
123 : */
124 : static sql_rel *
125 652463 : rel_remove_redundant_join_(visitor *v, sql_rel *rel)
126 : {
127 652463 : if ((is_join(rel->op) || is_semi(rel->op)) && !list_empty(rel->exps)) {
128 111573 : sql_rel *l = rel->l, *r = rel->r, *b, *p = NULL, *j;
129 :
130 111573 : if (is_basetable(l->op) && is_simple_project(r->op) && need_distinct(r)) {
131 8 : b = l;
132 8 : p = r;
133 8 : j = p->l;
134 111565 : } else if (is_basetable(r->op) && is_simple_project(l->op) && need_distinct(l)) {
135 2142 : b = r;
136 2142 : p = l;
137 2142 : j = p->l;
138 : }
139 111573 : if (!p || !j || j->op != rel->op)
140 : return rel;
141 : /* j must have b->l (ie table) */
142 9 : sql_rel *jl = j->l, *jr = j->r;
143 9 : if ((is_basetable(jl->op) && jl->l == b->l) ||
144 5 : (is_basetable(jr->op) && jr->l == b->l)) {
145 8 : int left = 0;
146 8 : if (is_basetable(jl->op) && jl->l == b->l)
147 8 : left = 1;
148 8 : if (!list_empty(p->exps)) {
149 13 : for (node *n=p->exps->h; n; n = n->next) { /* all exps of 'p' must be bound to the opposite side */
150 8 : sql_exp *e = n->data;
151 :
152 12 : if (!rel_rebind_exp(v->sql, left ? jr : jl, e))
153 : return rel;
154 : }
155 : }
156 5 : if (exp_match_list(j->exps, rel->exps)) {
157 0 : p->l = (left)?rel_dup(jr):rel_dup(jl);
158 0 : rel_destroy(j);
159 0 : set_nodistinct(p);
160 0 : v->changes++;
161 0 : return rel;
162 : }
163 : }
164 : }
165 : return rel;
166 : }
167 :
168 : static sql_rel *
169 49492 : rel_remove_redundant_join(visitor *v, global_props *gp, sql_rel *rel)
170 : {
171 49492 : (void) gp;
172 49492 : return rel_visitor_bottomup(v, rel, &rel_remove_redundant_join_); /* this optimizer has to run before rel_first_level_optimizations */
173 : }
174 :
175 : run_optimizer
176 645063 : bind_remove_redundant_join(visitor *v, global_props *gp)
177 : {
178 645063 : int flag = v->sql->sql_optimizer;
179 565605 : return gp->opt_cycle == 0 && gp->opt_level == 1 && (gp->cnt[op_left] || gp->cnt[op_right]
180 546895 : || gp->cnt[op_full] || gp->cnt[op_join] || gp->cnt[op_semi] || gp->cnt[op_anti]) &&
181 694352 : (flag & remove_redundant_join) ? rel_remove_redundant_join : NULL;
182 : }
183 :
184 :
185 : static list *
186 541796 : exp_merge_range(visitor *v, sql_rel *rel, list *exps)
187 : {
188 541796 : node *n, *m;
189 1159454 : for (n=exps->h; n; n = n->next) {
190 619554 : sql_exp *e = n->data;
191 619554 : sql_exp *le = e->l;
192 619554 : sql_exp *re = e->r;
193 :
194 : /* handle the and's in the or lists */
195 619554 : if (e->type == e_cmp && e->flag == cmp_or && !is_anti(e)) {
196 30540 : e->l = exp_merge_range(v, rel, e->l);
197 30540 : e->r = exp_merge_range(v, rel, e->r);
198 : /* only look for gt, gte, lte, lt */
199 589014 : } else if (n->next &&
200 76651 : e->type == e_cmp && e->flag < cmp_equal && !e->f &&
201 6092 : re->card == CARD_ATOM && !is_anti(e)) {
202 1176 : for (m=n->next; m; m = m->next) {
203 716 : sql_exp *f = m->data;
204 716 : sql_exp *lf = f->l;
205 716 : sql_exp *rf = f->r;
206 716 : int c_le = is_numeric_upcast(le), c_lf = is_numeric_upcast(lf);
207 :
208 716 : if (f->type == e_cmp && f->flag < cmp_equal && !f->f &&
209 645 : rf->card == CARD_ATOM && !is_anti(f) &&
210 322 : exp_match_exp(c_le?le->l:le, c_lf?lf->l:lf)) {
211 173 : sql_exp *ne;
212 173 : int swap = 0, lt = 0, gt = 0;
213 173 : sql_subtype super;
214 : /* for now only c1 <[=] x <[=] c2 */
215 :
216 173 : swap = lt = (e->flag == cmp_lt || e->flag == cmp_lte);
217 173 : gt = !lt;
218 :
219 173 : if (gt &&
220 149 : (f->flag == cmp_gt ||
221 : f->flag == cmp_gte))
222 30 : continue;
223 24 : if (lt &&
224 24 : (f->flag == cmp_lt ||
225 : f->flag == cmp_lte))
226 0 : continue;
227 :
228 143 : cmp_supertype(&super, exp_subtype(le), exp_subtype(lf));
229 143 : if (!(rf = exp_check_type(v->sql, &super, rel, rf, type_equal)) ||
230 143 : !(le = exp_check_type(v->sql, &super, rel, le, type_equal)) ||
231 143 : !(re = exp_check_type(v->sql, &super, rel, re, type_equal))) {
232 0 : v->sql->session->status = 0;
233 0 : v->sql->errstr[0] = 0;
234 0 : continue;
235 : }
236 143 : if (!swap)
237 119 : ne = exp_compare2(v->sql->sa, le, re, rf, compare2range(e->flag, f->flag), 0);
238 : else
239 24 : ne = exp_compare2(v->sql->sa, le, rf, re, compare2range(f->flag, e->flag), 0);
240 :
241 143 : list_remove_data(exps, NULL, e);
242 143 : list_remove_data(exps, NULL, f);
243 143 : list_append(exps, ne);
244 143 : v->changes++;
245 143 : return exp_merge_range(v, rel, exps);
246 : }
247 : }
248 588411 : } else if (n->next &&
249 76048 : e->type == e_cmp && e->flag < cmp_equal && !e->f &&
250 5489 : re->card > CARD_ATOM && !is_anti(e)) {
251 9718 : for (m=n->next; m; m = m->next) {
252 5982 : sql_exp *f = m->data;
253 5982 : sql_exp *lf = f->l;
254 5982 : sql_exp *rf = f->r;
255 :
256 5982 : if (f->type == e_cmp && f->flag < cmp_equal && !f->f &&
257 4689 : rf->card > CARD_ATOM && !is_anti(f)) {
258 4688 : sql_exp *ne, *t;
259 4688 : int swap = 0, lt = 0, gt = 0;
260 4688 : comp_type ef = (comp_type) e->flag, ff = (comp_type) f->flag;
261 4688 : int c_re = is_numeric_upcast(re), c_rf = is_numeric_upcast(rf);
262 4688 : int c_le = is_numeric_upcast(le), c_lf = is_numeric_upcast(lf), c;
263 4688 : sql_subtype super;
264 :
265 : /* both swapped ? */
266 4688 : if (exp_match_exp(c_re?re->l:re, c_rf?rf->l:rf)) {
267 19 : t = re;
268 19 : re = le;
269 19 : le = t;
270 19 : c = c_re; c_re = c_le; c_le = c;
271 19 : ef = swap_compare(ef);
272 19 : t = rf;
273 19 : rf = lf;
274 19 : lf = t;
275 19 : c = c_rf; c_rf = c_lf; c_lf = c;
276 19 : ff = swap_compare(ff);
277 : }
278 :
279 : /* is left swapped ? */
280 4688 : if (exp_match_exp(c_re?re->l:re, c_lf?lf->l:lf)) {
281 104 : t = re;
282 104 : re = le;
283 104 : le = t;
284 104 : c = c_re; c_re = c_le; c_le = c;
285 104 : ef = swap_compare(ef);
286 : }
287 :
288 : /* is right swapped ? */
289 4688 : if (exp_match_exp(c_le?le->l:le, c_rf?rf->l:rf)) {
290 177 : t = rf;
291 177 : rf = lf;
292 177 : lf = t;
293 177 : c = c_rf; c_rf = c_lf; c_lf = c;
294 177 : ff = swap_compare(ff);
295 : }
296 :
297 4688 : if (!exp_match_exp(c_le?le->l:le, c_lf?lf->l:lf))
298 2935 : continue;
299 :
300 : /* for now only c1 <[=] x <[=] c2 */
301 1830 : swap = lt = (ef == cmp_lt || ef == cmp_lte);
302 1830 : gt = !lt;
303 :
304 1830 : if (gt && (ff == cmp_gt || ff == cmp_gte))
305 73 : continue;
306 1757 : if (lt && (ff == cmp_lt || ff == cmp_lte))
307 4 : continue;
308 :
309 1753 : cmp_supertype(&super, exp_subtype(le), exp_subtype(lf));
310 1753 : if (!(rf = exp_check_type(v->sql, &super, rel, rf, type_equal)) ||
311 1753 : !(le = exp_check_type(v->sql, &super, rel, le, type_equal)) ||
312 1753 : !(re = exp_check_type(v->sql, &super, rel, re, type_equal))) {
313 0 : v->sql->session->status = 0;
314 0 : v->sql->errstr[0] = 0;
315 0 : continue;
316 : }
317 1753 : if (!swap)
318 1653 : ne = exp_compare2(v->sql->sa, le, re, rf, compare2range(ef, ff), 0);
319 : else
320 100 : ne = exp_compare2(v->sql->sa, le, rf, re, compare2range(ff, ef), 0);
321 :
322 1753 : list_remove_data(exps, NULL, e);
323 1753 : list_remove_data(exps, NULL, f);
324 1753 : list_append(exps, ne);
325 1753 : v->changes++;
326 1753 : return exp_merge_range(v, rel, exps);
327 : }
328 : }
329 : }
330 : }
331 : return exps;
332 : }
333 :
334 : static int
335 26954 : exps_cse( mvc *sql, list *oexps, list *l, list *r )
336 : {
337 26954 : list *nexps;
338 26954 : node *n, *m;
339 26954 : char *lu, *ru;
340 26954 : int lc = 0, rc = 0, match = 0, res = 0;
341 :
342 26954 : if (list_length(l) == 0 || list_length(r) == 0)
343 0 : return 0;
344 :
345 : /* first recursive exps_cse */
346 26954 : nexps = new_exp_list(sql->sa);
347 57712 : for (n = l->h; n; n = n->next) {
348 30758 : sql_exp *e = n->data;
349 :
350 30758 : if (e->type == e_cmp && e->flag == cmp_or && !is_anti(e)) {
351 8727 : res = exps_cse(sql, nexps, e->l, e->r);
352 : } else {
353 22031 : append(nexps, e);
354 : }
355 : }
356 26954 : l = nexps;
357 :
358 26954 : nexps = new_exp_list(sql->sa);
359 60890 : for (n = r->h; n; n = n->next) {
360 33936 : sql_exp *e = n->data;
361 :
362 33936 : if (e->type == e_cmp && e->flag == cmp_or && !is_anti(e)) {
363 1732 : res = exps_cse(sql, nexps, e->l, e->r);
364 : } else {
365 32204 : append(nexps, e);
366 : }
367 : }
368 26954 : r = nexps;
369 :
370 : /* simplify true or .. and .. or true */
371 26954 : if (list_length(l) == list_length(r) && list_length(l) == 1) {
372 22416 : sql_exp *le = l->h->data, *re = r->h->data;
373 :
374 22416 : if (exp_is_true(le)) {
375 14 : append(oexps, le);
376 14 : return 1;
377 : }
378 22402 : if (exp_is_true(re)) {
379 5 : append(oexps, re);
380 5 : return 1;
381 : }
382 : }
383 :
384 26935 : lu = SA_ZNEW_ARRAY(sql->ta, char, list_length(l));
385 26935 : ru = SA_ZNEW_ARRAY(sql->ta, char, list_length(r));
386 57691 : for (n = l->h, lc = 0; n; n = n->next, lc++) {
387 30756 : sql_exp *le = n->data;
388 :
389 70487 : for ( m = r->h, rc = 0; m; m = m->next, rc++) {
390 39731 : sql_exp *re = m->data;
391 :
392 39731 : if (!ru[rc] && exp_match_exp(le,re)) {
393 68 : lu[lc] = 1;
394 68 : ru[rc] = 1;
395 68 : match = 1;
396 : }
397 : }
398 : }
399 26935 : if (match) {
400 49 : list *nl = new_exp_list(sql->sa);
401 49 : list *nr = new_exp_list(sql->sa);
402 :
403 162 : for (n = l->h, lc = 0; n; n = n->next, lc++)
404 113 : if (!lu[lc])
405 49 : append(nl, n->data);
406 176 : for (n = r->h, rc = 0; n; n = n->next, rc++)
407 127 : if (!ru[rc])
408 59 : append(nr, n->data);
409 :
410 49 : if (list_length(nl) && list_length(nr))
411 23 : append(oexps, exp_or(sql->sa, nl, nr, 0));
412 :
413 162 : for (n = l->h, lc = 0; n; n = n->next, lc++) {
414 113 : if (lu[lc])
415 64 : append(oexps, n->data);
416 : }
417 : res = 1;
418 : } else {
419 26886 : append(oexps, exp_or(sql->sa, list_dup(l, (fdup)NULL),
420 : list_dup(r, (fdup)NULL), 0));
421 : }
422 : return res;
423 : }
424 :
425 : static inline int
426 23340 : exp_col_key(sql_exp *e)
427 : {
428 17584 : return e->nid ? e->nid : e->alias.label;
429 : }
430 :
431 : static inline int
432 5780 : exp_cmp_eq_unique_id(sql_exp *e)
433 : {
434 5780 : return exp_col_key(e->l);
435 : }
436 :
437 : static inline int
438 1773 : exp_multi_col_key(list *l)
439 : {
440 1773 : int k = exp_col_key(l->h->data);
441 5780 : for (node *n = l->h->next; n; n = n->next) {
442 4007 : k <<= 4;
443 8014 : k ^= exp_col_key(n->data);
444 : }
445 1773 : return k;
446 : }
447 :
448 : typedef struct exp_eq_col_values {
449 : /* we need ->first in order to remove it from the list of cmp_eq exps
450 : * in case that we find another occurrence (with a different value)
451 : */
452 : sql_exp* first;
453 : sql_exp* col; /* column */
454 : list *vs; /* list of values */
455 : } eq_cv;
456 :
457 : typedef struct exp_eq_multi_col_values {
458 : /* we need ->first in order to remove it from the list of multi col
459 : * cmp_eq exps in case that we find another occurrence (with different values)
460 : */
461 : list *first;
462 : list *cols; /* list of col exps */
463 : list *lvs; /* list of lists of values */
464 : } eq_mcv;
465 :
466 : static bool
467 4271 : detect_col_cmp_eqs(mvc *sql, list *eqs, sql_hash *eqh)
468 : {
469 4271 : bool col_multivalue_cmp_eq = false;
470 16051 : for (node *n = eqs->h; n; n = n->next ) {
471 11780 : sql_exp *e = n->data;
472 11780 : sql_exp *le = e->l, *re = e->r;
473 :
474 : /* find the le in the hash and append the re in the hash value (ea->list) */
475 11780 : bool found = false;
476 :
477 11780 : int key = eqh->key(le);
478 11780 : sql_hash_e *he = eqh->buckets[key&(eqh->size-1)];
479 :
480 14676 : for (;he && !found; he = he->chain) {
481 2896 : eq_cv *cv = he->value;
482 2896 : if (!exp_equal(le, cv->col)) {
483 2896 : cv->vs = append(cv->vs, re);
484 2896 : found = col_multivalue_cmp_eq = true;
485 : /* remove this and the previous (->first) occurrence (if exists) from eqs */
486 2896 : if (cv->first) {
487 1793 : list_remove_data(eqs, NULL, cv->first);
488 1793 : cv->first = NULL;
489 : }
490 2896 : list_remove_node(eqs, NULL, n);
491 : }
492 : }
493 :
494 11780 : if (!found) {
495 8884 : eq_cv *cv = SA_NEW(sql->sa, eq_cv);
496 8884 : cv->first = e;
497 8884 : cv->vs = sa_list(sql->sa);
498 8884 : cv->vs = append(cv->vs, re);
499 8884 : cv->col = le;
500 :
501 8884 : hash_add(eqh, key, cv);
502 : }
503 : }
504 4271 : return col_multivalue_cmp_eq;
505 : }
506 :
507 : static bool
508 693 : detect_multicol_cmp_eqs(mvc *sql, list *mce_ands, sql_hash *meqh)
509 : {
510 : /* we get as input a list of AND associated expressions (hence the entries are lists themselves)
511 : * we need to detect cmp_eq-only AND-associated expressions with the same columns so we can
512 : * group together their values
513 : * e.g. [[n = 1, m = 10], [m = 20, k = 100, l = 3000], [m = 20, n = 2]] has
514 : * - (m,k,l) group with a single value (20, 100, 3000)
515 : * - (n,k) group with two values (1, 10) and (2, 20)
516 : * at the end we return true only if we have at least a group of columns with more than a single value
517 : * e.g. in this example (n,k)
518 : */
519 693 : bool multi_multivalue_cmp_eq = false;
520 2466 : for (node *n = mce_ands->h; n; n = n->next) {
521 1773 : list *l = n->data;
522 :
523 : /* sort the list of the cmp_eq expressions based on the col exp
524 : * NOTE: from now on we only work with the sorted list, sl */
525 1773 : list *sl = list_sort(l, (fkeyvalue)&exp_cmp_eq_unique_id, NULL);
526 1773 : list_append_before(mce_ands, n, sl);
527 1773 : list_remove_node(mce_ands, NULL, n);
528 :
529 : /* find the eq exp in the hash and append the values */
530 1773 : bool found = false;
531 :
532 1773 : int key = meqh->key(sl);
533 1773 : sql_hash_e *he = meqh->buckets[key&(meqh->size-1)];
534 :
535 2875 : for (;he && !found; he = he->chain) {
536 : /* compare the values of the hash_entry with the cols under cmp_eq from the list */
537 1102 : bool same_cols = true;
538 1102 : eq_mcv *mcv = he->value;
539 3495 : for (node *m = sl->h, *k = mcv->cols->h; m && k && same_cols; m = m->next, k = k->next) {
540 2393 : sql_exp *col_exp = ((sql_exp*)m->data)->l;
541 2393 : if (exp_equal(col_exp, k->data))
542 663 : same_cols = false;
543 : }
544 1102 : if (same_cols) {
545 : /* we found the same multi cmp_eq exp in mce_ands list multiple times! */
546 439 : found = multi_multivalue_cmp_eq = true;
547 : /* gather all the values of the list and add them to the hash entry */
548 439 : list *atms = sa_list(sql->sa);
549 1799 : for (node *m = sl->h; m; m = m->next)
550 1360 : atms = append(atms, ((sql_exp*)m->data)->r);
551 439 : mcv->lvs = append(mcv->lvs, atms);
552 : /* remove this and the previous occurrence (which means that's the first time
553 : * that we found the *same* multi cmp_eq exp)
554 : */
555 439 : if (mcv->first) {
556 81 : list_remove_data(mce_ands, NULL, mcv->first);
557 81 : mcv->first = NULL;
558 : }
559 439 : list_remove_data(mce_ands, NULL, sl);
560 : }
561 : }
562 :
563 1773 : if (!found) {
564 1334 : eq_mcv *mcv = SA_NEW(sql->sa, eq_mcv);
565 1334 : mcv->first = sl;
566 1334 : mcv->cols = sa_list(sql->sa);
567 5754 : for (node *m = sl->h; m; m = m->next)
568 4420 : mcv->cols = append(mcv->cols, ((sql_exp*)m->data)->l);
569 : /* for the list of values (atoms) create a list and append it to the lvs list */
570 1334 : list *atms = sa_list(sql->sa);
571 5754 : for (node *m = sl->h; m; m = m->next)
572 4420 : atms = append(atms, ((sql_exp*)m->data)->r);
573 1334 : mcv->lvs = sa_list(sql->sa);
574 1334 : mcv->lvs = append(mcv->lvs, atms);
575 :
576 1334 : hash_add(meqh, key, mcv);
577 : }
578 : }
579 693 : return multi_multivalue_cmp_eq;
580 : }
581 :
582 : static void
583 47480 : exp_or_chain_groups(mvc *sql, list *exps, list **gen_ands, list **mce_ands, list **eqs, list **noneq)
584 : {
585 : /* identify three different groups
586 : * 1. gen_ands: lists of generic expressions (their inner association is AND)
587 : * 2. mce_ands: lists of multi_colum cmp_eq ONLY expressions (same^^^)
588 : * 3. eqs: equality expressions
589 : * 4. neq: non equality col expressions
590 : *
591 : * return true if there is an exp with more than one cmp_eq
592 : */
593 60086 : bool eq_only = true;
594 131693 : for (node *n = exps->h; n && eq_only; n = n->next) {
595 71607 : sql_exp *e = n->data;
596 71607 : sql_exp *le = e->l, *re = e->r;
597 142961 : eq_only &= (e->type == e_cmp && e->flag == cmp_equal &&
598 34128 : le->card != CARD_ATOM && is_column(le->type) &&
599 104659 : re->card == CARD_ATOM && !is_semantics(e));
600 : }
601 :
602 60086 : if (list_length(exps) > 1) {
603 5411 : if (eq_only)
604 4522 : *mce_ands = append(*mce_ands, exps);
605 : else
606 889 : *gen_ands = append(*gen_ands, exps);
607 54675 : } else if (list_length(exps) == 1) {
608 54675 : sql_exp *se = exps->h->data;
609 54675 : sql_exp *le = se->l, *re = se->r;
610 :
611 54675 : if (se->type == e_cmp && se->flag == cmp_or && !is_anti(se)) {
612 : /* for a cmp_or expression go down the tree */
613 12606 : exp_or_chain_groups(sql, (list*)le, gen_ands, mce_ands, eqs, noneq);
614 12606 : exp_or_chain_groups(sql, (list*)re, gen_ands, mce_ands, eqs, noneq);
615 :
616 42069 : } else if (eq_only) {
617 15312 : *eqs = append(*eqs, se);
618 : } else {
619 26757 : *noneq = append(*noneq, se);
620 : }
621 : }
622 47480 : }
623 :
624 : static list *
625 1686 : generate_single_col_cmp_in(mvc *sql, sql_hash *eqh)
626 : {
627 : /* from single col cmp_eq with multiple atoms in the hash generate
628 : * "e_col in (val0, val1, ...)" (see detect_col_cmp_eqs())
629 : */
630 1686 : list *ins = new_exp_list(sql->sa);
631 55638 : for (int i = 0; i < eqh->size; i++) {
632 53952 : sql_hash_e *he = eqh->buckets[i];
633 :
634 56592 : while (he) {
635 2640 : eq_cv *cv = he->value;
636 : /* NOTE: cmp_eq expressions with a single entry are still in eqs */
637 2640 : if (list_length(cv->vs) > 1)
638 1793 : ins = append(ins, exp_in(sql->sa, cv->col, cv->vs, cmp_in));
639 2640 : he = he->chain;
640 : }
641 : }
642 1686 : return ins;
643 : }
644 :
645 : static list *
646 81 : generate_multi_col_cmp_in(mvc *sql, sql_hash *meqh)
647 : {
648 : /* from multivalue cmp_eq with multiple lists of atoms in the hash generate
649 : * "(col1, col2, ...) in [(val10, val20, ...), (val11, val21, ...), ... ]"
650 : * (see detect_multicol_cmp_eqs())
651 : */
652 81 : list *ins = new_exp_list(sql->sa);
653 2673 : for (int i = 0; i < meqh->size; i++) {
654 2592 : sql_hash_e *he = meqh->buckets[i];
655 2687 : while (he) {
656 95 : eq_mcv *mcv = he->value;
657 : /* NOTE: multivalue cmp_eq expressions with a single entry are still in mce_ands */
658 95 : if (list_length(mcv->lvs) > 1) {
659 81 : sql_exp *mc = exp_label(sql->sa, exp_values(sql->sa, mcv->cols), ++sql->label);
660 601 : for (node *a = mcv->lvs->h; a; a = a->next)
661 520 : a->data = exp_values(sql->sa, a->data);
662 81 : ins = append(ins, exp_in(sql->sa, mc, mcv->lvs, cmp_in));
663 : }
664 95 : he = he->chain;
665 : }
666 : }
667 81 : return ins;
668 : }
669 :
670 : static list *
671 211662 : merge_ors(mvc *sql, list *exps, int *changes)
672 : {
673 211662 : sql_hash *eqh = NULL, *meqh = NULL;
674 211662 : list *eqs = NULL, *neq = NULL, *gen_ands = NULL, *mce_ands = NULL, *ins = NULL, *mins = NULL;
675 450496 : for (node *n = exps->h; n; n = n->next) {
676 238834 : sql_exp *e = n->data;
677 :
678 238834 : if (e->type == e_cmp && e->flag == cmp_or && !is_anti(e)) {
679 : /* NOTE: gen_ands and mce_ands are both a list of lists since the AND association
680 : * between expressions is expressed with a list
681 : * e.g. [[e1, e2], [e3, e4, e5]] semantically translates
682 : * to [(e1 AND e2), (e3 AND e4 AND e5)]
683 : * those (internal) AND list can be then used to
684 : * reconstructed an OR tree [[e1, e2], [e3, e4, e5]] =>
685 : * (([e1, e2] OR [e3, e4, e5]) OR <whatever-else> )
686 : * gen_ands includes general expressions associated with AND
687 : * mce_ands includes only cmp_eq expressions associated with AND
688 : */
689 17437 : gen_ands = new_exp_list(sql->sa);
690 17437 : mce_ands = new_exp_list(sql->sa);
691 17437 : eqs = new_exp_list(sql->sa);
692 17437 : neq = new_exp_list(sql->sa);
693 :
694 : /* walk the OR tree */
695 17437 : exp_or_chain_groups(sql, e->l, &gen_ands, &mce_ands, &eqs, &neq);
696 17437 : exp_or_chain_groups(sql, e->r, &gen_ands, &mce_ands, &eqs, &neq);
697 :
698 : /* detect col cmp_eq exps with multiple values */
699 17437 : bool col_multival = false;
700 17437 : if (list_length(eqs) > 1) {
701 4271 : eqh = hash_new(sql->sa, 32, (fkeyvalue)&exp_col_key);
702 4271 : col_multival = detect_col_cmp_eqs(sql, eqs, eqh);
703 : }
704 :
705 : /* detect mutli-col cmp_eq exps with multiple (lists of) values */
706 17437 : bool multicol_multival = false;
707 17437 : if (list_length(mce_ands) > 1) {
708 693 : meqh = hash_new(sql->sa, 32, (fkeyvalue)&exp_multi_col_key);
709 693 : multicol_multival = detect_multicol_cmp_eqs(sql, mce_ands, meqh);
710 : }
711 :
712 17437 : if (!col_multival && !multicol_multival)
713 15679 : continue;
714 :
715 1758 : if (col_multival)
716 1686 : ins = generate_single_col_cmp_in(sql, eqh);
717 :
718 1758 : if (multicol_multival)
719 81 : mins = generate_multi_col_cmp_in(sql, meqh);
720 :
721 : /* create the new OR tree */
722 1758 : sql_exp *new = (ins) ? ins->h->data : mins->h->data;
723 :
724 72 : if (ins) {
725 1793 : for (node *i = ins->h->next; i; i = i->next) {
726 107 : list *l = new_exp_list(sql->sa);
727 107 : list *r = new_exp_list(sql->sa);
728 107 : l = append(l, new);
729 107 : r = append(r, (sql_exp*)i->data);
730 107 : new = exp_or(sql->sa, l, r, 0);
731 :
732 107 : (*changes)++;
733 : }
734 : }
735 :
736 1758 : if (list_length(eqs)) {
737 1448 : for (node *i = eqs->h; i; i = i->next) {
738 872 : list *l = new_exp_list(sql->sa);
739 872 : list *r = new_exp_list(sql->sa);
740 872 : l = append(l, new);
741 872 : r = append(r, (sql_exp*)i->data);
742 872 : new = exp_or(sql->sa, l, r, 0);
743 : }
744 : }
745 :
746 1758 : if (mins) {
747 171 : for (node *i = ((ins) ? mins->h : mins->h->next); i; i = i->next) {
748 9 : list *l = new_exp_list(sql->sa);
749 9 : list *r = new_exp_list(sql->sa);
750 9 : l = append(l, new);
751 9 : r = append(r, (sql_exp*)i->data);
752 9 : new = exp_or(sql->sa, l, r, 0);
753 :
754 9 : (*changes)++;
755 : }
756 : }
757 :
758 1758 : if (list_length(mce_ands)) {
759 523 : for (node *i = mce_ands->h; i; i = i->next) {
760 273 : list *l = new_exp_list(sql->sa);
761 273 : l = append(l, new);
762 273 : new = exp_or(sql->sa, l, i->data, 0);
763 : }
764 : }
765 :
766 1761 : for (node *a = gen_ands->h; a; a = a->next){
767 3 : list *l = new_exp_list(sql->sa);
768 3 : l = append(l, new);
769 3 : new = exp_or(sql->sa, l, a->data, 0);
770 : }
771 :
772 2076 : for (node *o = neq->h; o; o = o->next){
773 318 : list *l = new_exp_list(sql->sa);
774 318 : list *r = new_exp_list(sql->sa);
775 318 : l = append(l, new);
776 318 : r = append(r, (sql_exp*)o->data);
777 318 : new = exp_or(sql->sa, l, r, 0);
778 : }
779 :
780 1758 : list_remove_node(exps, NULL, n);
781 1758 : exps = append(exps, new);
782 : }
783 : }
784 211662 : return exps;
785 : }
786 :
787 : #define TRIVIAL_NOT_EQUAL_CMP(e) \
788 : ((e)->type == e_cmp && (e)->flag == cmp_notequal && !is_anti((e)) && !is_semantics((e)) && ((sql_exp*)(e)->l)->card != CARD_ATOM && ((sql_exp*)(e)->r)->card == CARD_ATOM)
789 :
790 : static list *
791 265568 : merge_notequal(mvc *sql, list *exps, int *changes)
792 : {
793 265568 : list *inequality_groups = NULL, *nexps = NULL;
794 265568 : int needed = 0;
795 :
796 568662 : for (node *n = exps->h; n; n = n->next) {
797 303094 : sql_exp *e = n->data;
798 :
799 303094 : if (TRIVIAL_NOT_EQUAL_CMP(e)) {
800 21437 : bool appended = false;
801 :
802 21437 : if (inequality_groups) {
803 1085 : for (node *m = inequality_groups->h; m && !appended; m = m->next) {
804 543 : list *next = m->data;
805 543 : sql_exp *first = (sql_exp*) next->h->data;
806 :
807 543 : if (exp_match(first->l, e->l)) {
808 540 : list_append(next, e);
809 540 : appended = true;
810 : }
811 : }
812 : }
813 542 : if (!appended) {
814 20897 : if (!inequality_groups)
815 20895 : inequality_groups = new_exp_list(sql->sa);
816 20897 : list_append(inequality_groups, list_append(new_exp_list(sql->sa), e));
817 : }
818 : }
819 : }
820 :
821 265568 : if (inequality_groups) { /* if one list of inequalities has more than one entry, then the re-write is needed */
822 41792 : for (node *n = inequality_groups->h; n; n = n->next) {
823 20897 : list *next = n->data;
824 :
825 20897 : if (list_length(next) > 1)
826 537 : needed = 1;
827 : }
828 : }
829 :
830 20895 : if (needed) {
831 536 : nexps = new_exp_list(sql->sa);
832 1074 : for (node *n = inequality_groups->h; n; n = n->next) {
833 538 : list *next = n->data;
834 538 : sql_exp *first = (sql_exp*) next->h->data;
835 :
836 538 : if (list_length(next) > 1) {
837 537 : list *notin = new_exp_list(sql->sa);
838 :
839 1614 : for (node *m = next->h; m; m = m->next) {
840 1077 : sql_exp *e = m->data;
841 1077 : list_append(notin, e->r);
842 : }
843 537 : list_append(nexps, exp_in(sql->sa, first->l, notin, cmp_notin));
844 : } else {
845 1 : list_append(nexps, first);
846 : }
847 : }
848 :
849 1616 : for (node *n = exps->h; n; n = n->next) {
850 1080 : sql_exp *e = n->data;
851 :
852 1080 : if (!TRIVIAL_NOT_EQUAL_CMP(e))
853 2 : list_append(nexps, e);
854 : }
855 536 : (*changes)++;
856 : } else {
857 : nexps = exps;
858 : }
859 :
860 568122 : for (node *n = nexps->h; n ; n = n->next) {
861 302554 : sql_exp *e = n->data;
862 :
863 302554 : if (e->type == e_cmp && e->flag == cmp_or) {
864 26953 : e->l = merge_notequal(sql, e->l, changes);
865 26953 : e->r = merge_notequal(sql, e->r, changes);
866 : }
867 : }
868 :
869 265568 : return nexps;
870 : }
871 :
872 : int
873 113501 : is_numeric_upcast(sql_exp *e)
874 : {
875 113501 : if (is_convert(e->type)) {
876 4548 : sql_subtype *f = exp_fromtype(e);
877 4548 : sql_subtype *t = exp_totype(e);
878 :
879 4548 : if (f->type->eclass == t->type->eclass && EC_COMPUTE(f->type->eclass)) {
880 1587 : if (f->type->localtype < t->type->localtype)
881 1567 : return 1;
882 : }
883 : }
884 : return 0;
885 : }
886 :
887 : /* optimize (a = b) or (a is null and b is null) -> a = b with null semantics */
888 : static sql_exp *
889 33032 : try_rewrite_equal_or_is_null(mvc *sql, sql_rel *rel, sql_exp *or, list *l1, list *l2)
890 : {
891 33032 : if (list_length(l1) == 1) {
892 29659 : bool valid = true, first_is_null_found = false, second_is_null_found = false;
893 29659 : sql_exp *cmp = l1->h->data;
894 29659 : sql_exp *first = cmp->l, *second = cmp->r;
895 :
896 29659 : if (is_compare(cmp->type) && !is_anti(cmp) && !cmp->f && cmp->flag == cmp_equal) {
897 6995 : int fupcast = is_numeric_upcast(first), supcast = is_numeric_upcast(second);
898 14014 : for(node *n = l2->h ; n && valid; n = n->next) {
899 7019 : sql_exp *e = n->data, *l = e->l, *r = e->r;
900 :
901 7019 : if (is_compare(e->type) && e->flag == cmp_equal && !e->f &&
902 4174 : !is_anti(e) && is_semantics(e)) {
903 1374 : int lupcast = is_numeric_upcast(l);
904 1374 : int rupcast = is_numeric_upcast(r);
905 1374 : sql_exp *rr = rupcast ? r->l : r;
906 :
907 1374 : if (rr->type == e_atom && rr->l && atom_null(rr->l)) {
908 1374 : if (exp_match_exp(fupcast?first->l:first, lupcast?l->l:l))
909 : first_is_null_found = true;
910 116 : else if (exp_match_exp(supcast?second->l:second, lupcast?l->l:l))
911 : second_is_null_found = true;
912 : else
913 5737 : valid = false;
914 : } else {
915 : valid = false;
916 : }
917 : } else {
918 : valid = false;
919 : }
920 : }
921 6995 : if (valid && first_is_null_found && second_is_null_found) {
922 21 : sql_subtype super;
923 :
924 21 : cmp_supertype(&super, exp_subtype(first), exp_subtype(second)); /* first and second must have the same type */
925 21 : if (!(first = exp_check_type(sql, &super, rel, first, type_equal)) ||
926 21 : !(second = exp_check_type(sql, &super, rel, second, type_equal))) {
927 0 : sql->session->status = 0;
928 0 : sql->errstr[0] = 0;
929 0 : return or;
930 : }
931 21 : sql_exp *res = exp_compare(sql->sa, first, second, cmp->flag);
932 21 : set_semantics(res);
933 21 : if (exp_name(or))
934 0 : exp_prop_alias(sql->sa, res, or);
935 21 : return res;
936 : }
937 : }
938 : }
939 : return or;
940 : }
941 :
942 : static list *
943 478820 : merge_cmp_or_null(mvc *sql, sql_rel *rel, list *exps, int *changes)
944 : {
945 1022340 : for (node *n = exps->h; n ; n = n->next) {
946 543520 : sql_exp *e = n->data;
947 :
948 543520 : if (is_compare(e->type) && e->flag == cmp_or && !is_anti(e)) {
949 16516 : sql_exp *ne = try_rewrite_equal_or_is_null(sql, rel, e, e->l, e->r);
950 16516 : if (ne != e) {
951 19 : (*changes)++;
952 19 : n->data = ne;
953 : }
954 16516 : ne = try_rewrite_equal_or_is_null(sql, rel, e, e->r, e->l);
955 16516 : if (ne != e) {
956 2 : (*changes)++;
957 2 : n->data = ne;
958 : }
959 : }
960 : }
961 478820 : return exps;
962 : }
963 :
964 : static list *
965 478820 : cleanup_equal_exps(mvc *sql, sql_rel *rel, list *exps, int *changes)
966 : {
967 478820 : if (list_length(exps) <= 1)
968 : return exps;
969 55582 : if (is_join(rel->op)) {
970 93358 : for(node *n = exps->h; n; n = n->next) {
971 63271 : sql_exp *e = n->data;
972 126397 : if (e->type == e_cmp && !e->f && e->flag <= cmp_notequal &&
973 105676 : !rel_find_exp(rel->l, e->l) && !rel_find_exp(rel->r, e->r) &&
974 42385 : !find_prop(e->p, PROP_HASHCOL) && !find_prop(e->p, PROP_JOINIDX)) {
975 21119 : exp_swap(e);
976 : }
977 : }
978 : }
979 55582 : bool needed = false;
980 176315 : for(node *n = exps->h; !needed && n; n = n->next) {
981 313705 : for (node *m = n->next; !needed && m; m = m->next) {
982 192972 : if (exp_match_exp_semantics(n->data, m->data, true))
983 79 : needed = true;
984 : }
985 : }
986 55582 : if (needed) {
987 79 : list *nexps = sa_list(sql->sa);
988 :
989 243 : for(node *n = exps->h; n; n = n->next) {
990 164 : bool done = false;
991 421 : for (node *m = exps->h; m && !done; m = m->next) {
992 257 : if (n != m && exp_match_exp_semantics(n->data, m->data, false)) {
993 159 : sql_exp *e1 = n->data, *e2 = m->data;
994 159 : if ((is_any(e1) || is_semantics(e1)) || (!is_any(e2) && !is_semantics(e2))) {
995 159 : append(nexps, e1);
996 159 : if ((!is_any(e2) && !is_semantics(e2)) && is_left(rel->op) && list_length(rel->attr) == 1) {
997 : /* nil is false */
998 0 : sql_exp *m = rel->attr->h->data;
999 0 : if (exp_is_atom(m))
1000 0 : set_no_nil(m);
1001 : }
1002 : }
1003 : done = true;
1004 : }
1005 : }
1006 164 : if (!done)
1007 5 : append(nexps, n->data);
1008 : }
1009 : return nexps;
1010 : }
1011 : (void)changes;
1012 : return exps;
1013 : }
1014 :
1015 : static inline sql_rel *
1016 478820 : rel_select_cse(visitor *v, sql_rel *rel)
1017 : {
1018 478820 : if ((is_select(rel->op) || is_join(rel->op) || is_semi(rel->op)) && rel->exps) /* cleanup equal expressions */
1019 478820 : rel->exps = cleanup_equal_exps(v->sql, rel, rel->exps, &v->changes); /* (a = b) and (a += b) */
1020 :
1021 478820 : if (is_select(rel->op) && rel->exps)
1022 211662 : rel->exps = merge_ors(v->sql, rel->exps, &v->changes);
1023 :
1024 478820 : if (is_select(rel->op) && rel->exps)
1025 211662 : rel->exps = merge_notequal(v->sql, rel->exps, &v->changes); /* x <> 1 and x <> 2 => x not in (1, 2)*/
1026 :
1027 478820 : if ((is_select(rel->op) || is_join(rel->op) || is_semi(rel->op)) && rel->exps)
1028 478820 : rel->exps = merge_cmp_or_null(v->sql, rel, rel->exps, &v->changes); /* (a = b) or (a is null and b is null) -> a = b with null semantics */
1029 :
1030 478820 : if ((is_select(rel->op) || is_join(rel->op) || is_semi(rel->op)) && rel->exps) {
1031 478820 : node *n;
1032 478820 : list *nexps;
1033 478820 : int needed = 0;
1034 :
1035 1019941 : for (n=rel->exps->h; n && !needed; n = n->next) {
1036 541121 : sql_exp *e = n->data;
1037 :
1038 541121 : if (e->type == e_cmp && e->flag == cmp_or && !is_anti(e))
1039 541121 : needed = 1;
1040 : }
1041 478820 : if (!needed)
1042 : return rel;
1043 16367 : nexps = new_exp_list(v->sql->sa);
1044 35594 : for (n=rel->exps->h; n; n = n->next) {
1045 19227 : sql_exp *e = n->data;
1046 :
1047 19227 : if (e->type == e_cmp && e->flag == cmp_or && !is_anti(e)) {
1048 : /* split the common expressions */
1049 16495 : v->changes += exps_cse(v->sql, nexps, e->l, e->r);
1050 : } else {
1051 2732 : append(nexps, e);
1052 : }
1053 : }
1054 16367 : rel->exps = nexps;
1055 : }
1056 : return rel;
1057 : }
1058 :
1059 : static list *
1060 10489 : exps_merge_select_rse( mvc *sql, list *l, list *r, bool *merged)
1061 : {
1062 10489 : node *n, *m, *o;
1063 10489 : list *nexps = NULL, *lexps, *rexps;
1064 10489 : bool lmerged = true, rmerged = true;
1065 :
1066 10489 : lexps = new_exp_list(sql->sa);
1067 22359 : for (n = l->h; n; n = n->next) {
1068 11870 : sql_exp *e = n->data;
1069 :
1070 11870 : if (e->type == e_cmp && e->flag == cmp_or && !is_anti(e) && !is_semantics(e)) {
1071 3326 : lmerged = false;
1072 3326 : list *nexps = exps_merge_select_rse(sql, e->l, e->r, &lmerged);
1073 3728 : for (o = nexps->h; o; o = o->next)
1074 402 : append(lexps, o->data);
1075 : } else {
1076 8544 : append(lexps, e);
1077 : }
1078 : }
1079 10489 : if (lmerged)
1080 7212 : lmerged = (list_length(lexps) == 1);
1081 10489 : rexps = new_exp_list(sql->sa);
1082 23375 : for (n = r->h; n; n = n->next) {
1083 12886 : sql_exp *e = n->data;
1084 :
1085 12886 : if (e->type == e_cmp && e->flag == cmp_or && !is_anti(e) && !is_semantics(e)) {
1086 699 : rmerged = false;
1087 699 : list *nexps = exps_merge_select_rse(sql, e->l, e->r, &rmerged);
1088 724 : for (o = nexps->h; o; o = o->next)
1089 25 : append(rexps, o->data);
1090 : } else {
1091 12187 : append(rexps, e);
1092 : }
1093 : }
1094 10489 : if (rmerged)
1095 9811 : rmerged = (list_length(r) == 1);
1096 :
1097 10489 : nexps = new_exp_list(sql->sa);
1098 :
1099 : /* merge merged lists first ? */
1100 19435 : for (n = lexps->h; n; n = n->next) {
1101 8946 : sql_exp *le = n->data, *re, *fnd = NULL;
1102 :
1103 8946 : if (le->type != e_cmp || le->flag == cmp_or || is_anti(le) || is_semantics(le) || is_symmetric(le))
1104 622 : continue;
1105 17246 : for (m = rexps->h; !fnd && m; m = m->next) {
1106 8922 : re = m->data;
1107 8922 : if (exps_match_col_exps(le, re))
1108 1552 : fnd = re;
1109 : }
1110 8324 : if (fnd && (is_anti(fnd) || is_semantics(fnd)))
1111 50 : continue;
1112 : /* cases
1113 : * 1) 2 values (cmp_equal)
1114 : * 2) 1 value (cmp_equal), and cmp_in
1115 : * (also cmp_in, cmp_equal)
1116 : * 3) 2 cmp_in
1117 : * 4) ranges
1118 : */
1119 1502 : if (fnd) {
1120 1502 : re = fnd;
1121 1502 : fnd = NULL;
1122 1502 : if (is_anti(le) || is_anti(re) || is_symmetric(re))
1123 0 : continue;
1124 1939 : if (le->flag == cmp_equal && re->flag == cmp_equal) {
1125 437 : list *exps = new_exp_list(sql->sa);
1126 :
1127 437 : append(exps, le->r);
1128 437 : append(exps, re->r);
1129 437 : fnd = exp_in(sql->sa, le->l, exps, cmp_in);
1130 1208 : } else if (le->flag == cmp_equal && re->flag == cmp_in){
1131 143 : list *exps = new_exp_list(sql->sa);
1132 :
1133 143 : append(exps, le->r);
1134 143 : list_merge(exps, re->r, NULL);
1135 143 : fnd = exp_in(sql->sa, le->l, exps, cmp_in);
1136 1217 : } else if (le->flag == cmp_in && re->flag == cmp_equal){
1137 295 : list *exps = new_exp_list(sql->sa);
1138 :
1139 295 : append(exps, re->r);
1140 295 : list_merge(exps, le->r, NULL);
1141 295 : fnd = exp_in(sql->sa, le->l, exps, cmp_in);
1142 742 : } else if (le->flag == cmp_in && re->flag == cmp_in){
1143 115 : list *exps = new_exp_list(sql->sa);
1144 :
1145 115 : list_merge(exps, le->r, NULL);
1146 115 : list_merge(exps, re->r, NULL);
1147 115 : fnd = exp_in(sql->sa, le->l, exps, cmp_in);
1148 512 : } else if (le->f && re->f && /* merge ranges */
1149 20 : le->flag == re->flag && le->flag <= cmp_lt) {
1150 20 : sql_exp *mine = NULL, *maxe = NULL;
1151 :
1152 20 : if (!(mine = rel_binop_(sql, NULL, exp_copy(sql, le->r), exp_copy(sql, re->r), "sys", "sql_min", card_value, true))) {
1153 0 : sql->session->status = 0;
1154 0 : sql->errstr[0] = '\0';
1155 0 : continue;
1156 : }
1157 20 : if (!(maxe = rel_binop_(sql, NULL, exp_copy(sql, le->f), exp_copy(sql, re->f), "sys", "sql_max", card_value, true))) {
1158 0 : sql->session->status = 0;
1159 0 : sql->errstr[0] = '\0';
1160 0 : continue;
1161 : }
1162 20 : fnd = exp_compare2(sql->sa, exp_copy(sql, le->l), mine, maxe, le->flag, 0);
1163 20 : lmerged = false;
1164 : }
1165 1010 : if (fnd) {
1166 1010 : append(nexps, fnd);
1167 1864 : *merged = (fnd && lmerged && rmerged);
1168 : }
1169 : }
1170 : }
1171 10489 : return nexps;
1172 : }
1173 :
1174 : /* merge related sub expressions
1175 : *
1176 : * ie (x = a and y > 1 and y < 5) or
1177 : * (x = c and y > 1 and y < 10) or
1178 : * (x = e and y > 1 and y < 20)
1179 : * ->
1180 : * ((x = a and y > 1 and y < 5) or
1181 : * (x = c and y > 1 and y < 10) or
1182 : * (x = e and y > 1 and y < 20)) and
1183 : * x in (a,c,e) and
1184 : * y > 1 and y < 20
1185 : *
1186 : * for single expression or's we can do better
1187 : * x in (a, b, c) or x in (d, e, f)
1188 : * ->
1189 : * x in (a, b, c, d, e, f)
1190 : * */
1191 : static inline sql_rel *
1192 146766 : rel_merge_select_rse(visitor *v, sql_rel *rel)
1193 : {
1194 : /* only execute once per select */
1195 146766 : if ((is_select(rel->op) || is_join(rel->op) || is_semi(rel->op)) && rel->exps && !is_rel_merge_select_rse_used(rel->used)) {
1196 146766 : node *n, *o;
1197 146766 : list *nexps = new_exp_list(v->sql->sa);
1198 :
1199 310406 : for (n=rel->exps->h; n; n = n->next) {
1200 163640 : sql_exp *e = n->data;
1201 :
1202 170104 : if (e->type == e_cmp && e->flag == cmp_or && !is_anti(e) && !is_semantics(e)) {
1203 : /* possibly merge related expressions */
1204 6464 : bool merged = false;
1205 :
1206 6464 : list *ps = exps_merge_select_rse(v->sql, e->l, e->r, &merged);
1207 7047 : for (o = ps->h; o; o = o->next)
1208 583 : append(nexps, o->data);
1209 6464 : if (merged)
1210 87 : v->changes++;
1211 : else
1212 6377 : append(nexps, e);
1213 : } else {
1214 157176 : append(nexps, e);
1215 : }
1216 : }
1217 146766 : rel->exps = nexps;
1218 146766 : rel->used |= rel_merge_select_rse_used;
1219 : }
1220 146766 : return rel;
1221 : }
1222 :
1223 : /* pack optimizers into a single function call to avoid iterations in the AST */
1224 : static sql_rel *
1225 1831606 : rel_optimize_select_and_joins_bottomup_(visitor *v, sql_rel *rel)
1226 : {
1227 1831606 : if (!rel || (!is_join(rel->op) && !is_semi(rel->op) && !is_select(rel->op)) || list_empty(rel->exps))
1228 1352786 : return rel;
1229 478820 : uint8_t cycle = *(uint8_t*) v->data;
1230 :
1231 478820 : rel->exps = exp_merge_range(v, rel, rel->exps);
1232 478820 : rel = rel_select_cse(v, rel);
1233 478820 : if (cycle == 1)
1234 146766 : rel = rel_merge_select_rse(v, rel);
1235 478820 : rel = rewrite_simplify(v, cycle, v->value_based_opt, rel);
1236 478820 : return rel;
1237 : }
1238 :
1239 : static sql_rel *
1240 135056 : rel_optimize_select_and_joins_bottomup(visitor *v, global_props *gp, sql_rel *rel)
1241 : {
1242 135056 : v->data = &gp->opt_cycle;
1243 135056 : rel = rel_visitor_bottomup(v, rel, &rel_optimize_select_and_joins_bottomup_);
1244 135056 : v->data = gp;
1245 135056 : return rel;
1246 : }
1247 :
1248 : run_optimizer
1249 645054 : bind_optimize_select_and_joins_bottomup(visitor *v, global_props *gp)
1250 : {
1251 645054 : int flag = v->sql->sql_optimizer;
1252 644854 : return gp->opt_level == 1 && (gp->cnt[op_join] || gp->cnt[op_left] || gp->cnt[op_right] ||
1253 558523 : gp->cnt[op_full] || gp->cnt[op_semi] || gp->cnt[op_anti] ||
1254 1289908 : gp->cnt[op_select]) && (flag & optimize_select_and_joins_bottomup) ? rel_optimize_select_and_joins_bottomup : NULL;
1255 : }
1256 :
1257 :
1258 : static inline sql_rel *
1259 1606683 : rel_push_join_exps_down(visitor *v, sql_rel *rel)
1260 : {
1261 : /* push select exps part of join expressions down */
1262 : /* TODO CHECK WHY not semi enabled */
1263 1606683 : if ((is_innerjoin(rel->op) || is_left(rel->op) || is_right(rel->op) /*|| is_semi(rel->op)*/) && !list_empty(rel->exps)) {
1264 248695 : int left = is_innerjoin(rel->op) || is_right(rel->op) || rel->op == op_semi;
1265 248695 : int right = is_innerjoin(rel->op) || is_left(rel->op) || is_semi(rel->op);
1266 248695 : sql_rel *jl = rel->l, *ojl = jl, *jr = rel->r, *ojr = jr;
1267 :
1268 248695 : set_processed(jl);
1269 248695 : set_processed(jr);
1270 530513 : for (node *n = rel->exps->h; n;) {
1271 281818 : node *next = n->next;
1272 281818 : sql_exp *e = n->data;
1273 :
1274 281818 : if (left && rel_rebind_exp(v->sql, jl, e) && !is_any(e)) { /* select expressions on left */
1275 1351 : if (!is_select(jl->op) || rel_is_ref(jl))
1276 1340 : rel->l = jl = rel_select(v->sql->sa, jl, NULL);
1277 1351 : rel_select_add_exp(v->sql->sa, jl, e);
1278 1351 : list_remove_node(rel->exps, NULL, n);
1279 1351 : v->changes++;
1280 280467 : } else if (right && rel_rebind_exp(v->sql, jr, e) && !is_any(e)) { /* select expressions on right */
1281 45 : if (!is_select(jr->op) || rel_is_ref(jr))
1282 42 : rel->r = jr = rel_select(v->sql->sa, jr, NULL);
1283 45 : rel_select_add_exp(v->sql->sa, jr, e);
1284 45 : list_remove_node(rel->exps, NULL, n);
1285 45 : v->changes++;
1286 : }
1287 : n = next;
1288 : }
1289 248695 : if (ojl != jl)
1290 1340 : set_processed(jl);
1291 248695 : if (ojr != jr)
1292 42 : set_processed(jr);
1293 : }
1294 1606683 : return rel;
1295 : }
1296 :
1297 : static inline bool
1298 1606683 : is_non_trivial_select_applied_to_outer_join(sql_rel *rel)
1299 : {
1300 1606683 : return is_select(rel->op) && rel->exps && is_outerjoin(((sql_rel*) rel->l)->op);
1301 : }
1302 :
1303 : extern list *list_append_before(list *l, node *n, void *data);
1304 :
1305 : static void
1306 : replace_column_references_with_nulls_2(mvc *sql, sql_rel *inner_join_side, sql_exp* e);
1307 :
1308 : static void
1309 5585 : replace_column_references_with_nulls_1(mvc *sql, sql_rel *inner_join_side, list* exps) {
1310 5585 : if (list_empty(exps))
1311 : return;
1312 14353 : for(node* n = exps->h; n; n=n->next) {
1313 8768 : sql_exp* e = n->data;
1314 8768 : replace_column_references_with_nulls_2(sql, inner_join_side, e);
1315 : }
1316 : }
1317 :
1318 : static void
1319 28214 : replace_column_references_with_nulls_2(mvc *sql, sql_rel *inner_join_side, sql_exp* e) {
1320 35935 : if (e == NULL) {
1321 : return;
1322 : }
1323 :
1324 29762 : switch (e->type) {
1325 8871 : case e_column:
1326 8871 : if (rel_find_exp_and_corresponding_rel(inner_join_side, e, true, NULL, NULL)) {
1327 2558 : e->type = e_atom;
1328 2558 : e->l = atom_general(sql->sa, &e->tpe, NULL, 0);
1329 2558 : e->r = e->f = NULL;
1330 : }
1331 : break;
1332 9188 : case e_cmp:
1333 9188 : switch (e->flag) {
1334 6547 : case cmp_gt:
1335 : case cmp_gte:
1336 : case cmp_lte:
1337 : case cmp_lt:
1338 : case cmp_equal:
1339 : case cmp_notequal:
1340 : {
1341 6547 : sql_exp* l = e->l;
1342 6547 : sql_exp* r = e->r;
1343 6547 : sql_exp* f = e->f;
1344 :
1345 6547 : replace_column_references_with_nulls_2(sql, inner_join_side, l);
1346 6547 : replace_column_references_with_nulls_2(sql, inner_join_side, r);
1347 6547 : replace_column_references_with_nulls_2(sql, inner_join_side, f);
1348 6547 : break;
1349 : }
1350 1876 : case cmp_filter:
1351 : case cmp_or:
1352 : {
1353 1876 : list* l = e->l;
1354 1876 : list* r = e->r;
1355 1876 : replace_column_references_with_nulls_1(sql, inner_join_side, l);
1356 1876 : replace_column_references_with_nulls_1(sql, inner_join_side, r);
1357 1876 : break;
1358 : }
1359 765 : case cmp_in:
1360 : case cmp_notin:
1361 : {
1362 765 : sql_exp* l = e->l;
1363 765 : list* r = e->r;
1364 765 : replace_column_references_with_nulls_2(sql, inner_join_side, l);
1365 765 : replace_column_references_with_nulls_1(sql, inner_join_side, r);
1366 765 : break;
1367 : }
1368 : default:
1369 : break;
1370 : }
1371 : break;
1372 1068 : case e_func:
1373 : {
1374 1068 : list* l = e->l;
1375 1068 : replace_column_references_with_nulls_1(sql, inner_join_side, l);
1376 1068 : break;
1377 : }
1378 1174 : case e_convert:
1379 : {
1380 1174 : sql_exp* l = e->l;
1381 1174 : replace_column_references_with_nulls_2(sql, inner_join_side, l);
1382 1174 : break;
1383 : }
1384 : default:
1385 : break;
1386 : }
1387 : }
1388 :
1389 : static sql_rel *
1390 4948 : out2inner(visitor *v, sql_rel* sel, sql_rel* join, sql_rel* inner_join_side, operator_type new_type) {
1391 :
1392 : /* handle inner_join relations with a simple select */
1393 4948 : if (is_select(inner_join_side->op) && inner_join_side->l)
1394 4948 : inner_join_side = inner_join_side->l;
1395 :
1396 4948 : list* select_predicates = exps_copy(v->sql, sel->exps);
1397 :
1398 10439 : for(node* n = select_predicates->h; n; n=n->next) {
1399 5587 : sql_exp* e = n->data;
1400 5587 : replace_column_references_with_nulls_2(v->sql, inner_join_side, e);
1401 :
1402 5587 : if (exp_is_false(e)) {
1403 96 : join->op = new_type;
1404 96 : v->changes++;
1405 96 : break;
1406 : }
1407 : }
1408 :
1409 4948 : return sel;
1410 : }
1411 :
1412 : static inline sql_rel *
1413 1606683 : rel_out2inner(visitor *v, sql_rel *rel) {
1414 :
1415 1606683 : if (!is_non_trivial_select_applied_to_outer_join(rel)) {
1416 : // Nothing to do here.
1417 : return rel;
1418 : }
1419 :
1420 4925 : sql_rel* join = (sql_rel*) rel->l;
1421 :
1422 4925 : if (rel_is_ref(join)) {
1423 : /* Do not alter a multi-referenced join relation.
1424 : * This is problematic (e.g. in the case of the plan of a merge statement)
1425 : * basically because there are no guarantees on the other container relations.
1426 : * In particular there is no guarantee that the other referencing relations are
1427 : * select relations with null-rejacting predicates on the inner join side.
1428 : */
1429 : return rel;
1430 : }
1431 :
1432 4925 : sql_rel* inner_join_side;
1433 4925 : if (is_left(join->op)) {
1434 4876 : inner_join_side = join->r;
1435 4876 : return out2inner(v, rel, join, inner_join_side, op_join);
1436 : }
1437 49 : else if (is_right(join->op)) {
1438 26 : inner_join_side = join->l;
1439 26 : return out2inner(v, rel, join, inner_join_side, op_join);
1440 : }
1441 : else /*full outer join*/ {
1442 : // First check if left side can degenerate from full outer join to just right outer join.
1443 23 : inner_join_side = join->r;
1444 23 : rel = out2inner(v, rel, join, inner_join_side, op_right);
1445 : /* Now test if the right side can degenerate to
1446 : * a normal inner join or a left outer join
1447 : * depending on the result of previous call to out2inner.
1448 : */
1449 :
1450 23 : inner_join_side = join->l;
1451 37 : return out2inner(v, rel, join, inner_join_side, is_right(join->op)? op_join: op_left);
1452 : }
1453 : }
1454 :
1455 : static bool
1456 3993 : exps_uses_any(list *exps, list *l)
1457 : {
1458 3993 : bool uses_any = false;
1459 :
1460 3993 : if (list_empty(exps) || list_empty(l))
1461 15 : return false;
1462 7962 : for (node *n = l->h; n && !uses_any; n = n->next) {
1463 3984 : sql_exp *e = n->data;
1464 3984 : uses_any |= list_exps_uses_exp(exps, exp_relname(e), exp_name(e)) != NULL;
1465 : }
1466 :
1467 : return uses_any;
1468 : }
1469 :
1470 : /* TODO At the moment I have to disable the new join2semi because the join order optimizer doesn't take semi-joins into account,
1471 : so plans get deteriorated if more joins are optimized into semi-joins. Later I will review the join order with semi-joins and hopefully,
1472 : I will be able to re-enable the new join2semi. */
1473 : #if 0
1474 : #define NO_EXP_FOUND 0
1475 : #define FOUND_WITH_DUPLICATES 1
1476 : #define MAY_HAVE_DUPLICATE_NULLS 2
1477 : #define ALL_VALUES_DISTINCT 3
1478 :
1479 : static int
1480 : find_projection_for_join2semi(sql_rel *rel, sql_exp *jc)
1481 : {
1482 : sql_rel *res = NULL;
1483 : sql_exp *e = NULL;
1484 : bool underjoin = false;
1485 :
1486 : if ((e = rel_find_exp_and_corresponding_rel(rel, jc, &res, &underjoin))) {
1487 : if (underjoin || e->type != e_column)
1488 : return FOUND_WITH_DUPLICATES;
1489 : /* if just one groupby column is projected or the relation needs distinct values and one column is projected or is a primary key, it will be distinct */
1490 : if (is_unique(e) ||
1491 : (is_groupby(res->op) && list_length(res->r) == 1 && exps_find_exp(res->r, e)) ||
1492 : ((is_project(res->op) || is_base(res->op)) && ((need_distinct(res) && list_length(res->exps) == 1) || res->card < CARD_AGGR)))
1493 : return has_nil(e) ? MAY_HAVE_DUPLICATE_NULLS : ALL_VALUES_DISTINCT;
1494 : return FOUND_WITH_DUPLICATES;
1495 : }
1496 : return NO_EXP_FOUND;
1497 : }
1498 :
1499 : static int
1500 : subrel_uses_exp_outside_subrel(visitor *v, sql_rel *rel, list *l, sql_rel *j)
1501 : {
1502 : if (rel == j)
1503 : return 0;
1504 : if (mvc_highwater(v->sql))
1505 : return 1;
1506 : switch(rel->op){
1507 : case op_join:
1508 : case op_left:
1509 : case op_right:
1510 : case op_full:
1511 : return exps_uses_any(rel->exps, l) ||
1512 : subrel_uses_exp_outside_subrel(v, rel->l, l, j) || subrel_uses_exp_outside_subrel(v, rel->r, l, j);
1513 : case op_semi:
1514 : case op_anti:
1515 : case op_select:
1516 : return exps_uses_any(rel->exps, l) ||
1517 : subrel_uses_exp_outside_subrel(v, rel->l, l, j);
1518 : case op_project:
1519 : case op_groupby:
1520 : return exps_uses_any(rel->exps, l) || exps_uses_any(rel->r, l);
1521 : case op_basetable:
1522 : case op_table:
1523 : case op_union:
1524 : case op_except:
1525 : case op_inter:
1526 : return exps_uses_any(rel->exps, l);
1527 : case op_topn:
1528 : case op_sample:
1529 : return subrel_uses_exp_outside_subrel(v, rel->l, l, j);
1530 : default:
1531 : return 1;
1532 : }
1533 : }
1534 :
1535 : static int
1536 : projrel_uses_exp_outside_subrel(visitor *v, sql_rel *rel, list *l, sql_rel *j)
1537 : {
1538 : /* test if projecting relation uses any of the join expressions */
1539 : assert((is_simple_project(rel->op) || is_groupby(rel->op)) && rel->l);
1540 : return exps_uses_any(rel->exps, l) || exps_uses_any(rel->r, l) || subrel_uses_exp_outside_subrel(v, rel->l, l, j);
1541 : }
1542 :
1543 : static sql_rel *
1544 : rewrite_joins2semi(visitor *v, sql_rel *proj, sql_rel *rel)
1545 : {
1546 : /* generalize possibility : we need the visitor 'step' here */
1547 : if (rel_is_ref(rel) || mvc_highwater(v->sql)) /* if the join has multiple references, it's dangerous to convert it into a semijoin */
1548 : return rel;
1549 : if (is_innerjoin(rel->op) && !list_empty(rel->exps)) {
1550 : sql_rel *l = rel->l, *r = rel->r;
1551 : bool left_unique = true, right_unique = true;
1552 :
1553 : /* these relations don't project anything, so skip them */
1554 : while (is_topn(l->op) || is_sample(l->op) || is_select(l->op) || is_semi(l->op))
1555 : l = l->l;
1556 : /* joins will expand values, so don't search on those */
1557 : if (!is_base(l->op) && !is_project(l->op))
1558 : left_unique = false;
1559 : while (is_topn(r->op) || is_sample(r->op) || is_select(r->op) || is_semi(r->op))
1560 : r = r->l;
1561 : if (!is_base(r->op) && !is_project(r->op))
1562 : right_unique = false;
1563 : /* if all columns used in equi-joins from one of the sides are unique, the join can be rewritten into a semijoin */
1564 : for (node *n=rel->exps->h; n && (left_unique || right_unique); n = n->next) {
1565 : sql_exp *e = n->data, *el = e->l, *er = e->r;
1566 :
1567 : if (!is_compare(e->type) || e->flag != cmp_equal || exp_has_func(el) || exp_has_func(er)) {
1568 : left_unique = right_unique = false;
1569 : } else {
1570 : int found = 0;
1571 :
1572 : if (left_unique && (found = find_projection_for_join2semi(l, el)) > NO_EXP_FOUND)
1573 : left_unique &= (found == ALL_VALUES_DISTINCT || (found == MAY_HAVE_DUPLICATE_NULLS && (!is_semantics(e) || !has_nil(er))));
1574 : if (left_unique && (found = find_projection_for_join2semi(l, er)) > NO_EXP_FOUND)
1575 : left_unique &= (found == ALL_VALUES_DISTINCT || (found == MAY_HAVE_DUPLICATE_NULLS && (!is_semantics(e) || !has_nil(el))));
1576 : if (right_unique && (found = find_projection_for_join2semi(r, el)) > NO_EXP_FOUND)
1577 : right_unique &= (found == ALL_VALUES_DISTINCT || (found == MAY_HAVE_DUPLICATE_NULLS && (!is_semantics(e) || !has_nil(er))));
1578 : if (right_unique && (found = find_projection_for_join2semi(r, er)) > NO_EXP_FOUND)
1579 : right_unique &= (found == ALL_VALUES_DISTINCT || (found == MAY_HAVE_DUPLICATE_NULLS && (!is_semantics(e) || !has_nil(el))));
1580 : }
1581 : }
1582 :
1583 : /* now we need to check relation's expressions are not used */
1584 : if (left_unique && !projrel_uses_exp_outside_subrel(v, proj, l->exps, rel)) {
1585 : sql_rel *tmp = rel->r;
1586 : rel->r = rel->l;
1587 : rel->l = tmp;
1588 : rel->op = op_semi;
1589 : v->changes++;
1590 : } else if (right_unique && !projrel_uses_exp_outside_subrel(v, proj, r->exps, rel)) {
1591 : rel->op = op_semi;
1592 : v->changes++;
1593 : }
1594 : }
1595 : if (is_join(rel->op)) {
1596 : rel->l = rewrite_joins2semi(v, proj, rel->l);
1597 : rel->r = rewrite_joins2semi(v, proj, rel->r);
1598 : } else if (is_topn(rel->op) || is_sample(rel->op) || is_select(rel->op) || is_semi(rel->op)) {
1599 : rel->l = rewrite_joins2semi(v, proj, rel->l);
1600 : }
1601 : return rel;
1602 : }
1603 :
1604 : static inline sql_rel *
1605 : rel_join2semijoin(visitor *v, sql_rel *rel)
1606 : {
1607 : if ((is_simple_project(rel->op) || is_groupby(rel->op)) && rel->l)
1608 : rel->l = rewrite_joins2semi(v, rel, rel->l);
1609 : return rel;
1610 : }
1611 : #endif
1612 :
1613 : #define NO_PROJECTION_FOUND 0
1614 : #define MAY_HAVE_DUPLICATE_NULLS 1
1615 : #define ALL_VALUES_DISTINCT 2
1616 :
1617 : static int
1618 371447 : find_projection_for_join2semi(sql_rel *rel)
1619 : {
1620 371447 : if (is_simple_project(rel->op) || is_groupby(rel->op) || is_inter(rel->op) || is_except(rel->op) || is_base(rel->op) || (is_union(rel->op) && need_distinct(rel))) {
1621 235448 : if (rel->card < CARD_AGGR) /* const or groupby without group by exps */
1622 : return ALL_VALUES_DISTINCT;
1623 234313 : if (list_length(rel->exps) == 1) {
1624 6326 : sql_exp *e = rel->exps->h->data;
1625 : /* a single group by column in the projection list from a group by relation is guaranteed to be unique, but not an aggregate */
1626 6326 : if (e->type == e_column) {
1627 6294 : sql_rel *res = NULL;
1628 6294 : sql_exp *found = NULL;
1629 6294 : bool underjoin = false;
1630 :
1631 : /* if just one groupby column is projected or the relation needs distinct values and one column is projected or is a primary key, it will be distinct */
1632 6294 : if ((is_groupby(rel->op) && list_length(rel->r) == 1 && exps_find_exp(rel->r, e)) || (need_distinct(rel) && list_length(rel->exps) == 1))
1633 8842 : return ALL_VALUES_DISTINCT;
1634 2921 : if (is_unique(e))
1635 2095 : return has_nil(e) ? MAY_HAVE_DUPLICATE_NULLS : ALL_VALUES_DISTINCT;
1636 :
1637 826 : if ((is_simple_project(rel->op) || is_groupby(rel->op) || is_inter(rel->op) || is_except(rel->op)) &&
1638 210 : (found = rel_find_exp_and_corresponding_rel(rel->l, e, false, &res, &underjoin)) && !underjoin) { /* grouping column on inner relation */
1639 194 : if (need_distinct(res) && list_length(res->exps) == 1)
1640 : return ALL_VALUES_DISTINCT;
1641 194 : if (is_unique(found))
1642 0 : return has_nil(e) ? MAY_HAVE_DUPLICATE_NULLS : ALL_VALUES_DISTINCT;
1643 194 : if (found->type == e_column && found->card <= CARD_AGGR) {
1644 1 : if (!is_groupby(res->op) && list_length(res->exps) != 1)
1645 : return NO_PROJECTION_FOUND;
1646 0 : for (node *n = res->exps->h ; n ; n = n->next) { /* must be the single column in the group by expression list */
1647 0 : sql_exp *e = n->data;
1648 0 : if (e != found && e->type == e_column)
1649 : return NO_PROJECTION_FOUND;
1650 : }
1651 : return ALL_VALUES_DISTINCT;
1652 : }
1653 : }
1654 : }
1655 : }
1656 : }
1657 : return NO_PROJECTION_FOUND;
1658 : }
1659 :
1660 : static sql_rel *
1661 1155541 : find_candidate_join2semi(visitor *v, sql_rel *rel, bool *swap)
1662 : {
1663 : /* generalize possibility : we need the visitor 'step' here */
1664 1156071 : if (rel_is_ref(rel)) /* if the join has multiple references, it's dangerous to convert it into a semijoin */
1665 : return NULL;
1666 1098406 : if (rel->op == op_join && !list_empty(rel->exps) && list_empty(rel->attr)) {
1667 187733 : sql_rel *l = rel->l, *r = rel->r;
1668 187733 : int foundr = NO_PROJECTION_FOUND, foundl = NO_PROJECTION_FOUND, found = NO_PROJECTION_FOUND;
1669 187733 : bool ok = false;
1670 :
1671 187733 : foundr = find_projection_for_join2semi(r);
1672 187733 : if (foundr < ALL_VALUES_DISTINCT)
1673 183714 : foundl = find_projection_for_join2semi(l);
1674 187733 : if (foundr && foundr > foundl) {
1675 4029 : *swap = false;
1676 4029 : found = foundr;
1677 183704 : } else if (foundl) {
1678 2567 : *swap = true;
1679 2567 : found = foundl;
1680 : }
1681 :
1682 6596 : if (found > NO_PROJECTION_FOUND) {
1683 : /* if all join expressions can be pushed down or have function calls, then it cannot be rewritten into a semijoin */
1684 13196 : for (node *n=rel->exps->h; n && !ok; n = n->next) {
1685 6600 : sql_exp *e = n->data;
1686 :
1687 10177 : ok |= e->type == e_cmp && e->flag == cmp_equal && !exp_has_func(e) && !rel_rebind_exp(v->sql, l, e) && !rel_rebind_exp(v->sql, r, e) &&
1688 11 : (found == ALL_VALUES_DISTINCT || !is_semantics(e) || !has_nil((sql_exp *)e->l) || !has_nil((sql_exp *)e->r));
1689 : }
1690 : }
1691 :
1692 187733 : if (ok)
1693 : return rel;
1694 : }
1695 1095372 : if (is_join(rel->op) || is_semi(rel->op)) {
1696 284217 : sql_rel *c;
1697 :
1698 284217 : if ((c=find_candidate_join2semi(v, rel->l, swap)) != NULL ||
1699 283810 : (c=find_candidate_join2semi(v, rel->r, swap)) != NULL)
1700 429 : if (list_empty(c->attr))
1701 : return c;
1702 : }
1703 1094943 : if (is_topn(rel->op) || is_sample(rel->op))
1704 530 : return find_candidate_join2semi(v, rel->l, swap);
1705 : return NULL;
1706 : }
1707 :
1708 : static int
1709 2568 : subrel_uses_exp_outside_subrel(sql_rel *rel, list *l, sql_rel *c)
1710 : {
1711 2569 : if (rel == c)
1712 : return 0;
1713 : /* for subrel only expect joins (later possibly selects) */
1714 855 : if (is_join(rel->op) || is_semi(rel->op)) {
1715 433 : if (exps_uses_any(rel->exps, l))
1716 : return 1;
1717 848 : if (subrel_uses_exp_outside_subrel(rel->l, l, c) ||
1718 423 : subrel_uses_exp_outside_subrel(rel->r, l, c))
1719 5 : return 1;
1720 : }
1721 842 : if (is_topn(rel->op) || is_sample(rel->op))
1722 1 : return subrel_uses_exp_outside_subrel(rel->l, l, c);
1723 : return 0;
1724 : }
1725 :
1726 : static int
1727 3034 : rel_uses_exp_outside_subrel(sql_rel *rel, list *l, sql_rel *c)
1728 : {
1729 : /* for now we only expect sub relations of type project, selects (rel) or join/semi */
1730 3034 : if (is_simple_project(rel->op) || is_groupby(rel->op) || is_select(rel->op)) {
1731 3034 : if (!list_empty(rel->exps) && exps_uses_any(rel->exps, l))
1732 : return 1;
1733 1720 : if ((is_simple_project(rel->op) || is_groupby(rel->op)) && !list_empty(rel->r) && exps_uses_any(rel->r, l))
1734 : return 1;
1735 1720 : if (rel->l)
1736 1720 : return subrel_uses_exp_outside_subrel(rel->l, l, c);
1737 : }
1738 0 : if (is_topn(rel->op) || is_sample(rel->op))
1739 0 : return subrel_uses_exp_outside_subrel(rel->l, l, c);
1740 : return 1;
1741 : }
1742 :
1743 : static inline sql_rel *
1744 1606683 : rel_join2semijoin(visitor *v, sql_rel *rel)
1745 : {
1746 1606683 : if ((is_simple_project(rel->op) || is_groupby(rel->op)) && rel->l) {
1747 587514 : bool swap = false;
1748 587514 : sql_rel *l = rel->l;
1749 587514 : sql_rel *c = find_candidate_join2semi(v, l, &swap);
1750 :
1751 587514 : if (c) {
1752 : /* 'p' is a project */
1753 3034 : sql_rel *p = swap ? c->l : c->r;
1754 :
1755 : /* now we need to check if ce is only used at the level of c */
1756 3034 : if (!rel_uses_exp_outside_subrel(rel, p->exps, c)) {
1757 1712 : c->op = op_semi;
1758 1712 : if (swap) {
1759 526 : sql_rel *tmp = c->r;
1760 526 : c->r = c->l;
1761 526 : c->l = tmp;
1762 : }
1763 1712 : v->changes++;
1764 : }
1765 : }
1766 : }
1767 1606683 : return rel;
1768 : }
1769 :
1770 : static inline sql_rel *
1771 1606683 : rel_push_join_down_outer(visitor *v, sql_rel *rel)
1772 : {
1773 1606683 : if (is_join(rel->op) && !is_outerjoin(rel->op) && !is_single(rel) && !list_empty(rel->exps) && !rel_is_ref(rel)) {
1774 196641 : sql_rel *l = rel->l, *r = rel->r;
1775 :
1776 196641 : if (is_left(r->op) && (is_select(l->op) || (is_join(l->op) && !is_outerjoin(l->op))) && !rel_is_ref(l) &&
1777 656 : !rel_is_ref(r)) {
1778 656 : sql_rel *rl = r->l;
1779 656 : sql_rel *rr = r->r;
1780 656 : if (rel_is_ref(rl) || rel_is_ref(rr))
1781 : return rel;
1782 : /* join exps should only include l and r.l */
1783 656 : list *njexps = sa_list(v->sql->sa);
1784 1320 : for(node *n = rel->exps->h; n; n = n->next) {
1785 664 : sql_exp *je = n->data;
1786 :
1787 664 : assert(je->type == e_cmp);
1788 664 : if (je->f)
1789 : return rel;
1790 664 : if ((rel_find_exp(l, je->l) && rel_find_exp(rl, je->r)) || (rel_find_exp(l, je->r) && rel_find_exp(rl, je->l))) {
1791 664 : list_append(njexps, je);
1792 : } else {
1793 0 : return rel;
1794 : }
1795 : }
1796 656 : sql_rel *nl = rel_crossproduct(v->sql->sa, rel_dup(l), rl, rel->op);
1797 656 : r->l = nl;
1798 656 : nl->exps = njexps;
1799 656 : nl->attr = rel->attr;
1800 656 : rel->attr = NULL;
1801 656 : set_processed(nl);
1802 656 : rel_dup(r);
1803 656 : rel_destroy(rel);
1804 656 : rel = r;
1805 656 : v->changes++;
1806 : }
1807 : }
1808 : return rel;
1809 : }
1810 :
1811 : static sql_rel *
1812 1606683 : rel_optimize_joins_(visitor *v, sql_rel *rel)
1813 : {
1814 1606683 : rel = rel_push_join_exps_down(v, rel);
1815 1606683 : rel = rel_out2inner(v, rel);
1816 1606683 : rel = rel_join2semijoin(v, rel);
1817 1606683 : rel = rel_push_join_down_outer(v, rel);
1818 1606683 : return rel;
1819 : }
1820 :
1821 : static sql_rel *
1822 91901 : rel_optimize_joins(visitor *v, global_props *gp, sql_rel *rel)
1823 : {
1824 91901 : (void) gp;
1825 91901 : return rel_visitor_topdown(v, rel, &rel_optimize_joins_);
1826 : }
1827 :
1828 : run_optimizer
1829 645199 : bind_optimize_joins(visitor *v, global_props *gp)
1830 : {
1831 645199 : int flag = v->sql->sql_optimizer;
1832 644984 : return gp->opt_level == 1 && (gp->cnt[op_join] || gp->cnt[op_left] || gp->cnt[op_right]
1833 1290183 : || gp->cnt[op_full] || gp->cnt[op_semi] || gp->cnt[op_anti]) && (flag & optimize_joins) ? rel_optimize_joins : NULL;
1834 : }
1835 :
1836 :
1837 : static sql_rel *rel_join_order_(visitor *v, sql_rel *rel);
1838 :
1839 : static void
1840 719445 : get_relations(visitor *v, sql_rel *rel, list *rels)
1841 : {
1842 1000644 : if (list_empty(rel->attr) && !rel_is_ref(rel) && rel->op == op_join && rel->exps == NULL) {
1843 281199 : sql_rel *l = rel->l;
1844 281199 : sql_rel *r = rel->r;
1845 :
1846 281199 : get_relations(v, l, rels);
1847 281199 : get_relations(v, r, rels);
1848 281199 : rel->l = NULL;
1849 281199 : rel->r = NULL;
1850 281199 : rel_destroy(rel);
1851 : } else {
1852 438246 : rel = rel_join_order_(v, rel);
1853 438246 : append(rels, rel);
1854 : }
1855 719445 : }
1856 :
1857 : static void
1858 185559 : get_inner_relations(mvc *sql, sql_rel *rel, list *rels)
1859 : {
1860 293599 : if (!rel_is_ref(rel) && is_join(rel->op)) {
1861 108040 : sql_rel *l = rel->l;
1862 108040 : sql_rel *r = rel->r;
1863 :
1864 108040 : get_inner_relations(sql, l, rels);
1865 108040 : get_inner_relations(sql, r, rels);
1866 : } else {
1867 185559 : append(rels, rel);
1868 : }
1869 185559 : }
1870 :
1871 : static int
1872 999724 : exp_count(int *cnt, sql_exp *e)
1873 : {
1874 999724 : int flag;
1875 999724 : if (!e)
1876 : return 0;
1877 999724 : if (find_prop(e->p, PROP_JOINIDX))
1878 3699 : *cnt += 100;
1879 999724 : if (find_prop(e->p, PROP_HASHCOL))
1880 34536 : *cnt += 100;
1881 999724 : if (find_prop(e->p, PROP_HASHIDX))
1882 0 : *cnt += 100;
1883 999724 : switch(e->type) {
1884 357889 : case e_cmp:
1885 357889 : if (!is_complex_exp(e->flag)) {
1886 319600 : exp_count(cnt, e->l);
1887 319600 : exp_count(cnt, e->r);
1888 319600 : if (e->f)
1889 2629 : exp_count(cnt, e->f);
1890 : }
1891 357889 : flag = e->flag;
1892 357889 : switch (flag) {
1893 298057 : case cmp_equal:
1894 298057 : *cnt += 90;
1895 298057 : return 90;
1896 15779 : case cmp_notequal:
1897 15779 : *cnt += 7;
1898 15779 : return 7;
1899 5764 : case cmp_gt:
1900 : case cmp_gte:
1901 : case cmp_lt:
1902 : case cmp_lte:
1903 5764 : *cnt += 6;
1904 5764 : if (e->l) {
1905 5764 : sql_exp *l = e->l;
1906 5764 : sql_subtype *t = exp_subtype(l);
1907 5764 : if (EC_TEMP(t->type->eclass)) /* give preference to temporal ranges */
1908 172 : *cnt += 90;
1909 : }
1910 5764 : if (e->f){ /* range */
1911 2629 : *cnt += 6;
1912 2629 : return 12;
1913 : }
1914 : return 6;
1915 1458 : case cmp_filter:
1916 1458 : if (exps_card(e->r) > CARD_AGGR) {
1917 : /* filters for joins are special */
1918 6 : *cnt += 1000;
1919 6 : return 1000;
1920 : }
1921 1452 : *cnt += 2;
1922 1452 : return 2;
1923 33377 : case cmp_in:
1924 : case cmp_notin: {
1925 33377 : list *l = e->r;
1926 33377 : int c = 9 - 10*list_length(l);
1927 33377 : *cnt += c;
1928 33377 : return c;
1929 : }
1930 3454 : case cmp_or: /* prefer or over functions */
1931 3454 : *cnt += 3;
1932 3454 : return 3;
1933 : default:
1934 : return 0;
1935 : }
1936 517297 : case e_column:
1937 517297 : *cnt += 20;
1938 517297 : return 20;
1939 105815 : case e_atom:
1940 105815 : *cnt += 10;
1941 105815 : return 10;
1942 2872 : case e_func:
1943 : /* functions are more expensive, depending on the number of columns involved. */
1944 2872 : if (e->card == CARD_ATOM)
1945 : return 0;
1946 2587 : *cnt -= 5*list_length(e->l);
1947 2587 : return 5*list_length(e->l);
1948 15851 : case e_convert:
1949 : /* functions are more expensive, depending on the number of columns involved. */
1950 15851 : if (e->card == CARD_ATOM)
1951 : return 0;
1952 : /* fall through */
1953 : default:
1954 15145 : *cnt -= 5;
1955 15145 : return -5;
1956 : }
1957 : }
1958 :
1959 : int
1960 229643 : exp_keyvalue(sql_exp *e)
1961 : {
1962 229643 : int cnt = 0;
1963 18526 : exp_count(&cnt, e);
1964 229643 : return cnt;
1965 : }
1966 :
1967 : static sql_exp *
1968 752456 : joinexp_col(sql_exp *e, sql_rel *r)
1969 : {
1970 752456 : if (e->type == e_cmp) {
1971 752456 : if (rel_has_exp(r, e->l, false) >= 0)
1972 402623 : return e->l;
1973 349833 : return e->r;
1974 : }
1975 0 : assert(0);
1976 : return NULL;
1977 : }
1978 :
1979 : static sql_column *
1980 499924 : table_colexp(sql_exp *e, sql_rel *r)
1981 : {
1982 499924 : sql_table *t = r->l;
1983 :
1984 499924 : if (e->type == e_column) {
1985 485326 : const char *name = exp_name(e);
1986 485326 : node *cn;
1987 :
1988 485326 : if (r->exps) { /* use alias */
1989 898739 : for (cn = r->exps->h; cn; cn = cn->next) {
1990 894337 : sql_exp *ce = cn->data;
1991 894337 : if (strcmp(exp_name(ce), name) == 0) {
1992 480924 : name = ce->r;
1993 480924 : break;
1994 : }
1995 : }
1996 : }
1997 1069471 : for (cn = ol_first_node(t->columns); cn; cn = cn->next) {
1998 1065097 : sql_column *c = cn->data;
1999 1065097 : if (strcmp(c->base.name, name) == 0)
2000 480952 : return c;
2001 : }
2002 : }
2003 : return NULL;
2004 : }
2005 :
2006 : static list *
2007 362669 : matching_joins(allocator *sa, list *rels, list *exps, sql_exp *je)
2008 : {
2009 362669 : sql_rel *l, *r;
2010 :
2011 362669 : assert (je->type == e_cmp);
2012 :
2013 362669 : l = find_rel(rels, je->l);
2014 362669 : r = find_rel(rels, je->r);
2015 362669 : if (l && r) {
2016 362544 : list *res;
2017 362544 : list *n_rels = sa_list(sa);
2018 :
2019 362544 : append(n_rels, l);
2020 362544 : append(n_rels, r);
2021 362544 : res = list_select(exps, n_rels, (fcmp) &exp_joins_rels, (fdup)NULL);
2022 362544 : return res;
2023 : }
2024 125 : return sa_list(sa);
2025 : }
2026 :
2027 : static int
2028 5974 : sql_column_kc_cmp(sql_column *c, sql_kc *kc)
2029 : {
2030 : /* return on equality */
2031 5974 : return (c->colnr - kc->c->colnr);
2032 : }
2033 :
2034 : static sql_idx *
2035 468858 : find_fk_index(mvc *sql, sql_table *l, list *lcols, sql_table *r, list *rcols)
2036 : {
2037 468858 : sql_trans *tr = sql->session->tr;
2038 :
2039 468858 : if (l->idxs) {
2040 468858 : node *in;
2041 557066 : for (in = ol_first_node(l->idxs); in; in = in->next){
2042 89055 : sql_idx *li = in->data;
2043 89055 : if (li->type == join_idx) {
2044 8936 : sql_key *rk = (sql_key*)os_find_id(tr->cat->objects, tr, ((sql_fkey*)li->key)->rkey);
2045 8936 : fcmp cmp = (fcmp)&sql_column_kc_cmp;
2046 :
2047 9893 : if (rk->t == r &&
2048 1804 : list_match(lcols, li->columns, cmp) == 0 &&
2049 847 : list_match(rcols, rk->columns, cmp) == 0) {
2050 847 : return li;
2051 : }
2052 : }
2053 : }
2054 : }
2055 : return NULL;
2056 : }
2057 :
2058 : static sql_rel *
2059 725338 : find_basetable( sql_rel *r)
2060 : {
2061 1071782 : if (!r)
2062 : return NULL;
2063 1064766 : switch(r->op) {
2064 584743 : case op_basetable:
2065 584743 : if (!r->l)
2066 0 : return NULL;
2067 : return r;
2068 346444 : case op_semi:
2069 : case op_anti:
2070 : case op_project:
2071 : case op_select:
2072 : case op_topn:
2073 : case op_sample:
2074 346444 : return find_basetable(r->l);
2075 : default:
2076 : return NULL;
2077 : }
2078 : }
2079 :
2080 : static int
2081 101991 : exps_count(list *exps)
2082 : {
2083 101991 : node *n;
2084 101991 : int cnt = 0;
2085 :
2086 101991 : if (!exps)
2087 : return 0;
2088 230243 : for (n = exps->h; n; n=n->next)
2089 128252 : exp_count(&cnt, n->data);
2090 101991 : return cnt;
2091 : }
2092 :
2093 : static list *
2094 257455 : order_join_expressions(mvc *sql, list *dje, list *rels)
2095 : {
2096 257455 : node *n;
2097 257455 : int cnt = list_length(dje);
2098 :
2099 257455 : if (cnt <= 1)
2100 : return dje;
2101 :
2102 67216 : list *res = sa_list(sql->sa);
2103 67216 : int i, *keys = SA_NEW_ARRAY(sql->ta, int, cnt);
2104 67216 : void **data = SA_NEW_ARRAY(sql->ta, void*, cnt);
2105 :
2106 278333 : for (n = dje->h, i = 0; n; n = n->next, i++) {
2107 211117 : sql_exp *e = n->data;
2108 :
2109 211117 : keys[i] = exp_keyvalue(e);
2110 : /* add some weight for the selections */
2111 211117 : if (e->type == e_cmp && !is_complex_exp(e->flag)) {
2112 211090 : sql_rel *l = find_rel(rels, e->l);
2113 211090 : sql_rel *r = find_rel(rels, e->r);
2114 :
2115 211090 : if (l && is_select(l->op) && l->exps)
2116 54946 : keys[i] += list_length(l->exps)*10 + exps_count(l->exps);
2117 211090 : if (r && is_select(r->op) && r->exps)
2118 47045 : keys[i] += list_length(r->exps)*10 + exps_count(r->exps);
2119 : }
2120 211117 : data[i] = n->data;
2121 : }
2122 : /* sort descending */
2123 67216 : GDKqsort(keys, data, NULL, cnt, sizeof(int), sizeof(void *), TYPE_int, true, true);
2124 345549 : for(i=0; i<cnt; i++) {
2125 211117 : list_append(res, data[i]);
2126 : }
2127 : return res;
2128 : }
2129 :
2130 : static int
2131 240885 : find_join_rels(list **L, list **R, list *exps, list *rels)
2132 : {
2133 240885 : node *n;
2134 :
2135 240885 : *L = sa_list(exps->sa);
2136 240885 : *R = sa_list(exps->sa);
2137 240885 : if (!exps || list_length(exps) <= 1)
2138 : return -1;
2139 295213 : for(n = exps->h; n; n = n->next) {
2140 222788 : sql_exp *e = n->data;
2141 222788 : sql_rel *l = NULL, *r = NULL;
2142 :
2143 222788 : if (!is_complex_exp(e->flag)){
2144 222780 : l = find_rel(rels, e->l);
2145 222780 : r = find_rel(rels, e->r);
2146 : }
2147 222780 : if (l<r) {
2148 143671 : list_append(*L, l);
2149 143671 : list_append(*R, r);
2150 : } else {
2151 79117 : list_append(*L, r);
2152 79117 : list_append(*R, l);
2153 : }
2154 : }
2155 : return 0;
2156 : }
2157 :
2158 : static list *
2159 72425 : distinct_join_exps(list *aje, list *lrels, list *rrels)
2160 : {
2161 72425 : node *n, *m, *o, *p;
2162 72425 : int len = list_length(aje), i, j;
2163 72425 : char *used = SA_ZNEW_ARRAY(aje->sa, char, len);
2164 72425 : list *res = sa_list(aje->sa);
2165 :
2166 72425 : assert(len == list_length(lrels));
2167 295213 : for(n = lrels->h, m = rrels->h, j = 0; n && m;
2168 222788 : n = n->next, m = m->next, j++) {
2169 222788 : if (n->data && m->data)
2170 957545 : for(o = n->next, p = m->next, i = j+1; o && p;
2171 734807 : o = o->next, p = p->next, i++) {
2172 734807 : if (o->data == n->data && p->data == m->data)
2173 29021 : used[i] = 1;
2174 : }
2175 : }
2176 295213 : for (i = 0, n = aje->h; i < len; n = n->next, i++) {
2177 222788 : if (!used[i])
2178 198262 : list_append(res, n->data);
2179 : }
2180 72425 : return res;
2181 : }
2182 :
2183 : static list *
2184 240885 : find_fk( mvc *sql, list *rels, list *exps)
2185 : {
2186 240885 : node *djn;
2187 240885 : list *sdje, *aje, *dje;
2188 240885 : list *lrels, *rrels;
2189 :
2190 : /* first find the distinct join expressions */
2191 240885 : aje = list_select(exps, rels, (fcmp) &exp_is_join, (fdup)NULL);
2192 : /* add left/right relation */
2193 240885 : if (find_join_rels(&lrels, &rrels, aje, rels) < 0)
2194 : dje = aje;
2195 : else
2196 72425 : dje = distinct_join_exps(aje, lrels, rrels);
2197 607266 : for(djn=dje->h; djn; djn = djn->next) {
2198 : /* equal join expressions */
2199 366467 : sql_idx *idx = NULL;
2200 366467 : sql_exp *je = djn->data, *le = je->l, *re = je->r;
2201 :
2202 366467 : if (is_complex_exp(je->flag))
2203 : break;
2204 366381 : if (!find_prop(je->p, PROP_JOINIDX)) {
2205 362669 : int swapped = 0;
2206 362669 : list *aaje = matching_joins(sql->sa, rels, aje, je);
2207 362669 : list *eje = list_select(aaje, (void*)1, (fcmp) &exp_is_eqjoin, (fdup)NULL);
2208 362669 : sql_rel *lr = find_rel(rels, le), *olr = lr;
2209 362669 : sql_rel *rr = find_rel(rels, re), *orr = rr;
2210 362669 : sql_rel *bt = NULL;
2211 362669 : char *iname;
2212 :
2213 362669 : sql_table *l, *r;
2214 362669 : list *lexps = list_map(eje, lr, (fmap) &joinexp_col);
2215 362669 : list *rexps = list_map(eje, rr, (fmap) &joinexp_col);
2216 362669 : list *lcols, *rcols;
2217 :
2218 362669 : lr = find_basetable(lr);
2219 362669 : rr = find_basetable(rr);
2220 362669 : if (!lr || !rr)
2221 237089 : continue;
2222 253483 : l = lr->l;
2223 253483 : r = rr->l;
2224 253483 : lcols = list_map(lexps, lr, (fmap) &table_colexp);
2225 253483 : rcols = list_map(rexps, rr, (fmap) &table_colexp);
2226 253483 : lcols->destroy = NULL;
2227 253483 : rcols->destroy = NULL;
2228 253483 : if (list_length(lcols) != list_length(rcols))
2229 18716 : continue;
2230 :
2231 234767 : idx = find_fk_index(sql, l, lcols, r, rcols);
2232 234767 : if (!idx) {
2233 234091 : idx = find_fk_index(sql, r, rcols, l, lcols);
2234 234091 : swapped = 1;
2235 : }
2236 :
2237 234767 : if (idx && (iname = sa_strconcat( sql->sa, "%", idx->base.name)) != NULL &&
2238 676 : ((!swapped && name_find_column(olr, NULL, iname, -2, &bt) == NULL) ||
2239 171 : ( swapped && name_find_column(orr, NULL, iname, -2, &bt) == NULL)))
2240 : idx = NULL;
2241 :
2242 : if (idx) {
2243 754 : prop *p;
2244 754 : node *n;
2245 754 : sql_exp *t = NULL, *i = NULL;
2246 :
2247 754 : if (list_length(lcols) > 1 || !mvc_debug_on(sql, 512)) {
2248 :
2249 : /* Add join between idx and TID */
2250 754 : if (swapped) {
2251 155 : sql_exp *s = je->l, *l = je->r;
2252 :
2253 155 : t = rel_find_column(sql, olr, s->l, TID);
2254 155 : i = rel_find_column(sql, orr, l->l, iname);
2255 155 : if (!t || !i)
2256 1 : continue;
2257 154 : t->p = NULL;
2258 154 : i->p = NULL;
2259 154 : je = exp_compare(sql->sa, i, t, cmp_equal);
2260 : } else {
2261 599 : sql_exp *s = je->r, *l = je->l;
2262 :
2263 599 : t = rel_find_column(sql, orr, s->l, TID);
2264 599 : i = rel_find_column(sql, olr, l->l, iname);
2265 599 : if (!t || !i)
2266 0 : continue;
2267 599 : t->p = NULL;
2268 599 : i->p = NULL;
2269 599 : je = exp_compare(sql->sa, i, t, cmp_equal);
2270 : }
2271 :
2272 : /* Remove all join expressions */
2273 1508 : for (n = eje->h; n; n = n->next)
2274 755 : list_remove_data(exps, NULL, n->data);
2275 753 : append(exps, je);
2276 753 : djn->data = je;
2277 0 : } else if (swapped) { /* else keep je for single column expressions */
2278 0 : je = exp_compare(sql->sa, je->r, je->l, cmp_equal);
2279 : /* Remove all join expressions */
2280 0 : for (n = eje->h; n; n = n->next)
2281 0 : list_remove_data(exps, NULL, n->data);
2282 0 : append(exps, je);
2283 0 : djn->data = je;
2284 : }
2285 753 : je->p = p = prop_create(sql->sa, PROP_JOINIDX, je->p);
2286 753 : p->value.pval = idx;
2287 : }
2288 : }
2289 : }
2290 :
2291 : /* sort expressions on weighted number of reducing operators */
2292 240885 : sdje = order_join_expressions(sql, dje, rels);
2293 240885 : return sdje;
2294 : }
2295 :
2296 : static int
2297 564177 : exp_find_one_rel( sql_rel **rels, int nr, sql_exp *e)
2298 : {
2299 564177 : int fnd = 0;
2300 :
2301 4479339 : for(int i = 1; i<=nr; i++) {
2302 3915231 : if (rel_has_exp(rels[i], e, false) == 0) {
2303 564161 : if (fnd)
2304 : return 0;
2305 : fnd = i;
2306 : }
2307 : }
2308 : return fnd;
2309 : }
2310 :
2311 : static int
2312 140 : exps_find_one_rel( sql_rel **rels, int nr, list *exps)
2313 : {
2314 140 : int fnd = 0;
2315 :
2316 378 : for(node *n = exps->h; n; n = n->next) {
2317 243 : sql_exp *e = n->data;
2318 243 : if (exp_is_atom(e))
2319 86 : continue;
2320 157 : int nfnd = exp_find_one_rel(rels, nr, n->data);
2321 157 : if (nfnd != fnd && fnd)
2322 : return 0;
2323 154 : fnd = nfnd;
2324 154 : if (!fnd)
2325 : return 0;
2326 : }
2327 : return fnd;
2328 : }
2329 :
2330 : /* TODO move popcount and popcount64 into gdk_*.h, used in gdk_cand, strimps and here */
2331 : static inline int
2332 : popcount64(uint64_t x)
2333 : {
2334 : #ifdef __has_builtin
2335 : #if __has_builtin(__builtin_popcountll)
2336 : return (uint32_t) __builtin_popcountll(x);
2337 : #define BUILTIN_USED
2338 : #endif
2339 : #endif
2340 : #ifndef BUILTIN_USED
2341 : #if defined(_MSC_VER)
2342 : #if SIZEOF_OID == 4
2343 : /* no __popcnt64 on 32 bit Windows */
2344 : return (int) (__popcnt((uint32_t) x) + __popcnt((uint32_t) (x >> 32)));
2345 : #else
2346 : return (uint32_t) __popcnt64(x);
2347 : #endif
2348 : #else
2349 : x = (x & UINT64_C(0x5555555555555555)) + ((x >> 1) & UINT64_C(0x5555555555555555));
2350 : x = (x & UINT64_C(0x3333333333333333)) + ((x >> 2) & UINT64_C(0x3333333333333333));
2351 : x = (x & UINT64_C(0x0F0F0F0F0F0F0F0F)) + ((x >> 4) & UINT64_C(0x0F0F0F0F0F0F0F0F));
2352 : return (x * UINT64_C(0x0101010101010101)) >> 56;
2353 : #endif
2354 : #endif
2355 : #undef BUILTIN_USED
2356 : }
2357 :
2358 : static sql_rel *
2359 157047 : order_joins(visitor *v, list *rels, list *exps)
2360 : {
2361 157047 : sql_rel *top = NULL, *l = NULL, *r = NULL, *f = NULL;
2362 157047 : sql_exp *cje;
2363 157047 : node *djn;
2364 157047 : list *sdje, *n_rels = NULL;
2365 157047 : int fnd = 0;
2366 157047 : unsigned int rsingle;
2367 157047 : int direct = 1;
2368 :
2369 : /* find foreign keys and reorder the expressions on reducing quality */
2370 157047 : sdje = find_fk(v->sql, rels, exps);
2371 :
2372 439061 : for(djn = sdje->h; djn; djn = djn->next ) {
2373 282014 : sql_exp *e = djn->data;
2374 282014 : list_remove_data(exps, NULL, e);
2375 : }
2376 :
2377 157047 : int nr_exps = list_length(sdje), nr_rels = list_length(rels), ci = 1;
2378 157047 : if (nr_rels > 64) {
2379 1 : direct = 0;
2380 1 : n_rels = sa_list(v->sql->ta);
2381 : }
2382 157047 : sql_rel **rels_a = SA_NEW_ARRAY(v->sql->ta, sql_rel*, nr_rels+1); /* don't use slot 0 */
2383 157047 : rels_a[0] = NULL;
2384 595293 : for (node *n = rels->h; n; n = n->next, ci++) {
2385 438246 : rels_a[ci] = n->data;
2386 : }
2387 157047 : ulng *h = SA_NEW_ARRAY(v->sql->ta, ulng, nr_exps), rel_mask = 0; /* bit field (for > 64 its an imprint) */
2388 157047 : uint16_t *r1 = SA_NEW_ARRAY(v->sql->ta, uint16_t, nr_exps);
2389 157047 : uint16_t *r2 = SA_NEW_ARRAY(v->sql->ta, uint16_t, nr_exps);
2390 : /* change r3 into rest list's */
2391 157047 : int *r3 = SA_NEW_ARRAY(v->sql->ta, int, nr_exps);
2392 :
2393 157047 : ci = 0;
2394 439061 : for (node *n = sdje->h; n; n = n->next, ci++) {
2395 282014 : sql_exp *cje = n->data;
2396 :
2397 282014 : h[ci] = r1[ci] = r2[ci] = r3[ci] = 0;
2398 282014 : if (cje->type == e_cmp) {
2399 282014 : cje->tmp = ci;
2400 282014 : r1[ci] = cje->flag == cmp_filter ? exps_find_one_rel(rels_a, nr_rels, cje->l) : exp_find_one_rel(rels_a, nr_rels, cje->l);
2401 282014 : r2[ci] = cje->flag == cmp_filter ? exps_find_one_rel(rels_a, nr_rels, cje->r) : exp_find_one_rel(rels_a, nr_rels, cje->r);
2402 282014 : if (r1[ci])
2403 281951 : h[ci] |= ((ulng)1)<<((r1[ci]-1)%64);
2404 282014 : if (r2[ci])
2405 281931 : h[ci] |= ((ulng)1)<<((r2[ci]-1)%64);
2406 282014 : if (cje->f && cje->flag != cmp_filter) {
2407 132 : r3[ci] = exp_find_one_rel(rels_a, nr_rels, cje->f);
2408 132 : if (r3[ci] == r2[ci] || r3[ci] == r1[ci])
2409 114 : r3[ci] = 0;
2410 132 : if (r3[ci])
2411 8 : h[ci] |= ((ulng)1)<<((r3[ci]-1)%64);
2412 : }
2413 : }
2414 : }
2415 : /* open problem, some expressions use more than 2 relations */
2416 : /* For example a.x = b.y * c.z; */
2417 157047 : if (list_length(rels) >= 2 && sdje->h) {
2418 314053 : for (node *n = sdje->h; n && (!l || !r); n = n->next, ci++) {
2419 157030 : cje = n->data;
2420 :
2421 157030 : if (n->next && r3[cje->tmp])
2422 0 : continue;
2423 :
2424 : /* complex expressions may touch multiple base tables
2425 : * Should be pushed up to extra selection.
2426 : * */
2427 157030 : if (0 && popcount64(h[cje->tmp]) > 2)
2428 : assert(0);
2429 : /* find the involved relations */
2430 157030 : if (cje->type == e_cmp) {
2431 157030 : l = rels_a[r1[cje->tmp]];
2432 157030 : r = rels_a[r2[cje->tmp]];
2433 157030 : if (l && r)
2434 156965 : rel_mask |= h[cje->tmp];
2435 : }
2436 : }
2437 157023 : cje->tmp = 0;
2438 :
2439 157023 : if (l && r && l != r)
2440 156965 : list_remove_data(sdje, NULL, cje);
2441 : }
2442 :
2443 157047 : if (l && r && l != r) {
2444 156965 : list_remove_data(rels, NULL, l);
2445 156965 : list_remove_data(rels, NULL, r);
2446 156965 : if (!direct) {
2447 1 : list_append(n_rels, l);
2448 1 : list_append(n_rels, r);
2449 : }
2450 :
2451 : /* Create a relation between l and r. Since the calling
2452 : functions rewrote the join tree, into a list of expressions
2453 : and a list of (simple) relations, there are no outer joins
2454 : involved, we can simply do a crossproduct here.
2455 : */
2456 156965 : rsingle = is_single(r);
2457 156965 : reset_single(r);
2458 156965 : top = rel_crossproduct(v->sql->sa, l, r, op_join);
2459 156965 : if (rsingle)
2460 5188 : set_single(r);
2461 156965 : rel_join_add_exp(v->sql->sa, top, cje);
2462 :
2463 : /* all other join expressions on these 2 relations */
2464 160817 : for (node *en = exps->h; en; ) {
2465 3852 : node *next = en->next;
2466 3852 : sql_exp *e = en->data;
2467 3852 : if (rel_rebind_exp(v->sql, top, e)) {
2468 3680 : rel_join_add_exp(v->sql->sa, top, e);
2469 3680 : list_remove_data(exps, NULL, e);
2470 : }
2471 : en = next;
2472 : }
2473 : fnd = 1;
2474 : }
2475 : /* build join tree using the ordered list */
2476 278688 : while(list_length(sdje) && fnd) {
2477 121641 : fnd = 0;
2478 : /* find the first expression which could be added */
2479 884915 : for(djn = sdje->h; djn && !fnd && rels->h; djn = (!fnd)?djn->next:NULL) {
2480 381637 : node *en;
2481 381637 : l = r = f = NULL;
2482 381637 : int needs3 = 0;
2483 :
2484 381637 : cje = djn->data;
2485 381637 : if ((h[cje->tmp] & rel_mask) > 0) {
2486 121556 : if (rel_mask & (((ulng)1)<<((r1[cje->tmp]-1)%64)))
2487 69006 : l = rels_a[r1[cje->tmp]];
2488 121556 : if (rel_mask & (((ulng)1)<<((r2[cje->tmp]-1)%64)))
2489 52551 : r = rels_a[r2[cje->tmp]];
2490 121556 : if (cje->f && r3[cje->tmp]) {
2491 0 : needs3 = 1;
2492 0 : if (rel_mask & (((ulng)1)<<((r3[cje->tmp]-1)%64)))
2493 0 : f = rels_a[r3[cje->tmp]];
2494 : }
2495 : }
2496 381637 : if (!direct) { /* check if at least one side in n_rels */
2497 1040 : if (l && !list_find(n_rels, l, NULL))
2498 1007 : l = NULL;
2499 1040 : if (r && !list_find(n_rels, r, NULL))
2500 1010 : r = NULL;
2501 1040 : if (f && !list_find(n_rels, f, NULL))
2502 0 : f = NULL;
2503 : }
2504 :
2505 381637 : if ((!needs3 && l && r) || (needs3 && l && r && f)) {
2506 0 : assert(0);
2507 : /* create a selection on the current */
2508 : rel_join_add_exp(v->sql->sa, top, cje);
2509 : fnd = 1;
2510 381637 : } else if ((!needs3 && (l || r)) || (needs3 && (l || r || f))) {
2511 121556 : sql_rel *nr[2]= {NULL, NULL};
2512 121556 : rel_mask |= h[cje->tmp];
2513 121556 : int i = 0;
2514 121556 : if (!l)
2515 52550 : nr[i++] = rels_a[r1[cje->tmp]];
2516 121556 : if (!r)
2517 69006 : nr[i++] = rels_a[r2[cje->tmp]];
2518 121556 : if (needs3 && !f)
2519 0 : nr[i++] = rels_a[r3[cje->tmp]];
2520 121556 : if (!nr[0]) {
2521 0 : fnd = 1; /* not really, but this bails out */
2522 0 : list_remove_data(sdje, NULL, cje); /* handle later as select */
2523 0 : if (!list_find(exps, cje, NULL))
2524 0 : append(exps, cje);
2525 0 : continue;
2526 : }
2527 : /* remove the expression from the lists */
2528 121556 : list_remove_data(sdje, NULL, cje);
2529 :
2530 121556 : list_remove_data(rels, NULL, nr[0]);
2531 121556 : if (!direct)
2532 63 : append(n_rels, nr[0]);
2533 121556 : if (i > 1 && nr[1]) {
2534 0 : list_remove_data(rels, NULL, nr[1]);
2535 0 : if (!direct)
2536 0 : append(n_rels, nr[1]);
2537 : }
2538 :
2539 : /* create a join using the current expression */
2540 121556 : rsingle = is_single(nr[0]);
2541 121556 : reset_single(nr[0]);
2542 121556 : top = rel_crossproduct(v->sql->sa, top, nr[0], op_join);
2543 121556 : if (rsingle)
2544 0 : set_single(nr[0]);
2545 121556 : if (i > 1 && nr[1]) {
2546 0 : rsingle = is_single(nr[1]);
2547 0 : reset_single(nr[1]);
2548 0 : top = rel_crossproduct(v->sql->sa, top, nr[1], op_join);
2549 0 : if (rsingle)
2550 0 : set_single(nr[1]);
2551 : }
2552 121556 : rel_join_add_exp(v->sql->sa, top, cje);
2553 :
2554 : /* all join expressions on these tables */
2555 121944 : for (en = exps->h; en; ) {
2556 388 : node *next = en->next;
2557 388 : sql_exp *e = en->data;
2558 388 : if (rel_rebind_exp(v->sql, top, e)) {
2559 170 : rel_join_add_exp(v->sql->sa, top, e);
2560 170 : list_remove_data(exps, NULL, e);
2561 : }
2562 : en = next;
2563 : }
2564 : /* Remove other joins on the current 'n_rels'
2565 : set in the distinct list too */
2566 698242 : for (en = sdje->h; en; ) {
2567 576686 : node *next = en->next;
2568 576686 : sql_exp *e = en->data;
2569 576686 : if ((direct && ((e->flag <= cmp_notequal && (h[e->tmp] & rel_mask) == h[e->tmp] && h[e->tmp]) || (e->flag > cmp_notequal && rel_rebind_exp(v->sql, top, e)))) ||
2570 1953 : (!direct && rel_rebind_exp(v->sql, top, e))) {
2571 3345 : rel_join_add_exp(v->sql->sa, top, e);
2572 3345 : list_remove_data(sdje, NULL, en->data);
2573 : }
2574 : en = next;
2575 : }
2576 121556 : fnd = 1;
2577 : }
2578 : }
2579 : }
2580 157047 : if (list_length(rels)) { /* more relations */
2581 1131 : node *n;
2582 3891 : for(n=rels->h; n; n = n->next) {
2583 2760 : sql_rel *nr = n->data;
2584 :
2585 2760 : if (top) {
2586 2678 : rsingle = is_single(nr);
2587 2678 : reset_single(nr);
2588 2678 : top = rel_crossproduct(v->sql->sa, top, nr, op_join);
2589 2678 : if (rsingle)
2590 0 : set_single(nr);
2591 : } else
2592 : top = nr;
2593 : }
2594 : }
2595 157047 : if (list_length(sdje)) {
2596 143 : if (list_empty(exps))
2597 : exps = sdje;
2598 : else
2599 0 : exps = list_merge(exps, sdje, (fdup)NULL);
2600 : }
2601 157047 : if (list_length(exps)) { /* more expressions (add selects) */
2602 169 : top = rel_select(v->sql->sa, top, NULL);
2603 343 : for(node *n=exps->h; n; n = n->next) {
2604 174 : sql_exp *e = n->data;
2605 :
2606 174 : if (exp_is_join_exp(e) == 0) {
2607 160 : sql_rel *nr = NULL;
2608 160 : if (is_theta_exp(e->flag)) {
2609 143 : nr = rel_push_join(v->sql, top->l, e->l, e->r, e->f, e, 0);
2610 17 : } else if (e->flag == cmp_filter || e->flag == cmp_or) {
2611 17 : sql_exp *l = exps_find_one_multi_exp(e->l), *r = exps_find_one_multi_exp(e->r);
2612 17 : if (l && r)
2613 10 : nr = rel_push_join(v->sql, top->l, l, r, NULL, e, 0);
2614 : }
2615 153 : if (!nr)
2616 7 : rel_join_add_exp(v->sql->sa, top->l, e);
2617 : } else
2618 14 : rel_select_add_exp(v->sql->sa, top, e);
2619 : }
2620 169 : if (list_empty(top->exps)) { /* empty select */
2621 155 : sql_rel *l = top->l;
2622 155 : top->l = NULL;
2623 155 : rel_destroy(top);
2624 155 : top = l;
2625 : }
2626 : }
2627 157047 : return top;
2628 : }
2629 :
2630 : static int
2631 438246 : rel_neg_in_size(sql_rel *r)
2632 : {
2633 438246 : if ((is_union(r->op) /*|| is_munion(r->op)*/) && r->nrcols == 0)
2634 0 : return -1 + rel_neg_in_size(r->l);
2635 438246 : if (is_project(r->op) && r->nrcols == 0)
2636 0 : return -1;
2637 : return 0;
2638 : }
2639 :
2640 438246 : static void _rel_destroy(void *dummy, sql_rel *rel)
2641 : {
2642 438246 : (void)dummy;
2643 438246 : rel_destroy(rel);
2644 438246 : }
2645 :
2646 : static list *
2647 157047 : push_in_join_down(mvc *sql, list *rels, list *exps)
2648 : {
2649 157047 : node *n;
2650 157047 : int restart = 1;
2651 157047 : list *nrels;
2652 :
2653 : /* we should sort these first, ie small in's before large one's */
2654 157047 : nrels = list_sort(rels, (fkeyvalue)&rel_neg_in_size, (fdup)&rel_dup);
2655 :
2656 : /* we need to cleanup, the new refs ! */
2657 157047 : rels->destroy = (fdestroy)_rel_destroy;
2658 157047 : list_destroy(rels);
2659 157047 : rels = nrels;
2660 :
2661 : /* one of the rels should be a op_union with nrcols == 0 */
2662 471141 : while (restart) {
2663 595293 : for (n = rels->h; n; n = n->next) {
2664 438246 : sql_rel *r = n->data;
2665 :
2666 438246 : restart = 0;
2667 438246 : if (is_project(r->op) && r->nrcols == 0) {
2668 : /* next step find expression on this relation */
2669 0 : node *m;
2670 0 : sql_rel *l = NULL;
2671 0 : sql_exp *je = NULL;
2672 :
2673 0 : for(m = exps->h; !je && m; m = m->next) {
2674 0 : sql_exp *e = m->data;
2675 :
2676 0 : if (e->type == e_cmp && e->flag == cmp_equal) {
2677 : /* in values are on
2678 : the right of the join */
2679 0 : if (rel_has_exp(r, e->r, false) >= 0)
2680 0 : je = e;
2681 : }
2682 : }
2683 : /* with this expression find other relation */
2684 0 : if (je && (l = find_rel(rels, je->l)) != NULL) {
2685 0 : unsigned int rsingle = is_single(r);
2686 0 : reset_single(r);
2687 0 : sql_rel *nr = rel_crossproduct(sql->sa, l, r, op_join);
2688 0 : if (rsingle)
2689 0 : set_single(r);
2690 0 : rel_join_add_exp(sql->sa, nr, je);
2691 0 : list_append(rels, nr);
2692 0 : list_remove_data(rels, NULL, l);
2693 0 : list_remove_data(rels, NULL, r);
2694 0 : list_remove_data(exps, NULL, je);
2695 0 : restart = 1;
2696 0 : break;
2697 : }
2698 :
2699 : }
2700 : }
2701 : }
2702 157047 : return rels;
2703 : }
2704 :
2705 : static list *
2706 747552 : push_up_join_exps( mvc *sql, sql_rel *rel)
2707 : {
2708 747552 : if (rel_is_ref(rel))
2709 : return NULL;
2710 :
2711 706131 : switch(rel->op) {
2712 293277 : case op_join: {
2713 293277 : sql_rel *rl = rel->l;
2714 293277 : sql_rel *rr = rel->r;
2715 293277 : list *l, *r;
2716 :
2717 293277 : if (rel_is_ref(rl) && rel_is_ref(rr)) {
2718 16 : l = rel->exps;
2719 16 : rel->exps = NULL;
2720 16 : return l;
2721 : }
2722 293261 : l = push_up_join_exps(sql, rl);
2723 293261 : r = push_up_join_exps(sql, rr);
2724 293261 : if (l && r) {
2725 34 : l = list_merge(l, r, (fdup)NULL);
2726 34 : r = NULL;
2727 293227 : } else if (!l) {
2728 178386 : l = r;
2729 178386 : r = NULL;
2730 : }
2731 293261 : if (rel->exps) {
2732 261577 : if (l && !r)
2733 104468 : r = l;
2734 261577 : l = list_merge(rel->exps, r, (fdup)NULL);
2735 : }
2736 293261 : rel->exps = NULL;
2737 293261 : return l;
2738 : }
2739 : default:
2740 : return NULL;
2741 : }
2742 : }
2743 :
2744 : static sql_rel *
2745 288833 : reorder_join(visitor *v, sql_rel *rel)
2746 : {
2747 288833 : list *exps, *rels;
2748 :
2749 288833 : if (is_innerjoin(rel->op) && !is_single(rel) && !rel_is_ref(rel) && list_empty(rel->attr)) {
2750 178439 : if (list_empty(rel->exps)) {
2751 22199 : sql_rel *l = rel->l, *r = rel->r;
2752 22199 : if (!is_innerjoin(l->op) && !is_innerjoin(r->op))
2753 : return rel;
2754 : }
2755 161030 : rel->exps = push_up_join_exps(v->sql, rel);
2756 : }
2757 :
2758 271424 : if (!is_innerjoin(rel->op) || is_single(rel) || rel_is_ref(rel) || list_empty(rel->exps) || !list_empty(rel->attr)) {
2759 114377 : if (!list_empty(rel->exps)) { /* cannot add join idxs to cross products */
2760 77519 : exps = rel->exps;
2761 77519 : rel->exps = NULL; /* should be all crosstables by now */
2762 77519 : rels = sa_list(v->sql->ta);
2763 : /* try to use an join index also for outer joins */
2764 77519 : get_inner_relations(v->sql, rel, rels);
2765 77519 : int cnt = list_length(exps);
2766 77519 : rel->exps = find_fk(v->sql, rels, exps);
2767 77519 : if (list_length(rel->exps) != cnt)
2768 16570 : rel->exps = order_join_expressions(v->sql, exps, rels);
2769 : }
2770 114377 : rel->l = rel_join_order_(v, rel->l);
2771 114377 : rel->r = rel_join_order_(v, rel->r);
2772 : } else {
2773 157047 : exps = rel->exps;
2774 157047 : rel->exps = NULL; /* should be all crosstables by now */
2775 157047 : rels = sa_list(v->sql->ta);
2776 157047 : get_relations(v, rel, rels);
2777 157047 : if (list_length(rels) > 1) {
2778 157047 : rels = push_in_join_down(v->sql, rels, exps);
2779 157047 : rel = order_joins(v, rels, exps);
2780 : } else {
2781 0 : rel->exps = exps;
2782 : }
2783 : }
2784 : return rel;
2785 : }
2786 :
2787 : static sql_rel *
2788 2483795 : rel_join_order_(visitor *v, sql_rel *rel)
2789 : {
2790 2483795 : if (!rel)
2791 : return rel;
2792 :
2793 2468381 : switch (rel->op) {
2794 : case op_basetable:
2795 : break;
2796 3928 : case op_table:
2797 3928 : if (IS_TABLE_PROD_FUNC(rel->flag) || rel->flag == TABLE_FROM_RELATION)
2798 3928 : rel->l = rel_join_order_(v, rel->l);
2799 : break;
2800 : case op_join:
2801 : case op_left:
2802 : case op_right:
2803 : case op_full:
2804 : break;
2805 :
2806 12527 : case op_semi:
2807 : case op_anti:
2808 :
2809 : case op_union:
2810 : case op_inter:
2811 : case op_except:
2812 : case op_merge:
2813 12527 : rel->l = rel_join_order_(v, rel->l);
2814 12527 : rel->r = rel_join_order_(v, rel->r);
2815 12527 : break;
2816 168870 : case op_munion:
2817 168870 : assert(rel->l);
2818 577189 : for (node *n = ((list*)rel->l)->h; n; n = n->next)
2819 408319 : n->data = rel_join_order_(v, n->data);
2820 : break;
2821 1282639 : case op_project:
2822 : case op_select:
2823 : case op_groupby:
2824 : case op_topn:
2825 : case op_sample:
2826 1282639 : rel->l = rel_join_order_(v, rel->l);
2827 1282639 : break;
2828 52 : case op_ddl:
2829 52 : if (rel->flag == ddl_output || rel->flag == ddl_create_seq || rel->flag == ddl_alter_seq || rel->flag == ddl_alter_table || rel->flag == ddl_create_table || rel->flag == ddl_create_view) {
2830 0 : rel->l = rel_join_order_(v, rel->l);
2831 : } else if (rel->flag == ddl_list || rel->flag == ddl_exception) {
2832 52 : rel->l = rel_join_order_(v, rel->l);
2833 52 : rel->r = rel_join_order_(v, rel->r);
2834 : }
2835 : break;
2836 12053 : case op_insert:
2837 : case op_update:
2838 : case op_delete:
2839 12053 : rel->r = rel_join_order_(v, rel->r);
2840 12053 : break;
2841 : case op_truncate:
2842 : break;
2843 : }
2844 2468381 : if (is_join(rel->op))
2845 288833 : rel = reorder_join(v, rel);
2846 : return rel;
2847 : }
2848 :
2849 : static sql_rel *
2850 84698 : rel_join_order(visitor *v, global_props *gp, sql_rel *rel)
2851 : {
2852 84698 : (void) gp;
2853 84698 : sql_rel *r = rel_join_order_(v, rel);
2854 84698 : sa_reset(v->sql->ta);
2855 84698 : return r;
2856 : }
2857 :
2858 : run_optimizer
2859 645052 : bind_join_order(visitor *v, global_props *gp)
2860 : {
2861 645052 : int flag = v->sql->sql_optimizer;
2862 644856 : return gp->opt_level == 1 && gp->opt_cycle < 10 && !gp->cnt[op_update] && (gp->cnt[op_join] || gp->cnt[op_left] ||
2863 1283565 : gp->cnt[op_right] || gp->cnt[op_full]) && (flag & join_order) ? rel_join_order : NULL;
2864 : }
2865 :
2866 : /* this join order is to be done once after statistics are gathered */
2867 : run_optimizer
2868 734570 : bind_join_order2(visitor *v, global_props *gp)
2869 : {
2870 : /*int flag = v->sql->sql_optimizer;
2871 : return gp->opt_level == 1 && !gp->has_special_modify && !gp->cnt[op_update] && (gp->cnt[op_join] || gp->cnt[op_left] ||
2872 : gp->cnt[op_right] || gp->cnt[op_full]) && (flag & join_order) ? rel_join_order : NULL;*/
2873 : /* TODO we have to propagate count statistics here */
2874 734570 : (void) v;
2875 734570 : (void) gp;
2876 734570 : return NULL;
2877 : }
2878 :
2879 :
2880 : static int
2881 13939 : is_identity_of(sql_exp *e, sql_rel *l)
2882 : {
2883 13939 : if (e->type != e_cmp)
2884 : return 0;
2885 13924 : if (!is_identity(e->l, l) || !is_identity(e->r, l) || (e->f && !is_identity(e->f, l)))
2886 13924 : return 0;
2887 : return 1;
2888 : }
2889 :
2890 : static inline sql_rel *
2891 18171 : rel_rewrite_semijoin(visitor *v, sql_rel *rel)
2892 : {
2893 18171 : assert(is_semi(rel->op));
2894 : {
2895 18171 : sql_rel *l = rel->l;
2896 18171 : sql_rel *r = rel->r;
2897 18171 : sql_rel *rl = (r->l)?r->l:NULL;
2898 18171 : int on_identity = 1;
2899 :
2900 18171 : if (!rel->exps || list_length(rel->exps) != 1 || !is_identity_of(rel->exps->h->data, l))
2901 : on_identity = 0;
2902 :
2903 : /* rewrite {semi,anti}join (A, join(A,B)) into {semi,anti}join (A,B)
2904 : * and {semi,anti}join (A, join(B,A)) into {semi,anti}join (A,B)
2905 : * Where the semi/anti join is done using the identity */
2906 0 : if (on_identity && l->ref.refcnt == 2 && ((is_join(r->op) && (l == r->l || l == r->r)) ||
2907 0 : (is_project(r->op) && rl && is_join(rl->op) && (l == rl->l || l == rl->r)))){
2908 0 : sql_rel *or = r;
2909 :
2910 0 : if (is_project(r->op))
2911 0 : r = rl;
2912 :
2913 0 : if (l == r->r)
2914 0 : rel->r = rel_dup(r->l);
2915 : else
2916 0 : rel->r = rel_dup(r->r);
2917 :
2918 0 : rel->exps = r->exps;
2919 0 : r->exps = NULL;
2920 0 : rel->attr = r->attr;
2921 0 : r->attr = NULL;
2922 0 : rel_destroy(or);
2923 0 : v->changes++;
2924 : }
2925 : }
2926 : {
2927 18171 : sql_rel *l = rel->l, *rl = NULL;
2928 18171 : sql_rel *r = rel->r, *or = r;
2929 :
2930 18171 : if (r)
2931 18171 : rl = r->l;
2932 18171 : if (r && is_project(r->op)) {
2933 15037 : r = rl;
2934 15037 : if (r)
2935 14831 : rl = r->l;
2936 : }
2937 :
2938 : /* More general case is (join reduction)
2939 : {semi,anti}join (A, join(A,B) [A.c1 == B.c1]) [ A.c1 == B.c1 ]
2940 : into {semi,anti}join (A,B) [ A.c1 == B.c1 ]
2941 :
2942 : for semijoin also A.c1 == B.k1 ] [ A.c1 == B.k2 ] could be rewritten
2943 : */
2944 18171 : if (l && r && rl &&
2945 16887 : is_basetable(l->op) && is_basetable(rl->op) &&
2946 3891 : is_join(r->op) && l->l == rl->l)
2947 : {
2948 7 : node *n, *m;
2949 7 : list *exps;
2950 :
2951 13 : if (!rel->exps || !r->exps ||
2952 6 : list_length(rel->exps) != list_length(r->exps))
2953 1 : return rel;
2954 6 : exps = new_exp_list(v->sql->sa);
2955 :
2956 : /* are the join conditions equal */
2957 6 : for (n = rel->exps->h, m = r->exps->h;
2958 6 : n && m; n = n->next, m = m->next)
2959 : {
2960 6 : sql_exp *le = NULL, *oe = n->data;
2961 6 : sql_exp *re = NULL, *ne = m->data;
2962 6 : sql_column *cl;
2963 6 : int equal = 0;
2964 :
2965 6 : if (oe->type != e_cmp || ne->type != e_cmp ||
2966 6 : oe->flag != cmp_equal ||
2967 6 : ne->flag != cmp_equal || is_anti(oe) || is_anti(ne))
2968 : return rel;
2969 :
2970 6 : if ((cl = exp_find_column(rel->l, oe->l, -2)) != NULL) {
2971 6 : le = oe->l;
2972 6 : re = oe->r;
2973 0 : } else if ((cl = exp_find_column(rel->l, oe->r, -2)) != NULL) {
2974 0 : le = oe->r;
2975 0 : re = oe->l;
2976 : } else
2977 : return rel;
2978 :
2979 6 : if (exp_find_column(rl, ne->l, -2) == cl) {
2980 6 : sql_exp *e = (or != r)?rel_find_exp(or, re):re;
2981 :
2982 6 : if (e)
2983 6 : equal = exp_match_exp(ne->r, e);
2984 6 : if (!e || !equal)
2985 : return rel;
2986 0 : re = ne->r;
2987 0 : } else if (exp_find_column(rl, ne->r, -2) == cl) {
2988 0 : sql_exp *e = (or != r)?rel_find_exp(or, re):re;
2989 :
2990 0 : if (e)
2991 0 : equal = exp_match_exp(ne->l, e);
2992 0 : if (!e || !equal)
2993 : return rel;
2994 0 : re = ne->l;
2995 : } else
2996 : return rel;
2997 :
2998 0 : ne = exp_compare(v->sql->sa, le, re, cmp_equal);
2999 0 : append(exps, ne);
3000 : }
3001 :
3002 0 : rel->r = rel_dup(r->r);
3003 0 : rel->exps = exps;
3004 0 : rel_destroy(or);
3005 0 : v->changes++;
3006 : }
3007 : }
3008 : return rel;
3009 : }
3010 :
3011 : /*
3012 : * Push semijoins down, pushes the semijoin through a join.
3013 : *
3014 : * semijoin( join(A, B) [ A.x == B.y ], C ) [ A.z == C.c ]
3015 : * ->
3016 : * join( semijoin(A, C) [ A.z == C.c ], B ) [ A.x == B.y ]
3017 : *
3018 : * also push simple expressions of a semijoin down if they only
3019 : * involve the left sided of the semijoin.
3020 : *
3021 : * in some cases the other way is useful, ie push join down
3022 : * semijoin. When the join reduces (ie when there are selects on it).
3023 : *
3024 : * At the moment, we only flag changes by this optimizer on the first level of optimization
3025 : */
3026 : static inline sql_rel *
3027 252313 : rel_push_semijoin_down_or_up(visitor *v, sql_rel *rel)
3028 : {
3029 252313 : uint8_t cycle = *(uint8_t*) v->data;
3030 :
3031 252313 : if (rel->op == op_join && rel->exps && rel->l) {
3032 217275 : sql_rel *l = rel->l, *r = rel->r;
3033 :
3034 217275 : if (is_semi(l->op) && !rel_is_ref(l) && is_select(r->op) && !rel_is_ref(r)) {
3035 16 : rel->l = l->l;
3036 16 : l->l = rel;
3037 16 : if (cycle <= 0)
3038 0 : v->changes++;
3039 16 : return l;
3040 : }
3041 : }
3042 : /* also case with 2 joins */
3043 : /* join ( join ( semijoin(), table), select (table)); */
3044 252297 : if (rel->op == op_join && rel->exps && rel->l) {
3045 217259 : sql_rel *l = rel->l, *r = rel->r;
3046 217259 : sql_rel *ll;
3047 :
3048 217259 : if (is_join(l->op) && !rel_is_ref(l) && is_select(r->op) && !rel_is_ref(r)) {
3049 8056 : ll = l->l;
3050 8056 : if (is_semi(ll->op) && !rel_is_ref(ll)) {
3051 27 : l->l = ll->l;
3052 27 : ll->l = rel;
3053 27 : if (cycle <= 0)
3054 0 : v->changes++;
3055 27 : return ll;
3056 : }
3057 : }
3058 : }
3059 : /* first push down the expressions involving only A */
3060 252270 : if (rel->op == op_semi && rel->exps && rel->l) {
3061 11594 : sql_rel *jl = rel->l, *ojl = jl;
3062 :
3063 11594 : set_processed(jl);
3064 26777 : for (node *n = rel->exps->h; n;) {
3065 15183 : node *next = n->next;
3066 15183 : sql_exp *e = n->data;
3067 :
3068 15183 : if (n != rel->exps->h && e->type == e_cmp && rel_rebind_exp(v->sql, jl, e)) {
3069 16 : if (!is_select(jl->op) || rel_is_ref(jl))
3070 14 : rel->l = jl = rel_select(v->sql->sa, jl, NULL);
3071 16 : rel_select_add_exp(v->sql->sa, jl, e);
3072 16 : list_remove_node(rel->exps, NULL, n);
3073 16 : if (cycle <= 0)
3074 16 : v->changes++;
3075 : }
3076 : n = next;
3077 : }
3078 11594 : if (ojl != jl)
3079 14 : set_processed(jl);
3080 : }
3081 252270 : if (rel->op == op_semi && rel->exps && rel->l) {
3082 11594 : operator_type op = rel->op, lop;
3083 11594 : node *n;
3084 11594 : sql_rel *l = rel->l, *ll = NULL, *lr = NULL;
3085 11594 : sql_rel *r = rel->r;
3086 11594 : list *exps = rel->exps, *nsexps, *njexps, *nsattr, *njattr;
3087 11594 : int left = 1, right = 1;
3088 :
3089 : /* handle project
3090 : if (l->op == op_project && !need_distinct(l))
3091 : l = l->l;
3092 : */
3093 :
3094 11594 : if (!is_join(l->op) || is_full(l->op) || rel_is_ref(l) || is_single(l))
3095 : return rel;
3096 :
3097 1124 : lop = l->op;
3098 1124 : ll = l->l;
3099 1124 : lr = l->r;
3100 :
3101 : /* check which side is used and other exps are atoms or from right of semijoin */
3102 2205 : for(n = exps->h; n; n = n->next) {
3103 1247 : sql_exp *sje = n->data;
3104 :
3105 1247 : if (sje->type != e_cmp || is_complex_exp(sje->flag))
3106 : return rel;
3107 : /* sje->l from ll and sje->r/f from semijoin r ||
3108 : * sje->l from semijoin r and sje->r/f from ll ||
3109 : * sje->l from lr and sje->r/f from semijoin r ||
3110 : * sje->l from semijoin r and sje->r/f from lr */
3111 2462 : if (left &&
3112 2462 : ((rel_rebind_exp(v->sql, ll, sje->l) && rel_rebind_exp(v->sql, rel->r, sje->r) && (!sje->f || rel_rebind_exp(v->sql, rel->r, sje->f))) ||
3113 1254 : (rel_rebind_exp(v->sql, rel->r, sje->l) && rel_rebind_exp(v->sql, ll, sje->r) && (!sje->f || rel_rebind_exp(v->sql, ll, sje->f)))))
3114 : right = 0;
3115 : else
3116 927 : left = 0;
3117 1733 : if (right &&
3118 1612 : ((rel_rebind_exp(v->sql, lr, sje->l) && rel_rebind_exp(v->sql, rel->r, sje->r) && (!sje->f || rel_rebind_exp(v->sql, rel->r, sje->f))) ||
3119 535 : (rel_rebind_exp(v->sql, rel->r, sje->l) && rel_rebind_exp(v->sql, lr, sje->r) && (!sje->f || rel_rebind_exp(v->sql, lr, sje->f)))))
3120 : left = 0;
3121 : else
3122 : right = 0;
3123 1231 : if (!right && !left)
3124 : return rel;
3125 : }
3126 958 : if (left && is_right(lop))
3127 : return rel;
3128 957 : if (right && is_left(lop))
3129 : return rel;
3130 956 : nsexps = exps_copy(v->sql, rel->exps);
3131 956 : nsattr = exps_copy(v->sql, rel->attr);
3132 956 : njexps = exps_copy(v->sql, l->exps);
3133 956 : njattr = exps_copy(v->sql, l->attr);
3134 956 : if (left)
3135 180 : l = rel_crossproduct(v->sql->sa, rel_dup(ll), rel_dup(r), op);
3136 : else
3137 776 : l = rel_crossproduct(v->sql->sa, rel_dup(lr), rel_dup(r), op);
3138 956 : l->exps = nsexps;
3139 956 : l->attr = nsattr;
3140 956 : set_processed(l);
3141 956 : if (left)
3142 180 : l = rel_crossproduct(v->sql->sa, l, rel_dup(lr), lop);
3143 : else
3144 776 : l = rel_crossproduct(v->sql->sa, rel_dup(ll), l, lop);
3145 956 : l->exps = njexps;
3146 956 : l->attr = njattr;
3147 956 : set_processed(l);
3148 956 : rel_destroy(rel);
3149 956 : rel = l;
3150 956 : if (cycle <= 0)
3151 567 : v->changes++;
3152 : }
3153 : return rel;
3154 : }
3155 :
3156 : /* antijoin(a, union(b,c)) -> antijoin(antijoin(a,b), c) */
3157 : static inline sql_rel *
3158 6521 : rel_rewrite_antijoin(visitor *v, sql_rel *rel)
3159 : {
3160 6521 : sql_rel *l = rel->l;
3161 6521 : sql_rel *r = rel->r;
3162 :
3163 6521 : assert(rel->op == op_anti);
3164 6521 : if (l && !rel_is_ref(l) && r && !rel_is_ref(r) && is_union(r->op) && !is_single(r)) {
3165 0 : sql_rel *rl = rel_dup(r->l), *nl;
3166 0 : sql_rel *rr = rel_dup(r->r);
3167 :
3168 0 : if (!is_project(rl->op))
3169 0 : rl = rel_project(v->sql->sa, rl,
3170 : rel_projections(v->sql, rl, NULL, 1, 1));
3171 0 : if (!is_project(rr->op))
3172 0 : rr = rel_project(v->sql->sa, rr,
3173 : rel_projections(v->sql, rr, NULL, 1, 1));
3174 0 : rel_rename_exps(v->sql, r->exps, rl->exps);
3175 0 : rel_rename_exps(v->sql, r->exps, rr->exps);
3176 :
3177 0 : nl = rel_crossproduct(v->sql->sa, rel->l, rl, op_anti);
3178 0 : nl->exps = exps_copy(v->sql, rel->exps);
3179 0 : nl->attr = exps_copy(v->sql, rel->attr);
3180 0 : set_processed(nl);
3181 0 : rel->l = nl;
3182 0 : rel->r = rr;
3183 0 : rel_destroy(r);
3184 0 : v->changes++;
3185 0 : return rel;
3186 : }
3187 : return rel;
3188 : }
3189 :
3190 : static sql_rel *
3191 1606697 : rel_optimize_semi_and_anti_(visitor *v, sql_rel *rel)
3192 : {
3193 : /* rewrite semijoin (A, join(A,B)) into semijoin (A,B) */
3194 1606697 : if (rel && is_semi(rel->op))
3195 18171 : rel = rel_rewrite_semijoin(v, rel);
3196 : /* push semijoin through join */
3197 1606697 : if (rel && (is_semi(rel->op) || is_innerjoin(rel->op)))
3198 252313 : rel = rel_push_semijoin_down_or_up(v, rel);
3199 : /* antijoin(a, union(b,c)) -> antijoin(antijoin(a,b), c) */
3200 1606697 : if (rel && rel->op == op_anti)
3201 6521 : rel = rel_rewrite_antijoin(v, rel);
3202 1606697 : return rel;
3203 : }
3204 :
3205 : static sql_rel *
3206 91901 : rel_optimize_semi_and_anti(visitor *v, global_props *gp, sql_rel *rel)
3207 : {
3208 91901 : v->data = &gp->opt_cycle;
3209 91901 : rel = rel_visitor_bottomup(v, rel, &rel_optimize_semi_and_anti_);
3210 91901 : v->data = gp;
3211 91901 : return rel;
3212 : }
3213 :
3214 : run_optimizer
3215 645233 : bind_optimize_semi_and_anti(visitor *v, global_props *gp)
3216 : {
3217 : /* Important -> Re-write semijoins after rel_join_order */
3218 645233 : int flag = v->sql->sql_optimizer;
3219 645044 : return gp->opt_level == 1 && (gp->cnt[op_join] || gp->cnt[op_left] || gp->cnt[op_right]
3220 1290277 : || gp->cnt[op_full] || gp->cnt[op_semi] || gp->cnt[op_anti]) && (flag & optimize_semi_and_anti) ? rel_optimize_semi_and_anti : NULL;
3221 : }
3222 :
3223 :
3224 : static sql_rel *
3225 780085 : rel_semijoin_use_fk(visitor *v, sql_rel *rel)
3226 : {
3227 780085 : if (is_semi(rel->op) && rel->exps) {
3228 6319 : list *exps = rel->exps;
3229 6319 : list *rels = sa_list(v->sql->sa);
3230 :
3231 6319 : rel->exps = NULL;
3232 6319 : append(rels, rel->l);
3233 6319 : append(rels, rel->r);
3234 6319 : (void) find_fk( v->sql, rels, exps);
3235 :
3236 6319 : rel->exps = exps;
3237 : }
3238 780085 : return rel;
3239 : }
3240 :
3241 : /*
3242 : * Push {semi}joins down, pushes the joins through group by expressions.
3243 : * When the join is on the group by columns, we can push the joins left
3244 : * under the group by. This should only be done, iff the new semijoin would
3245 : * reduce the input table to the groupby. So there should be a reduction
3246 : * (selection) on the table A and this should be propagated to the groupby via
3247 : * for example a primary key.
3248 : *
3249 : * {semi}join( A, groupby( B ) [gbe][aggrs] ) [ gbe == A.x ]
3250 : * ->
3251 : * {semi}join( A, groupby( semijoin(B,A) [gbe == A.x] ) [gbe][aggrs] ) [ gbe == A.x ]
3252 : */
3253 : static inline sql_rel *
3254 780085 : rel_push_join_down(visitor *v, sql_rel *rel)
3255 : {
3256 780085 : if (!rel_is_ref(rel) && ((is_left(rel->op) || rel->op == op_join || is_semi(rel->op)) && rel->l && rel->exps)) {
3257 114738 : sql_rel *gb = rel->r, *ogb = gb, *l = NULL, *rell = rel->l;
3258 :
3259 114738 : if (is_simple_project(gb->op) && !rel_is_ref(gb))
3260 16703 : gb = gb->l;
3261 :
3262 114738 : if (rel_is_ref(rell) || !gb || rel_is_ref(gb))
3263 : return rel;
3264 :
3265 105567 : if (is_groupby(gb->op) && gb->r && list_length(gb->r)) {
3266 140 : list *exps = rel->exps, *jes = new_exp_list(v->sql->sa), *gbes = gb->r;
3267 140 : node *n, *m;
3268 : /* find out if all group by expressions are used in the join */
3269 145 : for(n = gbes->h; n; n = n->next) {
3270 140 : sql_exp *gbe = n->data;
3271 140 : int fnd = 0;
3272 140 : sql_alias *rname = NULL;
3273 140 : const char *name = NULL;
3274 :
3275 : /* project in between, ie find alias */
3276 : /* first find expression in expression list */
3277 140 : gbe = exps_uses_exp( gb->exps, gbe);
3278 140 : if (!gbe)
3279 0 : continue;
3280 140 : if (ogb != gb)
3281 117 : gbe = exps_uses_exp( ogb->exps, gbe);
3282 140 : if (gbe) {
3283 137 : rname = exp_find_rel_name(gbe);
3284 137 : name = exp_name(gbe);
3285 : }
3286 :
3287 137 : if (!name)
3288 3 : return rel;
3289 :
3290 341 : for (m = exps->h; m && !fnd; m = m->next) {
3291 204 : sql_exp *je = m->data;
3292 :
3293 204 : if (je->card >= CARD_ATOM && je->type == e_cmp &&
3294 204 : !is_complex_exp(je->flag)) {
3295 : /* expect right expression to match */
3296 204 : sql_exp *r = je->r;
3297 :
3298 204 : if (r == 0 || r->type != e_column)
3299 11 : continue;
3300 193 : if (r->l && rname && a_match(r->l, rname) && strcmp(r->r, name)==0) {
3301 : fnd = 1;
3302 71 : } else if (!r->l && !rname && strcmp(r->r, name)==0) {
3303 : fnd = 1;
3304 : }
3305 : if (fnd) {
3306 122 : sql_exp *le = je->l;
3307 122 : sql_exp *re = exp_push_down_prj(v->sql, r, gb, gb->l);
3308 122 : if (!re || (list_length(jes) == 0 && !find_prop(le->p, PROP_HASHCOL))) {
3309 : fnd = 0;
3310 : } else {
3311 5 : int anti = is_anti(je), semantics = is_semantics(je);
3312 :
3313 5 : je = exp_compare(v->sql->sa, le, re, je->flag);
3314 5 : if (anti) set_anti(je);
3315 5 : if (semantics) set_semantics(je);
3316 5 : list_append(jes, je);
3317 : }
3318 : }
3319 : }
3320 : }
3321 137 : if (!fnd)
3322 : return rel;
3323 : }
3324 5 : l = rel_dup(rel->l);
3325 :
3326 : /* push join's left side (as semijoin) down group by */
3327 5 : l = gb->l = rel_crossproduct(v->sql->sa, gb->l, l, op_semi);
3328 5 : l->exps = jes;
3329 5 : set_processed(l);
3330 5 : v->changes++;
3331 5 : return rel;
3332 : }
3333 : }
3334 : return rel;
3335 : }
3336 :
3337 : static bool
3338 364 : check_projection_on_foreignside(sql_rel *r, list *pexps, int fk_left)
3339 : {
3340 : /* projection columns from the foreign side */
3341 364 : if (list_empty(pexps))
3342 : return true;
3343 1284 : for (node *n = pexps->h; n; n = n->next) {
3344 1219 : sql_exp *pe = n->data;
3345 :
3346 1219 : if (pe && is_atom(pe->type))
3347 7 : continue;
3348 1212 : if (pe && !is_alias(pe->type))
3349 : return false;
3350 : /* check for columns from the pk side, then keep the join with the pk */
3351 1191 : if ((fk_left && rel_find_exp(r->r, pe)) || (!fk_left && rel_find_exp(r->l, pe)))
3352 230 : return false;
3353 : }
3354 : return true;
3355 : }
3356 :
3357 : static sql_rel *
3358 129721 : rel_simplify_project_fk_join(mvc *sql, sql_rel *r, list *pexps, list *orderexps, int *changes)
3359 : {
3360 129721 : sql_rel *rl = r->l, *rr = r->r, *nr = NULL;
3361 129721 : sql_exp *je, *le, *nje, *re;
3362 129721 : int fk_left = 1;
3363 :
3364 : /* check for foreign key join */
3365 129721 : if (list_length(r->exps) != 1 || !list_empty(r->attr))
3366 7578 : return r;
3367 122143 : if (!(je = exps_find_prop(r->exps, PROP_JOINIDX)) || je->flag != cmp_equal)
3368 : return r;
3369 : /* je->l == foreign expression, je->r == primary expression */
3370 420 : if (rel_find_exp(r->l, je->l)) {
3371 : fk_left = 1;
3372 34 : } else if (rel_find_exp(r->r, je->l)) {
3373 : fk_left = 0;
3374 : } else { /* not found */
3375 : return r;
3376 : }
3377 :
3378 : /* primary side must be a full table */
3379 386 : if ((fk_left && (!is_left(r->op) && !is_full(r->op)) && !is_basetable(rr->op)) ||
3380 34 : (!fk_left && (!is_right(r->op) && !is_full(r->op)) && !is_basetable(rl->op)))
3381 134 : return r;
3382 :
3383 286 : if (!check_projection_on_foreignside(r, pexps, fk_left) || !check_projection_on_foreignside(r, orderexps, fk_left))
3384 249 : return r;
3385 :
3386 : /* rewrite, ie remove pkey side if possible */
3387 37 : le = (sql_exp*)je->l, re = (sql_exp*)je->l;
3388 :
3389 : /* both have NULL and there are semantics, the join cannot be removed */
3390 37 : if (is_semantics(je) && has_nil(le) && has_nil(re))
3391 : return r;
3392 :
3393 37 : (*changes)++;
3394 : /* if the foreign key column doesn't have NULL values, then return it */
3395 37 : if (!has_nil(le) || is_full(r->op) || (fk_left && is_left(r->op)) || (!fk_left && is_right(r->op))) {
3396 : /* if ->attr, introduce group by on index */
3397 31 : if (fk_left) {
3398 23 : nr = rel_dup(r->l);
3399 : } else {
3400 8 : nr = rel_dup(r->r);
3401 : }
3402 31 : if (!list_empty(r->attr)) {
3403 0 : nr = rel_groupby(sql, nr, NULL);
3404 0 : if (nr) {
3405 : // printf("# introduced groupby \n");
3406 0 : nr->r = append(sa_list(sql->sa), le);
3407 0 : nr->exps = r->attr;
3408 : }
3409 : }
3410 31 : return nr;
3411 : }
3412 :
3413 : /* remove NULL values, ie generate a select not null */
3414 6 : nje = exp_compare(sql->sa, exp_ref(sql, le), exp_atom(sql->sa, atom_general(sql->sa, exp_subtype(le), NULL, 0)), cmp_equal);
3415 6 : set_anti(nje);
3416 6 : set_has_no_nil(nje);
3417 6 : set_semantics(nje);
3418 6 : if (fk_left) {
3419 6 : nr = rel_dup(r->l);
3420 : } else {
3421 0 : nr = rel_dup(r->r);
3422 : }
3423 6 : nr = rel_select(sql->sa, nr, nje);
3424 6 : set_processed(nr);
3425 6 : return nr;
3426 : }
3427 :
3428 : static sql_rel *
3429 1858 : rel_simplify_count_fk_join(mvc *sql, sql_rel *r, list *gexps, list *gcols, int *changes)
3430 : {
3431 1858 : sql_rel *rl = r->l, *rr = r->r, *nr = NULL;
3432 1858 : sql_exp *je, *le, *nje, *re, *oce;
3433 1858 : int fk_left = 1;
3434 :
3435 : /* check for foreign key join */
3436 1858 : if (list_length(r->exps) != 1)
3437 : return r;
3438 1857 : if (!(je = exps_find_prop(r->exps, PROP_JOINIDX)) || je->flag != cmp_equal)
3439 : return r;
3440 : /* je->l == foreign expression, je->r == primary expression */
3441 63 : if (rel_find_exp(r->l, je->l)) {
3442 : fk_left = 1;
3443 7 : } else if (rel_find_exp(r->r, je->l)) {
3444 : fk_left = 0;
3445 : } else { /* not found */
3446 : return r;
3447 : }
3448 :
3449 63 : oce = gexps->h->data;
3450 63 : if (oce->l) /* we only handle COUNT(*) */
3451 : return r;
3452 :
3453 : /* primary side must be a full table */
3454 62 : if ((fk_left && (!is_left(r->op) && !is_full(r->op)) && !is_basetable(rr->op)) ||
3455 6 : (!fk_left && (!is_right(r->op) && !is_full(r->op)) && !is_basetable(rl->op)))
3456 : return r;
3457 :
3458 33 : if (fk_left && is_join(rl->op) && !rel_is_ref(rl)) {
3459 12 : r->l = rel_simplify_count_fk_join(sql, rl, gexps, gcols, changes);
3460 12 : if (rl != r->l)
3461 9 : rel_destroy(rl);
3462 : }
3463 39 : if (!fk_left && is_join(rr->op) && !rel_is_ref(rr)) {
3464 2 : r->r = rel_simplify_count_fk_join(sql, rr, gexps, gcols, changes);
3465 2 : if (rr != r->r)
3466 2 : rel_destroy(rr);
3467 : }
3468 :
3469 39 : if (!check_projection_on_foreignside(r, gcols, fk_left))
3470 : return r;
3471 :
3472 : /* rewrite, ie remove pkey side if possible */
3473 37 : le = (sql_exp*)je->l, re = (sql_exp*)je->l;
3474 :
3475 : /* both have NULL and there are semantics, the join cannot be removed */
3476 37 : if (is_semantics(je) && has_nil(le) && has_nil(re))
3477 : return r;
3478 :
3479 37 : (*changes)++;
3480 : /* if the foreign key column doesn't have NULL values, then return it */
3481 37 : if (!has_nil(le) || is_full(r->op) || (fk_left && is_left(r->op)) || (!fk_left && is_right(r->op))) {
3482 31 : if (fk_left) {
3483 25 : nr = rel_dup(r->l);
3484 : } else {
3485 6 : nr = rel_dup(r->r);
3486 : }
3487 31 : return nr;
3488 : }
3489 :
3490 : /* remove NULL values, ie generate a select not null */
3491 6 : nje = exp_compare(sql->sa, exp_ref(sql, le), exp_atom(sql->sa, atom_general(sql->sa, exp_subtype(le), NULL, 0)), cmp_equal);
3492 6 : set_anti(nje);
3493 6 : set_has_no_nil(nje);
3494 6 : set_semantics(nje);
3495 6 : if (fk_left) {
3496 6 : nr = rel_dup(r->l);
3497 : } else {
3498 0 : nr = rel_dup(r->r);
3499 : }
3500 6 : nr = rel_select(sql->sa, nr, nje);
3501 6 : set_processed(nr);
3502 6 : return nr;
3503 : }
3504 :
3505 : /*
3506 : * Handle (left/right/outer/natural) join fk-pk rewrites
3507 : * 1 group by ( fk-pk-join () ) [ count(*) ] -> group by ( fk )
3508 : * 2 project ( fk-pk-join () ) [ fk-column ] -> project (fk table)[ fk-column ]
3509 : * 3 project ( fk1-pk1-join( fk2-pk2-join()) [ fk-column, pk1 column ] -> project (fk1-pk1-join)[ fk-column, pk1 column ]
3510 : */
3511 : static inline sql_rel *
3512 1843604 : rel_simplify_fk_joins(visitor *v, sql_rel *rel)
3513 : {
3514 1843604 : sql_rel *r = NULL;
3515 :
3516 1843604 : if (is_simple_project(rel->op))
3517 625257 : r = rel->l;
3518 :
3519 1843641 : while (is_simple_project(rel->op) && r && list_length(r->exps) == 1 && (is_join(r->op) || r->op == op_semi) && !(rel_is_ref(r))) {
3520 129721 : sql_rel *or = r;
3521 :
3522 129721 : r = rel_simplify_project_fk_join(v->sql, r, rel->exps, rel->r, &v->changes);
3523 129721 : if (r == or)
3524 : return rel;
3525 37 : rel_destroy(rel->l);
3526 37 : rel->l = r;
3527 : }
3528 :
3529 1713920 : if (!is_groupby(rel->op))
3530 : return rel;
3531 :
3532 49109 : r = rel->l;
3533 67907 : while(r && is_simple_project(r->op))
3534 18798 : r = r->l;
3535 :
3536 61693 : while (is_groupby(rel->op) && !rel_is_ref(rel) && r && (is_join(r->op) || r->op == op_semi) && list_length(r->exps) == 1 && !(rel_is_ref(r)) &&
3537 : /* currently only single count aggregation is handled, no other projects or aggregation */
3538 65213 : list_length(rel->exps) == 1 && exp_aggr_is_count(rel->exps->h->data)) {
3539 1844 : sql_rel *or = r;
3540 :
3541 1844 : r = rel_simplify_count_fk_join(v->sql, r, rel->exps, rel->r, &v->changes);
3542 1844 : if (r == or)
3543 : return rel;
3544 26 : rel_destroy(rel->l);
3545 26 : rel->l = r;
3546 : }
3547 : return rel;
3548 : }
3549 :
3550 : /*
3551 : * Gets the column expressions of a diff function and adds them to "columns".
3552 : * The diff function has two possible argument types: either a sql_exp representing a column
3553 : * or a sql_exp representing another diff function, therefore this function is recursive.
3554 : */
3555 : static void
3556 116 : get_diff_function_columns(sql_exp *diffExp, list *columns) {
3557 116 : list *args = diffExp->l;
3558 :
3559 252 : for (node *arg = args->h; arg; arg = arg->next) {
3560 136 : sql_exp *exp = arg->data;
3561 :
3562 : // diff function
3563 136 : if (exp->type == e_func) {
3564 20 : get_diff_function_columns(exp, columns);
3565 : }
3566 : // column
3567 : else {
3568 116 : list_append(columns, exp);
3569 : }
3570 : }
3571 116 : }
3572 :
3573 : /*
3574 : * Builds a list of aggregation key columns to be used by the select push down algorithm, namely for
3575 : * window functions. Returns NULL if the window function does not partition by any column
3576 : */
3577 : static list *
3578 1623 : get_partition_by_key_columns(allocator *sa, sql_rel *r) {
3579 18127 : for (node* n = r->exps->h; n; n = n->next) {
3580 16690 : sql_exp *e = n->data;
3581 :
3582 16690 : if (e->type == e_func) {
3583 5923 : sql_subfunc *f = e->f;
3584 :
3585 : // aggregation function
3586 5923 : if (!strcmp(f->func->base.name, "rank")) {
3587 186 : list* rankArguments = e->l;
3588 : // the partition key is the second argument
3589 186 : sql_exp *partitionExp = rankArguments->h->next->data;
3590 :
3591 : // check if the key contains any columns, i.e., is a diff function
3592 186 : if (partitionExp->type == e_func) {
3593 : // get columns to list
3594 96 : list *aggColumns = sa_list(sa);
3595 96 : get_diff_function_columns(partitionExp, aggColumns);
3596 96 : return aggColumns;
3597 : }
3598 : // the function has no aggregation columns (e_atom of boolean)
3599 : else {
3600 : return NULL;
3601 : }
3602 :
3603 : }
3604 : }
3605 : }
3606 : return NULL;
3607 : }
3608 :
3609 : /*
3610 : static bool
3611 : rank_exp_has_partition_key(sql_exp *e)
3612 : {
3613 : if (e->type == e_func) {
3614 : sql_subfunc *f = e->f;
3615 :
3616 : if (f->func->type == F_ANALYTIC) {
3617 : list *args = e->l;
3618 :
3619 : if (list_length(args) >= 2) { // the partition key is the second argument
3620 : return true;
3621 : }
3622 : }
3623 : }
3624 : return false;
3625 : }
3626 : */
3627 :
3628 : /*
3629 : * Checks if a filter column is also used as an aggregation key, so it can be later safely pushed down.
3630 : */
3631 : static int
3632 149 : filter_column_in_partition_by_columns(sql_exp *column, list *keyColumns)
3633 : {
3634 : /* check if it is a column or an e_convert, and get the actual column if it is the latter */
3635 149 : if (column->type == e_convert) {
3636 1 : column = column->l;
3637 : }
3638 :
3639 149 : char *tableName = column->l;
3640 149 : char *columnName = column->r;
3641 :
3642 314 : for (node *n = keyColumns->h; n; n = n->next) {
3643 174 : sql_exp *keyCol = n->data;
3644 174 : char *keyColTableName = keyCol->l;
3645 174 : char *keyColColumnName = keyCol->r;
3646 :
3647 174 : if (!strcmp(tableName, keyColTableName) && !strcmp(columnName, keyColColumnName)) {
3648 : /* match */
3649 : return 1;
3650 : }
3651 : }
3652 :
3653 : /* no matches found */
3654 : return 0;
3655 : }
3656 :
3657 : /*
3658 : * Push select down, pushes the selects through (simple) projections. Also
3659 : * it cleans up the projections which become useless.
3660 : *
3661 : * WARNING - Make sure to call try_remove_empty_select macro before returning so we ensure
3662 : * possible generated empty selects won't never be generated
3663 : */
3664 : static sql_rel *
3665 3701243 : rel_push_select_down(visitor *v, sql_rel *rel)
3666 : {
3667 3701243 : list *exps = NULL;
3668 3701243 : sql_rel *r = NULL;
3669 3701243 : node *n;
3670 :
3671 3701243 : if (rel_is_ref(rel)) {
3672 72837 : if (is_select(rel->op) && !list_empty(rel->exps)) {
3673 : /* add inplace empty select */
3674 1745 : sql_rel *l = rel_select(v->sql->sa, rel->l, NULL);
3675 :
3676 1745 : l->exps = rel->exps;
3677 1745 : set_processed(l);
3678 1745 : rel->exps = NULL;
3679 1745 : rel->l = l;
3680 1745 : v->changes++;
3681 : }
3682 72837 : return rel;
3683 : }
3684 :
3685 : /* don't make changes for empty selects */
3686 3628406 : if (is_select(rel->op) && list_empty(rel->exps))
3687 10 : return try_remove_empty_select(v, rel);
3688 :
3689 : /* merge 2 selects */
3690 3628396 : r = rel->l;
3691 3628396 : if (is_select(rel->op) && r && r->exps && is_select(r->op) && !(rel_is_ref(r)) && !exps_have_func(rel->exps)) {
3692 28 : (void)list_merge(r->exps, rel->exps, (fdup)NULL);
3693 28 : rel->l = NULL;
3694 28 : rel_destroy(rel);
3695 28 : v->changes++;
3696 28 : return try_remove_empty_select(v, r);
3697 : }
3698 : /*
3699 : * Push select through semi/anti join
3700 : * select (semi(A,B)) == semi(select(A), B)
3701 : */
3702 3628368 : if (is_select(rel->op) && r && is_semi(r->op) && !(rel_is_ref(r))) {
3703 330 : rel->l = r->l;
3704 330 : r->l = rel;
3705 330 : v->changes++;
3706 : /*
3707 : * if A has 2 references (ie used on both sides of
3708 : * the semi join), we also push the select into A.
3709 : */
3710 330 : if (rel_is_ref(rel->l) && rel->l == rel_find_ref(r->r)){
3711 0 : sql_rel *lx = rel->l;
3712 0 : sql_rel *rx = r->r;
3713 0 : if (lx->ref.refcnt == 2 && !rel_is_ref(rx)) {
3714 0 : while (rx->l && !rel_is_ref(rx->l) &&
3715 0 : (is_project(rx->op) ||
3716 : is_select(rx->op) ||
3717 : is_join(rx->op)))
3718 : rx = rx->l;
3719 : /* probably we need to introduce a project */
3720 0 : rel_destroy(rel->l);
3721 0 : lx = rel_project(v->sql->sa, rel, rel_projections(v->sql, rel, NULL, 1, 1));
3722 0 : r->l = lx;
3723 0 : rx->l = rel_dup(lx);
3724 : }
3725 : }
3726 330 : return r;
3727 : }
3728 3628038 : exps = rel->exps;
3729 :
3730 : /* push select through join */
3731 3628038 : if (is_select(rel->op) && r && is_join(r->op) && !rel_is_ref(r) && !is_single(r)){
3732 22184 : sql_rel *jl = r->l, *ojl = jl, *jr = r->r, *ojr = jr;
3733 22184 : int left = r->op == op_join || r->op == op_left;
3734 22184 : int right = r->op == op_join || r->op == op_right;
3735 :
3736 22184 : if (r->op == op_full)
3737 : return rel;
3738 :
3739 : /* introduce selects under the join (if needed) */
3740 22160 : set_processed(jl);
3741 22160 : set_processed(jr);
3742 48853 : for (n = exps->h; n;) {
3743 26693 : node *next = n->next;
3744 26693 : sql_exp *e = n->data;
3745 :
3746 26693 : if (!exp_unsafe(e, false, true)) {
3747 26502 : if (left && rel_rebind_exp(v->sql, jl, e)) {
3748 13759 : if (!is_select(jl->op) || rel_is_ref(jl))
3749 11466 : r->l = jl = rel_select(v->sql->sa, jl, NULL);
3750 13759 : rel_select_add_exp(v->sql->sa, jl, e);
3751 13759 : list_remove_node(exps, NULL, n);
3752 13759 : v->changes++;
3753 12743 : } else if (right && rel_rebind_exp(v->sql, jr, e)) {
3754 5307 : if (!is_select(jr->op) || rel_is_ref(jr))
3755 4334 : r->r = jr = rel_select(v->sql->sa, jr, NULL);
3756 5307 : rel_select_add_exp(v->sql->sa, jr, e);
3757 5307 : list_remove_node(exps, NULL, n);
3758 5307 : v->changes++;
3759 : }
3760 : }
3761 : n = next;
3762 : }
3763 22160 : if (ojl != jl)
3764 11466 : set_processed(jl);
3765 22160 : if (ojr != jr)
3766 4334 : set_processed(jr);
3767 : }
3768 :
3769 : /* merge select and cross product ? */
3770 3628014 : if (is_select(rel->op) && r && r->op == op_join && !rel_is_ref(r) && !is_single(r) && !exps_have_unsafe(exps, false, true)) {
3771 14668 : for (n = exps->h; n;) {
3772 1856 : node *next = n->next;
3773 1856 : sql_exp *e = n->data;
3774 :
3775 1856 : if (exp_is_join(e, NULL) == 0) {
3776 509 : if (!r->exps)
3777 158 : r->exps = sa_list(v->sql->sa);
3778 509 : append(r->exps, e);
3779 509 : list_remove_node(exps, NULL, n);
3780 509 : v->changes++;
3781 : }
3782 : n = next;
3783 : }
3784 : }
3785 :
3786 3628014 : if (is_select(rel->op) && r && (is_simple_project(r->op) || (is_groupby(r->op) && !list_empty(r->r))) && !rel_is_ref(r) && !is_single(r)){
3787 74548 : sql_rel *pl = r->l, *opl = pl;
3788 : /* we cannot push through window functions (for safety I disabled projects over DDL too) */
3789 74548 : if (pl && pl->op != op_ddl && !exps_have_unsafe(r->exps, false, false)) {
3790 : /* introduce selects under the project (if needed) */
3791 54555 : set_processed(pl);
3792 54555 : if (!pl->exps)
3793 426 : pl->exps = sa_list(v->sql->sa);
3794 116375 : for (n = exps->h; n;) {
3795 61820 : node *next = n->next;
3796 61820 : sql_exp *e = n->data, *ne = NULL;
3797 :
3798 : /* can we move it down */
3799 61820 : if (e->type == e_cmp && (ne = exp_push_down_prj(v->sql, e, r, pl)) && ne != e) {
3800 37384 : if (!(is_select(pl->op) && is_join(pl->op) && is_semi(pl->op)) || rel_is_ref(pl))
3801 37384 : r->l = pl = rel_select(v->sql->sa, pl, NULL);
3802 37384 : rel_select_add_exp(v->sql->sa, pl, ne);
3803 37384 : list_remove_node(exps, NULL, n);
3804 37384 : v->changes++;
3805 : }
3806 : n = next;
3807 : }
3808 54555 : if (opl != pl)
3809 28717 : set_processed(pl);
3810 : }
3811 :
3812 : /* push filters if they match the partition by key on a window function */
3813 1623 : else if (pl && pl->op != op_ddl && exps_have_unsafe(r->exps, false, false)) {
3814 1623 : set_processed(pl);
3815 : /* list of partition by key columns */
3816 1623 : list *keyColumns = get_partition_by_key_columns(v->sql->sa, r);
3817 :
3818 : /* partition by keys found, check if any filter matches them */
3819 1623 : if (keyColumns) {
3820 249 : for (n = exps->h; n;) {
3821 153 : node *next = n->next;
3822 153 : sql_exp *e = n->data, *ne = NULL;
3823 :
3824 153 : if (e->type == e_cmp) {
3825 : /* simple comparison filter */
3826 153 : if (e->flag == cmp_gt || e->flag == cmp_gte || e->flag == cmp_lte || e->flag == cmp_lt
3827 153 : || e->flag == cmp_equal || e->flag == cmp_notequal || e->flag == cmp_in || e->flag == cmp_notin
3828 5 : || (e->flag == cmp_filter && ((list*)e->l)->cnt == 1)) {
3829 149 : sql_exp* column;
3830 : /* the column in 'like' filters is stored inside a list */
3831 149 : if (e->flag == cmp_filter) {
3832 1 : column = ((list*)e->l)->h->data;
3833 : } else {
3834 148 : column = e->l;
3835 : }
3836 :
3837 : /* check if the expression matches any partition by key, meaning we can
3838 : try to safely push it down */
3839 149 : if (filter_column_in_partition_by_columns(column, keyColumns)) {
3840 9 : ne = exp_push_down_prj(v->sql, e, r, pl);
3841 :
3842 : /* can we move it down */
3843 9 : if (ne && ne != e && pl->exps) {
3844 9 : if (!is_select(pl->op) || rel_is_ref(pl))
3845 8 : r->l = pl = rel_select(v->sql->sa, pl, NULL);
3846 9 : rel_select_add_exp(v->sql->sa, pl, ne);
3847 9 : list_remove_node(exps, NULL, n);
3848 9 : v->changes++;
3849 : }
3850 : }
3851 : }
3852 : }
3853 : n = next;
3854 : }
3855 :
3856 : /* cleanup list */
3857 96 : list_destroy(keyColumns);
3858 : }
3859 : /* also push (rewrite) limits on output of row_number/(*)rank like window functions */
3860 1623 : if (is_simple_project(r->op) /*&& is_simple_project(pl->op)*/) { /* possible window functions */
3861 3347 : for (n = exps->h; n; n = n->next) {
3862 1732 : sql_exp *e = n->data;
3863 :
3864 1732 : if (e->type == e_cmp && (e->flag == cmp_lt || e->flag == cmp_lte) && exp_is_atom(e->r)) { /* simple limit */
3865 56 : sql_exp *ranke = rel_find_exp(r, e->l);
3866 :
3867 56 : if (ranke && ranke->type == e_func) {
3868 51 : sql_subfunc *rankf = ranke->f;
3869 51 : if (rankf->func->type == F_ANALYTIC) { /* rank functions cannot have a frame */
3870 : // For now only for rank/row_number without partition by
3871 50 : sql_rel *tn = NULL;
3872 50 : if (strcmp(rankf->func->base.name, "rank") == 0 && is_simple_project(pl->op) && pl->r /* &&
3873 : !rank_exp_has_partition_key(ranke)*/) {
3874 6 : tn = r->l = rel_topn(v->sql->sa, r->l, append(sa_list(v->sql->sa), e->r));
3875 6 : tn->grouped = 1;
3876 6 : v->changes++;
3877 6 : break;
3878 : }
3879 44 : if (strcmp(rankf->func->base.name, "row_number") == 0 && list_empty(r->r) && !is_topn(pl->op) /*&&
3880 : !rank_exp_has_partition_key(ranke)*/) {
3881 2 : tn = r->l = rel_topn(v->sql->sa, r->l, append(sa_list(v->sql->sa), e->r));
3882 2 : tn->grouped = 1;
3883 2 : v->changes++;
3884 2 : break;
3885 : }
3886 : }
3887 : }
3888 : }
3889 : }
3890 : }
3891 : }
3892 : }
3893 :
3894 : /* try push select under set relation */
3895 3628014 : if (is_select(rel->op) && r && is_set(r->op) && !list_empty(r->exps) && !rel_is_ref(r) && !is_single(r) && !list_empty(exps)) {
3896 20 : sql_rel *u = r, *ul = u->l, *ur = u->r;
3897 :
3898 20 : ul = rel_dup(ul);
3899 20 : ur = rel_dup(ur);
3900 20 : if (!is_project(ul->op))
3901 0 : ul = rel_project(v->sql->sa, ul,
3902 : rel_projections(v->sql, ul, NULL, 1, 1));
3903 20 : if (!is_project(ur->op))
3904 0 : ur = rel_project(v->sql->sa, ur,
3905 : rel_projections(v->sql, ur, NULL, 1, 1));
3906 20 : rel_rename_exps(v->sql, u->exps, ul->exps);
3907 20 : rel_rename_exps(v->sql, u->exps, ur->exps);
3908 :
3909 : /* introduce selects under the set */
3910 20 : ul = rel_select(v->sql->sa, ul, NULL);
3911 20 : ul->exps = exps_copy(v->sql, exps);
3912 20 : set_processed(ul);
3913 20 : ur = rel_select(v->sql->sa, ur, NULL);
3914 20 : ur->exps = exps_copy(v->sql, exps);
3915 20 : set_processed(ur);
3916 :
3917 20 : rel = rel_inplace_setop(v->sql, rel, ul, ur, u->op, rel_projections(v->sql, rel, NULL, 1, 1));
3918 20 : if (need_distinct(u))
3919 0 : set_distinct(rel);
3920 20 : v->changes++;
3921 : }
3922 3628014 : if (is_select(rel->op) && r && is_munion(r->op) && !is_recursive(r) && !list_empty(r->exps) && !rel_is_ref(r) && !is_single(r) && !list_empty(exps)) {
3923 4993 : sql_rel *u = r;
3924 4993 : list *rels = u->l, *nrels = sa_list(v->sql->sa);
3925 14987 : for(node *n = rels->h; n; n = n->next) {
3926 9994 : sql_rel *ul = n->data;
3927 9994 : ul = rel_dup(ul);
3928 9994 : if (!is_project(ul->op) || rel_is_ref(ul))
3929 9994 : ul = rel_project(v->sql->sa, ul,
3930 : rel_projections(v->sql, ul, NULL, 1, 1));
3931 9994 : rel_rename_exps(v->sql, u->exps, ul->exps);
3932 :
3933 : /* introduce selects under the set */
3934 9994 : ul = rel_select(v->sql->sa, ul, NULL);
3935 9994 : ul->exps = exps_copy(v->sql, exps);
3936 9994 : set_processed(ul);
3937 9994 : nrels = append(nrels, ul);
3938 : }
3939 :
3940 4993 : rel = rel_inplace_setop_n_ary(v->sql, rel, nrels, u->op, rel_projections(v->sql, rel, NULL, 1, 1));
3941 4993 : if (need_distinct(u))
3942 9 : set_distinct(rel);
3943 4993 : if (is_recursive(u))
3944 0 : set_recursive(rel);
3945 4993 : v->changes++;
3946 : }
3947 :
3948 3628014 : return try_remove_empty_select(v, rel);
3949 : }
3950 :
3951 : static int
3952 9008 : index_exp(sql_exp *e, sql_idx *i)
3953 : {
3954 9008 : if (e->type == e_cmp && !is_complex_exp(e->flag)) {
3955 5353 : switch(i->type) {
3956 5353 : case hash_idx:
3957 : case oph_idx:
3958 5353 : if (e->flag == cmp_equal)
3959 : return 0;
3960 : /* fall through */
3961 : case join_idx:
3962 : default:
3963 144 : return -1;
3964 : }
3965 : }
3966 : return -1;
3967 : }
3968 :
3969 : /* find column for the select/join expression */
3970 : static sql_column *
3971 5209 : sjexp_col(sql_exp *e, sql_rel *r)
3972 : {
3973 5209 : sql_column *res = NULL;
3974 :
3975 5209 : if (e->type == e_cmp && !is_complex_exp(e->flag)) {
3976 5209 : res = exp_find_column(r, e->l, -2);
3977 5209 : if (!res)
3978 1420 : res = exp_find_column(r, e->r, -2);
3979 : }
3980 5209 : return res;
3981 : }
3982 :
3983 : static sql_idx *
3984 816831 : find_index(allocator *sa, sql_rel *rel, sql_rel *sub, list **EXPS)
3985 : {
3986 816831 : node *n;
3987 :
3988 : /* any (partial) match of the expressions with the index columns */
3989 : /* Depending on the index type we may need full matches and only
3990 : limited number of cmp types (hash only equality etc) */
3991 : /* Depending on the index type we should (in the rel_bin) generate
3992 : more code, ie for spatial index add post filter etc, for hash
3993 : compute hash value and use index */
3994 :
3995 816831 : if (sub->exps && rel->exps)
3996 3722442 : for(n = sub->exps->h; n; n = n->next) {
3997 2983333 : prop *p;
3998 2983333 : sql_exp *e = n->data;
3999 :
4000 2983333 : if ((p = find_prop(e->p, PROP_HASHIDX)) != NULL) {
4001 8050 : list *exps, *cols;
4002 8050 : sql_idx *i = p->value.pval;
4003 8050 : fcmp cmp = (fcmp)&sql_column_kc_cmp;
4004 :
4005 : /* join indices are only interesting for joins */
4006 8050 : if (i->type == join_idx || list_length(i->columns) <= 1)
4007 0 : continue;
4008 : /* based on the index type, find qualifying exps */
4009 8050 : exps = list_select(rel->exps, i, (fcmp) &index_exp, (fdup)NULL);
4010 8050 : if (list_empty(exps))
4011 3735 : continue;
4012 : /* now we obtain the columns, move into sql_column_kc_cmp! */
4013 4315 : cols = list_map(exps, sub, (fmap) &sjexp_col);
4014 :
4015 : /* TODO check that at most 2 relations are involved */
4016 :
4017 : /* Match the index columns with the expression columns.
4018 : TODO, Allow partial matches ! */
4019 4315 : if (list_match(cols, i->columns, cmp) == 0) {
4020 : /* re-order exps in index order */
4021 217 : node *n, *m;
4022 217 : list *es = sa_list(sa);
4023 :
4024 751 : for(n = i->columns->h; n; n = n->next) {
4025 534 : int i = 0;
4026 1945 : for(m = cols->h; m; m = m->next, i++) {
4027 1945 : if (cmp(m->data, n->data) == 0){
4028 534 : sql_exp *e = list_fetch(exps, i);
4029 534 : list_append(es, e);
4030 534 : break;
4031 : }
4032 : }
4033 : }
4034 : /* fix the destroy function */
4035 217 : cols->destroy = NULL;
4036 217 : *EXPS = es;
4037 217 : e->used = 1;
4038 217 : return i;
4039 : }
4040 4098 : cols->destroy = NULL;
4041 : }
4042 : }
4043 : return NULL;
4044 : }
4045 :
4046 : static inline sql_rel *
4047 517474 : rel_use_index(visitor *v, sql_rel *rel)
4048 : {
4049 517474 : list *exps = NULL;
4050 517474 : sql_idx *i = find_index(v->sql->sa, rel, rel->l, &exps);
4051 517474 : int left = 1;
4052 :
4053 517474 : assert(is_select(rel->op) || is_join(rel->op));
4054 517474 : if (!i && is_join(rel->op)) {
4055 299357 : left = 0;
4056 299357 : i = find_index(v->sql->sa, rel, rel->r, &exps);
4057 : }
4058 :
4059 517355 : if (i) {
4060 217 : prop *p;
4061 217 : node *n;
4062 217 : int single_table = 1;
4063 217 : sql_exp *re = NULL;
4064 :
4065 749 : for( n = exps->h; n && single_table; n = n->next) {
4066 532 : sql_exp *e = n->data, *nre = e->l;
4067 :
4068 532 : if (!is_compare(e->type) || is_anti(e) || e->flag != cmp_equal)
4069 : return rel;
4070 532 : if (is_join(rel->op) && ((left && !rel_find_exp(rel->l, nre)) || (!left && rel_find_exp(rel->r, nre))))
4071 95 : nre = e->r;
4072 532 : single_table = (!re || (exp_relname(nre) && exp_relname(re) && a_match(exp_relname(nre), exp_relname(re))));
4073 532 : re = nre;
4074 : }
4075 217 : if (single_table) { /* add PROP_HASHCOL to all column exps */
4076 734 : for( n = exps->h; n; n = n->next) {
4077 522 : sql_exp *e = n->data;
4078 :
4079 : /* swapped ? */
4080 522 : if (is_join(rel->op) && ((left && !rel_find_exp(rel->l, e->l)) || (!left && !rel_find_exp(rel->r, e->l)))) {
4081 165 : exp_swap(e);
4082 : }
4083 522 : p = find_prop(e->p, PROP_HASHCOL);
4084 522 : if (!p)
4085 285 : e->p = p = prop_create(v->sql->sa, PROP_HASHCOL, e->p);
4086 522 : p->value.pval = i;
4087 : }
4088 : }
4089 : /* add the remaining exps to the new exp list */
4090 217 : if (list_length(rel->exps) > list_length(exps)) {
4091 96 : for( n = rel->exps->h; n; n = n->next) {
4092 72 : sql_exp *e = n->data;
4093 72 : if (!list_find(exps, e, (fcmp)&exp_cmp))
4094 24 : list_append(exps, e);
4095 : }
4096 : }
4097 217 : rel->exps = exps;
4098 : }
4099 : return rel;
4100 : }
4101 :
4102 : static sql_rel *
4103 1843604 : rel_select_leftgroup_2_semi(visitor *v, sql_rel *rel)
4104 : {
4105 1843604 : if (rel_is_ref(rel) || !is_select(rel->op) || list_empty(rel->exps))
4106 1629764 : return rel;
4107 213840 : sql_rel *l = rel->l;
4108 :
4109 213840 : if (!l || rel_is_ref(l) || !is_left(l->op) || list_empty(l->attr))
4110 213017 : return rel;
4111 :
4112 1638 : for(node *n = rel->exps->h; n; n = n->next) {
4113 823 : sql_exp *e = n->data;
4114 :
4115 823 : if (e->type == e_cmp && !is_semantics(e) && !e->f) {
4116 810 : list *attrs = l->attr;
4117 810 : sql_exp *a = attrs->h->data;
4118 :
4119 810 : if (exps_find_exp(l->attr, e->l) && exp_is_true(e->r) && e->flag == cmp_equal /*&& exp_is_true(a)*/) {
4120 : // printf("# optimize select leftgroup -> semi\n");
4121 8 : if (!list_empty(l->exps)) {
4122 13 : for(node *m = l->exps->h; m; m = m->next) {
4123 7 : sql_exp *j = m->data;
4124 7 : reset_any(j);
4125 : }
4126 : }
4127 8 : l->attr = NULL;
4128 8 : l->op = exp_is_true(a)?op_semi:op_anti;
4129 8 : list_remove_node(rel->exps, NULL, n);
4130 8 : rel = rel_project(v->sql->sa, rel, rel_projections(v->sql, rel, NULL, 1, 1));
4131 8 : list_append(rel->exps, attrs->h->data);
4132 8 : v->changes++;
4133 8 : return rel;
4134 : }
4135 : }
4136 : }
4137 : return rel;
4138 : }
4139 :
4140 : static sql_rel *
4141 1843604 : rel_optimize_select_and_joins_topdown_(visitor *v, sql_rel *rel)
4142 : {
4143 : /* push_join_down introduces semijoins */
4144 1843604 : uint8_t cycle = *(uint8_t*) v->data;
4145 1843604 : if (cycle <= 0) {
4146 780085 : rel = rel_semijoin_use_fk(v, rel);
4147 780085 : rel = rel_push_join_down(v, rel);
4148 : }
4149 :
4150 1843604 : rel = rel_simplify_fk_joins(v, rel);
4151 1843604 : rel = rel_push_select_down(v, rel);
4152 1843604 : rel = rel_select_leftgroup_2_semi(v, rel);
4153 1843604 : if (rel && rel->l && (is_select(rel->op) || is_join(rel->op)))
4154 517474 : rel = rel_use_index(v, rel);
4155 1843604 : return rel;
4156 : }
4157 :
4158 : static sql_rel *
4159 135056 : rel_optimize_select_and_joins_topdown(visitor *v, global_props *gp, sql_rel *rel)
4160 : {
4161 135056 : v->data = &gp->opt_cycle;
4162 135056 : rel = rel_visitor_topdown(v, rel, &rel_optimize_select_and_joins_topdown_);
4163 135056 : v->data = gp;
4164 135056 : return rel;
4165 : }
4166 :
4167 : run_optimizer
4168 645164 : bind_optimize_select_and_joins_topdown(visitor *v, global_props *gp)
4169 : {
4170 645164 : int flag = v->sql->sql_optimizer;
4171 644969 : return gp->opt_level == 1 && (gp->cnt[op_join] || gp->cnt[op_left] || gp->cnt[op_right]
4172 558519 : || gp->cnt[op_full] || gp->cnt[op_semi] || gp->cnt[op_anti] ||
4173 1290133 : gp->cnt[op_select]) && (flag & optimize_select_and_joins_topdown) ? rel_optimize_select_and_joins_topdown : NULL;
4174 : }
4175 :
4176 :
4177 : static int
4178 643593 : can_push_func(sql_exp *e, sql_rel *rel, int *must, int depth)
4179 : {
4180 687736 : switch(e->type) {
4181 535627 : case e_cmp: {
4182 535627 : sql_exp *l = e->l, *r = e->r, *f = e->f;
4183 :
4184 : /* don't push down functions inside attribute joins */
4185 535627 : if (e->flag == cmp_or || e->flag == cmp_in || e->flag == cmp_notin || e->flag == cmp_filter || (is_join(rel->op) && is_any(e)))
4186 : return 0;
4187 528237 : if (depth > 0) { /* for comparisons under the top ones, they become functions */
4188 60 : int lmust = 0;
4189 60 : int res = can_push_func(l, rel, &lmust, depth + 1) && can_push_func(r, rel, &lmust, depth + 1) &&
4190 24 : (!f || can_push_func(f, rel, &lmust, depth + 1));
4191 14 : if (res && !lmust)
4192 : return 1;
4193 58 : (*must) |= lmust;
4194 58 : return res;
4195 : } else {
4196 528177 : int mustl = 0, mustr = 0, mustf = 0;
4197 528177 : return ((l->type == e_column || can_push_func(l, rel, &mustl, depth + 1)) && (*must = mustl)) ||
4198 1049641 : ((r->type == e_column || can_push_func(r, rel, &mustr, depth + 1)) && (*must = mustr)) ||
4199 2884 : ((f && (f->type == e_column || can_push_func(f, rel, &mustf, depth + 1)) && (*must = mustf)));
4200 : }
4201 : }
4202 44143 : case e_convert:
4203 44143 : return can_push_func(e->l, rel, must, depth + 1);
4204 9344 : case e_aggr:
4205 : case e_func: {
4206 9344 : list *l = e->l;
4207 9344 : int res = 1, lmust = 0;
4208 :
4209 9344 : if (exp_unsafe(e, false, false))
4210 : return 0;
4211 26666 : if (l) for (node *n = l->h; n && res; n = n->next)
4212 17330 : res &= can_push_func(n->data, rel, &lmust, depth + 1);
4213 9336 : if (res && !lmust)
4214 : return 1;
4215 8558 : (*must) |= lmust;
4216 8558 : return res;
4217 : }
4218 50525 : case e_column:
4219 50525 : if (rel && !rel_find_exp(rel, e))
4220 : return 0;
4221 29547 : (*must) = 1;
4222 : /* fall through */
4223 : default:
4224 : return 1;
4225 : }
4226 : }
4227 :
4228 : static int
4229 441408 : exps_can_push_func(list *exps, sql_rel *rel)
4230 : {
4231 2143547 : for(node *n = exps->h; n; n = n->next) {
4232 1725810 : sql_exp *e = n->data;
4233 1725810 : int mustl = 0, mustr = 0;
4234 :
4235 1725810 : if ((is_joinop(rel->op) || is_select(rel->op)) && ((can_push_func(e, rel->l, &mustl, 0) && mustl)))
4236 23671 : return 1;
4237 1722026 : if (is_joinop(rel->op) && can_push_func(e, rel->r, &mustr, 0) && mustr)
4238 : return 1;
4239 : }
4240 : return 0;
4241 : }
4242 :
4243 : static int
4244 80523 : exp_needs_push_down(sql_rel *rel, sql_exp *e)
4245 : {
4246 104565 : switch(e->type) {
4247 26986 : case e_cmp:
4248 : /* don't push down functions inside attribute joins */
4249 26986 : if (e->flag == cmp_or || e->flag == cmp_in || e->flag == cmp_notin || e->flag == cmp_filter || (is_join(rel->op) && is_any(e)))
4250 : return 0;
4251 26337 : return exp_needs_push_down(rel, e->l) || exp_needs_push_down(rel, e->r) || (e->f && exp_needs_push_down(rel, e->f));
4252 24042 : case e_convert:
4253 24042 : return exp_needs_push_down(rel, e->l);
4254 1926 : case e_aggr:
4255 : case e_func:
4256 1926 : if (!e->l || exps_are_atoms(e->l))
4257 18 : return 0;
4258 : return 1;
4259 1295 : case e_atom:
4260 1295 : if (!e->f || exps_are_atoms(e->f))
4261 1295 : return 0;
4262 : return 1;
4263 : case e_column:
4264 : default:
4265 : return 0;
4266 : }
4267 : }
4268 :
4269 : static int
4270 23671 : exps_need_push_down(sql_rel *rel, list *exps )
4271 : {
4272 48737 : for(node *n = exps->h; n; n = n->next)
4273 26974 : if (exp_needs_push_down(rel, n->data))
4274 : return 1;
4275 : return 0;
4276 : }
4277 :
4278 : static sql_exp *exp_push_single_func_down(visitor *v, sql_rel *rel, sql_rel *ol, sql_rel *or, sql_exp *e, int depth);
4279 :
4280 : static list *
4281 2418 : exps_push_single_func_down(visitor *v, sql_rel *rel, sql_rel *ol, sql_rel *or, list *exps, int depth)
4282 : {
4283 4836 : if (mvc_highwater(v->sql))
4284 : return exps;
4285 :
4286 6728 : for (node *n = exps->h; n; n = n->next)
4287 4310 : if ((n->data = exp_push_single_func_down(v, rel, ol, or, n->data, depth)) == NULL)
4288 : return NULL;
4289 : return exps;
4290 : }
4291 :
4292 : static sql_exp *
4293 15746 : exp_push_single_func_down(visitor *v, sql_rel *rel, sql_rel *ol, sql_rel *or, sql_exp *e, int depth)
4294 : {
4295 27522 : if (mvc_highwater(v->sql))
4296 : return e;
4297 :
4298 15746 : switch(e->type) {
4299 4252 : case e_cmp: {
4300 4252 : if (e->flag == cmp_or || e->flag == cmp_filter) {
4301 246 : if ((e->l = exps_push_single_func_down(v, rel, ol, or, e->l, depth + 1)) == NULL)
4302 : return NULL;
4303 246 : if ((e->r = exps_push_single_func_down(v, rel, ol, or, e->r, depth + 1)) == NULL)
4304 : return NULL;
4305 4006 : } else if (e->flag == cmp_in || e->flag == cmp_notin) {
4306 18 : if ((e->l = exp_push_single_func_down(v, rel, ol, or, e->l, depth + 1)) == NULL)
4307 : return NULL;
4308 18 : if ((e->r = exps_push_single_func_down(v, rel, ol, or, e->r, depth + 1)) == NULL)
4309 : return NULL;
4310 : } else {
4311 3988 : if ((e->l = exp_push_single_func_down(v, rel, ol, or, e->l, depth + 1)) == NULL)
4312 : return NULL;
4313 3988 : if ((e->r = exp_push_single_func_down(v, rel, ol, or, e->r, depth + 1)) == NULL)
4314 : return NULL;
4315 3988 : if (e->f && (e->f = exp_push_single_func_down(v, rel, ol, or, e->f, depth + 1)) == NULL)
4316 : return NULL;
4317 : }
4318 : } break;
4319 2010 : case e_convert:
4320 2010 : if ((e->l = exp_push_single_func_down(v, rel, ol, or, e->l, depth + 1)) == NULL)
4321 : return NULL;
4322 : break;
4323 3993 : case e_aggr:
4324 : case e_func: {
4325 3993 : sql_rel *l = rel->l, *r = rel->r;
4326 3993 : int must = 0, mustl = 0, mustr = 0;
4327 :
4328 3993 : if (exp_unsafe(e, false, false))
4329 23 : return e;
4330 3993 : if (!e->l || exps_are_atoms(e->l))
4331 23 : return e;
4332 3970 : if ((is_joinop(rel->op) && ((can_push_func(e, l, &mustl, depth + 1) && mustl) || (can_push_func(e, r, &mustr, depth + 1) && mustr))) ||
4333 2973 : (is_select(rel->op) && can_push_func(e, l, &must, depth + 1) && must)) {
4334 3957 : exp_label(v->sql->sa, e, ++v->sql->label);
4335 : /* we need a full projection, group by's and unions cannot be extended with more expressions */
4336 3957 : if (mustr) {
4337 357 : if (r == or) /* don't project twice */
4338 334 : rel->r = r = rel_project(v->sql->sa, r, rel_projections(v->sql, r, NULL, 1, 1));
4339 357 : list_append(r->exps, e);
4340 : } else {
4341 3600 : if (l == ol) /* don't project twice */
4342 1844 : rel->l = l = rel_project(v->sql->sa, l, rel_projections(v->sql, l, NULL, 1, 1));
4343 3600 : list_append(l->exps, e);
4344 : }
4345 3957 : e = exp_ref(v->sql, e);
4346 3957 : v->changes++;
4347 : }
4348 3970 : } break;
4349 1156 : case e_atom: {
4350 1156 : if (e->f && (e->f = exps_push_single_func_down(v, rel, ol, or, e->f, depth + 1)) == NULL)
4351 : return NULL;
4352 : } break;
4353 : case e_column:
4354 : case e_psm:
4355 : break;
4356 : }
4357 : return e;
4358 : }
4359 :
4360 : static inline sql_rel *
4361 1857639 : rel_push_func_down(visitor *v, sql_rel *rel)
4362 : {
4363 1857639 : if ((is_select(rel->op) || is_joinop(rel->op)) && rel->l && rel->exps && !(rel_is_ref(rel))) {
4364 508143 : int changes = v->changes;
4365 508143 : sql_rel *l = rel->l, *r = rel->r;
4366 :
4367 : /* only push down when is useful */
4368 508143 : if ((is_select(rel->op) && list_length(rel->exps) <= 1) || rel_is_ref(l) || (is_joinop(rel->op) && rel_is_ref(r)))
4369 250383 : return rel;
4370 257760 : if (exps_can_push_func(rel->exps, rel) && exps_need_push_down(rel, rel->exps) && !exps_push_single_func_down(v, rel, l, r, rel->exps, 0))
4371 : return NULL;
4372 257760 : if (v->changes > changes) /* once we get a better join order, we can try to remove this projection */
4373 1908 : return rel_project(v->sql->sa, rel, rel_projections(v->sql, rel, NULL, 1, 1));
4374 : }
4375 1605348 : if (is_simple_project(rel->op) && rel->l && rel->exps) {
4376 645890 : sql_rel *pl = rel->l;
4377 :
4378 645890 : if (is_joinop(pl->op) && exps_can_push_func(rel->exps, rel)) {
4379 0 : sql_rel *l = pl->l, *r = pl->r, *ol = l, *or = r;
4380 :
4381 0 : for (node *n = rel->exps->h; n; ) {
4382 0 : node *next = n->next;
4383 0 : sql_exp *e = n->data;
4384 0 : int mustl = 0, mustr = 0;
4385 :
4386 0 : if ((can_push_func(e, l, &mustl, 0) && mustl) || (can_push_func(e, r, &mustr, 0) && mustr)) {
4387 0 : if (mustl) {
4388 0 : if (l == ol) /* don't project twice */
4389 0 : pl->l = l = rel_project(v->sql->sa, l, rel_projections(v->sql, l, NULL, 1, 1));
4390 0 : list_append(l->exps, e);
4391 0 : list_remove_node(rel->exps, NULL, n);
4392 0 : v->changes++;
4393 : } else {
4394 0 : if (r == or) /* don't project twice */
4395 0 : pl->r = r = rel_project(v->sql->sa, r, rel_projections(v->sql, r, NULL, 1, 1));
4396 0 : list_append(r->exps, e);
4397 0 : list_remove_node(rel->exps, NULL, n);
4398 0 : v->changes++;
4399 : }
4400 : }
4401 0 : n = next;
4402 : }
4403 : }
4404 : }
4405 : return rel;
4406 : }
4407 :
4408 : static sql_rel *
4409 1857639 : rel_push_func_and_select_down_(visitor *v, sql_rel *rel)
4410 : {
4411 1857639 : if (rel)
4412 1857639 : rel = rel_push_func_down(v, rel);
4413 1857639 : if (rel)
4414 1857639 : rel = rel_push_select_down(v, rel);
4415 1857639 : return rel;
4416 : }
4417 :
4418 : static sql_rel *
4419 135056 : rel_push_func_and_select_down(visitor *v, global_props *gp, sql_rel *rel)
4420 : {
4421 135056 : (void) gp;
4422 135056 : return rel_visitor_topdown(v, rel, &rel_push_func_and_select_down_);
4423 : }
4424 :
4425 : run_optimizer
4426 644891 : bind_push_func_and_select_down(visitor *v, global_props *gp)
4427 : {
4428 644891 : int flag = v->sql->sql_optimizer;
4429 644688 : return gp->opt_level == 1 && (gp->cnt[op_join] || gp->cnt[op_left] || gp->cnt[op_right]
4430 558420 : || gp->cnt[op_full] || gp->cnt[op_semi] || gp->cnt[op_anti] || gp->cnt[op_select])
4431 779614 : && (flag & push_func_and_select_down) ? rel_push_func_and_select_down : NULL;
4432 : }
|