Line data Source code
1 : /*
2 : * SPDX-License-Identifier: MPL-2.0
3 : *
4 : * This Source Code Form is subject to the terms of the Mozilla Public
5 : * License, v. 2.0. If a copy of the MPL was not distributed with this
6 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
7 : *
8 : * Copyright 2024 MonetDB Foundation;
9 : * Copyright August 2008 - 2023 MonetDB B.V.;
10 : * Copyright 1997 - July 2008 CWI.
11 : */
12 :
13 : #include "monetdb_config.h"
14 : #include "rel_optimizer_private.h"
15 : #include "rel_basetable.h"
16 : #include "rel_exp.h"
17 : #include "rel_select.h"
18 : #include "rel_rewriter.h"
19 :
20 : static int
21 3963854 : exp_is_rename(sql_exp *e)
22 : {
23 3963854 : return (e->type == e_column);
24 : }
25 :
26 : static int
27 2212892 : exp_is_useless_rename(sql_exp *e)
28 : {
29 4425784 : return (e->type == e_column &&
30 2212892 : ((!e->l && !exp_relname(e)) ||
31 4425366 : (e->l && exp_relname(e) && strcmp(e->l, exp_relname(e)) == 0)) &&
32 1963478 : strcmp(e->r, exp_name(e)) == 0);
33 : }
34 :
35 : static list *
36 19187 : rel_used_projections(mvc *sql, list *exps, list *users)
37 : {
38 19187 : list *nexps = sa_list(sql->sa);
39 19187 : bool *used = SA_ZNEW_ARRAY(sql->ta, bool, list_length(exps));
40 19187 : int i = 0;
41 :
42 129108 : for(node *n = users->h; n; n = n->next) {
43 109921 : sql_exp *e = n->data, *ne = NULL;
44 109921 : if ((e->l && (ne = exps_bind_column2(exps, e->l, e->r, NULL))) || (ne = exps_bind_column(exps, e->r, NULL, NULL, 1))) {
45 109921 : used[list_position(exps, ne)] = 1;
46 : }
47 : }
48 138261 : for(node *n = exps->h; n; n = n->next, i++) {
49 119074 : sql_exp *e = n->data;
50 119074 : if (is_intern(e) || used[i])
51 119070 : append(nexps, e);
52 : }
53 19187 : return nexps;
54 : }
55 :
56 : /* move projects down with the goal op removing them completely (ie push renames/reduced lists into basetable)
57 : * for some cases we can directly remove iff renames rename into same alias
58 : * */
59 : static sql_rel *
60 4516879 : rel_push_project_down_(visitor *v, sql_rel *rel)
61 : {
62 : /* for now only push down renames */
63 4516879 : if (v->depth > 1 && is_simple_project(rel->op) && !need_distinct(rel) && !rel_is_ref(rel) && rel->l && !rel->r &&
64 1220168 : v->parent &&
65 1209091 : !is_modify(v->parent->op) && !is_topn(v->parent->op) && !is_sample(v->parent->op) &&
66 792721 : !is_ddl(v->parent->op) && !is_set(v->parent->op) &&
67 792721 : list_check_prop_all(rel->exps, (prop_check_func)&exp_is_rename)) {
68 469919 : sql_rel *l = rel->l;
69 :
70 469919 : if (rel_is_ref(l))
71 : return rel;
72 417754 : if (is_basetable(l->op)) {
73 38126 : if (list_check_prop_all(rel->exps, (prop_check_func)&exp_is_useless_rename)) {
74 : /* TODO reduce list (those in the project + internal) */
75 19187 : rel->l = NULL;
76 19187 : l->exps = rel_used_projections(v->sql, l->exps, rel->exps);
77 19187 : rel_destroy(rel);
78 19187 : v->changes++;
79 19187 : return l;
80 : }
81 : return rel;
82 379628 : } else if (list_check_prop_all(rel->exps, (prop_check_func)&exp_is_useless_rename)) {
83 148942 : if ((is_project(l->op) && list_length(l->exps) == list_length(rel->exps)) ||
84 138103 : ((v->parent && is_project(v->parent->op)) &&
85 39164 : (is_set(l->op) || is_select(l->op) || is_join(l->op) || is_semi(l->op) || is_topn(l->op) || is_sample(l->op)))) {
86 45895 : rel->l = NULL;
87 45895 : rel_destroy(rel);
88 45895 : v->changes++;
89 45895 : return l;
90 : }
91 : }
92 : }
93 : /* ToDo handle useful renames, ie new relation name and unique set of attribute names (could reduce set of * attributes) */
94 : /* handle both useless and useful with project [ group by ] */
95 : return rel;
96 : }
97 :
98 : static sql_rel *
99 166422 : rel_push_project_down(visitor *v, global_props *gp, sql_rel *rel)
100 : {
101 166422 : (void) gp;
102 166422 : return rel_visitor_bottomup(v, rel, &rel_push_project_down_);
103 : }
104 :
105 : run_optimizer
106 596802 : bind_push_project_down(visitor *v, global_props *gp)
107 : {
108 596802 : int flag = v->sql->sql_optimizer;
109 596614 : return gp->opt_level == 1 && (flag & push_project_down) &&
110 1193412 : (gp->cnt[op_project] || gp->cnt[op_groupby]) ? rel_push_project_down : NULL;
111 : }
112 :
113 :
114 : static bool exp_shares_exps(sql_exp *e, list *shared, uint64_t *uses);
115 :
116 : static bool
117 96760 : exps_shares_exps(list *exps, list *shared, uint64_t *uses)
118 : {
119 96760 : if (!exps || !shared)
120 : return false;
121 331014 : for (node *n = exps->h; n; n = n->next) {
122 234377 : sql_exp *e = n->data;
123 :
124 234377 : if (exp_shares_exps(e, shared, uses))
125 : return true;
126 : }
127 : return false;
128 : }
129 :
130 : static bool
131 2051095 : exp_shares_exps(sql_exp *e, list *shared, uint64_t *uses)
132 : {
133 3828854 : switch(e->type) {
134 17 : case e_cmp:
135 17 : if (e->flag == cmp_or || e->flag == cmp_filter)
136 0 : return exps_shares_exps(e->l, shared, uses) || exps_shares_exps(e->r, shared, uses);
137 17 : else if (e->flag == cmp_in || e->flag == cmp_notin)
138 0 : return exp_shares_exps(e->l, shared, uses) || exps_shares_exps(e->r, shared, uses);
139 : else
140 34 : return exp_shares_exps(e->l, shared, uses) || exp_shares_exps(e->r, shared, uses) || (e->f && exp_shares_exps(e->f, shared, uses));
141 97567 : case e_atom:
142 97567 : if (e->f)
143 0 : return exps_shares_exps(e->f, shared, uses);
144 : return false;
145 3416419 : case e_column:
146 : {
147 3416419 : sql_exp *ne = NULL;
148 3416419 : if (e->l)
149 3270487 : ne = exps_bind_column2(shared, e->l, e->r, NULL);
150 3416419 : if (!ne && !e->l)
151 145932 : ne = exps_bind_column(shared, e->r, NULL, NULL, 1);
152 965060 : if (!ne)
153 : return false;
154 2541307 : if (ne->type != e_column) {
155 297077 : int i = list_position(shared, ne);
156 297077 : if (i < 0)
157 : return false;
158 297077 : uint64_t used = (uint64_t) 1 << i;
159 297077 : if (used & *uses)
160 : return true;
161 296956 : *uses |= used;
162 296956 : return false;
163 : }
164 2244230 : if (ne != e && (list_position(shared, e) < 0 || list_position(shared, e) > list_position(shared, ne)))
165 : /* maybe ne refers to a local complex exp */
166 1559668 : return exp_shares_exps(ne, shared, uses);
167 : return false;
168 : }
169 218091 : case e_convert:
170 218091 : return exp_shares_exps(e->l, shared, uses);
171 96760 : case e_aggr:
172 : case e_func:
173 96760 : return exps_shares_exps(e->l, shared, uses);
174 : case e_psm:
175 0 : assert(0); /* not in projection list */
176 : }
177 : return false;
178 : }
179 :
180 : static bool
181 253243 : exps_share_expensive_exp(list *exps, list *shared )
182 : {
183 253243 : uint64_t uses = 0;
184 :
185 253243 : if (list_empty(exps) || list_empty(shared))
186 0 : return false;
187 253243 : if (list_length(shared) > 64)
188 : return true;
189 2069244 : for (node *n = exps->h; n; n = n->next) {
190 1816667 : sql_exp *e = n->data;
191 :
192 1816667 : if (exp_shares_exps(e, shared, &uses))
193 : return true;
194 : }
195 : return false;
196 : }
197 :
198 : static bool ambigious_ref( list *exps, sql_exp *e);
199 : static bool
200 76656 : ambigious_refs( list *exps, list *refs)
201 : {
202 76656 : if (list_empty(refs))
203 : return false;
204 230966 : for(node *n=refs->h; n; n = n->next) {
205 174072 : if (ambigious_ref(exps, n->data))
206 : return true;
207 : }
208 : return false;
209 : }
210 :
211 : static bool
212 2857764 : ambigious_ref( list *exps, sql_exp *e)
213 : {
214 2857764 : sql_exp *ne = NULL;
215 :
216 2857764 : if (e->type == e_column) {
217 2450458 : if (e->l)
218 2362592 : ne = exps_bind_column2(exps, e->l, e->r, NULL);
219 2450458 : if (!ne && !e->l)
220 87866 : ne = exps_bind_column(exps, e->r, NULL, NULL, 1);
221 2450458 : if (ne && e != ne)
222 : return true;
223 : }
224 2852637 : if (e->type == e_func)
225 76656 : return ambigious_refs(exps, e->l);
226 : return false;
227 : }
228 :
229 : /* merge 2 projects into the lower one */
230 : static sql_rel *
231 4831651 : rel_merge_projects_(visitor *v, sql_rel *rel)
232 : {
233 4897450 : list *exps = rel->exps;
234 4897450 : sql_rel *prj = rel->l;
235 4897450 : node *n;
236 :
237 4897450 : if (rel->op == op_project &&
238 1845851 : prj && prj->op == op_project && !(rel_is_ref(prj)) && list_empty(prj->r)) {
239 687402 : int all = 1;
240 :
241 687402 : if (project_unsafe(rel,0) || project_unsafe(prj,0) || exps_share_expensive_exp(rel->exps, prj->exps))
242 434825 : return rel;
243 :
244 : /* here we try to fix aliases */
245 252577 : list *nexps = NULL;
246 : /* for each exp check if we can rename it */
247 1682669 : for (n = exps->h; n && all; n = n->next) {
248 1435219 : sql_exp *e = n->data, *ne = NULL;
249 :
250 : /* We do not handle expressions pointing back in the list */
251 1435219 : if (ambigious_ref(exps, e)) {
252 : all = 0;
253 : break;
254 : }
255 1430124 : ne = exp_push_down_prj(v->sql, e, prj, prj->l);
256 : /* check if the refered alias name isn't used twice */
257 1430124 : if (ne && ambigious_ref(nexps, ne)) {
258 : all = 0;
259 : break;
260 : }
261 1248441 : if (ne) {
262 1248441 : if (exp_name(e))
263 1242028 : exp_prop_alias(v->sql->sa, ne, e);
264 1248441 : if (!nexps)
265 241037 : nexps = new_exp_list(v->sql->sa);
266 1248441 : list_append(nexps, ne);
267 : } else {
268 : all = 0;
269 : }
270 : }
271 252577 : if (all) {
272 65799 : rel->exps = nexps;
273 : /* we can now remove the intermediate project */
274 : /* push order by expressions */
275 65799 : if (!list_empty(rel->r)) {
276 0 : list *nr = new_exp_list(v->sql->sa), *res = rel->r;
277 0 : for (n = res->h; n; n = n->next) {
278 0 : sql_exp *e = n->data, *ne = NULL;
279 :
280 0 : ne = exp_push_down_prj(v->sql, e, prj, prj->l);
281 0 : if (ne) {
282 0 : if (exp_name(e))
283 0 : exp_prop_alias(v->sql->sa, ne, e);
284 0 : list_append(nr, ne);
285 : } else {
286 : all = 0;
287 : }
288 : }
289 0 : if (all) {
290 0 : rel->r = nr;
291 : } else {
292 : /* leave as is */
293 0 : rel->exps = exps;
294 0 : return rel;
295 : }
296 : }
297 65799 : rel->l = prj->l;
298 65799 : prj->l = NULL;
299 65799 : rel_destroy(prj);
300 65799 : v->changes++;
301 65799 : return rel_merge_projects_(v, rel);
302 : } else {
303 : /* leave as is */
304 186778 : rel->exps = exps;
305 : }
306 186778 : return rel;
307 : }
308 : return rel;
309 : }
310 :
311 : static sql_rel *
312 166417 : rel_merge_projects(visitor *v, global_props *gp, sql_rel *rel)
313 : {
314 166417 : (void) gp;
315 166417 : return rel_visitor_bottomup(v, rel, &rel_merge_projects_);
316 : }
317 :
318 : run_optimizer
319 596801 : bind_merge_projects(visitor *v, global_props *gp)
320 : {
321 596801 : int flag = v->sql->sql_optimizer;
322 596613 : return gp->opt_level == 1 && (flag & merge_projects) &&
323 1193402 : (gp->cnt[op_project] || gp->cnt[op_groupby]) ? rel_merge_projects : NULL;
324 : }
325 :
326 :
327 : static sql_exp *split_aggr_and_project(mvc *sql, list *aexps, sql_exp *e);
328 :
329 : static void
330 0 : list_split_aggr_and_project(mvc *sql, list *aexps, list *exps)
331 : {
332 0 : if (list_empty(exps))
333 : return ;
334 0 : for(node *n = exps->h; n; n = n->next)
335 0 : n->data = split_aggr_and_project(sql, aexps, n->data);
336 : }
337 :
338 : static sql_exp *
339 0 : split_aggr_and_project(mvc *sql, list *aexps, sql_exp *e)
340 : {
341 0 : switch(e->type) {
342 0 : case e_aggr:
343 : /* add to the aggrs */
344 0 : if (!exp_name(e))
345 0 : exp_label(sql->sa, e, ++sql->label);
346 0 : list_append(aexps, e);
347 0 : return exp_ref(sql, e);
348 : case e_cmp:
349 : /* e_cmp's shouldn't exist in an aggr expression list */
350 0 : assert(0);
351 : case e_convert:
352 0 : e->l = split_aggr_and_project(sql, aexps, e->l);
353 0 : return e;
354 0 : case e_func:
355 0 : list_split_aggr_and_project(sql, aexps, e->l);
356 0 : return e;
357 : case e_atom:
358 : case e_column: /* constants and columns shouldn't be rewriten */
359 : case e_psm:
360 : return e;
361 : }
362 : return NULL;
363 : }
364 :
365 : /* exp_rename */
366 : static sql_exp * exp_rename(mvc *sql, sql_exp *e, sql_rel *f, sql_rel *t);
367 :
368 : static list *
369 48794 : exps_rename(mvc *sql, list *l, sql_rel *f, sql_rel *t)
370 : {
371 48794 : if (list_empty(l))
372 : return l;
373 43192 : for (node *n=l->h; n; n=n->next)
374 27597 : n->data = exp_rename(sql, n->data, f, t);
375 : return l;
376 : }
377 :
378 : /* exp_rename */
379 : static sql_exp *
380 173094 : exp_rename(mvc *sql, sql_exp *e, sql_rel *f, sql_rel *t)
381 : {
382 173094 : sql_exp *ne = NULL;
383 :
384 173094 : assert(is_project(f->op));
385 :
386 173094 : switch(e->type) {
387 73135 : case e_column:
388 73135 : if (e->l) {
389 73100 : ne = exps_bind_column2(f->exps, e->l, e->r, NULL);
390 : /* if relation name matches expressions relation name, find column based on column name alone */
391 : } else {
392 35 : ne = exps_bind_column(f->exps, e->r, NULL, NULL, 1);
393 : }
394 73135 : if (!ne)
395 : return e;
396 49170 : sql_exp *oe = e;
397 49170 : e = NULL;
398 49170 : if (exp_name(ne) && ne->r && ne->l)
399 48275 : e = rel_bind_column2(sql, t, ne->l, ne->r, 0);
400 49170 : if (!e && ne->r)
401 814 : e = rel_bind_column(sql, t, ne->r, 0, 1);
402 896 : if (!e) {
403 83 : sql->session->status = 0;
404 83 : sql->errstr[0] = 0;
405 83 : if (exp_is_atom(ne))
406 : return ne;
407 : return oe;
408 : }
409 49087 : return exp_ref(sql, e);
410 51064 : case e_cmp:
411 51064 : if (e->flag == cmp_or || e->flag == cmp_filter) {
412 3645 : e->l = exps_rename(sql, e->l, f, t);
413 3645 : e->r = exps_rename(sql, e->r, f, t);
414 47419 : } else if (e->flag == cmp_in || e->flag == cmp_notin) {
415 5639 : e->l = exp_rename(sql, e->l, f, t);
416 5639 : e->r = exps_rename(sql, e->r, f, t);
417 : } else {
418 41780 : e->l = exp_rename(sql, e->l, f, t);
419 41780 : e->r = exp_rename(sql, e->r, f, t);
420 41780 : if (e->f)
421 227 : e->f = exp_rename(sql, e->f, f, t);
422 : }
423 : break;
424 13030 : case e_convert:
425 13030 : e->l = exp_rename(sql, e->l, f, t);
426 13030 : break;
427 2666 : case e_aggr:
428 : case e_func:
429 2666 : e->l = exps_rename(sql, e->l, f, t);
430 2666 : break;
431 33199 : case e_atom:
432 33199 : e->f = exps_rename(sql, e->f, f, t);
433 33199 : break;
434 : case e_psm:
435 : break;
436 : }
437 : return e;
438 : }
439 :
440 : static int
441 7296576 : exp_match_exp_cmp( sql_exp *e1, sql_exp *e2)
442 : {
443 7296576 : if (exp_match_exp(e1,e2))
444 96353 : return 0;
445 : return -1;
446 : }
447 :
448 : /* Pushing projects up the tree. Done very early in the optimizer.
449 : * Makes later steps easier.
450 : */
451 : extern void _rel_print(mvc *sql, sql_rel *rel);
452 : static sql_rel *
453 4414180 : rel_push_project_up_(visitor *v, sql_rel *rel)
454 : {
455 4414180 : if (is_simple_project(rel->op) && rel->l && !rel_is_ref(rel)) {
456 1325817 : sql_rel *l = rel->l;
457 1325817 : if (is_simple_project(l->op))
458 343556 : return rel_merge_projects_(v, rel);
459 : /* find equal column references, later cases are rewritten into references back to the first
460 : * project () [ i.i L1, i.i L2 ] -> project() [ i.i L1, L1 L2 ] */
461 982261 : if (list_length(rel->exps) > 1) {
462 883427 : list *exps = rel->exps;
463 883427 : node *n = exps->h;
464 883427 : bool needed = false;
465 4452122 : for(n = n->next; n && !needed; n = n->next) {
466 3568705 : sql_exp *e = n->data;
467 3568705 : if (e->type == e_column && !is_selfref(e)) {
468 22953687 : for(node *m = exps->h; m && m != n && !needed; m = m->next) {
469 19891053 : sql_exp *h = m->data;
470 19891053 : if (exp_match_exp(h,e))
471 3587 : needed = true;
472 : }
473 : }
474 : }
475 883417 : if (needed) {
476 3587 : rel->exps = sa_list(v->sql->sa);
477 3587 : node *n = exps->h;
478 3587 : list_append(rel->exps, n->data);
479 69699 : for(n = n->next; n; n = n->next) {
480 66112 : sql_exp *e = n->data;
481 66112 : if (e->type == e_column && !is_selfref(e)) {
482 25687 : node *m = list_find(rel->exps, e, (fcmp)&exp_match_exp_cmp);
483 25687 : if (m) {
484 4101 : sql_exp *ne = exp_ref(v->sql, m->data);
485 4101 : exp_setname(v->sql->sa, ne, exp_relname(e), exp_name(e));
486 4101 : exp_propagate(v->sql->sa, ne, e);
487 4101 : set_selfref(ne);
488 4101 : e = ne;
489 : }
490 : }
491 66112 : list_append(rel->exps, e);
492 : }
493 : }
494 : }
495 : }
496 :
497 : /* project/project cleanup is done later */
498 4070612 : if (is_join(rel->op) || is_select(rel->op)) {
499 1315804 : node *n;
500 1315804 : list *exps = NULL, *l_exps, *r_exps;
501 1315804 : sql_rel *l = rel->l;
502 1315804 : sql_rel *r = rel->r;
503 1315804 : sql_rel *t;
504 1315804 : int nlexps = 0, i = 0;
505 :
506 : /* Don't rewrite refs, non projections or constant or
507 : order by projections */
508 1315804 : if (!l || rel_is_ref(l) || is_topn(l->op) || is_sample(l->op) ||
509 1202910 : (is_join(rel->op) && !list_empty(rel->attr)) ||
510 1133752 : (is_join(rel->op) && (!r || rel_is_ref(r))) ||
511 1110791 : (is_left(rel->op) && (rel->flag&MERGE_LEFT) /* can't push projections above merge statments left joins */) ||
512 1110544 : (is_select(rel->op) && l->op != op_project) ||
513 600153 : (is_join(rel->op) && ((l->op != op_project && r->op != op_project) || is_topn(r->op) || is_sample(r->op))) ||
514 192802 : ((l->op == op_project && (!l->l || l->r || project_unsafe(l,is_select(rel->op)))) ||
515 88376 : (is_join(rel->op) && (r->op == op_project && (!r->l || r->r || project_unsafe(r,0))))))
516 1252697 : return rel;
517 :
518 63107 : if (l->op == op_project && l->l) {
519 903945 : for (n = l->exps->h; n; n = n->next) {
520 865310 : sql_exp *e = n->data;
521 865310 : if (!(is_column(e->type) && exp_is_atom(e) && !(is_right(rel->op) || is_full(rel->op))) &&
522 864671 : !(e->type == e_column && !has_label(e)))
523 : return rel;
524 : }
525 : }
526 47237 : if (is_join(rel->op) && r->op == op_project && r->l) {
527 53570 : for (n = r->exps->h; n; n = n->next) {
528 47265 : sql_exp *e = n->data;
529 47265 : if (!(is_column(e->type) && exp_is_atom(e) && !(is_right(rel->op) || is_full(rel->op))) &&
530 46884 : !(e->type == e_column && !has_label(e)))
531 : return rel;
532 : }
533 : }
534 :
535 41746 : if (l->op == op_project && l->l) {
536 : /* Go through the list of project expressions.
537 : Check if they can be pushed up, ie are they not
538 : changing or introducing any columns used
539 : by the upper operator. */
540 :
541 38040 : exps = new_exp_list(v->sql->sa);
542 831175 : for (n = l->exps->h; n; n = n->next) {
543 793135 : sql_exp *e = n->data;
544 :
545 : /* we cannot rewrite projection with atomic values from outer joins */
546 793135 : if (is_column(e->type) && exp_is_atom(e) && !(is_right(rel->op) || is_full(rel->op))) {
547 610 : list_append(exps, e);
548 792525 : } else if (e->type == e_column) {
549 792525 : if (has_label(e))
550 : return rel;
551 792525 : list_append(exps, e);
552 : } else {
553 : return rel;
554 : }
555 : }
556 : } else {
557 3706 : exps = rel_projections(v->sql, l, NULL, 1, 1);
558 : }
559 41746 : nlexps = list_length(exps);
560 : /* also handle right hand of join */
561 41746 : if (is_join(rel->op) && r->op == op_project && r->l && list_empty(rel->attr)) {
562 : /* Here we also check all expressions of r like above
563 : but also we need to check for ambigious names. */
564 :
565 46637 : for (n = r->exps->h; n; n = n->next) {
566 40544 : sql_exp *e = n->data;
567 :
568 : /* we cannot rewrite projection with atomic values from outer joins */
569 40544 : if (is_column(e->type) && exp_is_atom(e) && !(is_left(rel->op) || is_full(rel->op))) {
570 155 : list_append(exps, e);
571 40389 : } else if (e->type == e_column) {
572 40177 : if (has_label(e))
573 : return rel;
574 40177 : list_append(exps, e);
575 : } else {
576 : return rel;
577 : }
578 : }
579 35441 : } else if (is_join(rel->op) && list_empty(rel->attr)) {
580 18885 : list *r_exps = rel_projections(v->sql, r, NULL, 1, 1);
581 18885 : list_merge(exps, r_exps, (fdup)NULL);
582 : }
583 41534 : if (!list_empty(rel->attr))
584 0 : append(exps, exp_ref(v->sql, rel->attr->h->data));
585 : /* Here we should check for ambigious names ? */
586 41534 : if (is_join(rel->op) && r && list_empty(rel->attr)) {
587 24978 : t = (l->op == op_project && l->l)?l->l:l;
588 24978 : l_exps = rel_projections(v->sql, t, NULL, 1, 1);
589 : /* conflict with old right expressions */
590 24978 : r_exps = rel_projections(v->sql, r, NULL, 1, 1);
591 24978 : if (rel->attr)
592 0 : append(r_exps, exp_ref(v->sql, rel->attr->h->data));
593 630297 : for(n = l_exps->h; n; n = n->next) {
594 605704 : sql_exp *e = n->data;
595 605704 : const char *rname = exp_relname(e);
596 605704 : const char *name = exp_name(e);
597 :
598 605704 : if (exp_is_atom(e))
599 0 : continue;
600 605704 : if ((rname && exps_bind_column2(r_exps, rname, name, NULL) != NULL) ||
601 650 : (!rname && exps_bind_column(r_exps, name, NULL, NULL, 1) != NULL))
602 385 : return rel;
603 : }
604 24593 : t = (r->op == op_project && r->l)?r->l:r;
605 24593 : r_exps = rel_projections(v->sql, t, NULL, 1, 1);
606 : /* conflict with new right expressions */
607 613429 : for(n = l_exps->h; n; n = n->next) {
608 590845 : sql_exp *e = n->data;
609 :
610 590845 : if (exp_is_atom(e))
611 0 : continue;
612 1179706 : if ((e->l && exps_bind_column2(r_exps, e->l, e->r, NULL) != NULL) ||
613 588899 : (exps_bind_column(r_exps, e->r, NULL, NULL, 1) != NULL && (!e->l || !e->r)))
614 2009 : return rel;
615 : }
616 : /* conflict with new left expressions */
617 160941 : for(n = r_exps->h; n; n = n->next) {
618 138357 : sql_exp *e = n->data;
619 :
620 138357 : if (exp_is_atom(e))
621 0 : continue;
622 276714 : if ((e->l && exps_bind_column2(l_exps, e->l, e->r, NULL) != NULL) ||
623 138426 : (exps_bind_column(l_exps, e->r, NULL, NULL, 1) != NULL && (!e->l || !e->r)))
624 0 : return rel;
625 : }
626 : }
627 :
628 : /* rename operator expressions */
629 39140 : if (l->op == op_project) {
630 : /* rewrite rel from rel->l into rel->l->l */
631 35669 : if (rel->exps) {
632 73713 : for (n = rel->exps->h; n; n = n->next) {
633 39081 : sql_exp *e = n->data, *ne;
634 :
635 39081 : ne = exp_rename(v->sql, e, l, l->l);
636 39081 : assert(ne);
637 39081 : if (ne != e && exp_name(e))
638 0 : exp_propagate(v->sql->sa, ne, e);
639 39081 : n->data = ne;
640 : }
641 : }
642 35669 : rel->l = l->l;
643 35669 : l->l = NULL;
644 35669 : rel_destroy(l);
645 : }
646 39140 : if (is_join(rel->op) && r->op == op_project && list_empty(rel->attr)) {
647 : /* rewrite rel from rel->r into rel->r->l */
648 4069 : if (rel->exps) {
649 7434 : for (n = rel->exps->h; n; n = n->next) {
650 3960 : sql_exp *e = n->data, *ne;
651 :
652 3960 : ne = exp_rename(v->sql, e, r, r->l);
653 3960 : assert(ne);
654 3960 : if (ne != e && exp_name(e))
655 0 : exp_propagate(v->sql->sa, ne, e);
656 3960 : n->data = ne;
657 : }
658 : }
659 4069 : rel->r = r->l;
660 4069 : r->l = NULL;
661 4069 : rel_destroy(r);
662 : }
663 : /* Done, ie introduce new project */
664 39140 : exps_fix_card(exps, rel->card);
665 : /* Fix nil flag */
666 39140 : if (!list_empty(exps)) {
667 834560 : for (n = exps->h ; n && i < nlexps ; n = n->next, i++) {
668 795420 : sql_exp *e = n->data;
669 :
670 795420 : if (is_right(rel->op) || is_full(rel->op))
671 401 : set_has_nil(e);
672 795420 : set_not_unique(e);
673 : }
674 167982 : for (; n ; n = n->next) {
675 128842 : sql_exp *e = n->data;
676 :
677 128842 : if (is_left(rel->op) || is_full(rel->op))
678 41078 : set_has_nil(e);
679 128842 : set_not_unique(e);
680 : }
681 : }
682 39140 : v->changes++;
683 39140 : return rel_inplace_project(v->sql->sa, rel, NULL, exps);
684 : }
685 2754808 : if (is_groupby(rel->op) && !rel_is_ref(rel) && rel->exps && list_length(rel->exps) > 1) {
686 43508 : node *n;
687 43508 : int fnd = 0;
688 43508 : list *aexps, *pexps;
689 :
690 : /* check if some are expressions aren't e_aggr */
691 163358 : for (n = rel->exps->h; n && !fnd; n = n->next) {
692 119850 : sql_exp *e = n->data;
693 :
694 119850 : if (e->type != e_aggr && e->type != e_column && e->type != e_atom && e->card > CARD_ATOM) {
695 119850 : fnd = 1;
696 : }
697 : }
698 : /* only aggr, no rewrite needed */
699 43508 : if (!fnd)
700 : return rel;
701 :
702 0 : aexps = sa_list(v->sql->sa);
703 0 : pexps = sa_list(v->sql->sa);
704 0 : for (n = rel->exps->h; n; n = n->next) {
705 0 : sql_exp *e = n->data, *ne = NULL;
706 :
707 0 : switch (e->type) {
708 0 : case e_atom: /* move over to the projection */
709 0 : list_append(pexps, e);
710 0 : break;
711 0 : case e_func:
712 0 : list_append(pexps, e);
713 0 : list_split_aggr_and_project(v->sql, aexps, e->l);
714 0 : break;
715 0 : case e_convert:
716 0 : list_append(pexps, e);
717 0 : e->l = split_aggr_and_project(v->sql, aexps, e->l);
718 0 : break;
719 0 : default: /* simple alias */
720 0 : list_append(aexps, e);
721 0 : ne = exp_column(v->sql->sa, exp_find_rel_name(e), exp_name(e), exp_subtype(e), e->card, has_nil(e), is_unique(e), is_intern(e));
722 0 : list_append(pexps, ne);
723 0 : break;
724 : }
725 : }
726 0 : v->changes++;
727 0 : rel->exps = aexps;
728 0 : return rel_inplace_project( v->sql->sa, rel, NULL, pexps);
729 : }
730 : return rel;
731 : }
732 :
733 : static sql_rel *
734 165901 : rel_push_project_up(visitor *v, global_props *gp, sql_rel *rel)
735 : {
736 165901 : (void) gp;
737 165901 : return rel_visitor_bottomup(v, rel, &rel_push_project_up_);
738 : }
739 :
740 : run_optimizer
741 596798 : bind_push_project_up(visitor *v, global_props *gp)
742 : {
743 596798 : int flag = v->sql->sql_optimizer;
744 596610 : return gp->opt_level == 1 && (flag & push_project_up) &&
745 1193404 : gp->cnt[op_project] ? rel_push_project_up : NULL;
746 : }
747 :
748 :
749 : static void split_exps(mvc *sql, list *exps, sql_rel *rel);
750 :
751 : static int
752 3748227 : exp_refers_cmp( sql_exp *e1, sql_exp *e2)
753 : {
754 3748227 : if (exp_refers(e1,e2))
755 0 : return 0;
756 : return -1;
757 : }
758 :
759 : sql_exp *
760 189987 : add_exp_too_project(mvc *sql, sql_exp *e, sql_rel *rel)
761 : {
762 189987 : node *n = list_find(rel->exps, e, (fcmp)&exp_match_exp_cmp);
763 :
764 : /* if not matching we may refer to an older expression */
765 189987 : if (!n)
766 97735 : n = list_find(rel->exps, e, (fcmp)&exp_refers_cmp);
767 97735 : if (!n) {
768 97735 : exp_label(sql->sa, e, ++sql->label);
769 97735 : append(rel->exps, e);
770 : } else {
771 92252 : sql_exp *ne = n->data;
772 :
773 92252 : if (rel && rel->l) {
774 184504 : if ((exp_relname(ne) && exp_name(ne) && rel_bind_column2(sql, rel->l, exp_relname(ne), exp_name(ne), 0)) ||
775 92252 : (!exp_relname(ne) && exp_name(ne) && rel_bind_column(sql, rel->l, exp_name(ne), 0, 1))) {
776 0 : exp_label(sql->sa, e, ++sql->label);
777 0 : append(rel->exps, e);
778 0 : ne = e;
779 : }
780 : }
781 : e = ne;
782 : }
783 189987 : e = exp_ref(sql, e);
784 189987 : return e;
785 : }
786 :
787 : static void
788 156326 : add_exps_too_project(mvc *sql, list *exps, sql_rel *rel)
789 : {
790 156326 : node *n;
791 :
792 156326 : if (!exps)
793 : return;
794 483855 : for(n=exps->h; n; n = n->next) {
795 327532 : sql_exp *e = n->data;
796 :
797 327532 : if (e->type != e_column && !exp_is_atom(e))
798 189710 : n->data = add_exp_too_project(sql, e, rel);
799 : }
800 : }
801 :
802 : static sql_exp *
803 614067 : split_exp(mvc *sql, sql_exp *e, sql_rel *rel)
804 : {
805 614067 : if (exp_is_atom(e))
806 : return e;
807 483902 : switch(e->type) {
808 : case e_column:
809 : return e;
810 81525 : case e_convert:
811 81525 : e->l = split_exp(sql, e->l, rel);
812 81525 : return e;
813 167461 : case e_aggr:
814 : case e_func:
815 167461 : if (!is_analytic(e) && !exp_has_sideeffect(e)) {
816 163996 : sql_subfunc *f = e->f;
817 163996 : if (e->type == e_func && !f->func->s && is_caselike_func(f) /*is_ifthenelse_func(f)*/) {
818 : return e;
819 : } else {
820 156326 : split_exps(sql, e->l, rel);
821 156326 : add_exps_too_project(sql, e->l, rel);
822 : }
823 : }
824 : return e;
825 20 : case e_cmp:
826 20 : if (e->flag == cmp_or || e->flag == cmp_filter) {
827 0 : split_exps(sql, e->l, rel);
828 0 : split_exps(sql, e->r, rel);
829 20 : } else if (e->flag == cmp_in || e->flag == cmp_notin) {
830 0 : e->l = split_exp(sql, e->l, rel);
831 0 : split_exps(sql, e->r, rel);
832 : } else {
833 20 : e->l = split_exp(sql, e->l, rel);
834 20 : e->r = split_exp(sql, e->r, rel);
835 20 : if (e->f)
836 19 : e->f = split_exp(sql, e->f, rel);
837 : }
838 : return e;
839 : case e_atom:
840 : case e_psm:
841 : return e;
842 : }
843 : return e;
844 : }
845 :
846 : static void
847 156326 : split_exps(mvc *sql, list *exps, sql_rel *rel)
848 : {
849 156326 : if (list_empty(exps))
850 : return;
851 483855 : for(node *n=exps->h; n; n = n->next){
852 327532 : sql_exp *e = n->data;
853 :
854 327532 : e = split_exp(sql, e, rel);
855 327532 : n->data = e;
856 : }
857 : }
858 :
859 : sql_rel *
860 574662 : rel_split_project_(visitor *v, sql_rel *rel, int top)
861 : {
862 1149324 : if (mvc_highwater(v->sql))
863 : return rel;
864 :
865 574664 : if (!rel)
866 : return NULL;
867 574664 : if (is_project(rel->op) && list_length(rel->exps) && (is_groupby(rel->op) || rel->l) && !need_distinct(rel) && !is_single(rel)) {
868 159145 : list *exps = rel->exps;
869 159145 : node *n;
870 159145 : int funcs = 0;
871 159145 : sql_rel *nrel;
872 :
873 : /* are there functions */
874 1368881 : for (n=exps->h; n && !funcs; n = n->next) {
875 1209736 : sql_exp *e = n->data;
876 :
877 1209736 : funcs = exp_has_func(e);
878 : }
879 : /* introduce extra project */
880 159145 : if (funcs && rel->op != op_project) {
881 0 : nrel = rel_project(v->sql->sa, rel->l,
882 0 : rel_projections(v->sql, rel->l, NULL, 1, 1));
883 0 : rel->l = nrel;
884 : /* recursively split all functions and add those to the projection list */
885 0 : split_exps(v->sql, rel->exps, nrel);
886 0 : if (nrel->l && !(nrel->l = rel_split_project_(v, nrel->l, (is_topn(rel->op)||is_sample(rel->op))?top:0)))
887 : return NULL;
888 0 : return rel;
889 159145 : } else if (funcs && !top && list_empty(rel->r)) {
890 : /* projects can have columns point back into the expression list, ie
891 : * create a new list including the split expressions */
892 16679 : node *n;
893 16679 : list *exps = rel->exps;
894 :
895 16679 : rel->exps = sa_list(v->sql->sa);
896 221630 : for (n=exps->h; n; n = n->next)
897 204951 : append(rel->exps, split_exp(v->sql, n->data, rel));
898 142466 : } else if (funcs && top && rel_is_ref(rel) && list_empty(rel->r)) {
899 : /* inplace */
900 38 : list *exps = rel_projections(v->sql, rel, NULL, 1, 1);
901 38 : sql_rel *l = rel_project(v->sql->sa, rel->l, NULL);
902 38 : rel->l = l;
903 38 : l->exps = rel->exps;
904 38 : set_processed(l);
905 38 : rel->exps = exps;
906 : }
907 : }
908 574662 : if (is_set(rel->op) || is_basetable(rel->op))
909 : return rel;
910 380440 : if (rel->l) {
911 386627 : rel->l = rel_split_project_(v, rel->l, (is_topn(rel->op)||is_sample(rel->op)||is_ddl(rel->op)||is_modify(rel->op))?top:0);
912 362318 : if (!rel->l)
913 : return NULL;
914 : }
915 380442 : if ((is_join(rel->op) || is_semi(rel->op)) && rel->r) {
916 111658 : rel->r = rel_split_project_(v, rel->r, (is_topn(rel->op)||is_sample(rel->op)||is_ddl(rel->op)||is_modify(rel->op))?top:0);
917 111658 : if (!rel->r)
918 : return NULL;
919 : }
920 : return rel;
921 : }
922 :
923 : static sql_rel *
924 100687 : rel_split_project(visitor *v, global_props *gp, sql_rel *rel)
925 : {
926 100687 : (void) gp;
927 100687 : return rel_split_project_(v, rel, 1);
928 : }
929 :
930 : run_optimizer
931 596797 : bind_split_project(visitor *v, global_props *gp)
932 : {
933 596797 : int flag = v->sql->sql_optimizer;
934 530868 : return gp->opt_cycle == 0 && gp->opt_level == 1 && (flag & split_project) &&
935 1127653 : (gp->cnt[op_project] || gp->cnt[op_groupby]) ? rel_split_project : NULL;
936 : }
937 :
938 :
939 : static sql_rel *
940 166088 : rel_project_reduce_casts(visitor *v, global_props *gp, sql_rel *rel)
941 : {
942 166088 : if (!rel)
943 : return NULL;
944 166088 : (void) gp;
945 166088 : if (gp->opt_level == 1 && v->value_based_opt && is_simple_project(rel->op) && list_length(rel->exps)) {
946 90563 : list *exps = rel->exps;
947 90563 : node *n;
948 :
949 454680 : for (n=exps->h; n; n = n->next) {
950 364117 : sql_exp *e = n->data;
951 :
952 364117 : if (e && e->type == e_func) {
953 30153 : sql_subfunc *f = e->f;
954 30153 : sql_subtype *res = f->res->h->data;
955 :
956 30153 : if (!f->func->s && !strcmp(f->func->base.name, "sql_mul") && res->scale > 0) {
957 9 : list *args = e->l;
958 9 : sql_exp *h = args->h->data;
959 9 : sql_exp *t = args->t->data;
960 9 : atom *ha = exp_value(v->sql, h), *ta = exp_value(v->sql, t);
961 :
962 9 : if (ha || ta) {
963 4 : atom *a = ha ? ha : ta;
964 4 : atom *na = reduce_scale(v->sql, a);
965 :
966 4 : if (na != a) {
967 2 : int rs = a->tpe.scale - na->tpe.scale;
968 2 : res->scale -= rs;
969 2 : if (ha) {
970 0 : h->r = NULL;
971 0 : h->l = na;
972 : } else {
973 2 : t->r = NULL;
974 2 : t->l = na;
975 : }
976 2 : v->changes++;
977 : }
978 : }
979 : }
980 : }
981 : }
982 : }
983 : return rel;
984 : }
985 :
986 : run_optimizer
987 596801 : bind_project_reduce_casts(visitor *v, global_props *gp)
988 : {
989 596801 : int flag = v->sql->sql_optimizer;
990 596801 : return gp->cnt[op_project] && (flag & project_reduce_casts) ? rel_project_reduce_casts : NULL;
991 : }
992 :
993 :
994 : static sql_rel *
995 1626393 : exp_skip_output_parts(sql_rel *rel)
996 : {
997 2495459 : while ((is_topn(rel->op) || is_project(rel->op) || is_sample(rel->op)) && rel->l) {
998 1023712 : if (is_union(rel->op) || (is_groupby(rel->op) && list_empty(rel->r)))
999 154646 : return rel; /* a group-by with no columns is a plain aggregate and hence always returns one row */
1000 869066 : rel = rel->l;
1001 : }
1002 : return rel;
1003 : }
1004 :
1005 : static sql_column *
1006 84657 : exp_find_column_( sql_rel *rel, sql_exp *exp, int pnr, sql_rel **bt )
1007 : {
1008 84657 : if (exp->type == e_column)
1009 84584 : return name_find_column(rel, exp->l, exp->r, pnr, bt);
1010 : return NULL;
1011 : }
1012 :
1013 : static int
1014 0 : rel_part_nr( sql_rel *rel, sql_exp *e )
1015 : {
1016 0 : sql_column *c = NULL;
1017 0 : sql_rel *bt = NULL;
1018 0 : assert(e->type == e_cmp);
1019 :
1020 0 : c = exp_find_column_(rel, e->l, -1, &bt);
1021 0 : if (!c)
1022 0 : c = exp_find_column_(rel, e->r, -1, &bt);
1023 0 : if (!c && e->f)
1024 0 : c = exp_find_column_(rel, e->f, -1, &bt);
1025 0 : if (!c || !bt || !rel_base_get_mergetable(bt))
1026 0 : return -1;
1027 0 : sql_table *pp = c->t;
1028 0 : sql_table *mt = rel_base_get_mergetable(bt);
1029 0 : return find_member_pos(mt->members, pp);
1030 : }
1031 :
1032 : static int
1033 0 : rel_uses_part_nr( sql_rel *rel, sql_exp *e, int pnr )
1034 : {
1035 0 : sql_column *c = NULL;
1036 0 : sql_rel *bt = NULL;
1037 0 : assert(e->type == e_cmp);
1038 :
1039 : /*
1040 : * following case fails.
1041 : *
1042 : * semijoin( A1, union [A1, A2] )
1043 : * The union will never return proper column (from A2).
1044 : * ie need different solution (probaly pass pnr).
1045 : */
1046 0 : c = exp_find_column_(rel, e->l, pnr, &bt);
1047 0 : if (!c)
1048 0 : c = exp_find_column_(rel, e->r, pnr, &bt);
1049 0 : if (c && bt && rel_base_get_mergetable(bt)) {
1050 0 : sql_table *pp = c->t;
1051 0 : sql_table *mt = rel_base_get_mergetable(bt);
1052 0 : if (find_member_pos(mt->members, pp) == pnr)
1053 : return 1;
1054 : }
1055 : /* for projects we may need to do a rename! */
1056 0 : if (is_project(rel->op) || is_topn(rel->op) || is_sample(rel->op))
1057 0 : return rel_uses_part_nr( rel->l, e, pnr);
1058 :
1059 0 : if (is_union(rel->op) || is_join(rel->op) || is_semi(rel->op)) {
1060 0 : if (rel_uses_part_nr( rel->l, e, pnr))
1061 : return 1;
1062 0 : if (!is_semi(rel->op) && rel_uses_part_nr( rel->r, e, pnr))
1063 : return 1;
1064 : }
1065 : return 0;
1066 : }
1067 :
1068 : static sql_column *
1069 1039545 : exp_is_pkey(sql_rel *rel, sql_exp *e)
1070 : {
1071 1039545 : if (find_prop(e->p, PROP_HASHCOL)) { /* aligned PKEY JOIN */
1072 9365 : fcmp cmp = (fcmp)&kc_column_cmp;
1073 9365 : sql_column *c = exp_find_column(rel, e, -2);
1074 :
1075 9365 : if (c && c->t->pkey && list_find(c->t->pkey->k.columns, c, cmp) != NULL)
1076 : return c;
1077 : }
1078 : return NULL;
1079 : }
1080 :
1081 : static sql_exp *
1082 483154 : rel_is_join_on_pkey(sql_rel *rel, bool pk_fk) /* pk_fk is used to verify is a join on pk-fk */
1083 : {
1084 483154 : if (!rel || !rel->exps)
1085 : return NULL;
1086 1013574 : for (node *n = rel->exps->h; n; n = n->next) {
1087 531840 : sql_exp *je = n->data;
1088 :
1089 1052320 : if (je->type == e_cmp && je->flag == cmp_equal &&
1090 1040960 : (exp_is_pkey(rel, je->l) || exp_is_pkey(rel, je->r)) &&
1091 9346 : (!pk_fk || find_prop(je->p, PROP_JOINIDX)))
1092 0 : return je;
1093 : }
1094 : return NULL;
1095 : }
1096 :
1097 : /* return true if the given expression is guaranteed to have no rows */
1098 : static int
1099 1299798 : exp_is_zero_rows(visitor *v, sql_rel *rel, sql_rel *sel)
1100 : {
1101 1302965 : if (!rel || mvc_highwater(v->sql))
1102 0 : return 0;
1103 1302965 : rel = exp_skip_output_parts(rel);
1104 1302965 : if (is_select(rel->op) && rel->l) {
1105 323428 : sel = rel;
1106 323428 : rel = exp_skip_output_parts(rel->l);
1107 : }
1108 :
1109 1302965 : sql_table *t = is_basetable(rel->op) && rel->l ? rel->l : NULL;
1110 579999 : bool table_readonly = t && isTable(t) && t->access == TABLE_READONLY;
1111 :
1112 1302965 : if (sel && !list_empty(sel->exps) && (v->value_based_opt || table_readonly)) {
1113 441849 : for (node *n = sel->exps->h; n; n = n->next) {
1114 230877 : sql_exp *e = n->data;
1115 :
1116 : /* if the expression is false, then the select is empty */
1117 230877 : if (v->value_based_opt && (exp_is_false(e) || exp_is_null(e)))
1118 51 : return 1;
1119 230826 : if (table_readonly && e->type == e_cmp && (e->flag == cmp_equal || e->f)) {
1120 : /* half-ranges are theoretically optimizable here, but not implemented */
1121 343 : sql_exp *c = e->l;
1122 343 : if (c->type == e_column) {
1123 343 : sql_exp *l = e->r;
1124 343 : sql_exp *h = e->f;
1125 :
1126 343 : atom *lval = exp_flatten(v->sql, v->value_based_opt, l);
1127 343 : atom *hval = h ? exp_flatten(v->sql, v->value_based_opt, h) : lval;
1128 343 : if (lval && hval) {
1129 303 : sql_rel *bt;
1130 303 : sql_column *col = name_find_column(sel, exp_relname(c), exp_name(c), -2, &bt);
1131 303 : void *min = NULL, *max = NULL;
1132 303 : atom *amin, *amax;
1133 303 : sql_subtype *ct = exp_subtype(c);
1134 :
1135 303 : if (col
1136 299 : && col->t == t
1137 56 : && sql_trans_ranges(v->sql->session->tr, col, &min, &max)
1138 50 : && min && max
1139 50 : && (amin = atom_general_ptr(v->sql->sa, ct, min)) && (amax = atom_general_ptr(v->sql->sa, ct, max))
1140 50 : && !exp_range_overlap(amin, amax, lval, hval, false, false)) {
1141 2 : return 1;
1142 : }
1143 : }
1144 : }
1145 : }
1146 : }
1147 : }
1148 1302912 : if ((is_innerjoin(rel->op) || is_left(rel->op) || is_right(rel->op) || is_semi(rel->op)) && !list_empty(rel->exps)) {
1149 478390 : sql_exp *je;
1150 :
1151 : /* check non overlaping pk-fk join */
1152 478390 : if ((je = rel_is_join_on_pkey(rel, true))) {
1153 0 : int lpnr = rel_part_nr(rel->l, je);
1154 :
1155 0 : if (lpnr >= 0 && !rel_uses_part_nr(rel->r, je, lpnr))
1156 : return 1;
1157 : }
1158 478390 : return (((is_innerjoin(rel->op) || is_left(rel->op) || is_semi(rel->op)) && exp_is_zero_rows(v, rel->l, sel)) ||
1159 478384 : ((is_innerjoin(rel->op) || is_right(rel->op)) && exp_is_zero_rows(v, rel->r, sel)));
1160 : }
1161 : /* global aggregates always return 1 row */
1162 824522 : if (is_simple_project(rel->op) || (is_groupby(rel->op) && !list_empty(rel->r)) || is_select(rel->op) ||
1163 : is_topn(rel->op) || is_sample(rel->op) || is_inter(rel->op) || is_except(rel->op)) {
1164 46023 : if (rel->l)
1165 : return exp_is_zero_rows(v, rel->l, sel);
1166 42469 : } else if (is_innerjoin(rel->op) && list_empty(rel->exps)) { /* cartesian product */
1167 42481 : return exp_is_zero_rows(v, rel->l, sel) || exp_is_zero_rows(v, rel->r, sel);
1168 : }
1169 : return 0;
1170 : }
1171 :
1172 : /* discard sides of UNION or UNION ALL which cannot produce any rows, as per
1173 : statistics, similarly to the merge table optimizer, e.g.
1174 : select * from a where x between 1 and 2 union all select * from b where x between 1 and 2
1175 : -> select * from b where x between 1 and 2 [assuming a has no rows with 1<=x<=2]
1176 : */
1177 : static inline sql_rel *
1178 3295981 : rel_remove_union_partitions(visitor *v, sql_rel *rel)
1179 : {
1180 3295981 : if (!is_union(rel->op) || rel_is_ref(rel))
1181 : return rel;
1182 208090 : int left_zero_rows = !rel_is_ref(rel->l) && exp_is_zero_rows(v, rel->l, NULL);
1183 208090 : int right_zero_rows = !rel_is_ref(rel->r) && exp_is_zero_rows(v, rel->r, NULL);
1184 :
1185 208090 : if (left_zero_rows && right_zero_rows) {
1186 : /* generate dummy relation */
1187 6 : list *converted = sa_list(v->sql->sa);
1188 6 : sql_rel *nrel = rel_project_exp(v->sql, exp_atom_bool(v->sql->sa, 1));
1189 6 : nrel = rel_select(v->sql->sa, nrel, exp_atom_bool(v->sql->sa, 0));
1190 6 : set_processed(nrel);
1191 :
1192 39 : for (node *n = rel->exps->h ; n ; n = n->next) {
1193 33 : sql_exp *e = n->data, *a = exp_atom(v->sql->sa, atom_general(v->sql->sa, exp_subtype(e), NULL));
1194 33 : exp_prop_alias(v->sql->sa, a, e);
1195 33 : list_append(converted, a);
1196 : }
1197 6 : rel_destroy(rel);
1198 6 : v->changes++;
1199 6 : return rel_project(v->sql->sa, nrel, converted);
1200 208084 : } else if (left_zero_rows) {
1201 15 : sql_rel *r = rel->r;
1202 15 : if (!is_project(r->op))
1203 0 : r = rel_project(v->sql->sa, r, rel_projections(v->sql, r, NULL, 1, 1));
1204 15 : rel_rename_exps(v->sql, rel->exps, r->exps);
1205 15 : rel->r = NULL;
1206 15 : rel_destroy(rel);
1207 15 : v->changes++;
1208 15 : return r;
1209 208069 : } else if (right_zero_rows) {
1210 26 : sql_rel *l = rel->l;
1211 26 : if (!is_project(l->op))
1212 0 : l = rel_project(v->sql->sa, l, rel_projections(v->sql, l, NULL, 1, 1));
1213 26 : rel_rename_exps(v->sql, rel->exps, l->exps);
1214 26 : rel->l = NULL;
1215 26 : rel_destroy(rel);
1216 26 : v->changes++;
1217 26 : return l;
1218 : }
1219 : return rel;
1220 : }
1221 :
1222 : static int
1223 25 : rel_match_projections(sql_rel *l, sql_rel *r)
1224 : {
1225 25 : node *n, *m;
1226 25 : list *le = l->exps;
1227 25 : list *re = r->exps;
1228 :
1229 25 : if (!le || !re)
1230 : return 0;
1231 25 : if (list_length(le) != list_length(re))
1232 : return 0;
1233 :
1234 64 : for (n = le->h, m = re->h; n && m; n = n->next, m = m->next)
1235 61 : if (!exp_match(n->data, m->data))
1236 : return 0;
1237 : return 1;
1238 : }
1239 :
1240 : static int
1241 0 : exps_has_predicate( list *l )
1242 : {
1243 0 : node *n;
1244 :
1245 0 : for( n = l->h; n; n = n->next){
1246 0 : sql_exp *e = n->data;
1247 :
1248 0 : if (e->card <= CARD_ATOM)
1249 : return 1;
1250 : }
1251 : return 0;
1252 : }
1253 :
1254 : static sql_rel *
1255 1520 : rel_find_select( sql_rel *r)
1256 : {
1257 5760 : while (!is_select(r->op) && r->l && is_project(r->op))
1258 : r = r->l;
1259 1520 : if (is_select(r->op))
1260 50 : return r;
1261 : return NULL;
1262 : }
1263 :
1264 : static inline sql_rel *
1265 3295981 : rel_merge_union(visitor *v, sql_rel *rel)
1266 : {
1267 3295981 : sql_rel *l = rel->l;
1268 3295981 : sql_rel *r = rel->r;
1269 3295981 : sql_rel *ref = NULL;
1270 :
1271 3295981 : if (is_union(rel->op) &&
1272 212318 : l && is_project(l->op) && !project_unsafe(l,0) &&
1273 130785 : r && is_project(r->op) && !project_unsafe(r,0) &&
1274 126765 : (ref = rel_find_ref(l)) != NULL && ref == rel_find_ref(r)) {
1275 : /* Find selects and try to merge */
1276 760 : sql_rel *ls = rel_find_select(l);
1277 760 : sql_rel *rs = rel_find_select(r);
1278 :
1279 : /* can we merge ? */
1280 760 : if (!ls || !rs)
1281 : return rel;
1282 :
1283 : /* merge any extra projects */
1284 25 : if (l->l != ls)
1285 22 : rel->l = l = rel_merge_projects_(v, l);
1286 25 : if (r->l != rs)
1287 22 : rel->r = r = rel_merge_projects_(v, r);
1288 :
1289 25 : if (!rel_match_projections(l,r))
1290 : return rel;
1291 :
1292 : /* for now only union(project*(select(R),project*(select(R))) */
1293 3 : if (ls != l->l || rs != r->l ||
1294 0 : ls->l != rs->l || !rel_is_ref(ls->l))
1295 : return rel;
1296 :
1297 0 : if (!ls->exps || !rs->exps ||
1298 0 : exps_has_predicate(ls->exps) ||
1299 0 : exps_has_predicate(rs->exps))
1300 : return rel;
1301 :
1302 : /* merge, ie. add 'or exp' */
1303 0 : v->changes++;
1304 0 : ls->exps = append(new_exp_list(v->sql->sa), exp_or(v->sql->sa, ls->exps, rs->exps, 0));
1305 0 : rs->exps = NULL;
1306 0 : rel = rel_inplace_project(v->sql->sa, rel, rel_dup(rel->l), rel->exps);
1307 0 : set_processed(rel);
1308 0 : return rel;
1309 : }
1310 : return rel;
1311 : }
1312 :
1313 : static sql_rel *
1314 3295981 : rel_optimize_unions_bottomup_(visitor *v, sql_rel *rel)
1315 : {
1316 3295981 : rel = rel_remove_union_partitions(v, rel);
1317 3295981 : rel = rel_merge_union(v, rel);
1318 3295981 : return rel;
1319 : }
1320 :
1321 : static sql_rel *
1322 25316 : rel_optimize_unions_bottomup(visitor *v, global_props *gp, sql_rel *rel)
1323 : {
1324 25316 : (void) gp;
1325 25316 : return rel_visitor_bottomup(v, rel, &rel_optimize_unions_bottomup_);
1326 : }
1327 :
1328 : run_optimizer
1329 596802 : bind_optimize_unions_bottomup(visitor *v, global_props *gp)
1330 : {
1331 596802 : int flag = v->sql->sql_optimizer;
1332 596614 : return gp->opt_level == 1 && gp->cnt[op_union] && (flag & optimize_unions_bottomup)
1333 596802 : ? rel_optimize_unions_bottomup : NULL;
1334 : }
1335 :
1336 :
1337 : static inline sql_rel *
1338 4393003 : rel_project_cse(visitor *v, sql_rel *rel)
1339 : {
1340 4393003 : if (is_project(rel->op) && rel->exps) {
1341 1774577 : node *n, *m;
1342 1774577 : list *nexps;
1343 1774577 : int needed = 0;
1344 :
1345 10191593 : for (n=rel->exps->h; n && !needed; n = n->next) {
1346 8417014 : sql_exp *e1 = n->data;
1347 :
1348 8417014 : if (e1->type != e_column && !exp_is_atom(e1) && exp_name(e1)) {
1349 18912524 : for (m=n->next; m; m = m->next){
1350 17991728 : sql_exp *e2 = m->data;
1351 :
1352 17991728 : if (exp_name(e2) && exp_match_exp(e1, e2))
1353 17991728 : needed = 1;
1354 : }
1355 : }
1356 : }
1357 :
1358 1774579 : if (!needed)
1359 : return rel;
1360 :
1361 429 : nexps = new_exp_list(v->sql->sa);
1362 5457 : for (n=rel->exps->h; n; n = n->next) {
1363 5028 : sql_exp *e1 = n->data;
1364 :
1365 5028 : if (e1->type != e_column && !exp_is_atom(e1) && exp_name(e1)) {
1366 28584 : for (m=nexps->h; m; m = m->next){
1367 25496 : sql_exp *e2 = m->data;
1368 :
1369 25496 : if (exp_name(e2) && exp_match_exp(e1, e2) && (e1->type != e_column || exps_bind_column2(nexps, exp_relname(e1), exp_name(e1), NULL) == e1)) {
1370 457 : sql_exp *ne = exp_alias(v->sql->sa, exp_relname(e1), exp_name(e1), exp_relname(e2), exp_name(e2), exp_subtype(e2), e2->card, has_nil(e2), is_unique(e2), is_intern(e1));
1371 :
1372 457 : ne = exp_propagate(v->sql->sa, ne, e1);
1373 457 : set_selfref(ne);
1374 457 : exp_prop_alias(v->sql->sa, ne, e1);
1375 457 : e1 = ne;
1376 457 : break;
1377 : }
1378 : }
1379 : }
1380 5028 : append(nexps, e1);
1381 : }
1382 429 : rel->exps = nexps;
1383 : }
1384 : return rel;
1385 : }
1386 :
1387 : static int exp_is_const_op(sql_exp *exp, sql_exp *tope, sql_rel *expr);
1388 :
1389 : static int
1390 76 : exps_are_const_op(list *exps, sql_exp *tope, sql_rel *expr)
1391 : {
1392 76 : int ok = 1;
1393 :
1394 76 : if (list_empty(exps))
1395 : return 1;
1396 152 : for (node *n = exps->h; n && ok; n = n->next)
1397 76 : ok &= exp_is_const_op(n->data, tope, expr);
1398 : return ok;
1399 : }
1400 :
1401 : static int
1402 518 : exp_is_const_op(sql_exp *exp, sql_exp *tope, sql_rel *expr)
1403 : {
1404 944 : switch (exp->type) {
1405 28 : case e_atom:
1406 28 : return exp->f ? 0 : 1;
1407 213 : case e_convert:
1408 213 : return exp_is_const_op(exp->l, tope, expr);
1409 76 : case e_func:
1410 : case e_aggr: {
1411 76 : sql_subfunc *f = exp->f;
1412 76 : if (f->func->side_effect || IS_ANALYTIC(f->func))
1413 : return 0;
1414 76 : return exps_are_const_op(exp->l, tope, expr);
1415 : }
1416 0 : case e_cmp:
1417 0 : if (exp->flag == cmp_or || exp->flag == cmp_filter)
1418 0 : return exps_are_const_op(exp->l, tope, expr) && exps_are_const_op(exp->r, tope, expr);
1419 0 : if (exp->flag == cmp_in || exp->flag == cmp_notin)
1420 0 : return exp_is_const_op(exp->l, tope, expr) && exps_are_const_op(exp->r, tope, expr);
1421 0 : return exp_is_const_op(exp->l, tope, expr) && exp_is_const_op(exp->r, tope, expr) && (!exp->f || exp_is_const_op(exp->f, tope, expr));
1422 627 : case e_column: {
1423 627 : if (is_simple_project(expr->op) || is_groupby(expr->op)) {
1424 : /* in a simple projection, self-references may occur */
1425 627 : sql_exp *nexp = (exp->l ? exps_bind_column2(expr->exps, exp->l, exp->r, NULL) : exps_bind_column(expr->exps, exp->r, NULL, NULL, 0));
1426 627 : if (nexp && list_position(expr->exps, nexp) < list_position(expr->exps, tope))
1427 : return exp_is_const_op(nexp, exp, expr);
1428 : }
1429 : return 0;
1430 : }
1431 : default:
1432 : return 0;
1433 : }
1434 : }
1435 :
1436 : static sql_exp *
1437 23 : rel_groupby_add_count_star(mvc *sql, sql_rel *rel, sql_exp *count_star_exp, bool *count_added)
1438 : {
1439 23 : if (count_star_exp)
1440 : return count_star_exp;
1441 16 : if (!list_empty(rel->exps)) {
1442 44 : for (node *n=rel->exps->h; n ; n = n->next) {
1443 32 : sql_exp *e = n->data;
1444 :
1445 32 : if (exp_aggr_is_count(e) && !need_distinct(e) && list_length(e->l) == 0)
1446 4 : return e;
1447 : }
1448 : }
1449 12 : sql_subfunc *cf = sql_bind_func(sql, "sys", "count", sql_bind_localtype("void"), NULL, F_AGGR, true);
1450 12 : *count_added = true;
1451 12 : return rel_groupby_add_aggr(sql, rel, exp_aggr(sql->sa, NULL, cf, 0, 0, rel->card, 0));
1452 : }
1453 :
1454 : /* optimize sum(x + 12) into sum(x) + 12*count(*) */
1455 : static inline sql_rel *
1456 43989 : rel_simplify_sum(visitor *v, sql_rel *rel)
1457 : {
1458 43989 : if (is_groupby(rel->op) && !list_empty(rel->exps)) {
1459 43909 : sql_rel *upper = NULL, *groupby = rel, *l = groupby->l;
1460 43909 : sql_exp *count_star_exp = NULL;
1461 43909 : bool count_added = false;
1462 :
1463 136991 : for (node *n=groupby->exps->h; n ; n = n->next) {
1464 93082 : sql_exp *e = n->data;
1465 93082 : list *el = e->l;
1466 93082 : sql_subfunc *sf = e->f;
1467 :
1468 93082 : if (e->type == e_aggr && !need_distinct(e) && sf->func->type == F_AGGR && !sf->func->s && !strcmp(sf->func->base.name, "sum")) {
1469 8939 : sql_rel *expr = groupby;
1470 8939 : sql_exp *exp = (sql_exp*) el->h->data, *oexp = exp;
1471 :
1472 8939 : while (is_numeric_upcast(exp))
1473 0 : exp = exp->l;
1474 : /* we want to find a +/- call, so expect them to appear only on simple projections */
1475 19176 : while (exp && exp->type == e_column && (is_simple_project(expr->op) || is_groupby(expr->op)) && expr->l) {
1476 11923 : sql_rel *nexpr = NULL;
1477 11923 : sql_exp *nexp = rel_find_exp_and_corresponding_rel(l, exp, false, &nexpr, NULL);
1478 :
1479 : /* break when it loops on the same relation */
1480 11923 : if (nexpr == expr && list_position(expr->exps, nexp) >= list_position(expr->exps, exp))
1481 : break;
1482 10237 : expr = nexpr;
1483 10237 : exp = oexp = nexp;
1484 10265 : while (exp && is_numeric_upcast(exp))
1485 28 : exp = exp->l;
1486 : }
1487 :
1488 8939 : list *expl = exp ? exp->l : NULL;
1489 93082 : sql_subfunc *expf = exp ? exp->f : NULL;
1490 : /* found a candidate function */
1491 7583 : if (exp && exp->type == e_func && expf->func->type == F_FUNC && !expf->func->s &&
1492 1224 : (!strcmp(expf->func->base.name, "sql_sub") || !strcmp(expf->func->base.name, "sql_add"))) {
1493 221 : sql_exp *e1 = (sql_exp*) expl->h->data, *e2 = (sql_exp*) expl->h->next->data;
1494 221 : int e1ok = exp_is_const_op(e1, oexp, expr), e2ok = exp_is_const_op(e2, oexp, expr);
1495 :
1496 221 : if ((!e1ok && e2ok) || (e1ok && !e2ok)) {
1497 24 : sql_exp *ocol = e1ok ? e2 : e1, *constant = e1ok ? e1 : e2, *mul, *colref, *naggr, *newop, *col = ocol, *match;
1498 24 : bool add_col = true, prepend = false;
1499 :
1500 : /* if 'col' is a projection from the under relation, then use it */
1501 24 : while (is_numeric_upcast(col))
1502 0 : col = col->l;
1503 24 : if (col->type == e_column) {
1504 22 : sql_exp *colf = exps_find_exp(l->exps, col);
1505 :
1506 : /* col is already found in the inner relation. Also look for a new reference for col, eg sql_add(col, 1), 1 as col */
1507 22 : if (colf && list_position(l->exps, colf) < list_position(l->exps, oexp)) {
1508 : add_col = false;
1509 4 : } else if (!colf && is_simple_project(l->op) && list_empty(l->r) && !rel_is_ref(l) && !need_distinct(l)) {
1510 : prepend = true;
1511 : add_col = false;
1512 1 : } else if (!colf && (is_simple_project(l->op) || is_groupby(l->op))) {
1513 : /* on these scenarios the new column expression will be ordered/(grouped for distinct) or create potential ambiguity (multiple ref), so skip */
1514 1 : continue;
1515 : }
1516 2 : } else if ((is_simple_project(l->op) && (!list_empty(l->r) || rel_is_ref(l) || need_distinct(l))) || is_groupby(l->op)) {
1517 0 : continue;
1518 : }
1519 :
1520 : /* add count star */
1521 23 : count_star_exp = rel_groupby_add_count_star(v->sql, groupby, count_star_exp, &count_added);
1522 : /* multiply constant by count star */
1523 23 : if (!(mul = rel_binop_(v->sql, NULL, constant, exp_ref(v->sql, count_star_exp), "sys", "sql_mul", card_value))) {
1524 0 : v->sql->session->status = 0;
1525 0 : v->sql->errstr[0] = '\0';
1526 0 : continue;
1527 : }
1528 23 : if (!has_label(mul))
1529 23 : exp_label(v->sql->sa, mul, ++v->sql->label);
1530 :
1531 23 : colref = exp_ref(v->sql, ocol);
1532 23 : if (add_col) /* if 'col' will be added, then make sure it has an unique label */
1533 3 : exp_label(v->sql->sa, colref, ++v->sql->label);
1534 :
1535 : /* 'oexp' contains the type for the input for the 'sum' aggregate */
1536 23 : if (!(colref = exp_check_type(v->sql, exp_subtype(oexp), groupby, colref, type_equal))) {
1537 0 : v->sql->session->status = 0;
1538 0 : v->sql->errstr[0] = '\0';
1539 0 : continue;
1540 : }
1541 : /* update sum to use the column side only */
1542 23 : sql_subfunc *a = sql_bind_func(v->sql, "sys", "sum", exp_subtype(colref), NULL, F_AGGR, true);
1543 23 : if (!a)
1544 0 : continue;
1545 23 : naggr = exp_aggr(v->sql->sa, list_append(sa_list(v->sql->sa), colref), a, need_distinct(e), need_no_nil(e), groupby->card, has_nil(e));
1546 23 : if ((match = exps_any_match(groupby->exps, naggr)) && list_position(groupby->exps, match) < list_position(groupby->exps, e)) { /* found a matching aggregate, use it */
1547 9 : naggr = exp_ref(v->sql, match);
1548 9 : exp_label(v->sql->sa, naggr, ++v->sql->label);
1549 14 : } else if (!has_label(naggr)) { /* otherwise use the new one */
1550 14 : exp_label(v->sql->sa, naggr, ++v->sql->label);
1551 : }
1552 :
1553 : /* generate addition/subtraction. subtraction is not commutative, so keep original order! */
1554 23 : if (!(newop = rel_binop_(v->sql, NULL, e1 == constant ? mul : exp_ref(v->sql, naggr), e1 == constant ? exp_ref(v->sql, naggr) : mul, "sys", expf->func->base.name, card_value))) {
1555 0 : v->sql->session->status = 0;
1556 0 : v->sql->errstr[0] = '\0';
1557 0 : continue;
1558 : }
1559 23 : if (!(newop = exp_check_type(v->sql, exp_subtype(e), groupby, newop, type_equal))) {
1560 0 : v->sql->session->status = 0;
1561 0 : v->sql->errstr[0] = '\0';
1562 0 : continue;
1563 : }
1564 :
1565 : /* a column reference can be prepended to the inner relation, add it after all the check type calls succeed */
1566 23 : if (prepend)
1567 3 : list_prepend(l->exps, exp_ref(v->sql, col));
1568 :
1569 : /* the new generate function calls are valid, update relations */
1570 : /* we need a new relation for the multiplication and addition/subtraction */
1571 23 : if (!upper) {
1572 : /* be careful with relations with more than 1 reference, so do in-place replacement */
1573 16 : list *projs = rel_projections(v->sql, rel, NULL, 1, 1);
1574 16 : sql_rel *nrel = rel_groupby(v->sql, rel->l, NULL);
1575 16 : nrel->exps = rel->exps;
1576 16 : nrel->r = rel->r;
1577 16 : nrel->card = rel->card;
1578 16 : nrel->nrcols = list_length(rel->exps);
1579 16 : set_processed(nrel);
1580 16 : rel_dup(rel->l);
1581 16 : upper = rel = rel_inplace_project(v->sql->sa, rel, nrel, projs);
1582 16 : rel->card = exps_card(projs);
1583 16 : groupby = nrel; /* update pointers :) */
1584 16 : l = groupby->l;
1585 : }
1586 91 : for (node *n = upper->exps->h ; n ; ) {
1587 68 : node *next = n->next;
1588 68 : sql_exp *re = n->data;
1589 :
1590 : /* remove the old reference to the aggregate because we will use the addition/subtraction instead,
1591 : as well as the count star if count_added */
1592 68 : if (exp_refers(e, re) || (count_added && exp_refers(count_star_exp, re)))
1593 35 : list_remove_node(upper->exps, NULL, n);
1594 : n = next;
1595 : }
1596 :
1597 : /* update sum aggregate with new aggregate or reference to an existing one */
1598 23 : n->data = naggr;
1599 23 : list_hash_clear(groupby->exps);
1600 :
1601 : /* add column reference with new label, if 'col' was not found */
1602 23 : if (add_col) {
1603 3 : if (!is_simple_project(l->op) || !list_empty(l->r) || rel_is_ref(l) || need_distinct(l))
1604 0 : groupby->l = l = rel_project(v->sql->sa, l, rel_projections(v->sql, l, NULL, 1, 1));
1605 3 : list_append(l->exps, ocol);
1606 : }
1607 :
1608 : /* propagate alias and add new expression */
1609 23 : exp_prop_alias(v->sql->sa, newop, e);
1610 23 : list_append(upper->exps, newop);
1611 23 : v->changes++;
1612 : }
1613 : }
1614 : }
1615 : }
1616 : }
1617 43989 : return rel;
1618 : }
1619 :
1620 : /* optimize group by x+1,(y-2)*3,2-z into group by x,y,z */
1621 : static inline sql_rel *
1622 43989 : rel_simplify_groupby_columns(visitor *v, sql_rel *rel)
1623 : {
1624 43989 : if (is_groupby(rel->op) && !list_empty(rel->r)) {
1625 33363 : sql_rel *l = rel->l;
1626 :
1627 85816 : for (node *n=((list*)rel->r)->h; n ; n = n->next) {
1628 52453 : sql_exp *e = n->data;
1629 52453 : e->used = 0; /* we need to use this flag, clean it first */
1630 : }
1631 85816 : for (node *n=((list*)rel->r)->h; n ; n = n->next) {
1632 52453 : sql_exp *e = n->data;
1633 :
1634 52453 : if (e->type == e_column) {
1635 52375 : bool searching = true;
1636 52375 : sql_rel *efrel = NULL;
1637 52375 : sql_exp *exp = rel_find_exp_and_corresponding_rel(l, e, false, &efrel, NULL), *col = NULL, *tope = exp;
1638 :
1639 104769 : while (searching && !col) {
1640 52394 : sql_exp *exp_col = exp;
1641 :
1642 52394 : if (exp && is_numeric_upcast(exp))
1643 6 : exp = exp->l;
1644 52394 : if (exp && exp->type == e_func) {
1645 526 : list *el = exp->l;
1646 526 : sql_subfunc *sf = exp->f;
1647 : /* At the moment look only at injective math functions */
1648 526 : if (sf->func->type == F_FUNC && !sf->func->s &&
1649 513 : (!strcmp(sf->func->base.name, "sql_sub") || !strcmp(sf->func->base.name, "sql_add") || !strcmp(sf->func->base.name, "sql_mul"))) {
1650 100 : sql_exp *e1 = (sql_exp*) el->h->data, *e2 = (sql_exp*) el->h->next->data;
1651 : /* the optimization cannot be done if side-effect calls (e.g. rand()) are present */
1652 192 : int e1ok = exp_is_atom(e1) && !exp_unsafe(e1, 1) && !exp_has_sideeffect(e1), e2ok = exp_is_atom(e2) && !exp_unsafe(e2, 1) && !exp_has_sideeffect(e2);
1653 :
1654 100 : if ((!e1ok && e2ok) || (e1ok && !e2ok)) {
1655 50 : sql_exp *c = e1ok ? e2 : e1;
1656 50 : bool done = false;
1657 50 : exp_col = exps_find_exp(efrel->exps, c);
1658 50 : if (exp_col)
1659 36 : c = exp_col;
1660 :
1661 108 : while (!done) {
1662 58 : if (is_numeric_upcast(c))
1663 22 : c = c->l;
1664 58 : if (c->type == e_column) {
1665 39 : if (is_simple_project(efrel->op) || is_groupby(efrel->op)) {
1666 : /* in a simple projection, self-references may occur */
1667 39 : sql_exp *nc = exps_find_exp(efrel->exps, c);
1668 39 : if (nc && list_position(efrel->exps, nc) < list_position(efrel->exps, exp_col)) {
1669 8 : exp_col = c;
1670 8 : c = nc;
1671 8 : continue;
1672 : }
1673 : }
1674 : col = c; /* 'c' is a column reference from the left relation */
1675 : done = true;
1676 : } else {
1677 : exp = c; /* maybe a nested function call, let's continue searching */
1678 : done = true;
1679 : }
1680 : }
1681 : } else {
1682 : searching = false;
1683 : }
1684 : } else {
1685 : searching = false;
1686 : }
1687 : } else {
1688 : searching = false;
1689 : }
1690 : }
1691 52375 : if (col) { /* a column reference was found */
1692 31 : const char *rname = exp_relname(e), *name = exp_name(e);
1693 :
1694 : /* the grouping column has an alias, we have to keep it */
1695 31 : if ((rname && name && (strcmp(rname, e->l) != 0 || strcmp(name, e->r) != 0)) || (!rname && name && strcmp(name, e->r) != 0)) {
1696 2 : if (!has_label(e)) /* dangerous to merge, skip it */
1697 0 : continue;
1698 2 : if (!is_simple_project(l->op) || !list_empty(l->r) || rel_is_ref(l) || need_distinct(l))
1699 0 : rel->l = l = rel_project(v->sql->sa, l, rel_projections(v->sql, l, NULL, 1, 1));
1700 2 : list_append(l->exps, e);
1701 2 : n->data = e = exp_ref(v->sql, e);
1702 2 : list_hash_clear(rel->r);
1703 : }
1704 :
1705 31 : sql_exp *f = exps_find_exp(rel->r, col);
1706 :
1707 31 : if (f && list_position(rel->r, f) < list_position(rel->r, e)) { /* if already present, remove it */
1708 7 : e->used = 1;
1709 : } else {
1710 : /* Use an unique reference to the column found. If there's another grouping column label pointing into it,
1711 : rel_groupby_cse will hopefully remove it */
1712 24 : sql_exp *colf = exps_find_exp(l->exps, col);
1713 :
1714 : /* col is already found in the inner relation. Also look for a new reference for col, eg sql_add(col, 1), 1 as col */
1715 24 : if (colf && list_position(l->exps, colf) < list_position(l->exps, tope)) {
1716 4 : n->data = exp_ref(v->sql, col);
1717 20 : } else if (!colf && is_simple_project(l->op) && list_empty(l->r) && !rel_is_ref(l) && !need_distinct(l)) { /* trivial case, it can be added */
1718 20 : sql_exp *ne = exp_ref(v->sql, col);
1719 20 : list_prepend(l->exps, ne);
1720 20 : n->data = exp_ref(v->sql, ne);
1721 0 : } else if (!colf && (is_simple_project(l->op) || is_groupby(l->op))) {
1722 : /* on these scenarios the new column expression will be ordered/(grouped for distinct) or create potential ambiguity (multiple ref), so skip */
1723 0 : continue;
1724 : } else {
1725 0 : sql_exp *ne = exp_ref(v->sql, col);
1726 :
1727 0 : if (colf) /* a col reference is already there, add a new label */
1728 0 : exp_label(v->sql->sa, ne, ++v->sql->label);
1729 0 : if (!is_simple_project(l->op) || !list_empty(l->r) || rel_is_ref(l) || need_distinct(l))
1730 0 : rel->l = l = rel_project(v->sql->sa, l, rel_projections(v->sql, l, NULL, 1, 1));
1731 0 : list_append(l->exps, ne);
1732 0 : n->data = exp_ref(v->sql, ne);
1733 : }
1734 24 : list_hash_clear(rel->r);
1735 : }
1736 31 : v->changes++;
1737 : }
1738 : }
1739 : }
1740 85816 : for (node *n=((list*)rel->r)->h; n ; ) {
1741 52453 : node *next = n->next;
1742 52453 : sql_exp *e = n->data;
1743 :
1744 52453 : if (e->used) /* remove unecessary grouping columns */
1745 7 : list_remove_node(rel->r, NULL, n);
1746 : n = next;
1747 : }
1748 : }
1749 43989 : return rel;
1750 : }
1751 :
1752 : /* remove identical grouping columns */
1753 : static inline sql_rel *
1754 136151 : rel_groupby_cse(visitor *v, sql_rel *rel)
1755 : {
1756 136151 : if (is_groupby(rel->op) && !list_empty(rel->r)) {
1757 95133 : sql_rel *l = rel->l;
1758 :
1759 : /* for every group expression e1 */
1760 262399 : for (node *n=((list*)rel->r)->h; n ; n = n->next) {
1761 167266 : sql_exp *e1 = n->data;
1762 : /* it's good to examine the same expression in the subrelation e.g. in case it's an alias */
1763 : /* TODO maybe cover more cases? Here I only look at the left relation */
1764 167266 : sql_exp *e1_sub = e1->type == e_column ? exps_find_exp(l->exps, e1) : NULL;
1765 :
1766 : /* for every other group expression */
1767 273658 : for (node *m=n->next; m; m = m->next) {
1768 106392 : sql_exp *e2 = m->data;
1769 106392 : sql_exp *e2_sub = e2->type == e_column ? exps_find_exp(l->exps, e2) : NULL;
1770 :
1771 : /* check if the expression are the same */
1772 106392 : if (exp_match_exp(e1, e2) || exp_refers(e1, e2) || (e1_sub && e2_sub && (exp_match_exp(e1_sub, e2_sub) || exp_refers(e1_sub, e2_sub)))) {
1773 :
1774 : /* use e2 from rel->exps instead of e2 from the rel->r as it can have an alias from the higher rel */
1775 37 : sql_exp *e2_in_exps = (e2->l && e2->alias.rname == e2->l && e2->alias.name == e2->r) ?
1776 52 : exps_bind_column2(rel->exps, e2->l, e2->r, NULL) :
1777 28 : exps_bind_column(rel->exps, e2->alias.name, NULL, NULL, 0);
1778 40 : assert(e2_in_exps);
1779 :
1780 : /* same as e2 */
1781 36 : sql_exp *e1_in_exps = (e1->l && e1->alias.rname == e1->l && e1->alias.name == e1->r) ?
1782 54 : exps_bind_column2(rel->exps, e1->l, e1->r, NULL) :
1783 26 : exps_bind_column(rel->exps, e1->alias.name, NULL, NULL, 0);
1784 40 : assert(e1_in_exps);
1785 :
1786 : /* write e2 as an e1 alias since the expressions are the same */
1787 40 : sql_exp* e2_as_e1_alias = exp_copy(v->sql, e1_in_exps);
1788 : /* NOTE: it is important to get the rname (exp->l) and name (exp->r) from e2 IN the exps
1789 : * (e2_in_exps), and not from e2, since it could carry an alias from the higher rel */
1790 40 : exp_setalias(e2_as_e1_alias, e2_in_exps->l, e2_in_exps->r);
1791 :
1792 : /* replace e2 with e2_as_e1_alias in expressions list */
1793 40 : node *e2_exps_node = list_find(rel->exps, e2_in_exps, NULL);
1794 40 : list_append_before(rel->exps, e2_exps_node, e2_as_e1_alias);
1795 40 : list_remove_node(rel->exps, NULL, e2_exps_node);
1796 :
1797 : /* finally remove e2 from the groups' list (->r) since it's redundant */
1798 40 : node *e2_r_node = list_find(rel->r, e2, NULL);
1799 40 : list_remove_node(rel->r, NULL, e2_r_node);
1800 :
1801 40 : v->changes++;
1802 : }
1803 : }
1804 : }
1805 : }
1806 136151 : return rel;
1807 : }
1808 :
1809 : sql_exp *list_exps_uses_exp(list *exps, const char *rname, const char *name);
1810 :
1811 : static sql_exp*
1812 9940 : exp_uses_exp(sql_exp *e, const char *rname, const char *name)
1813 : {
1814 9973 : sql_exp *res = NULL;
1815 :
1816 9973 : switch (e->type) {
1817 : case e_psm:
1818 : break;
1819 62 : case e_atom: {
1820 62 : if (e->f)
1821 0 : return list_exps_uses_exp(e->f, rname, name);
1822 : } break;
1823 32 : case e_convert:
1824 32 : return exp_uses_exp(e->l, rname, name);
1825 7967 : case e_column: {
1826 7967 : if (e->l && rname && strcmp(e->l, rname) == 0 &&
1827 2867 : e->r && name && strcmp(e->r, name) == 0)
1828 : return e;
1829 5280 : if (!e->l && !rname &&
1830 255 : e->r && name && strcmp(e->r, name) == 0)
1831 : return e;
1832 : } break;
1833 1813 : case e_func:
1834 : case e_aggr: {
1835 1813 : if (e->l)
1836 1244 : return list_exps_uses_exp(e->l, rname, name);
1837 : } break;
1838 99 : case e_cmp: {
1839 99 : if (e->flag == cmp_in || e->flag == cmp_notin) {
1840 0 : if ((res = exp_uses_exp(e->l, rname, name)))
1841 : return res;
1842 0 : return list_exps_uses_exp(e->r, rname, name);
1843 99 : } else if (e->flag == cmp_or || e->flag == cmp_filter) {
1844 0 : if ((res = list_exps_uses_exp(e->l, rname, name)))
1845 : return res;
1846 0 : return list_exps_uses_exp(e->r, rname, name);
1847 : } else {
1848 99 : if ((res = exp_uses_exp(e->l, rname, name)))
1849 : return res;
1850 99 : if ((res = exp_uses_exp(e->r, rname, name)))
1851 : return res;
1852 98 : if (e->f)
1853 : return exp_uses_exp(e->f, rname, name);
1854 : }
1855 : } break;
1856 : }
1857 : return NULL;
1858 : }
1859 :
1860 : sql_exp *
1861 4583 : list_exps_uses_exp(list *exps, const char *rname, const char *name)
1862 : {
1863 4583 : sql_exp *res = NULL;
1864 :
1865 4583 : if (!exps)
1866 : return NULL;
1867 14325 : for (node *n = exps->h; n && !res; n = n->next) {
1868 9742 : sql_exp *e = n->data;
1869 9742 : res = exp_uses_exp(e, rname, name);
1870 : }
1871 : return res;
1872 : }
1873 :
1874 : /* find in the list of expression an expression which uses e */
1875 : sql_exp *
1876 929 : exps_uses_exp(list *exps, sql_exp *e)
1877 : {
1878 929 : return list_exps_uses_exp(exps, exp_relname(e), exp_name(e));
1879 : }
1880 :
1881 : /*
1882 : * Rewrite aggregations over union all.
1883 : * groupby ([ union all (a, b) ], [gbe], [ count, sum ] )
1884 : *
1885 : * into
1886 : * groupby ( [ union all( groupby( a, [gbe], [ count, sum] ), [ groupby( b, [gbe], [ count, sum] )) , [gbe], [sum, sum] )
1887 : */
1888 : static inline sql_rel *
1889 136151 : rel_push_aggr_down(visitor *v, sql_rel *rel)
1890 : {
1891 136151 : if (rel->op == op_groupby && rel->l) {
1892 136065 : sql_rel *u = rel->l, *ou = u;
1893 136065 : sql_rel *g = rel;
1894 136065 : sql_rel *ul = u->l;
1895 136065 : sql_rel *ur = u->r;
1896 136065 : node *n, *m;
1897 136065 : list *lgbe = NULL, *rgbe = NULL, *gbe = NULL, *exps = NULL;
1898 :
1899 136065 : if (u->op == op_project && !need_distinct(u))
1900 61769 : u = u->l;
1901 :
1902 136065 : if (!u || !is_union(u->op) || need_distinct(u) || is_single(u) || !u->exps || rel_is_ref(u))
1903 : return rel;
1904 :
1905 1785 : ul = u->l;
1906 1785 : ur = u->r;
1907 :
1908 : /* make sure we don't create group by on group by's */
1909 1785 : if (ul->op == op_groupby || ur->op == op_groupby)
1910 : return rel;
1911 :
1912 : /* distinct should be done over the full result */
1913 1643 : for (n = g->exps->h; n; n = n->next) {
1914 1143 : sql_exp *e = n->data;
1915 1143 : sql_subfunc *af = e->f;
1916 :
1917 1143 : if (e->type == e_atom ||
1918 1143 : e->type == e_func ||
1919 571 : (e->type == e_aggr &&
1920 571 : ((strcmp(af->func->base.name, "sum") &&
1921 432 : strcmp(af->func->base.name, "count") &&
1922 184 : strcmp(af->func->base.name, "min") &&
1923 571 : strcmp(af->func->base.name, "max")) ||
1924 : need_distinct(e))))
1925 : return rel;
1926 : }
1927 :
1928 500 : ul = rel_dup(ul);
1929 500 : ur = rel_dup(ur);
1930 500 : if (!is_project(ul->op))
1931 18 : ul = rel_project(v->sql->sa, ul,
1932 : rel_projections(v->sql, ul, NULL, 1, 1));
1933 500 : if (!is_project(ur->op))
1934 21 : ur = rel_project(v->sql->sa, ur,
1935 : rel_projections(v->sql, ur, NULL, 1, 1));
1936 500 : rel_rename_exps(v->sql, u->exps, ul->exps);
1937 500 : rel_rename_exps(v->sql, u->exps, ur->exps);
1938 500 : if (u != ou) {
1939 234 : ul = rel_project(v->sql->sa, ul, NULL);
1940 234 : ul->exps = exps_copy(v->sql, ou->exps);
1941 234 : rel_rename_exps(v->sql, ou->exps, ul->exps);
1942 234 : set_processed(ul);
1943 234 : ur = rel_project(v->sql->sa, ur, NULL);
1944 234 : ur->exps = exps_copy(v->sql, ou->exps);
1945 234 : rel_rename_exps(v->sql, ou->exps, ur->exps);
1946 234 : set_processed(ur);
1947 : }
1948 :
1949 500 : if (g->r && list_length(g->r) > 0) {
1950 374 : list *gbe = g->r;
1951 :
1952 374 : lgbe = exps_copy(v->sql, gbe);
1953 374 : rgbe = exps_copy(v->sql, gbe);
1954 : }
1955 500 : ul = rel_groupby(v->sql, ul, NULL);
1956 500 : ul->r = lgbe;
1957 500 : ul->nrcols = g->nrcols;
1958 500 : ul->card = g->card;
1959 500 : ul->exps = exps_copy(v->sql, g->exps);
1960 500 : ul->nrcols = list_length(ul->exps);
1961 500 : set_processed(ul);
1962 :
1963 500 : ur = rel_groupby(v->sql, ur, NULL);
1964 500 : ur->r = rgbe;
1965 500 : ur->nrcols = g->nrcols;
1966 500 : ur->card = g->card;
1967 500 : ur->exps = exps_copy(v->sql, g->exps);
1968 500 : ur->nrcols = list_length(ur->exps);
1969 500 : set_processed(ur);
1970 :
1971 : /* group by on primary keys which define the partioning scheme
1972 : * don't need a finalizing group by */
1973 : /* how to check if a partition is based on some primary key ?
1974 : * */
1975 500 : if (!list_empty(rel->r)) {
1976 938 : for (node *n = ((list*)rel->r)->h; n; n = n->next) {
1977 564 : sql_exp *e = n->data;
1978 564 : sql_column *c = NULL;
1979 :
1980 564 : if ((c = exp_is_pkey(rel, e)) && partition_find_part(v->sql->session->tr, c->t, NULL)) {
1981 : /* check if key is partition key */
1982 0 : v->changes++;
1983 0 : return rel_inplace_setop(v->sql, rel, ul, ur, op_union,
1984 : rel_projections(v->sql, rel, NULL, 1, 1));
1985 : }
1986 : }
1987 : }
1988 :
1989 500 : if (!list_empty(rel->r)) {
1990 374 : list *ogbe = rel->r;
1991 :
1992 374 : gbe = new_exp_list(v->sql->sa);
1993 938 : for (n = ogbe->h; n; n = n->next) {
1994 564 : sql_exp *e = n->data, *ne;
1995 :
1996 : /* group by in aggreation list */
1997 564 : ne = exps_uses_exp( rel->exps, e);
1998 564 : if (ne)
1999 558 : ne = list_find_exp( ul->exps, ne);
2000 558 : if (!ne) {
2001 : /* e only in the ul/ur->r (group by list) */
2002 8 : ne = exp_ref(v->sql, e);
2003 8 : list_append(ul->exps, ne);
2004 8 : ne = exp_ref(v->sql, e);
2005 8 : list_append(ur->exps, ne);
2006 : }
2007 8 : assert(ne);
2008 564 : ne = exp_ref(v->sql, ne);
2009 564 : append(gbe, ne);
2010 : }
2011 : }
2012 :
2013 500 : u = rel_setop(v->sql->sa, ul, ur, op_union);
2014 500 : rel_setop_set_exps(v->sql, u, rel_projections(v->sql, ul, NULL, 1, 1), false);
2015 500 : set_processed(u);
2016 :
2017 500 : exps = new_exp_list(v->sql->sa);
2018 1520 : for (n = u->exps->h, m = rel->exps->h; n && m; n = n->next, m = m->next) {
2019 1020 : sql_exp *ne, *e = n->data, *oa = m->data;
2020 :
2021 1020 : if (oa->type == e_aggr) {
2022 451 : sql_subfunc *f = oa->f;
2023 451 : int cnt = exp_aggr_is_count(oa);
2024 451 : sql_subfunc *a = sql_bind_func(v->sql, "sys", (cnt)?"sum":f->func->base.name, exp_subtype(e), NULL, F_AGGR, true);
2025 :
2026 451 : assert(a);
2027 : /* union of aggr result may have nils
2028 : * because sum/count of empty set */
2029 451 : set_has_nil(e);
2030 451 : e = exp_ref(v->sql, e);
2031 451 : ne = exp_aggr1(v->sql->sa, e, a, need_distinct(e), 1, e->card, 1);
2032 : } else {
2033 569 : ne = exp_copy(v->sql, oa);
2034 : }
2035 1020 : exp_setname(v->sql->sa, ne, exp_find_rel_name(oa), exp_name(oa));
2036 1020 : append(exps, ne);
2037 : }
2038 500 : v->changes++;
2039 500 : return rel_inplace_groupby( rel, u, gbe, exps);
2040 : }
2041 : return rel;
2042 : }
2043 :
2044 : /*
2045 : * More general
2046 : * groupby(
2047 : * [ outer ] join(
2048 : * project(
2049 : * table(A) [ c1, c2, .. ]
2050 : * ) [ c1, c2, identity(c2) as I, .. ],
2051 : * table(B) [ c1, c2, .. ]
2052 : * ) [ A.c1 = B.c1 ]
2053 : * ) [ I ] [ a1, a2, .. ]
2054 : *
2055 : * ->
2056 : *
2057 : * [ outer ] join(
2058 : * project(
2059 : * table(A) [ c1, c2, .. ]
2060 : * ) [ c1, c2, .. ],
2061 : * groupby (
2062 : * table(B) [ c1, c2, .. ]
2063 : * ) [ B.c1 ] [ a1, a2, .. ]
2064 : * ) [ A.c1 = B.c1 ]
2065 : */
2066 : static sql_rel *
2067 65080 : gen_push_groupby_down(mvc *sql, sql_rel *rel, int *changes)
2068 : {
2069 65080 : sql_rel *j = rel->l;
2070 65080 : list *gbe = rel->r;
2071 :
2072 65080 : if (rel->op == op_groupby && list_length(gbe) == 1 && j->op == op_join){
2073 6147 : sql_rel *jl = j->l, *jr = j->r, *cr, *cl;
2074 6147 : sql_exp *gb = gbe->h->data, *e;
2075 6147 : node *n;
2076 6147 : int left = 1;
2077 6147 : list *aggrs, *aliases, *gbe;
2078 :
2079 6147 : if (!is_identity(gb, jl) && !is_identity(gb, jr))
2080 : return rel;
2081 6 : if (jl->op == op_project &&
2082 4 : (e = list_find_exp( jl->exps, gb)) != NULL &&
2083 2 : find_prop(e->p, PROP_HASHCOL) != NULL) {
2084 : left = 0;
2085 : cr = jr;
2086 : cl = jl;
2087 4 : } else if (jr->op == op_project &&
2088 8 : (e = list_find_exp( jr->exps, gb)) != NULL &&
2089 4 : find_prop(e->p, PROP_HASHCOL) != NULL) {
2090 : left = 1;
2091 : cr = jl;
2092 : cl = jr;
2093 : } else {
2094 0 : return rel;
2095 : }
2096 :
2097 6 : if ((left && is_base(jl->op)) || (!left && is_base(jr->op))||
2098 0 : (left && is_select(jl->op)) || (!left && is_select(jr->op))
2099 0 : || rel_is_join_on_pkey(j, false))
2100 6 : return rel;
2101 :
2102 : /* only add aggr (based on left/right), and repeat the group by column */
2103 0 : aggrs = sa_list(sql->sa);
2104 0 : aliases = sa_list(sql->sa);
2105 0 : if (rel->exps) for (n = rel->exps->h; n; n = n->next) {
2106 0 : sql_exp *ce = n->data;
2107 :
2108 0 : if (exp_is_atom(ce))
2109 0 : list_append(aliases, ce);
2110 0 : else if (ce->type == e_column) {
2111 0 : if (rel_has_exp(cl, ce, false) == 0) /* collect aliases outside groupby */
2112 0 : list_append(aliases, ce);
2113 : else
2114 0 : list_append(aggrs, ce);
2115 0 : } else if (ce->type == e_aggr) {
2116 0 : list *args = ce->l;
2117 :
2118 : /* check args are part of left/right */
2119 0 : if (!list_empty(args) && rel_has_exps(cl, args, false) == 0)
2120 : return rel;
2121 0 : if (rel->op != op_join && exp_aggr_is_count(ce))
2122 0 : ce->p = prop_create(sql->sa, PROP_COUNT, ce->p);
2123 0 : list_append(aggrs, ce);
2124 : }
2125 : }
2126 : /* TODO move any column expressions (aliases) into the project list */
2127 :
2128 : /* find gb in left or right and should be unique */
2129 0 : gbe = sa_list(sql->sa);
2130 : /* push groupby to right, group on join exps */
2131 0 : if (j->exps) for (n = j->exps->h; n; n = n->next) {
2132 0 : sql_exp *ce = n->data, *l = ce->l, *r = ce->r, *e;
2133 :
2134 : /* get left/right hand of e_cmp */
2135 0 : assert(ce->type == e_cmp);
2136 0 : if (ce->flag == cmp_equal && is_alias(l->type) && is_alias(r->type) &&
2137 0 : (((e = rel_find_exp(cr, l)) && rel_find_exp(cl, r)) ||
2138 0 : ((e = rel_find_exp(cr, r)) && rel_find_exp(cl, l)))) {
2139 0 : e = exp_ref(sql, e);
2140 0 : list_append(gbe, e);
2141 : } else {
2142 0 : return rel;
2143 : }
2144 : }
2145 0 : if (!left)
2146 0 : cr = j->r = rel_groupby(sql, cr, gbe);
2147 : else
2148 0 : cr = j->l = rel_groupby(sql, cr, gbe);
2149 0 : cr->exps = list_merge(cr->exps, aggrs, (fdup)NULL);
2150 0 : set_processed(cr);
2151 0 : if (!is_project(cl->op))
2152 0 : cl = rel_project(sql->sa, cl,
2153 : rel_projections(sql, cl, NULL, 1, 1));
2154 0 : cl->exps = list_merge(cl->exps, aliases, (fdup)NULL);
2155 0 : set_processed(cl);
2156 0 : if (!left)
2157 0 : j->l = cl;
2158 : else
2159 0 : j->r = cl;
2160 0 : rel -> l = NULL;
2161 0 : rel_destroy(rel);
2162 :
2163 0 : if (list_empty(cr->exps) && list_empty(j->exps)) { /* remove crossproduct */
2164 0 : sql_rel *r = cl;
2165 0 : if (!left)
2166 0 : j->l = NULL;
2167 : else
2168 0 : j->r = NULL;
2169 0 : rel_destroy(j);
2170 0 : j = r;
2171 : }
2172 0 : (*changes)++;
2173 0 : return j;
2174 : }
2175 : return rel;
2176 : }
2177 :
2178 : /*
2179 : * Rewrite group(project(join(A,Dict)[a.i==dict.i])[...dict.n])[dict.n][ ... dict.n ]
2180 : * into
2181 : * project(join(groupby (A)[a.i],[a.i]), Dict)[a.i==dict.i])[dict.n]
2182 : *
2183 : */
2184 : static inline sql_rel *
2185 136151 : rel_push_groupby_down(visitor *v, sql_rel *rel)
2186 : {
2187 136151 : sql_rel *p = rel->l;
2188 136151 : list *gbe = rel->r;
2189 :
2190 136151 : if (rel->op == op_groupby && gbe && p && is_join(p->op))
2191 9739 : return gen_push_groupby_down(v->sql, rel, &v->changes);
2192 126412 : if (rel->op == op_groupby && gbe && p && p->op == op_project) {
2193 55341 : sql_rel *j = p->l;
2194 55341 : sql_rel *jl, *jr;
2195 55341 : node *n;
2196 :
2197 55341 : if (!j || j->op != op_join || list_length(j->exps) != 1)
2198 53933 : return gen_push_groupby_down(v->sql, rel, &v->changes);
2199 1408 : jl = j->l;
2200 1408 : jr = j->r;
2201 :
2202 : /* check if jr is a dict with index and var still used */
2203 1408 : if (jr->op != op_basetable || jr->l || !jr->r || list_length(jr->exps) != 2)
2204 1408 : return gen_push_groupby_down(v->sql, rel, &v->changes);
2205 :
2206 : /* check if group by is done on dict column */
2207 0 : for(n = gbe->h; n; n = n->next) {
2208 0 : sql_exp *ge = n->data, *pe = NULL, *e = NULL;
2209 :
2210 : /* find group by exp in project, then in dict */
2211 0 : pe = rel_find_exp(p, ge);
2212 0 : if (pe) /* find project exp in right hand of join, ie dict */
2213 0 : e = rel_find_exp(jr, pe);
2214 0 : if (pe && e) { /* Rewrite: join with dict after the group by */
2215 0 : list *pexps = rel_projections(v->sql, rel, NULL, 1, 1), *npexps;
2216 0 : node *m;
2217 0 : sql_exp *ne = j->exps->h->data; /* join exp */
2218 0 : p->l = jl; /* Project now only on the left side of the join */
2219 :
2220 0 : ne = ne->l; /* The left side of the compare is the index of the left */
2221 :
2222 : /* find ge reference in new projection list */
2223 0 : npexps = sa_list(v->sql->sa);
2224 0 : for (m = pexps->h; m; m = m->next) {
2225 0 : sql_exp *a = m->data;
2226 :
2227 0 : if (exp_refers(ge, a)) {
2228 0 : sql_exp *sc = jr->exps->t->data;
2229 0 : sql_exp *e = exp_ref(v->sql, sc);
2230 0 : if (exp_name(a))
2231 0 : exp_prop_alias(v->sql->sa, e, a);
2232 : a = e;
2233 : }
2234 0 : append(npexps, a);
2235 : }
2236 :
2237 : /* find ge in aggr list */
2238 0 : for (m = rel->exps->h; m; m = m->next) {
2239 0 : sql_exp *a = m->data;
2240 :
2241 0 : if (exp_match_exp(a, ge) || exp_refers(ge, a)) {
2242 0 : a = exp_ref(v->sql, ne);
2243 0 : if (exp_name(ne))
2244 0 : exp_prop_alias(v->sql->sa, a, ne);
2245 0 : m->data = a;
2246 : }
2247 : }
2248 :
2249 : /* change alias pe, ie project out the index */
2250 0 : pe->l = (void*)exp_relname(ne);
2251 0 : pe->r = (void*)exp_name(ne);
2252 0 : if (exp_name(ne))
2253 0 : exp_prop_alias(v->sql->sa, pe, ne);
2254 :
2255 : /* change alias ge */
2256 0 : ge->l = (void*)exp_relname(pe);
2257 0 : ge->r = (void*)exp_name(pe);
2258 0 : if (exp_name(pe))
2259 0 : exp_prop_alias(v->sql->sa, ge, pe);
2260 :
2261 : /* zap both project and groupby name hash tables (as we changed names above) */
2262 0 : list_hash_clear(rel->exps);
2263 0 : list_hash_clear((list*)rel->r);
2264 0 : list_hash_clear(p->exps);
2265 :
2266 : /* add join */
2267 0 : j->l = rel;
2268 0 : rel = rel_project(v->sql->sa, j, npexps);
2269 0 : v->changes++;
2270 : }
2271 : }
2272 : }
2273 : return rel;
2274 : }
2275 :
2276 : /* reduce group by expressions based on pkey info
2277 : *
2278 : * The reduced group by and (derived) aggr expressions are restored via
2279 : * extra (new) aggregate columns.
2280 : */
2281 : static inline sql_rel *
2282 136151 : rel_reduce_groupby_exps(visitor *v, sql_rel *rel)
2283 : {
2284 136151 : list *gbe = rel->r;
2285 :
2286 136151 : if (is_groupby(rel->op) && rel->r && !rel_is_ref(rel) && list_length(gbe)) {
2287 51577 : node *n, *m;
2288 51577 : int k, j, i, ngbe = list_length(gbe);
2289 51577 : int8_t *scores = SA_NEW_ARRAY(v->sql->ta, int8_t, ngbe);
2290 51577 : sql_column *c;
2291 51577 : sql_table **tbls = SA_NEW_ARRAY(v->sql->ta, sql_table*, ngbe);
2292 51577 : sql_rel **bts = SA_NEW_ARRAY(v->sql->ta, sql_rel*, ngbe), *bt = NULL;
2293 :
2294 51577 : gbe = rel->r;
2295 125649 : for (k = 0, i = 0, n = gbe->h; n; n = n->next, k++) {
2296 74072 : sql_exp *e = n->data;
2297 :
2298 74072 : c = exp_find_column_(rel, e, -2, &bt);
2299 74072 : if (c) {
2300 79506 : for(j = 0; j < i; j++)
2301 25366 : if (c->t == tbls[j] && bts[j] == bt)
2302 : break;
2303 67593 : tbls[j] = c->t;
2304 67593 : bts[j] = bt;
2305 67593 : i += (j == i);
2306 : }
2307 : }
2308 51577 : if (i) { /* forall tables find pkey and
2309 : remove useless other columns */
2310 : /* TODO also remove group by columns which are related to
2311 : * the other columns using a foreign-key join (n->1), ie 1
2312 : * on the to be removed side.
2313 : */
2314 102326 : for(j = 0; j < i; j++) {
2315 54128 : int l, nr = 0, cnr = 0;
2316 :
2317 54128 : k = list_length(gbe);
2318 54128 : memset(scores, 0, list_length(gbe));
2319 54128 : if (tbls[j]->pkey) {
2320 14282 : for (l = 0, n = gbe->h; l < k && n; l++, n = n->next) {
2321 10585 : fcmp cmp = (fcmp)&kc_column_cmp;
2322 10585 : sql_exp *e = n->data;
2323 :
2324 10585 : c = exp_find_column_(rel, e, -2, &bt);
2325 16527 : if (c && c->t == tbls[j] && bts[j] == bt &&
2326 5942 : list_find(tbls[j]->pkey->k.columns, c, cmp) != NULL) {
2327 800 : scores[l] = 1;
2328 800 : nr ++;
2329 9322 : } else if (c && c->t == tbls[j] && bts[j] == bt) {
2330 : /* Okay we can cleanup a group by column */
2331 5142 : scores[l] = -1;
2332 5142 : cnr ++;
2333 : }
2334 : }
2335 : }
2336 3697 : if (nr) {
2337 789 : int all = (list_length(tbls[j]->pkey->k.columns) == nr);
2338 789 : sql_kc *kc = tbls[j]->pkey->k.columns->h->data;
2339 :
2340 789 : c = kc->c;
2341 2119 : for (l = 0, n = gbe->h; l < k && n; l++, n = n->next) {
2342 1330 : sql_exp *e = n->data;
2343 :
2344 : /* pkey based group by */
2345 1493 : if (scores[l] == 1 && ((all ||
2346 : /* first of key */
2347 935 : (c == exp_find_column(rel, e, -2))) && !find_prop(e->p, PROP_HASHCOL)))
2348 12 : e->p = prop_create(v->sql->sa, PROP_HASHCOL, e->p);
2349 : }
2350 2998 : for (m = rel->exps->h; m; m = m->next ){
2351 2209 : sql_exp *e = m->data;
2352 :
2353 7788 : for (l = 0, n = gbe->h; l < k && n; l++, n = n->next) {
2354 5591 : sql_exp *gb = n->data;
2355 :
2356 : /* pkey based group by */
2357 5591 : if (scores[l] == 1 && exp_match_exp(e,gb) && find_prop(gb->p, PROP_HASHCOL) && !find_prop(e->p, PROP_HASHCOL)) {
2358 12 : e->p = prop_create(v->sql->sa, PROP_HASHCOL, e->p);
2359 12 : break;
2360 : }
2361 :
2362 : }
2363 : }
2364 : }
2365 54128 : if (cnr && nr && list_length(tbls[j]->pkey->k.columns) == nr) {
2366 108 : list *ngbe = new_exp_list(v->sql->sa);
2367 :
2368 392 : for (l = 0, n = gbe->h; l < k && n; l++, n = n->next) {
2369 284 : sql_exp *e = n->data;
2370 :
2371 : /* keep the group by columns which form a primary key
2372 : * of this table. And those unrelated to this table. */
2373 284 : if (scores[l] != -1)
2374 139 : append(ngbe, e);
2375 : }
2376 108 : rel->r = ngbe;
2377 : /* rewrite gbe and aggr, in the aggr list */
2378 410 : for (m = rel->exps->h; m; m = m->next ){
2379 302 : sql_exp *e = m->data;
2380 302 : int fnd = 0;
2381 :
2382 1276 : for (l = 0, n = gbe->h; l < k && n && !fnd; l++, n = n->next) {
2383 974 : sql_exp *gb = n->data;
2384 :
2385 974 : if (scores[l] == -1 && exp_refers(gb, e)) {
2386 145 : sql_exp *rs = exp_column(v->sql->sa, gb->l?gb->l:exp_relname(gb), gb->r?gb->r:exp_name(gb), exp_subtype(gb), rel->card, has_nil(gb), is_unique(gb), is_intern(gb));
2387 145 : exp_setname(v->sql->sa, rs, exp_find_rel_name(e), exp_name(e));
2388 145 : e = rs;
2389 145 : fnd = 1;
2390 : }
2391 : }
2392 302 : m->data = e;
2393 : }
2394 : /* new reduced aggr expression list */
2395 108 : assert(list_length(rel->exps)>0);
2396 : /* only one reduction at a time */
2397 108 : list_hash_clear(rel->exps);
2398 108 : v->changes++;
2399 108 : return rel;
2400 : }
2401 54020 : gbe = rel->r;
2402 : }
2403 : }
2404 : }
2405 : /* remove constants from group by list */
2406 136043 : if (is_groupby(rel->op) && rel->r && !rel_is_ref(rel)) {
2407 51957 : int i;
2408 51957 : node *n;
2409 :
2410 125745 : for (i = 0, n = gbe->h; n; n = n->next) {
2411 73788 : sql_exp *e = n->data;
2412 :
2413 73788 : if (exp_is_atom(e))
2414 72 : i++;
2415 : }
2416 51957 : if (i) {
2417 68 : list *ngbe = new_exp_list(v->sql->sa);
2418 68 : list *dgbe = new_exp_list(v->sql->sa);
2419 :
2420 153 : for (n = gbe->h; n; n = n->next) {
2421 85 : sql_exp *e = n->data;
2422 :
2423 85 : if (!exp_is_atom(e))
2424 13 : append(ngbe, e);
2425 : /* we need at least one gbe */
2426 72 : else if (!n->next && list_empty(ngbe))
2427 56 : append(ngbe, e);
2428 : else
2429 16 : append(dgbe, e);
2430 : }
2431 68 : rel->r = ngbe;
2432 68 : if (!list_empty(dgbe)) {
2433 : /* use atom's directly in the aggr expr list */
2434 :
2435 54 : for (n = rel->exps->h; n; n = n->next) {
2436 38 : sql_exp *e = n->data, *ne = NULL;
2437 :
2438 38 : if (e->type == e_column) {
2439 33 : if (e->l)
2440 33 : ne = exps_bind_column2(dgbe, e->l, e->r, NULL);
2441 : else
2442 0 : ne = exps_bind_column(dgbe, e->r, NULL, NULL, 1);
2443 33 : if (ne) {
2444 16 : ne = exp_copy(v->sql, ne);
2445 16 : exp_prop_alias(v->sql->sa, ne, e);
2446 16 : e = ne;
2447 : }
2448 : }
2449 38 : n->data = e;
2450 : }
2451 16 : list_hash_clear(rel->exps);
2452 16 : v->changes++;
2453 : }
2454 : }
2455 : }
2456 : return rel;
2457 : }
2458 :
2459 : /* if all arguments to a distinct aggregate are unique, remove 'distinct' property */
2460 : static inline sql_rel *
2461 136151 : rel_distinct_aggregate_on_unique_values(visitor *v, sql_rel *rel)
2462 : {
2463 136151 : if (is_groupby(rel->op) && !list_empty(rel->exps)) {
2464 400248 : for (node *n = rel->exps->h; n; n = n->next) {
2465 264201 : sql_exp *exp = (sql_exp*) n->data;
2466 :
2467 264201 : if (exp->type == e_aggr && need_distinct(exp)) {
2468 1337 : bool all_unique = true;
2469 1337 : list *l = exp->l;
2470 :
2471 2674 : for (node *m = l->h; m && all_unique; m = m->next) {
2472 1337 : sql_exp *arg = (sql_exp*) m->data;
2473 :
2474 1434 : all_unique &= arg->type == e_column && is_unique(arg) && (!is_semantics(exp) || !has_nil(arg));
2475 : }
2476 1337 : if (!all_unique && exps_card(l) > CARD_ATOM)
2477 1067 : all_unique = exps_unique(v->sql, rel, l) && (!is_semantics(exp) || !have_nil(l));
2478 : if (all_unique) {
2479 97 : set_nodistinct(exp);
2480 97 : v->changes++;
2481 : }
2482 : }
2483 : }
2484 : }
2485 136151 : return rel;
2486 : }
2487 :
2488 : static inline sql_rel *
2489 136151 : rel_remove_const_aggr(visitor *v, sql_rel *rel)
2490 : {
2491 136151 : if (!rel)
2492 : return rel;
2493 136151 : if (rel && is_groupby(rel->op) && list_length(rel->exps) >= 1 && !rel_is_ref(rel)) {
2494 92397 : int needed = 0;
2495 262748 : for (node *n = rel->exps->h; n; n = n->next) {
2496 170351 : sql_exp *exp = (sql_exp*) n->data;
2497 :
2498 170351 : if (exp_is_atom(exp) && exp->type != e_aggr)
2499 72 : needed++;
2500 : }
2501 92397 : if (needed) {
2502 70 : if (!list_empty(rel->r)) {
2503 10 : int atoms = 0;
2504 : /* corner case, all grouping columns are atoms */
2505 21 : for (node *n = ((list*)rel->r)->h; n; n = n->next) {
2506 11 : sql_exp *exp = (sql_exp*) n->data;
2507 :
2508 11 : if (exp_is_atom(exp))
2509 2 : atoms++;
2510 : }
2511 10 : if (atoms == list_length(rel->r)) {
2512 2 : list *nexps = sa_list(v->sql->sa);
2513 5 : for (node *n = rel->exps->h; n; ) {
2514 3 : node *next = n->next;
2515 3 : sql_exp *e = (sql_exp*) n->data;
2516 :
2517 : /* remove references to constant group by columns */
2518 3 : if (e->type == e_column) {
2519 0 : sql_exp *found = NULL;
2520 0 : const char *nrname = (const char*) e->l, *nename = (const char*) e->r;
2521 0 : if (nrname && nename) {
2522 0 : found = exps_bind_column2(rel->r, nrname, nename, NULL);
2523 0 : } else if (nename) {
2524 0 : found = exps_bind_column(rel->r, nename, NULL, NULL, 1);
2525 : }
2526 0 : if (found) {
2527 0 : list_append(nexps, found);
2528 0 : list_remove_node(rel->exps, NULL, n);
2529 : }
2530 : }
2531 : n = next;
2532 : }
2533 2 : rel->r = NULL; /* transform it into a global aggregate */
2534 2 : rel->exps = list_merge(nexps, rel->exps, (fdup) NULL); /* add grouping columns back as projections */
2535 : /* global aggregates may return 1 row, so filter it based on the count */
2536 2 : sql_subfunc *cf = sql_bind_func(v->sql, "sys", "count", sql_bind_localtype("void"), NULL, F_AGGR, true);
2537 2 : sql_exp *count = exp_aggr(v->sql->sa, NULL, cf, 0, 1, CARD_ATOM, 0);
2538 2 : count = rel_groupby_add_aggr(v->sql, rel, count);
2539 2 : sql_exp *cp = exp_compare(v->sql->sa, exp_ref(v->sql, count), exp_atom(v->sql->sa, atom_int(v->sql->sa, exp_subtype(count), 0)), cmp_notequal);
2540 2 : rel = rel_select(v->sql->sa, rel, cp);
2541 2 : set_processed(rel);
2542 2 : return rel;
2543 : }
2544 60 : } else if (list_length(rel->exps) == needed) { /* all are const */
2545 56 : sql_rel *ll = rel->l;
2546 56 : rel->op = op_project;
2547 : /* TODO check if l->l == const, else change that */
2548 56 : if (ll && ll->l) {
2549 52 : rel_destroy(ll);
2550 52 : rel->l = rel_project_exp(v->sql, exp_atom_bool(v->sql->sa, 1));
2551 : }
2552 56 : return rel;
2553 : }
2554 12 : sql_rel *nrel = rel_project(v->sql->sa, rel, rel_projections(v->sql, rel, NULL, 1, 1));
2555 36 : for (node *n = nrel->exps->h; n; n = n->next) {
2556 24 : sql_exp *exp = (sql_exp*) n->data;
2557 24 : if (exp->type == e_column) {
2558 24 : sql_exp *e = rel_find_exp(rel, exp);
2559 :
2560 24 : if (e && exp_is_atom(e) && e->type == e_atom) {
2561 14 : sql_exp *ne = exp_copy(v->sql, e);
2562 14 : exp_setname(v->sql->sa, ne, exp_find_rel_name(exp), exp_name(exp));
2563 14 : n->data = ne;
2564 14 : v->changes++;
2565 : }
2566 : }
2567 : }
2568 12 : list *nl = sa_list(v->sql->sa);
2569 36 : for (node *n = rel->exps->h; n; n = n->next) {
2570 24 : sql_exp *exp = (sql_exp*) n->data;
2571 :
2572 24 : if (!exp_is_atom(exp) || exp->type != e_atom)
2573 10 : append(nl, exp);
2574 : }
2575 12 : rel->exps = nl;
2576 12 : return nrel;
2577 : }
2578 : }
2579 : return rel;
2580 : }
2581 :
2582 : #if 0
2583 : static sql_rel *
2584 : rel_groupby_distinct2(visitor *v, sql_rel *rel)
2585 : {
2586 : list *ngbes = sa_list(v->sql->sa), *gbes, *naggrs = sa_list(v->sql->sa), *aggrs = sa_list(v->sql->sa);
2587 : sql_rel *l;
2588 : node *n;
2589 :
2590 : gbes = rel->r;
2591 : if (!gbes)
2592 : return rel;
2593 :
2594 : /* check if each aggr is, rewritable (max,min,sum,count)
2595 : * and only has one argument */
2596 : for (n = rel->exps->h; n; n = n->next) {
2597 : sql_exp *e = n->data;
2598 : sql_subfunc *af = e->f;
2599 :
2600 : if (e->type == e_aggr &&
2601 : (strcmp(af->func->base.name, "sum") &&
2602 : strcmp(af->func->base.name, "count") &&
2603 : strcmp(af->func->base.name, "min") &&
2604 : strcmp(af->func->base.name, "max")))
2605 : return rel;
2606 : }
2607 :
2608 : for (n = gbes->h; n; n = n->next) {
2609 : sql_exp *e = n->data;
2610 :
2611 : e = exp_column(v->sql->sa, exp_find_rel_name(e), exp_name(e), exp_subtype(e), e->card, has_nil(e), is_unique(e), is_intern(e));
2612 : append(ngbes, e);
2613 : }
2614 :
2615 : /* 1 for each aggr(distinct v) add the attribute expression v to gbes and aggrs list
2616 : * 2 for each aggr(z) add aggr_phase2('z') to the naggrs list
2617 : * 3 for each group by col, add also to the naggrs list
2618 : * */
2619 : for (n = rel->exps->h; n; n = n->next) {
2620 : sql_exp *e = n->data;
2621 :
2622 : if (e->type == e_aggr && need_distinct(e)) { /* 1 */
2623 : /* need column expression */
2624 : list *args = e->l;
2625 : sql_exp *v = args->h->data;
2626 : append(gbes, v);
2627 : if (!exp_name(v))
2628 : exp_label(v->sql->sa, v, ++v->sql->label);
2629 : v = exp_column(v->sql->sa, exp_find_rel_name(v), exp_name(v), exp_subtype(v), v->card, has_nil(v), is_unique(v), is_intern(v));
2630 : append(aggrs, v);
2631 : v = exp_aggr1(v->sql->sa, v, e->f, need_distinct(e), 1, e->card, 1);
2632 : exp_setname(v->sql->sa, v, exp_find_rel_name(e), exp_name(e));
2633 : append(naggrs, v);
2634 : } else if (e->type == e_aggr && !need_distinct(e)) {
2635 : sql_exp *v;
2636 : sql_subfunc *f = e->f;
2637 : int cnt = exp_aggr_is_count(e);
2638 : sql_subfunc *a = sql_bind_func(v->sql, "sys", (cnt)?"sum":f->func->base.name, exp_subtype(e), NULL, F_AGGR, true);
2639 :
2640 : append(aggrs, e);
2641 : if (!exp_name(e))
2642 : exp_label(v->sql->sa, e, ++v->sql->label);
2643 : set_has_nil(e);
2644 : v = exp_column(v->sql->sa, exp_find_rel_name(e), exp_name(e), exp_subtype(e), e->card, has_nil(e), is_unique(e), is_intern(e));
2645 : v = exp_aggr1(v->sql->sa, v, a, 0, 1, e->card, 1);
2646 : if (cnt)
2647 : set_zero_if_empty(v);
2648 : exp_setname(v->sql->sa, v, exp_find_rel_name(e), exp_name(e));
2649 : append(naggrs, v);
2650 : } else { /* group by col */
2651 : if (list_find_exp(gbes, e) || !list_find_exp(naggrs, e)) {
2652 : append(aggrs, e);
2653 :
2654 : e = exp_column(v->sql->sa, exp_find_rel_name(e), exp_name(e), exp_subtype(e), e->card, has_nil(e), is_unique(e), is_intern(e));
2655 : }
2656 : append(naggrs, e);
2657 : }
2658 : }
2659 :
2660 : l = rel->l = rel_groupby(v->sql, rel->l, gbes);
2661 : l->exps = aggrs;
2662 : rel->r = ngbes;
2663 : rel->exps = naggrs;
2664 : v->changes++;
2665 : return rel;
2666 : }
2667 : #endif
2668 :
2669 : /* Rewrite group by expressions with distinct
2670 : *
2671 : * ie select a, count(distinct b) from c where ... groupby a;
2672 : * No other aggregations should be present
2673 : *
2674 : * Rewrite the more general case, good for parallel execution
2675 : *
2676 : * groupby(R) [e,f] [ aggr1 a distinct, aggr2 b distinct, aggr3 c, aggr4 d]
2677 : *
2678 : * into
2679 : *
2680 : * groupby(
2681 : * groupby(R) [e,f,a,b] [ a, b, aggr3 c, aggr4 d]
2682 : * ) [e,f]( aggr1 a distinct, aggr2 b distinct, aggr3_phase2 c, aggr4_phase2 d)
2683 : */
2684 : static inline sql_rel *
2685 136151 : rel_groupby_distinct(visitor *v, sql_rel *rel)
2686 : {
2687 136151 : node *n;
2688 :
2689 136151 : if (is_groupby(rel->op)) {
2690 136065 : sql_rel *l = rel->l;
2691 136065 : if (!l || is_groupby(l->op))
2692 : return rel;
2693 : }
2694 135922 : if (is_groupby(rel->op) && rel->r && !rel_is_ref(rel)) {
2695 51919 : int nr = 0, anr = 0;
2696 51919 : list *gbe, *ngbe, *arg, *exps, *nexps;
2697 51919 : sql_exp *distinct = NULL, *darg, *found;
2698 51919 : sql_rel *l = NULL;
2699 :
2700 180381 : for (n=rel->exps->h; n && nr <= 2; n = n->next) {
2701 128462 : sql_exp *e = n->data;
2702 128462 : if (need_distinct(e)) {
2703 1063 : distinct = n->data;
2704 1063 : nr++;
2705 : }
2706 128462 : anr += is_aggr(e->type);
2707 : }
2708 51919 : if (nr < 1 || distinct->type != e_aggr)
2709 : return rel;
2710 670 : if (nr > 1 || anr > nr)
2711 : return rel;//rel_groupby_distinct2(v, rel);
2712 122 : arg = distinct->l;
2713 122 : if (list_length(arg) != 1 || list_length(rel->r) + nr != list_length(rel->exps))
2714 2 : return rel;
2715 :
2716 120 : gbe = rel->r;
2717 120 : ngbe = sa_list(v->sql->sa);
2718 120 : exps = sa_list(v->sql->sa);
2719 120 : nexps = sa_list(v->sql->sa);
2720 382 : for (n=rel->exps->h; n; n = n->next) {
2721 262 : sql_exp *e = n->data;
2722 262 : if (e != distinct) {
2723 142 : if (e->type == e_aggr) { /* copy the arguments to the aggregate */
2724 0 : list *args = e->l;
2725 0 : if (args) {
2726 0 : for (node *n = args->h ; n ; n = n->next) {
2727 0 : sql_exp *e = n->data;
2728 0 : list_append(ngbe, exp_copy(v->sql, e));
2729 0 : list_append(exps, exp_copy(v->sql, e));
2730 : }
2731 : }
2732 : } else {
2733 142 : e = exp_ref(v->sql, e);
2734 142 : append(ngbe, e);
2735 142 : append(exps, e);
2736 : }
2737 142 : if (e->type == e_aggr) /* aggregates must be copied */
2738 0 : e = exp_copy(v->sql, e);
2739 : else
2740 142 : e = exp_ref(v->sql, e);
2741 142 : append(nexps, e);
2742 : }
2743 : }
2744 :
2745 120 : darg = arg->h->data;
2746 120 : if ((found = exps_find_exp(exps, darg)) == NULL) { /* not already in the groups projection list */
2747 114 : if ((found = exps_find_exp(gbe, darg))) { /* first find if the aggregate argument already exists in the grouping list */
2748 0 : darg = exp_ref(v->sql, found);
2749 : } else {
2750 114 : list_append(gbe, darg = exp_copy(v->sql, darg));
2751 114 : exp_label(v->sql->sa, darg, ++v->sql->label);
2752 114 : darg = exp_ref(v->sql, darg);
2753 : }
2754 114 : list_append(exps, darg);
2755 114 : darg = exp_ref(v->sql, darg);
2756 : } else {
2757 6 : darg = exp_ref(v->sql, found);
2758 : }
2759 120 : arg->h->data = darg;
2760 120 : l = rel->l = rel_groupby(v->sql, rel->l, gbe);
2761 120 : l->exps = exps;
2762 120 : set_processed(l);
2763 120 : rel->r = ngbe;
2764 120 : rel->exps = nexps;
2765 120 : set_nodistinct(distinct);
2766 120 : append(nexps, distinct);
2767 120 : v->changes++;
2768 : }
2769 : return rel;
2770 : }
2771 :
2772 : /*
2773 : * Push Count inside crossjoin down, and multiply the results
2774 : *
2775 : * project ( project(
2776 : * group by ( crossproduct (
2777 : * crossproduct( project (
2778 : * L, => group by (
2779 : * R L
2780 : * ) [ ] [ count NOT NULL ] ) [ ] [ count NOT NULL ]
2781 : * ) ),
2782 : * ) [ NOT NULL ] project (
2783 : * group by (
2784 : * R
2785 : * ) [ ] [ count NOT NULL ]
2786 : * )
2787 : * ) [ sql_mul(.., .. NOT NULL) ]
2788 : * )
2789 : */
2790 : static inline sql_rel *
2791 136151 : rel_push_count_down(visitor *v, sql_rel *rel)
2792 : {
2793 136151 : sql_rel *r = rel->l;
2794 :
2795 136151 : if (is_groupby(rel->op) && !rel_is_ref(rel) && list_empty(rel->r) &&
2796 40786 : r && !r->exps && r->op == op_join && !(rel_is_ref(r)) &&
2797 : /* currently only single count aggregation is handled, no other projects or aggregation */
2798 62 : list_length(rel->exps) == 1 && exp_aggr_is_count(rel->exps->h->data)) {
2799 12 : sql_exp *nce, *oce, *cnt1 = NULL, *cnt2 = NULL;
2800 12 : sql_rel *gbl = NULL, *gbr = NULL; /* Group By */
2801 12 : sql_rel *cp = NULL; /* Cross Product */
2802 12 : sql_rel *srel;
2803 :
2804 12 : oce = rel->exps->h->data;
2805 12 : if (oce->l) /* we only handle COUNT(*) */
2806 : return rel;
2807 :
2808 10 : srel = r->l;
2809 : {
2810 10 : sql_subfunc *cf = sql_bind_func(v->sql, "sys", "count", sql_bind_localtype("void"), NULL, F_AGGR, true);
2811 10 : sql_exp *e = exp_aggr(v->sql->sa, NULL, cf, need_distinct(oce), need_no_nil(oce), oce->card, 0);
2812 :
2813 10 : exp_label(v->sql->sa, e, ++v->sql->label);
2814 10 : cnt1 = exp_ref(v->sql, e);
2815 10 : gbl = rel_groupby(v->sql, rel_dup(srel), NULL);
2816 10 : set_processed(gbl);
2817 10 : rel_groupby_add_aggr(v->sql, gbl, e);
2818 : }
2819 :
2820 10 : srel = r->r;
2821 : {
2822 10 : sql_subfunc *cf = sql_bind_func(v->sql, "sys", "count", sql_bind_localtype("void"), NULL, F_AGGR, true);
2823 10 : sql_exp *e = exp_aggr(v->sql->sa, NULL, cf, need_distinct(oce), need_no_nil(oce), oce->card, 0);
2824 :
2825 10 : exp_label(v->sql->sa, e, ++v->sql->label);
2826 10 : cnt2 = exp_ref(v->sql, e);
2827 10 : gbr = rel_groupby(v->sql, rel_dup(srel), NULL);
2828 10 : set_processed(gbr);
2829 10 : rel_groupby_add_aggr(v->sql, gbr, e);
2830 : }
2831 :
2832 10 : cp = rel_crossproduct(v->sql->sa, gbl, gbr, op_join);
2833 10 : set_processed(cp);
2834 :
2835 10 : if (!(nce = rel_binop_(v->sql, NULL, cnt1, cnt2, "sys", "sql_mul", card_value))) {
2836 0 : v->sql->session->status = 0;
2837 0 : v->sql->errstr[0] = '\0';
2838 0 : return rel; /* error, fallback to original expression */
2839 : }
2840 : /* because of remote plans, make sure "sql_mul" returns bigint. The cardinality is atomic, so no major performance penalty */
2841 10 : if (subtype_cmp(exp_subtype(oce), exp_subtype(nce)) != 0)
2842 10 : nce = exp_convert(v->sql->sa, nce, exp_subtype(nce), exp_subtype(oce));
2843 10 : if (exp_name(oce))
2844 10 : exp_prop_alias(v->sql->sa, nce, oce);
2845 :
2846 10 : rel_destroy(rel);
2847 10 : rel = rel_project(v->sql->sa, cp, append(new_exp_list(v->sql->sa), nce));
2848 10 : set_processed(rel);
2849 :
2850 10 : v->changes++;
2851 : }
2852 :
2853 : return rel;
2854 : }
2855 :
2856 : static inline sql_rel *
2857 43989 : rel_basecount(visitor *v, sql_rel *rel)
2858 : {
2859 54338 : if (is_groupby(rel->op) && !rel_is_ref(rel) && rel->l && list_empty(rel->r) &&
2860 19500 : list_length(rel->exps) == 1 && exp_aggr_is_count(rel->exps->h->data)) {
2861 3896 : sql_rel *bt = rel->l;
2862 3896 : sql_exp *e = rel->exps->h->data;
2863 3896 : if (is_basetable(bt->op) && list_empty(e->l)) { /* count(*) */
2864 : /* change into select cnt('schema','table') */
2865 628 : sql_table *t = bt->l;
2866 : /* I need to get the declared table's frame number to make this work correctly for those */
2867 628 : if (!isTable(t) || isDeclaredTable(t))
2868 : return rel;
2869 533 : sql_subfunc *cf = sql_bind_func(v->sql, "sys", "cnt", sql_bind_localtype("str"), sql_bind_localtype("str"), F_FUNC, true);
2870 533 : list *exps = sa_list(v->sql->sa);
2871 533 : append(exps, exp_atom_str(v->sql->sa, t->s->base.name, sql_bind_localtype("str")));
2872 533 : append(exps, exp_atom_str(v->sql->sa, t->base.name, sql_bind_localtype("str")));
2873 533 : sql_exp *ne = exp_op(v->sql->sa, exps, cf);
2874 :
2875 533 : ne = exp_propagate(v->sql->sa, ne, e);
2876 533 : exp_setname(v->sql->sa, ne, exp_find_rel_name(e), exp_name(e));
2877 533 : rel_destroy(rel);
2878 533 : rel = rel_project(v->sql->sa, NULL, append(sa_list(v->sql->sa), ne));
2879 533 : v->changes++;
2880 : }
2881 : }
2882 : return rel;
2883 : }
2884 :
2885 : static inline sql_rel *
2886 43989 : rel_simplify_count(visitor *v, sql_rel *rel)
2887 : {
2888 43989 : if (is_groupby(rel->op) && !list_empty(rel->exps)) {
2889 43883 : mvc *sql = v->sql;
2890 43883 : int ncountstar = 0;
2891 :
2892 : /* Convert count(no null) into count(*) */
2893 136911 : for (node *n = rel->exps->h; n ; n = n->next) {
2894 93028 : sql_exp *e = n->data;
2895 :
2896 93028 : if (exp_aggr_is_count(e) && !need_distinct(e)) {
2897 13359 : if (list_length(e->l) == 0) {
2898 10493 : ncountstar++;
2899 2866 : } else if (list_length(e->l) == 1 && !has_nil((sql_exp*)((list*)e->l)->h->data)) {
2900 2413 : sql_subfunc *cf = sql_bind_func(sql, "sys", "count", sql_bind_localtype("void"), NULL, F_AGGR, true);
2901 2413 : sql_exp *ne = exp_aggr(sql->sa, NULL, cf, 0, 0, e->card, 0);
2902 2413 : if (exp_name(e))
2903 2413 : exp_prop_alias(sql->sa, ne, e);
2904 2413 : n->data = ne;
2905 2413 : ncountstar++;
2906 2413 : v->changes++;
2907 : }
2908 : }
2909 : }
2910 : /* With multiple count(*), use exp_ref to reduce the number of calls to this aggregate */
2911 43883 : if (ncountstar > 1) {
2912 9 : sql_exp *count_star = NULL;
2913 9 : sql_rel *nrel = rel_project(v->sql->sa, rel, NULL);
2914 9 : list *aexps = sa_list(v->sql->sa), *nexps = sa_list(v->sql->sa);
2915 9 : nrel->exps = nexps;
2916 44 : for (node *n = rel->exps->h; n ; n = n->next) {
2917 35 : sql_exp *e = n->data;
2918 :
2919 35 : if (exp_aggr_is_count(e) && !need_distinct(e) && list_length(e->l) == 0) {
2920 20 : if (!count_star) {
2921 9 : count_star = e;
2922 9 : append(aexps, e);
2923 9 : append(nexps, exp_ref(sql, e));
2924 : } else {
2925 11 : sql_exp *ne = exp_ref(sql, count_star);
2926 :
2927 11 : if (exp_name(e))
2928 11 : exp_prop_alias(sql->sa, ne, e);
2929 11 : v->changes++;
2930 11 : append(nexps, ne);
2931 : }
2932 : } else {
2933 15 : append(aexps, e);
2934 15 : append(nexps, exp_ref(sql, e));
2935 : }
2936 : }
2937 9 : rel->exps = aexps;
2938 9 : return nrel;
2939 : }
2940 : }
2941 : return rel;
2942 : }
2943 :
2944 : static sql_rel *
2945 43989 : rel_groupjoin(visitor *v, sql_rel *rel)
2946 : {
2947 43989 : if (!rel || rel_is_ref(rel) || !is_groupby(rel->op) || list_empty(rel->r))
2948 15824 : return rel;
2949 :
2950 28165 : sql_rel *j = rel->l;
2951 28165 : if (!j || rel_is_ref(j) /*|| !is_left(j->op)*/ || j->op != op_join || list_length(rel->exps) > 1 /* only join because left joins aren't optimized jet (TODO), only length 1 as implementation of groupjoins is missing */ || !list_empty(rel->attr))
2952 27452 : return rel;
2953 : /* check group by exps == equi join exps */
2954 713 : list *gbes = rel->r;
2955 713 : if (list_length(gbes) != list_length(j->exps))
2956 : return rel;
2957 618 : int nr = 0;
2958 1233 : for(node *n = gbes->h; n; n = n->next) {
2959 618 : sql_exp *gbe = n->data;
2960 1233 : for(node *m = j->exps->h; m; m = m->next) {
2961 618 : sql_exp *je = m->data;
2962 618 : if (je->type != e_cmp || je->flag != cmp_equal)
2963 : return rel;
2964 : /* check if its a join exp (ie not a selection) */
2965 619 : if (!( (!rel_has_exp(j->l, je->l, false) && !rel_has_exp(j->r, je->r, false)) ||
2966 8 : (!rel_has_exp(j->l, je->r, false) && !rel_has_exp(j->r, je->l, false))))
2967 0 : return rel;
2968 615 : if (exp_match(je->l, gbe)) {
2969 78 : nr++;
2970 537 : } else if (exp_match(je->r, gbe)) {
2971 10 : nr++;
2972 : }
2973 : }
2974 : }
2975 615 : if (nr == list_length(gbes)) {
2976 : // printf("#group by converted\n");
2977 88 : j = rel_dup(j);
2978 88 : j->attr = rel->exps;
2979 88 : v->changes++;
2980 88 : rel_destroy(rel);
2981 88 : return j;
2982 : }
2983 : return rel;
2984 : }
2985 :
2986 : static sql_rel *
2987 4393001 : rel_optimize_projections_(visitor *v, sql_rel *rel)
2988 : {
2989 4393001 : rel = rel_project_cse(v, rel);
2990 :
2991 4393001 : if (!rel || !is_groupby(rel->op))
2992 : return rel;
2993 :
2994 136151 : rel = rel_remove_const_aggr(v, rel);
2995 136151 : if (v->value_based_opt) {
2996 43989 : rel = rel_simplify_sum(v, rel);
2997 43989 : rel = rel_simplify_groupby_columns(v, rel);
2998 : }
2999 136151 : rel = rel_groupby_cse(v, rel);
3000 136151 : rel = rel_push_aggr_down(v, rel);
3001 136151 : rel = rel_push_groupby_down(v, rel);
3002 136151 : rel = rel_reduce_groupby_exps(v, rel);
3003 136151 : rel = rel_distinct_aggregate_on_unique_values(v, rel);
3004 136151 : rel = rel_groupby_distinct(v, rel);
3005 136151 : rel = rel_push_count_down(v, rel);
3006 : /* only when value_based_opt is on, ie not for dependency resolution */
3007 136151 : if (v->value_based_opt) {
3008 43989 : rel = rel_simplify_count(v, rel);
3009 43989 : rel = rel_basecount(v, rel);
3010 :
3011 43989 : rel = rel_groupjoin(v, rel);
3012 : }
3013 : return rel;
3014 : }
3015 :
3016 : static sql_rel *
3017 166419 : rel_optimize_projections(visitor *v, global_props *gp, sql_rel *rel)
3018 : {
3019 166419 : (void) gp;
3020 166419 : return rel_visitor_topdown(v, rel, &rel_optimize_projections_);
3021 : }
3022 :
3023 : run_optimizer
3024 596801 : bind_optimize_projections(visitor *v, global_props *gp)
3025 : {
3026 596801 : int flag = v->sql->sql_optimizer;
3027 596613 : return gp->opt_level == 1 && (gp->cnt[op_groupby] || gp->cnt[op_project] || gp->cnt[op_union]
3028 1193414 : || gp->cnt[op_inter] || gp->cnt[op_except]) && (flag & optimize_projections) ? rel_optimize_projections : NULL;
3029 : }
3030 :
3031 :
3032 : static inline sql_rel *
3033 3276759 : rel_push_project_down_union(visitor *v, sql_rel *rel)
3034 : {
3035 : /* first remove distinct if already unique */
3036 3276759 : if (rel->op == op_project && need_distinct(rel) && rel->exps && exps_unique(v->sql, rel, rel->exps) && !have_nil(rel->exps)) {
3037 13 : set_nodistinct(rel);
3038 13 : if (exps_card(rel->exps) <= CARD_ATOM && rel->card > CARD_ATOM) { /* if the projection just contains constants, then no topN is needed */
3039 0 : sql_rel *nl = rel->l = rel_topn(v->sql->sa, rel->l, append(sa_list(v->sql->sa), exp_atom_lng(v->sql->sa, 1)));
3040 0 : set_processed(nl);
3041 : }
3042 13 : v->changes++;
3043 : }
3044 :
3045 3276759 : if (rel->op == op_project && rel->l && rel->exps && list_empty(rel->r)) {
3046 973190 : int need_distinct = need_distinct(rel);
3047 973190 : sql_rel *u = rel->l;
3048 973190 : sql_rel *p = rel;
3049 973190 : sql_rel *ul = u->l;
3050 973190 : sql_rel *ur = u->r;
3051 :
3052 973190 : if (!u || !is_union(u->op) || need_distinct(u) || !u->exps || rel_is_ref(u) || project_unsafe(rel,0))
3053 947086 : return rel;
3054 : /* don't push project down union of single values */
3055 26104 : if ((is_project(ul->op) && !ul->l) || (is_project(ur->op) && !ur->l))
3056 : return rel;
3057 :
3058 26058 : ul = rel_dup(ul);
3059 26058 : ur = rel_dup(ur);
3060 :
3061 26058 : if (!is_project(ul->op))
3062 196 : ul = rel_project(v->sql->sa, ul,
3063 : rel_projections(v->sql, ul, NULL, 1, 1));
3064 26058 : if (!is_project(ur->op))
3065 442 : ur = rel_project(v->sql->sa, ur,
3066 : rel_projections(v->sql, ur, NULL, 1, 1));
3067 26058 : need_distinct = (need_distinct &&
3068 0 : (!exps_unique(v->sql, ul, ul->exps) || have_nil(ul->exps) ||
3069 0 : !exps_unique(v->sql, ur, ur->exps) || have_nil(ur->exps)));
3070 26058 : rel_rename_exps(v->sql, u->exps, ul->exps);
3071 26058 : rel_rename_exps(v->sql, u->exps, ur->exps);
3072 :
3073 : /* introduce projects under the set */
3074 26058 : ul = rel_project(v->sql->sa, ul, NULL);
3075 26058 : if (need_distinct)
3076 0 : set_distinct(ul);
3077 26058 : ur = rel_project(v->sql->sa, ur, NULL);
3078 26058 : if (need_distinct)
3079 0 : set_distinct(ur);
3080 :
3081 26058 : ul->exps = exps_copy(v->sql, p->exps);
3082 26058 : set_processed(ul);
3083 26058 : ur->exps = exps_copy(v->sql, p->exps);
3084 26058 : set_processed(ur);
3085 :
3086 26058 : rel = rel_inplace_setop(v->sql, rel, ul, ur, op_union,
3087 : rel_projections(v->sql, rel, NULL, 1, 1));
3088 26058 : if (need_distinct)
3089 0 : set_distinct(rel);
3090 26058 : if (is_single(u))
3091 2 : set_single(rel);
3092 26058 : v->changes++;
3093 26058 : rel->l = rel_merge_projects_(v, rel->l);
3094 26058 : rel->r = rel_merge_projects_(v, rel->r);
3095 26058 : return rel;
3096 : }
3097 : return rel;
3098 : }
3099 :
3100 : /*
3101 : * Push (semi)joins down unions, this is basically for merge tables, where
3102 : * we know that the fk-indices are split over two clustered merge tables.
3103 : */
3104 : static inline sql_rel *
3105 3276759 : rel_push_join_down_union(visitor *v, sql_rel *rel)
3106 : {
3107 3276759 : if ((is_join(rel->op) && !is_outerjoin(rel->op) && !is_single(rel)) || is_semi(rel->op)) {
3108 406748 : sql_rel *l = rel->l, *r = rel->r, *ol = l, *or = r;
3109 406748 : list *exps = rel->exps, *attr = rel->attr;
3110 406748 : sql_exp *je = NULL;
3111 :
3112 : /* we would like to optimize in place reference rels which point
3113 : * to replica tables and let the replica optimizer handle those
3114 : * later. otherwise we miss the push join down optimization due
3115 : * to the rel_is_ref bailout
3116 : */
3117 406748 : if (rel_is_ref(l) && is_basetable(l->op) && l->l && isReplicaTable((sql_table*)l->l)) {
3118 1 : rel->l = rel_copy(v->sql, l, true);
3119 1 : rel_destroy(l);
3120 : }
3121 406748 : if (rel_is_ref(r) && is_basetable(r->op) && r->l && isReplicaTable((sql_table*)r->l)) {
3122 6 : rel->r = rel_copy(v->sql, r, true);
3123 6 : rel_destroy(r);
3124 : }
3125 :
3126 406748 : if (!l || !r || need_distinct(l) || need_distinct(r) || rel_is_ref(l) || rel_is_ref(r))
3127 : return rel;
3128 361985 : if (l->op == op_project)
3129 31213 : l = l->l;
3130 361985 : if (r->op == op_project)
3131 38157 : r = r->l;
3132 :
3133 : /* both sides only if we have a join index */
3134 361985 : if (!l || !r || (is_union(l->op) && is_union(r->op) &&
3135 145 : !(je = rel_is_join_on_pkey(rel, true)))) /* aligned PKEY-FKEY JOIN */
3136 825 : return rel;
3137 361160 : if (is_semi(rel->op) && is_union(l->op) && !je)
3138 : return rel;
3139 :
3140 360390 : if ((is_union(l->op) && !need_distinct(l) && !is_single(l)) && !is_union(r->op)){
3141 4096 : sql_rel *nl, *nr;
3142 4096 : sql_rel *ll = rel_dup(l->l), *lr = rel_dup(l->r);
3143 :
3144 : /* join(union(a,b), c) -> union(join(a,c), join(b,c)) */
3145 4096 : if (!is_project(ll->op))
3146 5 : ll = rel_project(v->sql->sa, ll,
3147 : rel_projections(v->sql, ll, NULL, 1, 1));
3148 4096 : if (!is_project(lr->op))
3149 8 : lr = rel_project(v->sql->sa, lr,
3150 : rel_projections(v->sql, lr, NULL, 1, 1));
3151 4096 : rel_rename_exps(v->sql, l->exps, ll->exps);
3152 4096 : rel_rename_exps(v->sql, l->exps, lr->exps);
3153 4096 : if (l != ol) {
3154 1 : ll = rel_project(v->sql->sa, ll, NULL);
3155 1 : ll->exps = exps_copy(v->sql, ol->exps);
3156 1 : set_processed(ll);
3157 1 : lr = rel_project(v->sql->sa, lr, NULL);
3158 1 : lr->exps = exps_copy(v->sql, ol->exps);
3159 1 : set_processed(lr);
3160 : }
3161 4096 : nl = rel_crossproduct(v->sql->sa, ll, rel_dup(or), rel->op);
3162 4096 : nr = rel_crossproduct(v->sql->sa, lr, rel_dup(or), rel->op);
3163 4096 : nl->exps = exps_copy(v->sql, exps);
3164 4096 : nl->attr = exps_copy(v->sql, attr);
3165 4096 : nr->exps = exps_copy(v->sql, exps);
3166 4096 : nr->attr = exps_copy(v->sql, attr);
3167 4096 : set_processed(nl);
3168 4096 : set_processed(nr);
3169 4096 : nl = rel_project(v->sql->sa, nl, rel_projections(v->sql, nl, NULL, 1, 1));
3170 4096 : nr = rel_project(v->sql->sa, nr, rel_projections(v->sql, nr, NULL, 1, 1));
3171 4096 : v->changes++;
3172 4096 : return rel_inplace_setop(v->sql, rel, nl, nr, op_union, rel_projections(v->sql, rel, NULL, 1, 1));
3173 356294 : } else if (is_union(l->op) && !need_distinct(l) && !is_single(l) &&
3174 0 : is_union(r->op) && !need_distinct(r) && !is_single(r) && je) {
3175 0 : sql_rel *nl, *nr;
3176 0 : sql_rel *ll = rel_dup(l->l), *lr = rel_dup(l->r);
3177 0 : sql_rel *rl = rel_dup(r->l), *rr = rel_dup(r->r);
3178 :
3179 : /* join(union(a,b), union(c,d)) -> union(join(a,c), join(b,d)) */
3180 0 : if (!is_project(ll->op))
3181 0 : ll = rel_project(v->sql->sa, ll,
3182 : rel_projections(v->sql, ll, NULL, 1, 1));
3183 0 : if (!is_project(lr->op))
3184 0 : lr = rel_project(v->sql->sa, lr,
3185 : rel_projections(v->sql, lr, NULL, 1, 1));
3186 0 : rel_rename_exps(v->sql, l->exps, ll->exps);
3187 0 : rel_rename_exps(v->sql, l->exps, lr->exps);
3188 0 : if (l != ol) {
3189 0 : ll = rel_project(v->sql->sa, ll, NULL);
3190 0 : ll->exps = exps_copy(v->sql, ol->exps);
3191 0 : set_processed(ll);
3192 0 : lr = rel_project(v->sql->sa, lr, NULL);
3193 0 : lr->exps = exps_copy(v->sql, ol->exps);
3194 0 : set_processed(lr);
3195 : }
3196 0 : if (!is_project(rl->op))
3197 0 : rl = rel_project(v->sql->sa, rl,
3198 : rel_projections(v->sql, rl, NULL, 1, 1));
3199 0 : if (!is_project(rr->op))
3200 0 : rr = rel_project(v->sql->sa, rr,
3201 : rel_projections(v->sql, rr, NULL, 1, 1));
3202 0 : rel_rename_exps(v->sql, r->exps, rl->exps);
3203 0 : rel_rename_exps(v->sql, r->exps, rr->exps);
3204 0 : if (r != or) {
3205 0 : rl = rel_project(v->sql->sa, rl, NULL);
3206 0 : rl->exps = exps_copy(v->sql, or->exps);
3207 0 : set_processed(rl);
3208 0 : rr = rel_project(v->sql->sa, rr, NULL);
3209 0 : rr->exps = exps_copy(v->sql, or->exps);
3210 0 : set_processed(rr);
3211 : }
3212 0 : nl = rel_crossproduct(v->sql->sa, ll, rl, rel->op);
3213 0 : nr = rel_crossproduct(v->sql->sa, lr, rr, rel->op);
3214 0 : nl->exps = exps_copy(v->sql, exps);
3215 0 : nl->attr = exps_copy(v->sql, attr);
3216 0 : nr->exps = exps_copy(v->sql, exps);
3217 0 : nr->attr = exps_copy(v->sql, attr);
3218 0 : set_processed(nl);
3219 0 : set_processed(nr);
3220 0 : nl = rel_project(v->sql->sa, nl, rel_projections(v->sql, nl, NULL, 1, 1));
3221 0 : nr = rel_project(v->sql->sa, nr, rel_projections(v->sql, nr, NULL, 1, 1));
3222 0 : v->changes++;
3223 0 : return rel_inplace_setop(v->sql, rel, nl, nr, op_union, rel_projections(v->sql, rel, NULL, 1, 1));
3224 356294 : } else if (!is_union(l->op) &&
3225 356286 : is_union(r->op) && !need_distinct(r) && !is_single(r) &&
3226 : !is_semi(rel->op)) {
3227 1237 : sql_rel *nl, *nr;
3228 1237 : sql_rel *rl = rel_dup(r->l), *rr = rel_dup(r->r);
3229 :
3230 : /* join(a, union(b,c)) -> union(join(a,b), join(a,c)) */
3231 1237 : if (!is_project(rl->op))
3232 7 : rl = rel_project(v->sql->sa, rl,
3233 : rel_projections(v->sql, rl, NULL, 1, 1));
3234 1237 : if (!is_project(rr->op))
3235 14 : rr = rel_project(v->sql->sa, rr,
3236 : rel_projections(v->sql, rr, NULL, 1, 1));
3237 1237 : rel_rename_exps(v->sql, r->exps, rl->exps);
3238 1237 : rel_rename_exps(v->sql, r->exps, rr->exps);
3239 1237 : if (r != or) {
3240 55 : rl = rel_project(v->sql->sa, rl, NULL);
3241 55 : rl->exps = exps_copy(v->sql, or->exps);
3242 55 : set_processed(rl);
3243 55 : rr = rel_project(v->sql->sa, rr, NULL);
3244 55 : rr->exps = exps_copy(v->sql, or->exps);
3245 55 : set_processed(rr);
3246 : }
3247 1237 : nl = rel_crossproduct(v->sql->sa, rel_dup(ol), rl, rel->op);
3248 1237 : nr = rel_crossproduct(v->sql->sa, rel_dup(ol), rr, rel->op);
3249 1237 : nl->exps = exps_copy(v->sql, exps);
3250 1237 : nl->attr = exps_copy(v->sql, attr);
3251 1237 : nr->exps = exps_copy(v->sql, exps);
3252 1237 : nr->attr = exps_copy(v->sql, attr);
3253 1237 : set_processed(nl);
3254 1237 : set_processed(nr);
3255 1237 : nl = rel_project(v->sql->sa, nl, rel_projections(v->sql, nl, NULL, 1, 1));
3256 1237 : nr = rel_project(v->sql->sa, nr, rel_projections(v->sql, nr, NULL, 1, 1));
3257 1237 : v->changes++;
3258 1237 : return rel_inplace_setop(v->sql, rel, nl, nr, op_union, rel_projections(v->sql, rel, NULL, 1, 1));
3259 : /* {semi}join ( A1, union (A2, B)) [A1.partkey = A2.partkey] ->
3260 : * {semi}join ( A1, A2 )
3261 : * and
3262 : * {semi}join ( A1, union (B, A2)) [A1.partkey = A2.partkey] ->
3263 : * {semi}join ( A1, A2 )
3264 : * (ie a single part on the left)
3265 : *
3266 : * Howto detect that a relation isn't matching.
3267 : *
3268 : * partitioning is currently done only on pkey/fkey's
3269 : * ie only matching per part if join is on pkey/fkey (parts)
3270 : *
3271 : * and part numbers should match.
3272 : *
3273 : * */
3274 355057 : } else if (!is_union(l->op) &&
3275 355049 : is_union(r->op) && !need_distinct(r) && !is_single(r) &&
3276 982 : is_semi(rel->op) && (je = rel_is_join_on_pkey(rel, true))) {
3277 : /* use first join expression, to find part nr */
3278 0 : int lpnr = rel_part_nr(l, je);
3279 0 : sql_rel *rl = r->l;
3280 0 : sql_rel *rr = r->r;
3281 :
3282 0 : if (lpnr < 0)
3283 : return rel;
3284 : /* case 1: uses left not right */
3285 0 : if (rel_uses_part_nr(rl, je, lpnr) &&
3286 0 : !rel_uses_part_nr(rr, je, lpnr)) {
3287 0 : sql_rel *nl;
3288 :
3289 0 : rl = rel_dup(rl);
3290 0 : if (!is_project(rl->op))
3291 0 : rl = rel_project(v->sql->sa, rl,
3292 : rel_projections(v->sql, rl, NULL, 1, 1));
3293 0 : rel_rename_exps(v->sql, r->exps, rl->exps);
3294 0 : if (r != or) {
3295 0 : rl = rel_project(v->sql->sa, rl, NULL);
3296 0 : rl->exps = exps_copy(v->sql, or->exps);
3297 0 : set_processed(rl);
3298 : }
3299 0 : nl = rel_crossproduct(v->sql->sa, rel_dup(ol), rl, rel->op);
3300 0 : nl->exps = exps_copy(v->sql, exps);
3301 0 : nl->attr = exps_copy(v->sql, attr);
3302 0 : set_processed(nl);
3303 0 : v->changes++;
3304 0 : return rel_inplace_project(v->sql->sa, rel, nl, rel_projections(v->sql, rel, NULL, 1, 1));
3305 : /* case 2: uses right not left */
3306 0 : } else if (!rel_uses_part_nr(rl, je, lpnr) &&
3307 0 : rel_uses_part_nr(rr, je, lpnr)) {
3308 0 : sql_rel *nl;
3309 :
3310 0 : rr = rel_dup(rr);
3311 0 : if (!is_project(rr->op))
3312 0 : rr = rel_project(v->sql->sa, rr,
3313 : rel_projections(v->sql, rr, NULL, 1, 1));
3314 0 : rel_rename_exps(v->sql, r->exps, rr->exps);
3315 0 : if (r != or) {
3316 0 : rr = rel_project(v->sql->sa, rr, NULL);
3317 0 : rr->exps = exps_copy(v->sql, or->exps);
3318 0 : set_processed(rr);
3319 : }
3320 0 : nl = rel_crossproduct(v->sql->sa, rel_dup(ol), rr, rel->op);
3321 0 : nl->exps = exps_copy(v->sql, exps);
3322 0 : nl->attr = exps_copy(v->sql, attr);
3323 0 : set_processed(nl);
3324 0 : v->changes++;
3325 0 : return rel_inplace_project(v->sql->sa, rel, nl, rel_projections(v->sql, rel, NULL, 1, 1));
3326 : }
3327 : }
3328 : }
3329 : return rel;
3330 : }
3331 :
3332 : static sql_rel *
3333 3276759 : rel_optimize_unions_topdown_(visitor *v, sql_rel *rel)
3334 : {
3335 3276759 : rel = rel_push_project_down_union(v, rel);
3336 3276759 : rel = rel_push_join_down_union(v, rel);
3337 3276759 : return rel;
3338 : }
3339 :
3340 : static sql_rel *
3341 25316 : rel_optimize_unions_topdown(visitor *v, global_props *gp, sql_rel *rel)
3342 : {
3343 25316 : (void) gp;
3344 25316 : return rel_visitor_topdown(v, rel, &rel_optimize_unions_topdown_);
3345 : }
3346 :
3347 : run_optimizer
3348 596801 : bind_optimize_unions_topdown(visitor *v, global_props *gp)
3349 : {
3350 596801 : (void) v;
3351 596801 : return gp->opt_level == 1 && gp->cnt[op_union] ? rel_optimize_unions_topdown : NULL;
3352 : }
3353 :
3354 :
3355 : static sql_column *
3356 0 : is_fk_column_of_pk(mvc *sql, sql_rel *rel, sql_column *pkc, sql_exp *e) /* test if e is a foreing key column for the pk on pkc */
3357 : {
3358 0 : sql_trans *tr = sql->session->tr;
3359 0 : sql_column *c = exp_find_column(rel, e, -2);
3360 :
3361 0 : if (c) {
3362 0 : sql_table *t = c->t;
3363 :
3364 0 : for (node *n = ol_first_node(t->idxs); n; n = n->next) {
3365 0 : sql_idx *li = n->data;
3366 :
3367 0 : if (li->type == join_idx) {
3368 0 : for (node *m = li->columns->h ; m ; m = m->next) {
3369 0 : sql_kc *fkc = m->data;
3370 :
3371 0 : if (strcmp(fkc->c->base.name, c->base.name) == 0) { /* same fkey column */
3372 0 : sql_key *fkey = (sql_key*)os_find_id(tr->cat->objects, tr, ((sql_fkey*)li->key)->rkey);
3373 :
3374 0 : if (strcmp(fkey->t->base.name, pkc->t->base.name) == 0) { /* to same pk table */
3375 0 : for (node *o = fkey->columns->h ; o ; o = n->next) {
3376 0 : sql_kc *kc = m->data;
3377 :
3378 0 : if (strcmp(kc->c->base.name, pkc->base.name) == 0) /* to same pk table column */
3379 : return c;
3380 : }
3381 : }
3382 : }
3383 : }
3384 : }
3385 : }
3386 : }
3387 : return NULL;
3388 : }
3389 :
3390 : static bool
3391 0 : has_no_selectivity(mvc *sql, sql_rel *rel)
3392 : {
3393 0 : if (!rel)
3394 : return true;
3395 :
3396 0 : switch(rel->op){
3397 : case op_basetable:
3398 : case op_truncate:
3399 : case op_table:
3400 : return true;
3401 0 : case op_topn:
3402 : case op_sample:
3403 : case op_project:
3404 : case op_groupby:
3405 0 : return has_no_selectivity(sql, rel->l);
3406 : case op_ddl:
3407 : case op_insert:
3408 : case op_update:
3409 : case op_delete:
3410 : case op_merge:
3411 : case op_join:
3412 : case op_left:
3413 : case op_right:
3414 : case op_full:
3415 : case op_semi:
3416 : case op_anti:
3417 : case op_union:
3418 : case op_inter:
3419 : case op_except:
3420 : case op_select:
3421 : return false;
3422 : }
3423 : return true;
3424 : }
3425 :
3426 : static sql_rel *
3427 706967 : rel_distinct_project2groupby_(visitor *v, sql_rel *rel)
3428 : {
3429 706967 : sql_rel *l = rel->l;
3430 :
3431 : /* rewrite distinct project (table) [ constant ] -> project [ constant ] */
3432 714156 : if (rel->op == op_project && rel->l && !rel->r /* no order by */ && need_distinct(rel) &&
3433 7189 : exps_card(rel->exps) <= CARD_ATOM) {
3434 356 : set_nodistinct(rel);
3435 356 : if (rel->card > CARD_ATOM) { /* if the projection just contains constants, then no topN is needed */
3436 94 : sql_rel *nl = rel->l = rel_topn(v->sql->sa, rel->l, append(sa_list(v->sql->sa), exp_atom_lng(v->sql->sa, 1)));
3437 94 : set_processed(nl);
3438 : }
3439 356 : v->changes++;
3440 : }
3441 :
3442 : /* rewrite distinct project [ pk ] ( select ( table ) [ e op val ])
3443 : * into project [ pk ] ( select/semijoin ( table ) */
3444 706967 : if (rel->op == op_project && rel->l && !rel->r /* no order by */ && need_distinct(rel) &&
3445 6836 : (l->op == op_select || l->op == op_semi) && exps_unique(v->sql, rel, rel->exps) &&
3446 3 : (!have_semantics(l->exps) || !have_nil(rel->exps))) {
3447 3 : set_nodistinct(rel);
3448 3 : v->changes++;
3449 : }
3450 :
3451 : /* rewrite distinct project ( join(p,f) [ p.pk = f.fk ] ) [ p.pk ]
3452 : * into project( (semi)join(p,f) [ p.pk = f.fk ] ) [ p.pk ] */
3453 706967 : if (rel->op == op_project && rel->l && !rel->r /* no order by */ && need_distinct(rel) &&
3454 6830 : l && (is_select(l->op) || l->op == op_join) && rel_is_join_on_pkey(l, true) /* [ pk == fk ] */) {
3455 0 : sql_exp *found = NULL, *pk = NULL, *fk = NULL;
3456 0 : bool all_exps_atoms = true;
3457 0 : sql_column *pkc = NULL;
3458 :
3459 0 : for (node *m = l->exps->h ; m ; m = m->next) { /* find a primary key join */
3460 0 : sql_exp *je = (sql_exp *) m->data;
3461 0 : sql_exp *le = je->l, *re = je->r;
3462 :
3463 0 : if (!find_prop(je->p, PROP_JOINIDX)) /* must be a pk-fk join expression */
3464 0 : continue;
3465 :
3466 0 : if ((pkc = exp_is_pkey(l, le))) { /* le is the primary key */
3467 0 : all_exps_atoms = true;
3468 :
3469 0 : for (node *n = rel->exps->h; n && all_exps_atoms; n = n->next) {
3470 0 : sql_exp *e = (sql_exp *) n->data;
3471 :
3472 0 : if (exp_match(e, le) || exp_refers(e, le))
3473 : found = e;
3474 0 : else if (e->card > CARD_ATOM)
3475 0 : all_exps_atoms = false;
3476 : }
3477 : pk = le;
3478 : fk = re;
3479 : }
3480 0 : if (!found && (pkc = exp_is_pkey(l, re))) { /* re is the primary key */
3481 0 : all_exps_atoms = true;
3482 :
3483 0 : for (node *n = rel->exps->h; n && all_exps_atoms; n = n->next) {
3484 0 : sql_exp *e = (sql_exp *) n->data;
3485 :
3486 0 : if (exp_match(e, re) || exp_refers(e, re))
3487 : found = e;
3488 0 : else if (e->card > CARD_ATOM)
3489 0 : all_exps_atoms = false;
3490 : }
3491 : pk = re;
3492 : fk = le;
3493 : }
3494 : }
3495 :
3496 0 : if (all_exps_atoms && found) { /* rel must have the same primary key on the projection list */
3497 : /* if the foreign key has no selectivity, the join can be removed */
3498 0 : if (!(rel_is_ref(l)) && ((rel_find_exp(l->l, fk) && is_fk_column_of_pk(v->sql, l->l, pkc, fk) && has_no_selectivity(v->sql, l->l)) ||
3499 0 : (l->r && rel_find_exp(l->r, fk) && is_fk_column_of_pk(v->sql, l->r, pkc, fk) && has_no_selectivity(v->sql, l->r)))) {
3500 0 : sql_rel *side = (rel_find_exp(l->l, pk) != NULL)?l->l:l->r;
3501 :
3502 0 : rel->l = rel_dup(side);
3503 0 : rel_destroy(l);
3504 0 : v->changes++;
3505 0 : set_nodistinct(rel);
3506 0 : return rel;
3507 : }
3508 : /* if the join has no multiple references it can be re-written into a semijoin */
3509 0 : if (l->op == op_join && !(rel_is_ref(l)) && list_length(rel->exps) == 1) { /* other expressions may come from the other side */
3510 0 : if (l->r && rel_find_exp(l->r, pk)) {
3511 0 : sql_rel *temp = l->l;
3512 0 : l->l = l->r;
3513 0 : l->r = temp;
3514 :
3515 0 : l->op = op_semi;
3516 0 : } else if (rel_find_exp(l->l, pk)) {
3517 0 : l->op = op_semi;
3518 : }
3519 : }
3520 0 : v->changes++;
3521 0 : set_nodistinct(rel);
3522 0 : return rel;
3523 : }
3524 : }
3525 : /* rewrite distinct project [ gbe ] ( select ( groupby [ gbe ] [ gbe, e ] )[ e op val ])
3526 : * into project [ gbe ] ( select ( group etc ) */
3527 706967 : if (rel->op == op_project && rel->l && !rel->r /* no order by */ &&
3528 6830 : need_distinct(rel) && l->op == op_select){
3529 2882 : sql_rel *g = l->l;
3530 2882 : if (is_groupby(g->op)) {
3531 5 : list *used = sa_list(v->sql->sa);
3532 5 : list *gbe = g->r;
3533 5 : node *n;
3534 5 : int fnd = 1;
3535 :
3536 10 : for (n = rel->exps->h; n && fnd; n = n->next) {
3537 5 : sql_exp *e = n->data;
3538 :
3539 5 : if (e->card > CARD_ATOM) {
3540 : /* find e in gbe */
3541 5 : sql_exp *ne = list_find_exp(g->exps, e);
3542 :
3543 5 : if (ne)
3544 4 : ne = list_find_exp( gbe, ne);
3545 4 : if (ne && !list_find_exp(used, ne)) {
3546 2 : fnd++;
3547 2 : list_append(used, ne);
3548 : }
3549 3 : if (!ne)
3550 : fnd = 0;
3551 : }
3552 : }
3553 5 : if (fnd == (list_length(gbe)+1)) {
3554 0 : v->changes++;
3555 0 : set_nodistinct(rel);
3556 : }
3557 : }
3558 : }
3559 706967 : if (rel->op == op_project && rel->l &&
3560 6830 : need_distinct(rel) && exps_card(rel->exps) > CARD_ATOM) {
3561 6830 : node *n;
3562 6830 : list *exps = new_exp_list(v->sql->sa), *gbe = new_exp_list(v->sql->sa);
3563 6830 : list *obe = rel->r; /* we need to read the ordering later */
3564 :
3565 6830 : if (obe) {
3566 0 : int fnd = 0;
3567 :
3568 0 : for(n = obe->h; n && !fnd; n = n->next) {
3569 0 : sql_exp *e = n->data;
3570 :
3571 0 : if (e->type != e_column)
3572 : fnd = 1;
3573 0 : else if (exps_bind_column2(rel->exps, e->l, e->r, NULL) == 0)
3574 0 : fnd = 1;
3575 : }
3576 0 : if (fnd)
3577 : return rel;
3578 : }
3579 6830 : rel->l = rel_project(v->sql->sa, rel->l, rel->exps);
3580 :
3581 20182 : for (n = rel->exps->h; n; n = n->next) {
3582 13352 : sql_exp *e = n->data, *ne;
3583 :
3584 13352 : set_nodistinct(e);
3585 13352 : ne = exp_ref(v->sql, e);
3586 13352 : if (e->card > CARD_ATOM && !list_find_exp(gbe, ne)) { /* no need to group by on constants, or the same column multiple times */
3587 12984 : append(gbe, ne);
3588 12984 : ne = exp_ref(v->sql, ne);
3589 : }
3590 13352 : append(exps, ne);
3591 : }
3592 6830 : rel->op = op_groupby;
3593 6830 : rel->exps = exps;
3594 6830 : rel->r = gbe;
3595 6830 : set_nodistinct(rel);
3596 6830 : if (obe) {
3597 : /* add order again */
3598 0 : rel = rel_project(v->sql->sa, rel, rel_projections(v->sql, rel, NULL, 1, 1));
3599 0 : rel->r = obe;
3600 : }
3601 6830 : v->changes++;
3602 : }
3603 : return rel;
3604 : }
3605 :
3606 : static sql_rel *
3607 5320 : rel_distinct_project2groupby(visitor *v, global_props *gp, sql_rel *rel)
3608 : {
3609 5320 : (void) gp;
3610 5320 : return rel_visitor_bottomup(v, rel, &rel_distinct_project2groupby_);
3611 : }
3612 :
3613 : run_optimizer
3614 596795 : bind_distinct_project2groupby(visitor *v, global_props *gp)
3615 : {
3616 596795 : int flag = v->sql->sql_optimizer;
3617 530861 : return gp->opt_cycle == 0 && gp->opt_level == 1 && gp->needs_distinct && gp->cnt[op_project] &&
3618 602115 : (flag & distinct_project2groupby)? rel_distinct_project2groupby : NULL;
3619 : }
|