Line data Source code
1 : /*
2 : * SPDX-License-Identifier: MPL-2.0
3 : *
4 : * This Source Code Form is subject to the terms of the Mozilla Public
5 : * License, v. 2.0. If a copy of the MPL was not distributed with this
6 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
7 : *
8 : * Copyright 2024 MonetDB Foundation;
9 : * Copyright August 2008 - 2023 MonetDB B.V.;
10 : * Copyright 1997 - July 2008 CWI.
11 : */
12 :
13 : #include "monetdb_config.h"
14 : #include "rel_optimizer_private.h"
15 : #include "rel_basetable.h"
16 : #include "rel_exp.h"
17 : #include "rel_select.h"
18 : #include "rel_rewriter.h"
19 :
20 : static int
21 3281798 : exp_is_rename(sql_exp *e)
22 : {
23 3281798 : return (e->type == e_column);
24 : }
25 :
26 : static int
27 1862834 : exp_is_useless_rename(sql_exp *e)
28 : {
29 1862834 : return (e->type == e_column && e->alias.label == e->nid);
30 : }
31 :
32 : static list *
33 20051 : rel_used_projections(mvc *sql, list *exps, list *users)
34 : {
35 20051 : list *nexps = sa_list(sql->sa);
36 20051 : bool *used = SA_ZNEW_ARRAY(sql->ta, bool, list_length(exps));
37 20051 : int i = 0;
38 :
39 132064 : for(node *n = users->h; n; n = n->next) {
40 112013 : sql_exp *e = n->data, *ne = NULL;
41 112013 : assert(e->nid && exps_bind_nid(exps, e->nid));
42 112013 : if (e->nid && (ne = exps_bind_nid(exps, e->nid))) {
43 112013 : used[list_position(exps, ne)] = 1;
44 : }
45 : }
46 141904 : for(node *n = exps->h; n; n = n->next, i++) {
47 121853 : sql_exp *e = n->data;
48 121853 : if (is_intern(e) || used[i])
49 121716 : append(nexps, e);
50 : }
51 20051 : return nexps;
52 : }
53 :
54 : /* move projects down with the goal op removing them completely (ie push renames/reduced lists into basetable)
55 : * for some cases we can directly remove iff renames rename into same alias
56 : * */
57 : static sql_rel *
58 3758358 : rel_push_project_down_(visitor *v, sql_rel *rel)
59 : {
60 : /* for now only push down renames */
61 3758358 : if (v->depth > 1 && is_simple_project(rel->op) && !need_distinct(rel) && !rel_is_ref(rel) && rel->l && !rel->r &&
62 1078938 : v->parent &&
63 1078930 : !is_modify(v->parent->op) && !is_topn(v->parent->op) && !is_sample(v->parent->op) &&
64 1696878 : !is_ddl(v->parent->op) && !is_set(v->parent->op) && !is_munion(v->parent->op) &&
65 653162 : list_check_prop_all(rel->exps, (prop_check_func)&exp_is_rename)) {
66 387594 : sql_rel *l = rel->l;
67 :
68 387594 : if (rel_is_ref(l))
69 : return rel;
70 357020 : if (is_basetable(l->op)) {
71 37818 : if (list_check_prop_all(rel->exps, (prop_check_func)&exp_is_useless_rename)) {
72 : /* TODO reduce list (those in the project + internal) */
73 20051 : rel->l = NULL;
74 20051 : l->exps = rel_used_projections(v->sql, l->exps, rel->exps);
75 20051 : rel_destroy(rel);
76 20051 : v->changes++;
77 20051 : return l;
78 : }
79 : return rel;
80 319202 : } else if (list_check_prop_all(rel->exps, (prop_check_func)&exp_is_useless_rename)) {
81 123479 : if ((is_project(l->op) && list_length(l->exps) == list_length(rel->exps)) ||
82 113989 : ((v->parent && is_project(v->parent->op)) &&
83 37784 : (is_mset(l->op) || is_set(l->op) || is_select(l->op) || is_join(l->op) || is_semi(l->op) || is_topn(l->op) || is_sample(l->op)))) {
84 46474 : rel->l = NULL;
85 46474 : rel_destroy(rel);
86 46474 : v->changes++;
87 46474 : return l;
88 : }
89 : }
90 : }
91 : /* ToDo handle useful renames, ie new relation name and unique set of attribute names (could reduce set of * attributes) */
92 : /* handle both useless and useful with project [ group by ] */
93 : return rel;
94 : }
95 :
96 : static sql_rel *
97 171303 : rel_push_project_down(visitor *v, global_props *gp, sql_rel *rel)
98 : {
99 171303 : (void) gp;
100 171303 : return rel_visitor_bottomup(v, rel, &rel_push_project_down_);
101 : }
102 :
103 : run_optimizer
104 611668 : bind_push_project_down(visitor *v, global_props *gp)
105 : {
106 611668 : int flag = v->sql->sql_optimizer;
107 611472 : return gp->opt_level == 1 && (flag & push_project_down) &&
108 1223140 : (gp->cnt[op_project] || gp->cnt[op_groupby]) ? rel_push_project_down : NULL;
109 : }
110 :
111 :
112 : static bool exp_shares_exps(sql_exp *e, list *shared, uint64_t *uses);
113 :
114 : static bool
115 112531 : exps_shares_exps(list *exps, list *shared, uint64_t *uses)
116 : {
117 112531 : if (!exps || !shared)
118 : return false;
119 390641 : for (node *n = exps->h; n; n = n->next) {
120 278516 : sql_exp *e = n->data;
121 :
122 278516 : if (exp_shares_exps(e, shared, uses))
123 : return true;
124 : }
125 : return false;
126 : }
127 :
128 : static bool
129 2706920 : exp_shares_exps(sql_exp *e, list *shared, uint64_t *uses)
130 : {
131 4935727 : switch(e->type) {
132 75 : case e_cmp:
133 75 : if (e->flag == cmp_or || e->flag == cmp_filter)
134 52 : return exps_shares_exps(e->l, shared, uses) || exps_shares_exps(e->r, shared, uses);
135 23 : else if (e->flag == cmp_in || e->flag == cmp_notin)
136 0 : return exp_shares_exps(e->l, shared, uses) || exps_shares_exps(e->r, shared, uses);
137 : else
138 46 : return exp_shares_exps(e->l, shared, uses) || exp_shares_exps(e->r, shared, uses) || (e->f && exp_shares_exps(e->f, shared, uses));
139 111636 : case e_atom:
140 111636 : if (e->f)
141 0 : return exps_shares_exps(e->f, shared, uses);
142 : return false;
143 4476734 : case e_column:
144 : {
145 4476734 : sql_exp *ne = NULL;
146 4476734 : assert(e->nid);
147 4476734 : if (e->nid)
148 4476734 : ne = exps_bind_nid(shared, e->nid);
149 4476734 : if (!ne)
150 : return false;
151 3458136 : if (ne->type != e_column) {
152 481487 : int i = list_position(shared, ne);
153 481487 : if (i < 0)
154 : return false;
155 481487 : uint64_t used = (uint64_t) 1 << i;
156 481487 : if (used & *uses)
157 : return true;
158 481084 : *uses |= used;
159 481084 : return false;
160 : }
161 2976649 : if (ne != e && (list_position(shared, e) < 0 || list_position(shared, e) > list_position(shared, ne)))
162 : /* maybe ne refers to a local complex exp */
163 1993952 : return exp_shares_exps(ne, shared, uses);
164 : return false;
165 : }
166 234855 : case e_convert:
167 234855 : return exp_shares_exps(e->l, shared, uses);
168 112427 : case e_aggr:
169 : case e_func:
170 112427 : return exps_shares_exps(e->l, shared, uses);
171 : case e_psm:
172 0 : assert(0); /* not in projection list */
173 : }
174 : return false;
175 : }
176 :
177 : static bool
178 321523 : exps_share_expensive_exp(list *exps, list *shared )
179 : {
180 321523 : uint64_t uses = 0;
181 :
182 321523 : if (list_empty(exps) || list_empty(shared))
183 0 : return false;
184 321523 : if (list_length(shared) > 64)
185 : return true;
186 2749043 : for (node *n = exps->h; n; n = n->next) {
187 2428342 : sql_exp *e = n->data;
188 :
189 2428342 : if (exp_shares_exps(e, shared, &uses))
190 : return true;
191 : }
192 : return false;
193 : }
194 :
195 : static bool ambigious_ref( list *exps, sql_exp *e);
196 : static bool
197 86354 : ambigious_refs( list *exps, list *refs)
198 : {
199 86354 : if (list_empty(refs))
200 : return false;
201 262011 : for(node *n=refs->h; n; n = n->next) {
202 196690 : if (ambigious_ref(exps, n->data))
203 : return true;
204 : }
205 : return false;
206 : }
207 :
208 : static bool
209 3674759 : ambigious_ref( list *exps, sql_exp *e)
210 : {
211 3674759 : sql_exp *ne = NULL;
212 :
213 3674759 : if (e->type == e_column) {
214 3162775 : assert(e->nid);
215 3162775 : if (e->nid)
216 3162775 : ne = exps_bind_nid(exps, e->nid);
217 3162775 : if (ne && e != ne)
218 : return true;
219 : }
220 3665838 : if (e->type == e_func)
221 86354 : return ambigious_refs(exps, e->l);
222 : return false;
223 : }
224 :
225 : /* merge 2 projects into the lower one */
226 : static sql_rel *
227 4008490 : rel_merge_projects_(visitor *v, sql_rel *rel)
228 : {
229 4067034 : list *exps = rel->exps;
230 4067034 : sql_rel *prj = rel->l;
231 4067034 : node *n;
232 :
233 4067034 : if (rel->op == op_project &&
234 1606189 : prj && prj->op == op_project && !(rel_is_ref(prj)) && list_empty(prj->r)) {
235 584125 : int all = 1;
236 :
237 584125 : if (project_unsafe(rel, false) || project_unsafe(prj, false) || exps_share_expensive_exp(rel->exps, prj->exps))
238 263423 : return rel;
239 :
240 : /* here we try to fix aliases */
241 320701 : list *nexps = NULL;
242 : /* for each exp check if we can rename it */
243 2181893 : for (n = exps->h; n && all; n = n->next) {
244 1870113 : sql_exp *e = n->data, *ne = NULL;
245 :
246 : /* We do not handle expressions pointing back in the list */
247 1870113 : if (ambigious_ref(exps, e)) {
248 : all = 0;
249 : break;
250 : }
251 1861192 : ne = exp_push_down_prj(v->sql, e, prj, prj->l);
252 : /* check if the referred alias name isn't used twice */
253 1861192 : if (ne && ambigious_ref(nexps, ne)) {
254 : all = 0;
255 : break;
256 : }
257 1607956 : if (ne) {
258 1607956 : if (exp_name(e))
259 1601590 : exp_prop_alias(v->sql->sa, ne, e);
260 1607956 : if (!nexps)
261 312167 : nexps = new_exp_list(v->sql->sa);
262 1607956 : list_append(nexps, ne);
263 : } else {
264 : all = 0;
265 : }
266 : }
267 320701 : if (all) {
268 58544 : rel->exps = nexps;
269 : /* we can now remove the intermediate project */
270 : /* push order by expressions */
271 58544 : if (!list_empty(rel->r)) {
272 0 : list *nr = new_exp_list(v->sql->sa), *res = rel->r;
273 0 : for (n = res->h; n; n = n->next) {
274 0 : sql_exp *e = n->data, *ne = NULL;
275 :
276 0 : ne = exp_push_down_prj(v->sql, e, prj, prj->l);
277 0 : if (ne) {
278 0 : if (exp_name(e))
279 0 : exp_prop_alias(v->sql->sa, ne, e);
280 0 : list_append(nr, ne);
281 : } else {
282 : all = 0;
283 : }
284 : }
285 0 : if (all) {
286 0 : rel->r = nr;
287 : } else {
288 : /* leave as is */
289 0 : rel->exps = exps;
290 0 : return rel;
291 : }
292 : }
293 58544 : rel->l = prj->l;
294 58544 : prj->l = NULL;
295 58544 : rel_destroy(prj);
296 58544 : v->changes++;
297 58544 : return rel_merge_projects_(v, rel);
298 : } else {
299 : /* leave as is */
300 262157 : rel->exps = exps;
301 : }
302 262157 : return rel;
303 : }
304 : return rel;
305 : }
306 :
307 : static sql_rel *
308 171303 : rel_merge_projects(visitor *v, global_props *gp, sql_rel *rel)
309 : {
310 171303 : (void) gp;
311 171303 : return rel_visitor_bottomup(v, rel, &rel_merge_projects_);
312 : }
313 :
314 : run_optimizer
315 611681 : bind_merge_projects(visitor *v, global_props *gp)
316 : {
317 611681 : int flag = v->sql->sql_optimizer;
318 611485 : return gp->opt_level == 1 && (flag & merge_projects) &&
319 1223158 : (gp->cnt[op_project] || gp->cnt[op_groupby]) ? rel_merge_projects : NULL;
320 : }
321 :
322 :
323 : static sql_exp *split_aggr_and_project(mvc *sql, list *aexps, sql_exp *e);
324 :
325 : static void
326 0 : list_split_aggr_and_project(mvc *sql, list *aexps, list *exps)
327 : {
328 0 : if (list_empty(exps))
329 : return ;
330 0 : for(node *n = exps->h; n; n = n->next)
331 0 : n->data = split_aggr_and_project(sql, aexps, n->data);
332 : }
333 :
334 : static sql_exp *
335 0 : split_aggr_and_project(mvc *sql, list *aexps, sql_exp *e)
336 : {
337 0 : switch(e->type) {
338 0 : case e_aggr:
339 : /* add to the aggrs */
340 0 : if (!exp_name(e))
341 0 : exp_label(sql->sa, e, ++sql->label);
342 0 : list_append(aexps, e);
343 0 : return exp_ref(sql, e);
344 : case e_cmp:
345 : /* e_cmp's shouldn't exist in an aggr expression list */
346 0 : assert(0);
347 : case e_convert:
348 0 : e->l = split_aggr_and_project(sql, aexps, e->l);
349 0 : return e;
350 0 : case e_func:
351 0 : list_split_aggr_and_project(sql, aexps, e->l);
352 0 : return e;
353 : case e_atom:
354 : case e_column: /* constants and columns shouldn't be rewritten */
355 : case e_psm:
356 : return e;
357 : }
358 : return NULL;
359 : }
360 :
361 : /* exp_rename */
362 : static sql_exp * exp_rename(mvc *sql, sql_exp *e, sql_rel *f, sql_rel *t);
363 :
364 : static list *
365 48988 : exps_rename(mvc *sql, list *l, sql_rel *f, sql_rel *t)
366 : {
367 48988 : if (list_empty(l))
368 : return l;
369 44248 : for (node *n=l->h; n; n=n->next)
370 28400 : n->data = exp_rename(sql, n->data, f, t);
371 : return l;
372 : }
373 :
374 : /* exp_rename */
375 : static sql_exp *
376 161192 : exp_rename(mvc *sql, sql_exp *e, sql_rel *f, sql_rel *t)
377 : {
378 161192 : sql_exp *ne = NULL;
379 :
380 161192 : assert(is_project(f->op));
381 :
382 161192 : switch(e->type) {
383 71180 : case e_column:
384 71180 : assert(e->nid);
385 71180 : ne = exps_bind_nid(f->exps, e->nid);
386 71180 : if (!ne)
387 : return e;
388 47084 : sql_exp *oe = e;
389 47084 : e = NULL;
390 47084 : if (ne && ne->nid)
391 46939 : e = rel_find_exp(t, ne);
392 46939 : if (!e) {
393 145 : sql->session->status = 0;
394 145 : sql->errstr[0] = 0;
395 145 : if (exp_is_atom(ne))
396 : return ne;
397 : return oe;
398 : }
399 46939 : ne = exp_ref(sql, e);
400 46939 : if (oe)
401 46939 : exp_propagate(sql->sa, ne, oe);
402 46939 : return ne;
403 49499 : case e_cmp:
404 49499 : if (e->flag == cmp_or || e->flag == cmp_filter) {
405 3450 : e->l = exps_rename(sql, e->l, f, t);
406 3450 : e->r = exps_rename(sql, e->r, f, t);
407 46049 : } else if (e->flag == cmp_in || e->flag == cmp_notin) {
408 6139 : e->l = exp_rename(sql, e->l, f, t);
409 6139 : e->r = exps_rename(sql, e->r, f, t);
410 : } else {
411 39910 : e->l = exp_rename(sql, e->l, f, t);
412 39910 : e->r = exp_rename(sql, e->r, f, t);
413 39910 : if (e->f)
414 232 : e->f = exp_rename(sql, e->f, f, t);
415 : }
416 : break;
417 4564 : case e_convert:
418 4564 : e->l = exp_rename(sql, e->l, f, t);
419 4564 : break;
420 2824 : case e_aggr:
421 : case e_func:
422 2824 : e->l = exps_rename(sql, e->l, f, t);
423 2824 : break;
424 33125 : case e_atom:
425 33125 : e->f = exps_rename(sql, e->f, f, t);
426 33125 : break;
427 : case e_psm:
428 : break;
429 : }
430 : return e;
431 : }
432 :
433 : static int
434 5513066 : exp_match_exp_cmp( sql_exp *e1, sql_exp *e2)
435 : {
436 5513066 : if (exp_match_exp(e1,e2))
437 55066 : return 0;
438 : return -1;
439 : }
440 :
441 : /* Pushing projects up the tree. Done very early in the optimizer.
442 : * Makes later steps easier.
443 : */
444 : extern void _rel_print(mvc *sql, sql_rel *rel);
445 : static sql_rel *
446 3660326 : rel_push_project_up_(visitor *v, sql_rel *rel)
447 : {
448 3660326 : if (is_simple_project(rel->op) && rel->l && !rel_is_ref(rel)) {
449 1166896 : sql_rel *l = rel->l;
450 1166896 : if (is_simple_project(l->op))
451 283540 : return rel_merge_projects_(v, rel);
452 : /* find equal column references, later cases are rewritten into references back to the first
453 : * project () [ i.i L1, i.i L2 ] -> project() [ i.i L1, L1 L2 ] */
454 883356 : if (list_length(rel->exps) > 1) {
455 746441 : list *exps = rel->exps;
456 746441 : node *n = exps->h;
457 746441 : bool needed = false;
458 3744158 : for(n = n->next; n && !needed; n = n->next) {
459 2997716 : sql_exp *e = n->data;
460 2997716 : if (e->type == e_column && !is_selfref(e)) {
461 19777777 : for(node *m = exps->h; m && m != n && !needed; m = m->next) {
462 17221298 : sql_exp *h = m->data;
463 17221298 : if (exp_match_exp(h,e))
464 5603 : needed = true;
465 : }
466 : }
467 : }
468 746442 : if (needed) {
469 5603 : rel->exps = sa_list(v->sql->sa);
470 5603 : node *n = exps->h;
471 5603 : list_append(rel->exps, n->data);
472 77863 : for(n = n->next; n; n = n->next) {
473 72260 : sql_exp *e = n->data;
474 72260 : if (e->type == e_column && !is_selfref(e)) {
475 31688 : node *m = list_find(rel->exps, e, (fcmp)&exp_match_exp_cmp);
476 31688 : if (m) {
477 7529 : sql_exp *me = m->data;
478 7529 : if (me->alias.label != me->nid) {
479 780 : sql_exp *ne = exp_ref(v->sql, m->data);
480 780 : exp_setalias(ne, e->alias.label, exp_relname(e), exp_name(e));
481 780 : exp_propagate(v->sql->sa, ne, e);
482 780 : set_selfref(ne);
483 780 : e = ne;
484 : }
485 : }
486 : }
487 72260 : list_append(rel->exps, e);
488 : }
489 : }
490 : }
491 : }
492 :
493 : /* project/project cleanup is done later */
494 3376787 : if (is_join(rel->op) || is_select(rel->op)) {
495 1089313 : node *n;
496 1089313 : list *exps = NULL, *l_exps, *r_exps;
497 1089313 : sql_rel *l = rel->l;
498 1089313 : sql_rel *r = rel->r;
499 1089313 : sql_rel *t;
500 1089313 : int nlexps = 0, i = 0;
501 :
502 : /* Don't rewrite refs, non projections or constant or
503 : order by projections */
504 1089313 : if (!l || rel_is_ref(l) || is_topn(l->op) || is_sample(l->op) ||
505 1003672 : (is_join(rel->op) && !list_empty(rel->attr)) ||
506 953358 : (is_join(rel->op) && (!r || rel_is_ref(r))) ||
507 930021 : (is_left(rel->op) && (rel->flag&MERGE_LEFT) /* can't push projections above merge statements left joins */) ||
508 930019 : (is_select(rel->op) && l->op != op_project) ||
509 452495 : (is_join(rel->op) && ((l->op != op_project && r->op != op_project) || is_topn(r->op) || is_sample(r->op))) ||
510 156442 : ((l->op == op_project && (!l->l || l->r || project_unsafe(l, is_select(rel->op)))) ||
511 88416 : (is_join(rel->op) && (r->op == op_project && (!r->l || r->r || project_unsafe(r, false))))))
512 1014034 : return rel;
513 :
514 75279 : if (l->op == op_project && l->l) {
515 910178 : for (n = l->exps->h; n; n = n->next) {
516 874243 : sql_exp *e = n->data;
517 874243 : if (!(is_column(e->type) && exp_is_atom(e) && !(is_right(rel->op) || is_full(rel->op))) &&
518 873024 : !(e->type == e_column && !has_label(e)))
519 : return rel;
520 : }
521 : }
522 53931 : if (is_join(rel->op) && r->op == op_project && r->l) {
523 57796 : for (n = r->exps->h; n; n = n->next) {
524 53129 : sql_exp *e = n->data;
525 53129 : if (!(is_column(e->type) && exp_is_atom(e) && !(is_right(rel->op) || is_full(rel->op))) &&
526 52722 : !(e->type == e_column && !has_label(e)))
527 : return rel;
528 : }
529 : }
530 :
531 38539 : if (l->op == op_project && l->l) {
532 : /* Go through the list of project expressions.
533 : Check if they can be pushed up, ie are they not
534 : changing or introducing any columns used
535 : by the upper operator. */
536 :
537 35133 : exps = new_exp_list(v->sql->sa);
538 799328 : for (n = l->exps->h; n; n = n->next) {
539 764195 : sql_exp *e = n->data;
540 :
541 : /* we cannot rewrite projection with atomic values from outer joins */
542 764195 : if (is_column(e->type) && exp_is_atom(e) && !(is_right(rel->op) || is_full(rel->op))) {
543 661 : list_append(exps, e);
544 763534 : } else if (e->type == e_column) {
545 763534 : if (has_label(e))
546 : return rel;
547 763534 : list_append(exps, e);
548 : } else {
549 : return rel;
550 : }
551 : }
552 : } else {
553 3406 : exps = rel_projections(v->sql, l, NULL, 1, 1);
554 : }
555 38539 : nlexps = list_length(exps);
556 : /* also handle right hand of join */
557 38539 : if (is_join(rel->op) && r->op == op_project && r->l && list_empty(rel->attr)) {
558 : /* Here we also check all expressions of r like above
559 : but also we need to check for ambiguous names. */
560 :
561 39348 : for (n = r->exps->h; n; n = n->next) {
562 34930 : sql_exp *e = n->data;
563 :
564 : /* we cannot rewrite projection with atomic values from outer joins */
565 34930 : if (is_column(e->type) && exp_is_atom(e) && !(is_left(rel->op) || is_full(rel->op))) {
566 147 : list_append(exps, e);
567 34783 : } else if (e->type == e_column) {
568 34534 : if (has_label(e))
569 : return rel;
570 34534 : list_append(exps, e);
571 : } else {
572 : return rel;
573 : }
574 : }
575 33872 : } else if (is_join(rel->op) && list_empty(rel->attr)) {
576 18192 : list *r_exps = rel_projections(v->sql, r, NULL, 1, 1);
577 18192 : list_merge(exps, r_exps, (fdup)NULL);
578 : }
579 38290 : if (!list_empty(rel->attr))
580 0 : append(exps, exp_ref(v->sql, rel->attr->h->data));
581 : /* Here we should check for ambiguous names ? */
582 38290 : if (is_join(rel->op) && r && list_empty(rel->attr)) {
583 22610 : t = (l->op == op_project && l->l)?l->l:l;
584 22610 : l_exps = rel_projections(v->sql, t, NULL, 1, 1);
585 : /* conflict with old right expressions */
586 22610 : r_exps = rel_projections(v->sql, r, NULL, 1, 1);
587 22610 : if (rel->attr)
588 0 : append(r_exps, exp_ref(v->sql, rel->attr->h->data));
589 22610 : t = (r->op == op_project && r->l)?r->l:r;
590 22610 : r_exps = rel_projections(v->sql, t, NULL, 1, 1);
591 : /* conflict with new right expressions */
592 607950 : for(n = l_exps->h; n; n = n->next) {
593 585675 : sql_exp *e = n->data;
594 :
595 585675 : if (exp_is_atom(e))
596 0 : continue;
597 585675 : if (e->nid && exps_bind_nid(r_exps, e->nid))
598 : return rel;
599 585340 : if (e->alias.label && exps_bind_nid(r_exps, e->alias.label))
600 : return rel;
601 : }
602 : /* conflict with new left expressions */
603 155106 : for(n = r_exps->h; n; n = n->next) {
604 132831 : sql_exp *e = n->data;
605 :
606 132831 : if (exp_is_atom(e))
607 0 : continue;
608 132831 : if (e->nid && exps_bind_nid(l_exps, e->nid))
609 : return rel;
610 132831 : if (e->alias.label && exps_bind_nid(l_exps, e->alias.label))
611 : return rel;
612 : }
613 : }
614 :
615 : /* rename operator expressions */
616 37955 : if (l->op == op_project) {
617 : /* rewrite rel from rel->l into rel->l->l */
618 34769 : if (rel->exps) {
619 71731 : for (n = rel->exps->h; n; n = n->next) {
620 38026 : sql_exp *e = n->data, *ne;
621 :
622 38026 : ne = exp_rename(v->sql, e, l, l->l);
623 38026 : assert(ne);
624 38026 : if (ne != e && exp_name(e))
625 0 : exp_propagate(v->sql->sa, ne, e);
626 38026 : n->data = ne;
627 : }
628 : }
629 34769 : rel->l = l->l;
630 34769 : l->l = NULL;
631 34769 : rel_destroy(l);
632 : }
633 37955 : if (is_join(rel->op) && r->op == op_project && list_empty(rel->attr)) {
634 : /* rewrite rel from rel->r into rel->r->l */
635 4093 : if (rel->exps) {
636 7464 : for (n = rel->exps->h; n; n = n->next) {
637 4011 : sql_exp *e = n->data, *ne;
638 :
639 4011 : ne = exp_rename(v->sql, e, r, r->l);
640 4011 : assert(ne);
641 4011 : if (ne != e && exp_name(e))
642 0 : exp_propagate(v->sql->sa, ne, e);
643 4011 : n->data = ne;
644 : }
645 : }
646 4093 : rel->r = r->l;
647 4093 : r->l = NULL;
648 4093 : rel_destroy(r);
649 : }
650 : /* Done, ie introduce new project */
651 37955 : exps_fix_card(exps, rel->card);
652 : /* Fix nil flag */
653 37955 : if (!list_empty(exps)) {
654 819299 : for (n = exps->h ; n && i < nlexps ; n = n->next, i++) {
655 781344 : sql_exp *e = n->data;
656 :
657 781344 : if (is_right(rel->op) || is_full(rel->op))
658 448 : set_has_nil(e);
659 781344 : set_not_unique(e);
660 : }
661 164541 : for (; n ; n = n->next) {
662 126586 : sql_exp *e = n->data;
663 :
664 126586 : if (is_left(rel->op) || is_full(rel->op))
665 39442 : set_has_nil(e);
666 126586 : set_not_unique(e);
667 : }
668 : }
669 37955 : v->changes++;
670 37955 : return rel_inplace_project(v->sql->sa, rel, NULL, exps);
671 : }
672 2287474 : if (is_groupby(rel->op) && !rel_is_ref(rel) && rel->exps && list_length(rel->exps) > 1) {
673 39369 : node *n;
674 39369 : int fnd = 0;
675 39369 : list *aexps, *pexps;
676 :
677 : /* check if some are expressions aren't e_aggr */
678 154202 : for (n = rel->exps->h; n && !fnd; n = n->next) {
679 114833 : sql_exp *e = n->data;
680 :
681 114833 : if (e->type != e_aggr && e->type != e_column && e->type != e_atom && e->card > CARD_ATOM) {
682 114833 : fnd = 1;
683 : }
684 : }
685 : /* only aggr, no rewrite needed */
686 39369 : if (!fnd)
687 : return rel;
688 :
689 0 : aexps = sa_list(v->sql->sa);
690 0 : pexps = sa_list(v->sql->sa);
691 0 : for (n = rel->exps->h; n; n = n->next) {
692 0 : sql_exp *e = n->data, *ne = NULL;
693 :
694 0 : switch (e->type) {
695 0 : case e_atom: /* move over to the projection */
696 0 : list_append(pexps, e);
697 0 : break;
698 0 : case e_func:
699 0 : list_append(pexps, e);
700 0 : list_split_aggr_and_project(v->sql, aexps, e->l);
701 0 : break;
702 0 : case e_convert:
703 0 : list_append(pexps, e);
704 0 : e->l = split_aggr_and_project(v->sql, aexps, e->l);
705 0 : break;
706 0 : default: /* simple alias */
707 0 : list_append(aexps, e);
708 0 : ne = exp_column(v->sql->sa, exp_find_rel_name(e), exp_name(e), exp_subtype(e), e->card, has_nil(e), is_unique(e), is_intern(e));
709 0 : assert(0);
710 : list_append(pexps, ne);
711 : break;
712 : }
713 : }
714 0 : v->changes++;
715 0 : rel->exps = aexps;
716 0 : return rel_inplace_project( v->sql->sa, rel, NULL, pexps);
717 : }
718 : return rel;
719 : }
720 :
721 : static sql_rel *
722 170584 : rel_push_project_up(visitor *v, global_props *gp, sql_rel *rel)
723 : {
724 170584 : (void) gp;
725 170584 : return rel_visitor_bottomup(v, rel, &rel_push_project_up_);
726 : }
727 :
728 : run_optimizer
729 611615 : bind_push_project_up(visitor *v, global_props *gp)
730 : {
731 611615 : int flag = v->sql->sql_optimizer;
732 611419 : return gp->opt_level == 1 && (flag & push_project_up) &&
733 1223034 : gp->cnt[op_project] ? rel_push_project_up : NULL;
734 : }
735 :
736 :
737 : static void split_exps(mvc *sql, list *exps, sql_rel *rel);
738 :
739 : static int
740 3080682 : exp_refers_cmp( sql_exp *e1, sql_exp *e2)
741 : {
742 3080682 : if (exp_refers(e1,e2))
743 0 : return 0;
744 : return -1;
745 : }
746 :
747 : sql_exp *
748 130251 : add_exp_too_project(mvc *sql, sql_exp *e, sql_rel *rel)
749 : {
750 130251 : node *n = list_find(rel->exps, e, (fcmp)&exp_match_exp_cmp);
751 :
752 : /* if not matching we may refer to an older expression */
753 130251 : if (!n)
754 82714 : n = list_find(rel->exps, e, (fcmp)&exp_refers_cmp);
755 82714 : if (!n) {
756 82714 : exp_label(sql->sa, e, ++sql->label);
757 82714 : append(rel->exps, e);
758 : } else {
759 47537 : sql_exp *ne = n->data;
760 :
761 47537 : if (rel && rel->l) {
762 47537 : if (ne && ne->alias.label && rel_find_nid(rel->l, ne->alias.label)) {
763 0 : exp_label(sql->sa, e, ++sql->label);
764 0 : append(rel->exps, e);
765 0 : ne = e;
766 : }
767 : }
768 : e = ne;
769 : }
770 130251 : e = exp_ref(sql, e);
771 130251 : return e;
772 : }
773 :
774 : static void
775 150834 : add_exps_too_project(mvc *sql, list *exps, sql_rel *rel)
776 : {
777 150834 : node *n;
778 :
779 150834 : if (!exps)
780 : return;
781 466722 : for(n=exps->h; n; n = n->next) {
782 315888 : sql_exp *e = n->data;
783 :
784 315888 : if (e->type != e_column && !exp_is_atom(e))
785 129971 : n->data = add_exp_too_project(sql, e, rel);
786 : }
787 : }
788 :
789 : static sql_exp *
790 495679 : split_exp(mvc *sql, sql_exp *e, sql_rel *rel)
791 : {
792 495679 : if (exp_is_atom(e))
793 : return e;
794 367818 : switch(e->type) {
795 : case e_column:
796 : return e;
797 16653 : case e_convert:
798 16653 : e->l = split_exp(sql, e->l, rel);
799 16652 : return e;
800 159957 : case e_aggr:
801 : case e_func:
802 159957 : if (!is_analytic(e) && !exp_has_sideeffect(e)) {
803 158097 : sql_subfunc *f = e->f;
804 158097 : if (e->type == e_func && !f->func->s && is_caselike_func(f) /*is_ifthenelse_func(f)*/) {
805 : return e;
806 : } else {
807 150834 : split_exps(sql, e->l, rel);
808 150834 : add_exps_too_project(sql, e->l, rel);
809 : }
810 : }
811 : return e;
812 484 : case e_cmp:
813 484 : if (e->flag == cmp_or || e->flag == cmp_filter) {
814 461 : split_exps(sql, e->l, rel);
815 461 : split_exps(sql, e->r, rel);
816 23 : } else if (e->flag == cmp_in || e->flag == cmp_notin) {
817 0 : e->l = split_exp(sql, e->l, rel);
818 0 : split_exps(sql, e->r, rel);
819 : } else {
820 23 : e->l = split_exp(sql, e->l, rel);
821 23 : e->r = split_exp(sql, e->r, rel);
822 23 : if (e->f)
823 21 : e->f = split_exp(sql, e->f, rel);
824 : }
825 : return e;
826 : case e_atom:
827 : case e_psm:
828 : return e;
829 : }
830 : return e;
831 : }
832 :
833 : static void
834 151763 : split_exps(mvc *sql, list *exps, sql_rel *rel)
835 : {
836 151763 : if (list_empty(exps))
837 : return;
838 468594 : for(node *n=exps->h; n; n = n->next){
839 316831 : sql_exp *e = n->data;
840 :
841 316831 : e = split_exp(sql, e, rel);
842 316831 : n->data = e;
843 : }
844 : }
845 :
846 : sql_rel *
847 585133 : rel_split_project_(visitor *v, sql_rel *rel, int top)
848 : {
849 1170278 : if (mvc_highwater(v->sql))
850 : return rel;
851 :
852 585140 : if (!rel)
853 : return NULL;
854 585140 : if (is_project(rel->op) && list_length(rel->exps) && (is_groupby(rel->op) || rel->l) && !need_distinct(rel) && !is_single(rel)) {
855 164114 : list *exps = rel->exps;
856 164114 : node *n;
857 164114 : int funcs = 0;
858 164114 : sql_rel *nrel;
859 :
860 : /* are there functions */
861 1277021 : for (n=exps->h; n && !funcs; n = n->next) {
862 1112906 : sql_exp *e = n->data;
863 :
864 1112906 : funcs = exp_has_func(e);
865 : }
866 : /* introduce extra project */
867 164115 : if (funcs && rel->op != op_project) {
868 7 : nrel = rel_project(v->sql->sa, rel->l,
869 7 : rel_projections(v->sql, rel->l, NULL, 1, 1));
870 7 : rel->l = nrel;
871 : /* recursively split all functions and add those to the projection list */
872 7 : split_exps(v->sql, rel->exps, nrel);
873 14 : if (nrel->l && !(nrel->l = rel_split_project_(v, nrel->l, (is_topn(rel->op)||is_sample(rel->op))?top:0)))
874 : return NULL;
875 7 : return rel;
876 164108 : } else if (funcs && !top && list_empty(rel->r)) {
877 : /* projects can have columns point back into the expression list, ie
878 : * create a new list including the split expressions */
879 16564 : node *n;
880 16564 : list *exps = rel->exps;
881 :
882 16564 : rel->exps = sa_list(v->sql->sa);
883 178693 : for (n=exps->h; n; n = n->next)
884 162129 : append(rel->exps, split_exp(v->sql, n->data, rel));
885 147544 : } else if (funcs && top && rel_is_ref(rel) && list_empty(rel->r)) {
886 : /* inplace */
887 9 : list *exps = rel_projections(v->sql, rel, NULL, 1, 1);
888 9 : sql_rel *l = rel_project(v->sql->sa, rel->l, NULL);
889 9 : rel->l = l;
890 9 : l->exps = rel->exps;
891 9 : set_processed(l);
892 9 : rel->exps = exps;
893 : }
894 : }
895 585138 : if (is_mset(rel->op) || is_set(rel->op) || is_basetable(rel->op))
896 : return rel;
897 381984 : if (rel->l && (rel->op != op_table || rel->flag != TRIGGER_WRAPPER)) {
898 402311 : rel->l = rel_split_project_(v, rel->l, (is_topn(rel->op)||is_sample(rel->op)||is_ddl(rel->op)||is_modify(rel->op))?top:0);
899 372610 : if (!rel->l)
900 : return NULL;
901 : }
902 381990 : if ((is_join(rel->op) || is_semi(rel->op)) && rel->r) {
903 111279 : rel->r = rel_split_project_(v, rel->r, (is_topn(rel->op)||is_sample(rel->op)||is_ddl(rel->op)||is_modify(rel->op))?top:0);
904 111279 : if (!rel->r)
905 : return NULL;
906 : }
907 : return rel;
908 : }
909 :
910 : static sql_rel *
911 101246 : rel_split_project(visitor *v, global_props *gp, sql_rel *rel)
912 : {
913 101246 : (void) gp;
914 101246 : return rel_split_project_(v, rel, 1);
915 : }
916 :
917 : run_optimizer
918 611541 : bind_split_project(visitor *v, global_props *gp)
919 : {
920 611541 : int flag = v->sql->sql_optimizer;
921 541277 : return gp->opt_cycle == 0 && gp->opt_level == 1 && (flag & split_project) &&
922 1152810 : (gp->cnt[op_project] || gp->cnt[op_groupby]) ? rel_split_project : NULL;
923 : }
924 :
925 :
926 : static sql_rel *
927 170782 : rel_project_reduce_casts(visitor *v, global_props *gp, sql_rel *rel)
928 : {
929 170782 : if (!rel)
930 : return NULL;
931 170782 : (void) gp;
932 170782 : if (gp->opt_level == 1 && v->value_based_opt && is_simple_project(rel->op) && list_length(rel->exps)) {
933 86417 : list *exps = rel->exps;
934 86417 : node *n;
935 :
936 415046 : for (n=exps->h; n; n = n->next) {
937 328629 : sql_exp *e = n->data;
938 :
939 328629 : if (e && e->type == e_func) {
940 30546 : sql_subfunc *f = e->f;
941 30546 : sql_subtype *res = f->res->h->data;
942 :
943 30546 : if (!f->func->s && !strcmp(f->func->base.name, "sql_mul") && res->scale > 0) {
944 6 : list *args = e->l;
945 6 : sql_exp *h = args->h->data;
946 6 : sql_exp *t = args->t->data;
947 6 : atom *ha = exp_value(v->sql, h), *ta = exp_value(v->sql, t);
948 :
949 6 : if (ha || ta) {
950 5 : atom *a = ha ? ha : ta;
951 5 : atom *na = reduce_scale(v->sql, a);
952 :
953 5 : if (na && na != a) {
954 2 : int rs = a->tpe.scale - na->tpe.scale;
955 2 : res->scale -= rs;
956 2 : if (ha) {
957 0 : h->r = NULL;
958 0 : h->l = na;
959 : } else {
960 2 : t->r = NULL;
961 2 : t->l = na;
962 : }
963 2 : v->changes++;
964 : }
965 : }
966 : }
967 : }
968 : }
969 : }
970 : return rel;
971 : }
972 :
973 : run_optimizer
974 611736 : bind_project_reduce_casts(visitor *v, global_props *gp)
975 : {
976 611736 : int flag = v->sql->sql_optimizer;
977 611736 : return gp->cnt[op_project] && (flag & project_reduce_casts) ? rel_project_reduce_casts : NULL;
978 : }
979 :
980 : static sql_column *
981 94323 : exp_find_column_( sql_rel *rel, sql_exp *exp, int pnr, sql_rel **bt )
982 : {
983 94323 : if (exp->type == e_column)
984 94297 : return name_find_column(rel, exp->l, exp->r, pnr, bt);
985 : return NULL;
986 : }
987 :
988 : static int
989 0 : rel_part_nr( sql_rel *rel, sql_exp *e )
990 : {
991 0 : sql_column *c = NULL;
992 0 : sql_rel *bt = NULL;
993 0 : assert(e->type == e_cmp);
994 :
995 0 : c = exp_find_column_(rel, e->l, -1, &bt);
996 0 : if (!c)
997 0 : c = exp_find_column_(rel, e->r, -1, &bt);
998 0 : if (!c && e->f)
999 0 : c = exp_find_column_(rel, e->f, -1, &bt);
1000 0 : if (!c || !bt || !rel_base_get_mergetable(bt))
1001 0 : return -1;
1002 0 : sql_table *pp = c->t;
1003 0 : sql_table *mt = rel_base_get_mergetable(bt);
1004 0 : return find_member_pos(mt->members, pp);
1005 : }
1006 :
1007 : static int
1008 0 : rel_uses_part_nr( sql_rel *rel, sql_exp *e, int pnr )
1009 : {
1010 0 : sql_column *c = NULL;
1011 0 : sql_rel *bt = NULL;
1012 0 : assert(e->type == e_cmp);
1013 :
1014 : /*
1015 : * following case fails.
1016 : *
1017 : * semijoin( A1, union [A1, A2] )
1018 : * The union will never return proper column (from A2).
1019 : * ie need different solution (probably pass pnr).
1020 : */
1021 0 : c = exp_find_column_(rel, e->l, pnr, &bt);
1022 0 : if (!c)
1023 0 : c = exp_find_column_(rel, e->r, pnr, &bt);
1024 0 : if (c && bt && rel_base_get_mergetable(bt)) {
1025 0 : sql_table *pp = c->t;
1026 0 : sql_table *mt = rel_base_get_mergetable(bt);
1027 0 : if (find_member_pos(mt->members, pp) == pnr)
1028 : return 1;
1029 : }
1030 : /* for projects we may need to do a rename! */
1031 0 : if (is_project(rel->op) || is_topn(rel->op) || is_sample(rel->op))
1032 0 : return rel_uses_part_nr( rel->l, e, pnr);
1033 :
1034 0 : if (is_munion(rel->op)) {
1035 : list *l = rel->l;
1036 : if (rel_uses_part_nr( l->h->data, e, pnr))
1037 : return 1;
1038 0 : } else if (is_union(rel->op) || is_join(rel->op) || is_semi(rel->op)) {
1039 0 : if (rel_uses_part_nr( rel->l, e, pnr))
1040 : return 1;
1041 0 : if (!is_semi(rel->op) && rel_uses_part_nr( rel->r, e, pnr))
1042 : return 1;
1043 : }
1044 : return 0;
1045 : }
1046 :
1047 : static sql_column *
1048 2733 : exp_is_pkey(sql_rel *rel, sql_exp *e)
1049 : {
1050 2733 : if (find_prop(e->p, PROP_HASHCOL)) { /* aligned PKEY JOIN */
1051 238 : fcmp cmp = (fcmp)&kc_column_cmp;
1052 238 : sql_column *c = exp_find_column(rel, e, -2);
1053 :
1054 238 : if (c && c->t->pkey && list_find(c->t->pkey->k.columns, c, cmp) != NULL)
1055 : return c;
1056 : }
1057 : return NULL;
1058 : }
1059 :
1060 : static sql_exp *
1061 3464 : rel_is_join_on_pkey(sql_rel *rel, bool pk_fk) /* pk_fk is used to verify is a join on pk-fk */
1062 : {
1063 3464 : if (!rel || !rel->exps)
1064 : return NULL;
1065 4751 : for (node *n = rel->exps->h; n; n = n->next) {
1066 2519 : sql_exp *je = n->data;
1067 :
1068 3778 : if (je->type == e_cmp && je->flag == cmp_equal &&
1069 2518 : (exp_is_pkey(rel, je->l) || exp_is_pkey(rel, je->r)) &&
1070 237 : (!pk_fk || find_prop(je->p, PROP_JOINIDX)))
1071 0 : return je;
1072 : }
1073 : return NULL;
1074 : }
1075 :
1076 : static int
1077 0 : exps_has_predicate( list *l )
1078 : {
1079 0 : node *n;
1080 :
1081 0 : for( n = l->h; n; n = n->next){
1082 0 : sql_exp *e = n->data;
1083 :
1084 0 : if (e->card <= CARD_ATOM)
1085 : return 1;
1086 : }
1087 : return 0;
1088 : }
1089 :
1090 : static sql_rel *
1091 656 : rel_find_select( sql_rel *r)
1092 : {
1093 2248 : while (!is_select(r->op) && r->l && is_project(r->op))
1094 : r = r->l;
1095 656 : if (is_select(r->op))
1096 15 : return r;
1097 : return NULL;
1098 : }
1099 :
1100 : static bool
1101 134843 : rels_share_rel(list *l)
1102 : {
1103 134843 : sql_rel *ref = NULL;
1104 207655 : for (node *n = l->h; n; n = n->next) {
1105 206999 : sql_rel *r = n->data;
1106 206999 : if(!ref) {
1107 134843 : if ((ref = rel_find_ref(r)) == NULL)
1108 : return false;
1109 72156 : } else if (ref != rel_find_ref(r)) {
1110 : return false;
1111 : }
1112 : }
1113 : return true;
1114 : }
1115 :
1116 : static inline sql_rel *
1117 2659227 : rel_merge_munion(visitor *v, sql_rel *rel)
1118 : {
1119 2659227 : if (is_munion(rel->op) && rels_share_rel(rel->l)) {
1120 656 : list *rels = rel->l, *nrels = NULL;
1121 656 : sql_rel *cur = NULL, *curs = NULL;
1122 :
1123 : /* Find selects and try to merge */
1124 656 : for(node *n = rels->h; n; n = n->next) {
1125 656 : sql_rel *r = n->data;
1126 656 : sql_rel *s = rel_find_select(r);
1127 :
1128 656 : if (!s)
1129 : return rel;
1130 15 : if (cur) {
1131 0 : if (s != r->l || curs->l != s->l || !rel_is_ref(s->l) ||
1132 : /* for now only union(project*(select(R),project*(select(R))) */
1133 0 : !s->exps || !curs->exps || exps_has_predicate(s->exps) || exps_has_predicate(curs->exps)) {
1134 0 : if (nrels)
1135 0 : append(nrels, r);
1136 : else
1137 : return rel;
1138 : }
1139 :
1140 : /* merge, ie. add 'or exp' */
1141 0 : curs->exps = append(new_exp_list(v->sql->sa), exp_or(v->sql->sa, cur->exps, s->exps, 0));
1142 0 : if (!nrels) {
1143 0 : nrels = sa_list(v->sql->sa);
1144 0 : append(nrels, cur);
1145 : }
1146 : } else {
1147 15 : if (s != r->l || !rel_is_ref(s->l)) {
1148 15 : if (nrels) {
1149 0 : append(nrels, r);
1150 : } else {
1151 : return rel;
1152 : }
1153 : }
1154 : cur = r;
1155 : curs = s;
1156 : }
1157 : }
1158 0 : if (nrels) {
1159 0 : v->changes++;
1160 0 : rel->l = nrels;
1161 : }
1162 0 : return rel;
1163 : }
1164 : return rel;
1165 : }
1166 :
1167 : static sql_rel *
1168 2659227 : rel_optimize_munions_bottomup_(visitor *v, sql_rel *rel)
1169 : {
1170 2659227 : rel = rel_merge_munion(v, rel);
1171 2659227 : return rel;
1172 : }
1173 :
1174 : static sql_rel *
1175 25763 : rel_optimize_munions_bottomup(visitor *v, global_props *gp, sql_rel *rel)
1176 : {
1177 25763 : (void) gp;
1178 25763 : return rel_visitor_bottomup(v, rel, &rel_optimize_munions_bottomup_);
1179 : }
1180 :
1181 : run_optimizer
1182 611627 : bind_optimize_unions_bottomup(visitor *v, global_props *gp)
1183 : {
1184 611627 : int flag = v->sql->sql_optimizer;
1185 611431 : return gp->opt_level == 1 && gp->cnt[op_munion] && (flag & optimize_unions_bottomup)
1186 611627 : ? rel_optimize_munions_bottomup : NULL;
1187 : }
1188 :
1189 :
1190 : static inline sql_rel *
1191 3638715 : rel_project_cse(visitor *v, sql_rel *rel)
1192 : {
1193 3638715 : if (is_project(rel->op) && rel->exps) {
1194 1467565 : node *n, *m;
1195 1467565 : list *nexps;
1196 1467565 : int needed = 0;
1197 :
1198 8518307 : for (n=rel->exps->h; n && !needed; n = n->next) {
1199 7050750 : sql_exp *e1 = n->data;
1200 :
1201 7050750 : if (e1->type != e_column && !exp_is_atom(e1) && exp_name(e1)) {
1202 16044464 : for (m=n->next; m && !needed; m = m->next){
1203 15320008 : sql_exp *e2 = m->data;
1204 :
1205 15320008 : if (exp_name(e2) && exp_match_exp(e1, e2))
1206 15320008 : needed = 1;
1207 : }
1208 : }
1209 : }
1210 :
1211 1467557 : if (!needed)
1212 : return rel;
1213 :
1214 759 : nexps = new_exp_list(v->sql->sa);
1215 8737 : for (n=rel->exps->h; n; n = n->next) {
1216 7978 : sql_exp *e1 = n->data;
1217 :
1218 7978 : if (e1->type != e_column && !exp_is_atom(e1) && exp_name(e1)) {
1219 34095 : for (m=nexps->h; m; m = m->next){
1220 29489 : sql_exp *e2 = m->data;
1221 :
1222 29489 : if (exp_name(e2) && exp_match_exp(e1, e2)) {
1223 799 : assert(e2->alias.label);
1224 799 : sql_exp *ne = exp_ref(v->sql, e2);
1225 :
1226 799 : ne = exp_propagate(v->sql->sa, ne, e1);
1227 799 : assert(!e1->ref);
1228 799 : exp_setalias(ne, e1->alias.label, exp_relname(e1), exp_name(e1));
1229 799 : set_selfref(ne);
1230 799 : assert(ne->nid);
1231 : e1 = ne;
1232 : break;
1233 : }
1234 : }
1235 : }
1236 7978 : append(nexps, e1);
1237 : }
1238 759 : rel->exps = nexps;
1239 : }
1240 : return rel;
1241 : }
1242 :
1243 : static int exp_is_const_op(sql_exp *exp, sql_exp *tope, sql_rel *expr);
1244 :
1245 : static int
1246 76 : exps_are_const_op(list *exps, sql_exp *tope, sql_rel *expr)
1247 : {
1248 76 : int ok = 1;
1249 :
1250 76 : if (list_empty(exps))
1251 : return 1;
1252 152 : for (node *n = exps->h; n && ok; n = n->next)
1253 76 : ok &= exp_is_const_op(n->data, tope, expr);
1254 : return ok;
1255 : }
1256 :
1257 : static int
1258 522 : exp_is_const_op(sql_exp *exp, sql_exp *tope, sql_rel *expr)
1259 : {
1260 661 : switch (exp->type) {
1261 28 : case e_atom:
1262 28 : return exp->f ? 0 : 1;
1263 2 : case e_convert:
1264 2 : return exp_is_const_op(exp->l, tope, expr);
1265 76 : case e_func:
1266 : case e_aggr: {
1267 76 : sql_subfunc *f = exp->f;
1268 76 : if (f->func->side_effect || IS_ANALYTIC(f->func))
1269 : return 0;
1270 76 : return exps_are_const_op(exp->l, tope, expr);
1271 : }
1272 0 : case e_cmp:
1273 0 : if (exp->flag == cmp_or || exp->flag == cmp_filter)
1274 0 : return exps_are_const_op(exp->l, tope, expr) && exps_are_const_op(exp->r, tope, expr);
1275 0 : if (exp->flag == cmp_in || exp->flag == cmp_notin)
1276 0 : return exp_is_const_op(exp->l, tope, expr) && exps_are_const_op(exp->r, tope, expr);
1277 0 : return exp_is_const_op(exp->l, tope, expr) && exp_is_const_op(exp->r, tope, expr) && (!exp->f || exp_is_const_op(exp->f, tope, expr));
1278 555 : case e_column: {
1279 555 : if (is_simple_project(expr->op) || is_groupby(expr->op)) {
1280 : /* in a simple projection, self-references may occur */
1281 555 : sql_exp *nexp = exps_bind_nid(expr->exps, exp->nid);
1282 555 : if (nexp && list_position(expr->exps, nexp) < list_position(expr->exps, tope))
1283 : return exp_is_const_op(nexp, exp, expr);
1284 : }
1285 : return 0;
1286 : }
1287 : default:
1288 : return 0;
1289 : }
1290 : }
1291 :
1292 : static sql_exp *
1293 23 : rel_groupby_add_count_star(mvc *sql, sql_rel *rel, sql_exp *count_star_exp, bool *count_added)
1294 : {
1295 23 : if (count_star_exp)
1296 : return count_star_exp;
1297 16 : if (!list_empty(rel->exps)) {
1298 44 : for (node *n=rel->exps->h; n ; n = n->next) {
1299 32 : sql_exp *e = n->data;
1300 :
1301 32 : if (exp_aggr_is_count(e) && !need_distinct(e) && list_length(e->l) == 0)
1302 4 : return e;
1303 : }
1304 : }
1305 12 : sql_subfunc *cf = sql_bind_func(sql, "sys", "count", sql_bind_localtype("void"), NULL, F_AGGR, true, true);
1306 12 : *count_added = true;
1307 12 : return rel_groupby_add_aggr(sql, rel, exp_aggr(sql->sa, NULL, cf, 0, 0, rel->card, 0));
1308 : }
1309 :
1310 : /* optimize sum(x + 12) into sum(x) + 12*count(*) */
1311 : static inline sql_rel *
1312 40482 : rel_simplify_sum(visitor *v, sql_rel *rel)
1313 : {
1314 40482 : if (is_groupby(rel->op) && !list_empty(rel->exps)) {
1315 40427 : sql_rel *upper = NULL, *groupby = rel, *l = groupby->l;
1316 40427 : sql_exp *count_star_exp = NULL;
1317 40427 : bool count_added = false;
1318 :
1319 125684 : for (node *n=groupby->exps->h; n ; n = n->next) {
1320 85257 : sql_exp *e = n->data;
1321 85257 : list *el = e->l;
1322 85257 : sql_subfunc *sf = e->f;
1323 :
1324 85257 : if (e->type == e_aggr && !need_distinct(e) && sf->func->type == F_AGGR && !sf->func->s && !strcmp(sf->func->base.name, "sum")) {
1325 7924 : sql_rel *expr = groupby;
1326 7924 : sql_exp *exp = (sql_exp*) el->h->data, *oexp = exp;
1327 :
1328 7924 : while (is_numeric_upcast(exp))
1329 0 : exp = exp->l;
1330 : /* we want to find a +/- call, so expect them to appear only on simple projections */
1331 16875 : while (exp && exp->type == e_column && (is_simple_project(expr->op) || is_groupby(expr->op)) && expr->l) {
1332 10531 : sql_rel *nexpr = NULL;
1333 10531 : sql_exp *nexp = rel_find_exp_and_corresponding_rel(l, exp, false, &nexpr, NULL);
1334 :
1335 : /* break when it loops on the same relation */
1336 10531 : if (nexpr == expr && list_position(expr->exps, nexp) >= list_position(expr->exps, exp))
1337 : break;
1338 8951 : expr = nexpr;
1339 8951 : exp = oexp = nexp;
1340 8953 : while (exp && is_numeric_upcast(exp))
1341 2 : exp = exp->l;
1342 : }
1343 :
1344 7924 : list *expl = exp ? exp->l : NULL;
1345 85257 : sql_subfunc *expf = exp ? exp->f : NULL;
1346 : /* found a candidate function */
1347 6820 : if (exp && exp->type == e_func && expf->func->type == F_FUNC && !expf->func->s &&
1348 1141 : (!strcmp(expf->func->base.name, "sql_sub") || !strcmp(expf->func->base.name, "sql_add"))) {
1349 223 : sql_exp *e1 = (sql_exp*) expl->h->data, *e2 = (sql_exp*) expl->h->next->data;
1350 223 : int e1ok = exp_is_const_op(e1, oexp, expr), e2ok = exp_is_const_op(e2, oexp, expr);
1351 :
1352 223 : if ((!e1ok && e2ok) || (e1ok && !e2ok)) {
1353 24 : sql_exp *ocol = e1ok ? e2 : e1, *constant = e1ok ? e1 : e2, *mul, *colref, *naggr, *newop, *col = ocol, *match;
1354 24 : bool add_col = true, prepend = false;
1355 :
1356 : /* if 'col' is a projection from the under relation, then use it */
1357 24 : while (is_numeric_upcast(col))
1358 0 : col = col->l;
1359 24 : if (col->type == e_column) {
1360 22 : sql_exp *colf = exps_find_exp(l->exps, col);
1361 :
1362 : /* col is already found in the inner relation. Also look for a new reference for col, eg sql_add(col, 1), 1 as col */
1363 22 : if (colf && list_position(l->exps, colf) < list_position(l->exps, oexp)) {
1364 : add_col = false;
1365 10 : } else if (!colf && is_simple_project(l->op) && list_empty(l->r) && !rel_is_ref(l) && !need_distinct(l)) {
1366 : prepend = true;
1367 : add_col = false;
1368 1 : } else if (!colf && (is_simple_project(l->op) || is_groupby(l->op))) {
1369 : /* on these scenarios the new column expression will be ordered/(grouped for distinct) or create potential ambiguity (multiple ref), so skip */
1370 1 : continue;
1371 : }
1372 2 : } else if ((is_simple_project(l->op) && (!list_empty(l->r) || rel_is_ref(l) || need_distinct(l))) || is_groupby(l->op)) {
1373 0 : continue;
1374 : }
1375 :
1376 : /* add count star */
1377 23 : count_star_exp = rel_groupby_add_count_star(v->sql, groupby, count_star_exp, &count_added);
1378 : /* multiply constant by count star */
1379 23 : if (!(mul = rel_binop_(v->sql, NULL, constant, exp_ref(v->sql, count_star_exp), "sys", "sql_mul", card_value, true))) {
1380 0 : v->sql->session->status = 0;
1381 0 : v->sql->errstr[0] = '\0';
1382 0 : continue;
1383 : }
1384 23 : if (!has_label(mul))
1385 23 : exp_label(v->sql->sa, mul, ++v->sql->label);
1386 :
1387 23 : colref = exp_ref(v->sql, ocol);
1388 23 : if (add_col) /* if 'col' will be added, then make sure it has an unique label */
1389 3 : exp_label(v->sql->sa, colref, ++v->sql->label);
1390 :
1391 : /* 'oexp' contains the type for the input for the 'sum' aggregate */
1392 23 : if (!(colref = exp_check_type(v->sql, exp_subtype(oexp), groupby, colref, type_equal))) {
1393 0 : v->sql->session->status = 0;
1394 0 : v->sql->errstr[0] = '\0';
1395 0 : continue;
1396 : }
1397 : /* update sum to use the column side only */
1398 23 : sql_subfunc *a = sql_bind_func(v->sql, "sys", "sum", exp_subtype(colref), NULL, F_AGGR, true, true);
1399 23 : if (!a)
1400 0 : continue;
1401 23 : naggr = exp_aggr(v->sql->sa, list_append(sa_list(v->sql->sa), colref), a, need_distinct(e), need_no_nil(e), groupby->card, has_nil(e));
1402 23 : if ((match = exps_any_match(groupby->exps, naggr)) && list_position(groupby->exps, match) < list_position(groupby->exps, e)) { /* found a matching aggregate, use it */
1403 9 : naggr = exp_ref(v->sql, match);
1404 9 : exp_label(v->sql->sa, naggr, ++v->sql->label);
1405 14 : } else if (!has_label(naggr)) { /* otherwise use the new one */
1406 14 : exp_label(v->sql->sa, naggr, ++v->sql->label);
1407 : }
1408 :
1409 : /* generate addition/subtraction. subtraction is not commutative, so keep original order! */
1410 23 : if (!(newop = rel_binop_(v->sql, NULL, e1 == constant ? mul : exp_ref(v->sql, naggr), e1 == constant ? exp_ref(v->sql, naggr) : mul, "sys", expf->func->base.name, card_value, true))) {
1411 0 : v->sql->session->status = 0;
1412 0 : v->sql->errstr[0] = '\0';
1413 0 : continue;
1414 : }
1415 23 : if (!(newop = exp_check_type(v->sql, exp_subtype(e), groupby, newop, type_equal))) {
1416 0 : v->sql->session->status = 0;
1417 0 : v->sql->errstr[0] = '\0';
1418 0 : continue;
1419 : }
1420 :
1421 : /* a column reference can be prepended to the inner relation, add it after all the check type calls succeed */
1422 23 : if (prepend)
1423 9 : list_prepend(l->exps, col);
1424 :
1425 : /* the new generate function calls are valid, update relations */
1426 : /* we need a new relation for the multiplication and addition/subtraction */
1427 23 : if (!upper) {
1428 : /* be careful with relations with more than 1 reference, so do in-place replacement */
1429 16 : list *projs = rel_projections(v->sql, rel, NULL, 1, 1);
1430 16 : sql_rel *nrel = rel_groupby(v->sql, rel->l, NULL);
1431 16 : nrel->exps = rel->exps;
1432 16 : nrel->r = rel->r;
1433 16 : nrel->card = rel->card;
1434 16 : nrel->nrcols = list_length(rel->exps);
1435 16 : set_processed(nrel);
1436 16 : rel_dup(rel->l);
1437 16 : upper = rel = rel_inplace_project(v->sql->sa, rel, nrel, projs);
1438 16 : rel->card = exps_card(projs);
1439 16 : groupby = nrel; /* update pointers :) */
1440 16 : l = groupby->l;
1441 : }
1442 91 : for (node *n = upper->exps->h ; n ; ) {
1443 68 : node *next = n->next;
1444 68 : sql_exp *re = n->data;
1445 :
1446 : /* remove the old reference to the aggregate because we will use the addition/subtraction instead,
1447 : as well as the count star if count_added */
1448 68 : if (exp_refers(e, re) || (count_added && exp_refers(count_star_exp, re)))
1449 35 : list_remove_node(upper->exps, NULL, n);
1450 : n = next;
1451 : }
1452 :
1453 : /* update sum aggregate with new aggregate or reference to an existing one */
1454 23 : n->data = naggr;
1455 23 : list_hash_clear(groupby->exps);
1456 :
1457 : /* add column reference with new label, if 'col' was not found */
1458 23 : if (add_col) {
1459 3 : if (!is_simple_project(l->op) || !list_empty(l->r) || rel_is_ref(l) || need_distinct(l))
1460 0 : groupby->l = l = rel_project(v->sql->sa, l, rel_projections(v->sql, l, NULL, 1, 1));
1461 3 : list_append(l->exps, ocol);
1462 : }
1463 :
1464 : /* propagate alias and add new expression */
1465 23 : exp_prop_alias(v->sql->sa, newop, e);
1466 23 : list_append(upper->exps, newop);
1467 23 : v->changes++;
1468 : }
1469 : }
1470 : }
1471 : }
1472 : }
1473 40482 : return rel;
1474 : }
1475 :
1476 : /* optimize group by x+1,(y-2)*3,2-z into group by x,y,z */
1477 : static inline sql_rel *
1478 40482 : rel_simplify_groupby_columns(visitor *v, sql_rel *rel)
1479 : {
1480 40482 : if (is_groupby(rel->op) && !list_empty(rel->r)) {
1481 29929 : sql_rel *l = rel->l;
1482 :
1483 77667 : for (node *n=((list*)rel->r)->h; n ; n = n->next) {
1484 47738 : sql_exp *e = n->data;
1485 47738 : e->used = 0; /* we need to use this flag, clean it first */
1486 : }
1487 77667 : for (node *n=((list*)rel->r)->h; n ; n = n->next) {
1488 47738 : sql_exp *e = n->data;
1489 :
1490 47738 : if (e->type == e_column) {
1491 47701 : bool searching = true;
1492 47701 : sql_rel *efrel = NULL;
1493 47701 : sql_exp *exp = rel_find_exp_and_corresponding_rel(l, e, false, &efrel, NULL), *col = NULL, *tope = exp;
1494 :
1495 95421 : while (searching && !col) {
1496 47720 : sql_exp *exp_col = exp;
1497 :
1498 47720 : if (exp && is_numeric_upcast(exp))
1499 54 : exp = exp->l;
1500 47720 : if (exp && exp->type == e_func) {
1501 658 : list *el = exp->l;
1502 658 : sql_subfunc *sf = exp->f;
1503 : /* At the moment look only at injective math functions */
1504 658 : if (sf->func->type == F_FUNC && !sf->func->s &&
1505 645 : (!strcmp(sf->func->base.name, "sql_sub") || !strcmp(sf->func->base.name, "sql_add") || !strcmp(sf->func->base.name, "sql_mul"))) {
1506 111 : sql_exp *e1 = (sql_exp*) el->h->data, *e2 = (sql_exp*) el->h->next->data;
1507 : /* the optimization cannot be done if side-effect calls (e.g. rand()) are present */
1508 215 : int e1ok = exp_is_atom(e1) && !exp_unsafe(e1, true, false) && !exp_has_sideeffect(e1), e2ok = exp_is_atom(e2) && !exp_unsafe(e2, true, false) && !exp_has_sideeffect(e2);
1509 :
1510 111 : if ((!e1ok && e2ok) || (e1ok && !e2ok)) {
1511 57 : sql_exp *c = e1ok ? e2 : e1;
1512 57 : bool done = false;
1513 57 : exp_col = exps_find_exp(efrel->exps, c);
1514 57 : if (exp_col)
1515 28 : c = exp_col;
1516 :
1517 116 : while (!done) {
1518 59 : if (is_numeric_upcast(c))
1519 0 : c = c->l;
1520 59 : if (c->type == e_column) {
1521 40 : if (is_simple_project(efrel->op) || is_groupby(efrel->op)) {
1522 : /* in a simple projection, self-references may occur */
1523 40 : sql_exp *nc = exps_find_exp(efrel->exps, c);
1524 40 : if (nc && list_position(efrel->exps, nc) < list_position(efrel->exps, exp_col)) {
1525 2 : exp_col = c;
1526 2 : c = nc;
1527 2 : continue;
1528 : }
1529 : }
1530 : col = c; /* 'c' is a column reference from the left relation */
1531 : done = true;
1532 : } else {
1533 : exp = c; /* maybe a nested function call, let's continue searching */
1534 : done = true;
1535 : }
1536 : }
1537 : } else {
1538 : searching = false;
1539 : }
1540 : } else {
1541 : searching = false;
1542 : }
1543 : } else {
1544 : searching = false;
1545 : }
1546 : }
1547 47701 : if (col) { /* a column reference was found */
1548 38 : const char *rname = exp_relname(e), *name = exp_name(e);
1549 :
1550 : /* the grouping column has an alias, we have to keep it */
1551 38 : if ((rname && name && (strcmp(rname, e->l) != 0 || strcmp(name, e->r) != 0)) || (!rname && name && strcmp(name, e->r) != 0)) {
1552 2 : if (!has_label(e)) /* dangerous to merge, skip it */
1553 0 : continue;
1554 2 : if (!is_simple_project(l->op) || !list_empty(l->r) || rel_is_ref(l) || need_distinct(l))
1555 0 : rel->l = l = rel_project(v->sql->sa, l, rel_projections(v->sql, l, NULL, 1, 1));
1556 2 : list_append(l->exps, e);
1557 2 : n->data = e = exp_ref(v->sql, e);
1558 2 : list_hash_clear(rel->r);
1559 : }
1560 :
1561 38 : sql_exp *f = exps_find_exp(rel->r, col);
1562 :
1563 38 : if (f && list_position(rel->r, f) < list_position(rel->r, e)) { /* if already present, remove it */
1564 10 : e->used = 1;
1565 : } else {
1566 : /* Use an unique reference to the column found. If there's another grouping column label pointing into it,
1567 : rel_groupby_cse will hopefully remove it */
1568 28 : sql_exp *colf = exps_find_exp(l->exps, col);
1569 :
1570 : /* col is already found in the inner relation. Also look for a new reference for col, eg sql_add(col, 1), 1 as col */
1571 28 : if (colf && list_position(l->exps, colf) < list_position(l->exps, tope)) {
1572 2 : n->data = exp_ref(v->sql, col);
1573 23 : } else if (!colf && is_simple_project(l->op) && list_empty(l->r) && !rel_is_ref(l) && !need_distinct(l)) { /* trivial case, it can be added */
1574 23 : sql_exp *ne = col;
1575 23 : list_prepend(l->exps, ne);
1576 23 : n->data = exp_ref(v->sql, ne);
1577 0 : } else if (!colf && (is_simple_project(l->op) || is_groupby(l->op))) {
1578 : /* on these scenarios the new column expression will be ordered/(grouped for distinct) or create potential ambiguity (multiple ref), so skip */
1579 0 : continue;
1580 : } else {
1581 3 : sql_exp *ne = exp_ref(v->sql, col);
1582 :
1583 3 : if (colf) /* a col reference is already there, add a new label */
1584 3 : exp_label(v->sql->sa, ne, ++v->sql->label);
1585 3 : if (!is_simple_project(l->op) || !list_empty(l->r) || rel_is_ref(l) || need_distinct(l))
1586 0 : rel->l = l = rel_project(v->sql->sa, l, rel_projections(v->sql, l, NULL, 1, 1));
1587 3 : list_append(l->exps, ne);
1588 3 : n->data = exp_ref(v->sql, ne);
1589 : }
1590 28 : list_hash_clear(rel->r);
1591 : }
1592 38 : v->changes++;
1593 : }
1594 : }
1595 : }
1596 77667 : for (node *n=((list*)rel->r)->h; n ; ) {
1597 47738 : node *next = n->next;
1598 47738 : sql_exp *e = n->data;
1599 :
1600 47738 : if (e->used) /* remove unnecessary grouping columns */
1601 10 : list_remove_node(rel->r, NULL, n);
1602 : n = next;
1603 : }
1604 : }
1605 40482 : return rel;
1606 : }
1607 :
1608 : /* remove identical grouping columns */
1609 : static inline sql_rel *
1610 93716 : rel_groupby_cse(visitor *v, sql_rel *rel)
1611 : {
1612 93716 : if (is_groupby(rel->op) && !list_empty(rel->r)) {
1613 69276 : sql_rel *l = rel->l;
1614 :
1615 : /* for every group expression e1 */
1616 193179 : for (node *n=((list*)rel->r)->h; n ; n = n->next) {
1617 123903 : sql_exp *e1 = n->data;
1618 : /* it's good to examine the same expression in the subrelation e.g. in case it's an alias */
1619 : /* TODO maybe cover more cases? Here I only look at the left relation */
1620 247806 : sql_exp *e1_sub = e1->type == e_column ?
1621 123903 : e1->nid ? exps_bind_nid(l->exps, e1->nid) : exps_find_exp(l->exps, e1) : NULL;
1622 :
1623 : /* for every other group expression */
1624 213017 : for (node *m=n->next; m; m = m->next) {
1625 89114 : sql_exp *e2 = m->data;
1626 178228 : sql_exp *e2_sub = e2->type == e_column ?
1627 89114 : e2->nid ? exps_bind_nid(l->exps, e2->nid) : exps_find_exp(l->exps, e2) : NULL;
1628 :
1629 : /* check if the expression are the same */
1630 89114 : if (exp_match_exp(e1, e2) || exp_refers(e1, e2) || (e1_sub && e2_sub && (exp_match_exp(e1_sub, e2_sub) || exp_refers(e1_sub, e2_sub)))) {
1631 :
1632 : /* use e2 from rel->exps instead of e2 from the rel->r as it can have an alias from the higher rel */
1633 41 : sql_exp *e2_in_exps = exps_uses_nid(rel->exps, e2->alias.label);
1634 41 : assert(e2_in_exps);
1635 :
1636 : /* same as e2 */
1637 41 : sql_exp *e1_in_exps = exps_uses_nid(rel->exps, e1->alias.label);
1638 41 : if (!e1_in_exps)
1639 6 : continue;
1640 :
1641 : /* write e2 as an e1 alias since the expressions are the same */
1642 35 : sql_exp* e2_as_e1_alias = exp_copy(v->sql, e1_in_exps);
1643 : /* NOTE: it is important to get the rname (exp->l) and name (exp->r) from e2 IN the exps
1644 : * (e2_in_exps), and not from e2, since it could carry an alias from the higher rel */
1645 35 : exp_setalias(e2_as_e1_alias, e2_as_e1_alias->alias.label, e2_in_exps->l, e2_in_exps->r);
1646 35 : e2_as_e1_alias->alias.label = e2->alias.label;
1647 :
1648 : /* replace e2 with e2_as_e1_alias in expressions list */
1649 35 : node *e2_exps_node = list_find(rel->exps, e2_in_exps, NULL);
1650 35 : list_append_before(rel->exps, e2_exps_node, e2_as_e1_alias);
1651 35 : list_remove_node(rel->exps, NULL, e2_exps_node);
1652 :
1653 : /* finally remove e2 from the groups' list (->r) since it's redundant */
1654 35 : node *e2_r_node = list_find(rel->r, e2, NULL);
1655 35 : list_remove_node(rel->r, NULL, e2_r_node);
1656 :
1657 35 : v->changes++;
1658 : }
1659 : }
1660 : }
1661 : }
1662 93716 : return rel;
1663 : }
1664 :
1665 : sql_exp *list_exps_uses_exp(list *exps, const char *rname, const char *name);
1666 :
1667 : static sql_exp*
1668 18948 : exp_uses_exp(sql_exp *e, const char *rname, const char *name)
1669 : {
1670 19232 : sql_exp *res = NULL;
1671 :
1672 19232 : switch (e->type) {
1673 : case e_psm:
1674 : break;
1675 1915 : case e_atom: {
1676 1915 : if (e->f)
1677 0 : return list_exps_uses_exp(e->f, rname, name);
1678 : } break;
1679 283 : case e_convert:
1680 283 : return exp_uses_exp(e->l, rname, name);
1681 10512 : case e_column: {
1682 10512 : if (e->l && rname && strcmp(e->l, rname) == 0 &&
1683 2419 : e->r && name && strcmp(e->r, name) == 0)
1684 : return e;
1685 8263 : if (!e->l && !rname &&
1686 244 : e->r && name && strcmp(e->r, name) == 0)
1687 : return e;
1688 : } break;
1689 6121 : case e_func:
1690 : case e_aggr: {
1691 6121 : if (e->l)
1692 4902 : return list_exps_uses_exp(e->l, rname, name);
1693 : } break;
1694 401 : case e_cmp: {
1695 401 : if (e->flag == cmp_in || e->flag == cmp_notin) {
1696 0 : if ((res = exp_uses_exp(e->l, rname, name)))
1697 : return res;
1698 0 : return list_exps_uses_exp(e->r, rname, name);
1699 401 : } else if (e->flag == cmp_or || e->flag == cmp_filter) {
1700 2 : if ((res = list_exps_uses_exp(e->l, rname, name)))
1701 : return res;
1702 2 : return list_exps_uses_exp(e->r, rname, name);
1703 : } else {
1704 399 : if ((res = exp_uses_exp(e->l, rname, name)))
1705 : return res;
1706 397 : if ((res = exp_uses_exp(e->r, rname, name)))
1707 : return res;
1708 391 : if (e->f)
1709 : return exp_uses_exp(e->f, rname, name);
1710 : }
1711 : } break;
1712 : }
1713 : return NULL;
1714 : }
1715 :
1716 : sql_exp *
1717 9880 : list_exps_uses_exp(list *exps, const char *rname, const char *name)
1718 : {
1719 9880 : sql_exp *res = NULL;
1720 :
1721 9880 : if (!exps)
1722 : return NULL;
1723 28032 : for (node *n = exps->h; n && !res; n = n->next) {
1724 18152 : sql_exp *e = n->data;
1725 18152 : res = exp_uses_exp(e, rname, name);
1726 : }
1727 : return res;
1728 : }
1729 :
1730 : /* find in the list of expression an expression which uses e */
1731 : sql_exp *
1732 803 : exps_uses_exp(list *exps, sql_exp *e)
1733 : {
1734 803 : return list_exps_uses_exp(exps, exp_relname(e), exp_name(e));
1735 : }
1736 : /*
1737 : * Rewrite aggregations over munion all.
1738 : * groupby ([ union all (a, b, c) ], [gbe], [ count, sum ] )
1739 : *
1740 : * into
1741 : * groupby ([ union all( groupby( a, [gbe], [ count, sum] ),
1742 : * groupby( b, [gbe], [ count, sum] ),
1743 : * groupby( c, [gbe], [ count, sum] ) ],
1744 : * [gbe], [sum, sum] )
1745 : */
1746 : static inline sql_rel *
1747 1056 : rel_push_aggr_down_n_arry(visitor *v, sql_rel *rel)
1748 : {
1749 1056 : sql_rel *g = rel;
1750 1056 : sql_rel *u = rel->l, *ou = u;
1751 1056 : sql_rel *r = NULL;
1752 1056 : list *rgbe = NULL, *gbe = NULL, *exps = NULL;
1753 1056 : node *n, *m;
1754 :
1755 : // TODO why?
1756 1056 : if (u->op == op_project && !need_distinct(u))
1757 197 : u = u->l;
1758 :
1759 : /* make sure we don't create group by on group by's */
1760 2146 : for (node *n = ((list*)u->l)->h; n; n = n->next) {
1761 1651 : r = n->data;
1762 1651 : if (r->op == op_groupby)
1763 : return rel;
1764 : }
1765 :
1766 : /* distinct should be done over the full result */
1767 1299 : for (n = g->exps->h; n; n = n->next) {
1768 910 : sql_exp *e = n->data;
1769 910 : sql_subfunc *af = e->f;
1770 :
1771 910 : if (e->type == e_atom ||
1772 910 : e->type == e_func ||
1773 456 : (e->type == e_aggr &&
1774 456 : ((strcmp(af->func->base.name, "sum") &&
1775 319 : strcmp(af->func->base.name, "count") &&
1776 171 : strcmp(af->func->base.name, "min") &&
1777 456 : strcmp(af->func->base.name, "max")) ||
1778 : need_distinct(e))))
1779 : return rel;
1780 : }
1781 :
1782 389 : list *nl = sa_list(v->sql->sa);
1783 1213 : for (node *n = ((list*)u->l)->h; n; n = n->next) {
1784 824 : r = rel_dup(n->data);
1785 : //n->data = NULL; /* clean list as we steal the relation r, stealing is needed else (with multiple references) double project cleanup fails */
1786 824 : if (!is_project(r->op))
1787 47 : r = rel_project(v->sql->sa, r,
1788 : rel_projections(v->sql, r, NULL, 1, 1));
1789 824 : rel_rename_exps(v->sql, u->exps, r->exps);
1790 824 : if (u != ou) {
1791 359 : bool isproject = is_project(r->op);
1792 359 : r = rel_project(v->sql->sa, r, NULL);
1793 359 : r->exps = exps_copy(v->sql, ou->exps);
1794 359 : rel_rename_exps(v->sql, ou->exps, r->exps);
1795 359 : set_processed(r);
1796 359 : if (isproject)
1797 359 : r = rel_push_project_down_(v, r); /* cleanup any double projects */
1798 : }
1799 824 : if (g->r && list_length(g->r) > 0) {
1800 574 : list *gbe = g->r;
1801 574 : rgbe = exps_copy(v->sql, gbe);
1802 : }
1803 824 : r = rel_groupby(v->sql, r, NULL);
1804 824 : r->r = rgbe;
1805 824 : r->nrcols = g->nrcols;
1806 824 : r->card = g->card;
1807 824 : r->exps = exps_copy(v->sql, g->exps);
1808 824 : r->nrcols = list_length(r->exps);
1809 824 : set_processed(r);
1810 :
1811 824 : assert(r);
1812 824 : append(nl, r);
1813 : }
1814 :
1815 : /* group by on primary keys which define the partitioning scheme
1816 : * don't need a finalizing group by */
1817 : /* how to check if a partition is based on some primary key ?
1818 : * */
1819 389 : if (!list_empty(rel->r)) {
1820 714 : for (node *n = ((list*)rel->r)->h; n; n = n->next) {
1821 446 : sql_exp *e = n->data;
1822 446 : sql_column *c = NULL;
1823 :
1824 446 : if ((c = exp_is_pkey(rel, e)) && partition_find_part(v->sql->session->tr, c->t, NULL)) {
1825 : /* check if key is partition key */
1826 0 : v->changes++;
1827 0 : return rel_inplace_setop_n_ary(v->sql, rel, nl, op_munion,
1828 : rel_projections(v->sql, rel, NULL, 1, 1));
1829 : }
1830 : }
1831 : }
1832 :
1833 389 : if (!list_empty(rel->r)) {
1834 268 : list *ogbe = rel->r;
1835 :
1836 268 : gbe = new_exp_list(v->sql->sa);
1837 714 : for (n = ogbe->h; n; n = n->next) {
1838 446 : sql_exp *e = n->data, *ne;
1839 :
1840 : /* group by in aggregation list */
1841 446 : ne = exps_uses_exp( rel->exps, e);
1842 446 : if (ne) {
1843 440 : sql_rel *first_munion_rel = nl->h->data;
1844 440 : ne = list_find_exp(first_munion_rel->exps, ne);
1845 : }
1846 440 : if (!ne) {
1847 : /* e only in the u1,u2,...un->r (group by list) */
1848 24 : for (node *n = nl->h; n; n = n->next) {
1849 16 : ne = exp_ref(v->sql, e);
1850 16 : list_append(((sql_rel*)n->data)->exps, ne);
1851 : }
1852 : }
1853 8 : assert(ne);
1854 446 : ne = exp_ref(v->sql, ne);
1855 446 : append(gbe, ne);
1856 : }
1857 : }
1858 :
1859 389 : u = rel_setop_n_ary(v->sql->sa, nl, op_munion);
1860 389 : rel_setop_n_ary_set_exps(v->sql, u,
1861 389 : rel_projections(v->sql, nl->h->data, NULL, 1, 1), false);
1862 389 : set_processed(u);
1863 :
1864 389 : exps = new_exp_list(v->sql->sa);
1865 1190 : for (n = u->exps->h, m = rel->exps->h; n && m; n = n->next, m = m->next) {
1866 801 : sql_exp *ne, *e = n->data, *oa = m->data;
1867 :
1868 801 : if (oa->type == e_aggr) {
1869 350 : sql_subfunc *f = oa->f;
1870 350 : int cnt = exp_aggr_is_count(oa);
1871 350 : sql_subfunc *a = sql_bind_func(v->sql, "sys", (cnt)?"sum":f->func->base.name, exp_subtype(e), NULL, F_AGGR, true, true);
1872 :
1873 350 : assert(a);
1874 : /* munion of aggr result may have nils
1875 : * because sum/count of empty set */
1876 350 : set_has_nil(e);
1877 350 : e = exp_ref(v->sql, e);
1878 350 : ne = exp_aggr1(v->sql->sa, e, a, need_distinct(e), 1, e->card, 1);
1879 : } else {
1880 451 : ne = exp_copy(v->sql, oa);
1881 : }
1882 801 : exp_setalias(ne, oa->alias.label, exp_find_rel_name(oa), exp_name(oa));
1883 801 : append(exps, ne);
1884 : }
1885 389 : v->changes++;
1886 389 : return rel_inplace_groupby(rel, u, gbe, exps);
1887 : }
1888 :
1889 : /*
1890 : * Rewrite aggregations over union all.
1891 : * groupby ([ union all (a, b) ], [gbe], [ count, sum ] )
1892 : *
1893 : * into
1894 : * groupby ( [ union all( groupby( a, [gbe], [ count, sum] ), [ groupby( b, [gbe], [ count, sum] )) , [gbe], [sum, sum] )
1895 : */
1896 : static inline sql_rel *
1897 93716 : rel_push_aggr_down(visitor *v, sql_rel *rel)
1898 : {
1899 93716 : if (rel->op == op_groupby && rel->l) {
1900 93647 : sql_rel *u = rel->l, *ou = u;
1901 93647 : sql_rel *g = rel;
1902 93647 : sql_rel *ul = u->l;
1903 93647 : sql_rel *ur = u->r;
1904 93647 : node *n, *m;
1905 93647 : list *lgbe = NULL, *rgbe = NULL, *gbe = NULL, *exps = NULL;
1906 :
1907 93647 : if (u->op == op_project && !need_distinct(u))
1908 40400 : u = u->l;
1909 :
1910 93647 : if (!u || !(is_union(u->op) || is_munion(u->op)) || need_distinct(u) || is_single(u) || !u->exps || rel_is_ref(u))
1911 : return rel;
1912 :
1913 1056 : if (is_munion(u->op))
1914 1056 : return rel_push_aggr_down_n_arry(v, rel);
1915 :
1916 0 : ul = u->l;
1917 0 : ur = u->r;
1918 :
1919 : /* make sure we don't create group by on group by's */
1920 0 : if (ul->op == op_groupby || ur->op == op_groupby)
1921 : return rel;
1922 :
1923 : /* distinct should be done over the full result */
1924 0 : for (n = g->exps->h; n; n = n->next) {
1925 0 : sql_exp *e = n->data;
1926 0 : sql_subfunc *af = e->f;
1927 :
1928 0 : if (e->type == e_atom ||
1929 0 : e->type == e_func ||
1930 0 : (e->type == e_aggr &&
1931 0 : ((strcmp(af->func->base.name, "sum") &&
1932 0 : strcmp(af->func->base.name, "count") &&
1933 0 : strcmp(af->func->base.name, "min") &&
1934 0 : strcmp(af->func->base.name, "max")) ||
1935 : need_distinct(e))))
1936 : return rel;
1937 : }
1938 :
1939 0 : ul = rel_dup(ul);
1940 0 : ur = rel_dup(ur);
1941 0 : if (!is_project(ul->op))
1942 0 : ul = rel_project(v->sql->sa, ul,
1943 : rel_projections(v->sql, ul, NULL, 1, 1));
1944 0 : if (!is_project(ur->op))
1945 0 : ur = rel_project(v->sql->sa, ur,
1946 : rel_projections(v->sql, ur, NULL, 1, 1));
1947 0 : rel_rename_exps(v->sql, u->exps, ul->exps);
1948 0 : rel_rename_exps(v->sql, u->exps, ur->exps);
1949 0 : if (u != ou) {
1950 0 : ul = rel_project(v->sql->sa, ul, NULL);
1951 0 : ul->exps = exps_copy(v->sql, ou->exps);
1952 0 : rel_rename_exps(v->sql, ou->exps, ul->exps);
1953 0 : set_processed(ul);
1954 0 : ur = rel_project(v->sql->sa, ur, NULL);
1955 0 : ur->exps = exps_copy(v->sql, ou->exps);
1956 0 : rel_rename_exps(v->sql, ou->exps, ur->exps);
1957 0 : set_processed(ur);
1958 : }
1959 :
1960 0 : if (g->r && list_length(g->r) > 0) {
1961 0 : list *gbe = g->r;
1962 :
1963 0 : lgbe = exps_copy(v->sql, gbe);
1964 0 : rgbe = exps_copy(v->sql, gbe);
1965 : }
1966 0 : ul = rel_groupby(v->sql, ul, NULL);
1967 0 : ul->r = lgbe;
1968 0 : ul->nrcols = g->nrcols;
1969 0 : ul->card = g->card;
1970 0 : ul->exps = exps_copy(v->sql, g->exps);
1971 0 : ul->nrcols = list_length(ul->exps);
1972 0 : set_processed(ul);
1973 :
1974 0 : ur = rel_groupby(v->sql, ur, NULL);
1975 0 : ur->r = rgbe;
1976 0 : ur->nrcols = g->nrcols;
1977 0 : ur->card = g->card;
1978 0 : ur->exps = exps_copy(v->sql, g->exps);
1979 0 : ur->nrcols = list_length(ur->exps);
1980 0 : set_processed(ur);
1981 :
1982 : /* group by on primary keys which define the partitioning scheme
1983 : * don't need a finalizing group by */
1984 : /* how to check if a partition is based on some primary key ?
1985 : * */
1986 0 : if (!list_empty(rel->r)) {
1987 0 : for (node *n = ((list*)rel->r)->h; n; n = n->next) {
1988 0 : sql_exp *e = n->data;
1989 0 : sql_column *c = NULL;
1990 :
1991 0 : if ((c = exp_is_pkey(rel, e)) && partition_find_part(v->sql->session->tr, c->t, NULL)) {
1992 : /* check if key is partition key */
1993 0 : v->changes++;
1994 0 : return rel_inplace_setop(v->sql, rel, ul, ur, op_union,
1995 : rel_projections(v->sql, rel, NULL, 1, 1));
1996 : }
1997 : }
1998 : }
1999 :
2000 0 : if (!list_empty(rel->r)) {
2001 0 : list *ogbe = rel->r;
2002 :
2003 0 : gbe = new_exp_list(v->sql->sa);
2004 0 : for (n = ogbe->h; n; n = n->next) {
2005 0 : sql_exp *e = n->data, *ne;
2006 :
2007 : /* group by in aggregation list */
2008 0 : ne = exps_uses_exp( rel->exps, e);
2009 0 : if (ne)
2010 0 : ne = list_find_exp( ul->exps, ne);
2011 0 : if (!ne) {
2012 : /* e only in the ul/ur->r (group by list) */
2013 0 : ne = exp_ref(v->sql, e);
2014 0 : list_append(ul->exps, ne);
2015 0 : ne = exp_ref(v->sql, e);
2016 0 : list_append(ur->exps, ne);
2017 : }
2018 0 : assert(ne);
2019 0 : ne = exp_ref(v->sql, ne);
2020 0 : append(gbe, ne);
2021 : }
2022 : }
2023 :
2024 0 : u = rel_setop(v->sql->sa, ul, ur, op_union);
2025 0 : rel_setop_set_exps(v->sql, u, rel_projections(v->sql, ul, NULL, 1, 1), false);
2026 0 : set_processed(u);
2027 :
2028 0 : exps = new_exp_list(v->sql->sa);
2029 0 : for (n = u->exps->h, m = rel->exps->h; n && m; n = n->next, m = m->next) {
2030 0 : sql_exp *ne, *e = n->data, *oa = m->data;
2031 :
2032 0 : if (oa->type == e_aggr) {
2033 0 : sql_subfunc *f = oa->f;
2034 0 : int cnt = exp_aggr_is_count(oa);
2035 0 : sql_subfunc *a = sql_bind_func(v->sql, "sys", (cnt)?"sum":f->func->base.name, exp_subtype(e), NULL, F_AGGR, true, true);
2036 :
2037 0 : assert(a);
2038 : /* union of aggr result may have nils
2039 : * because sum/count of empty set */
2040 0 : set_has_nil(e);
2041 0 : e = exp_ref(v->sql, e);
2042 0 : ne = exp_aggr1(v->sql->sa, e, a, need_distinct(e), 1, e->card, 1);
2043 : } else {
2044 0 : ne = exp_copy(v->sql, oa);
2045 : }
2046 0 : exp_setalias(ne, oa->alias.label, exp_find_rel_name(oa), exp_name(oa));
2047 0 : append(exps, ne);
2048 : }
2049 0 : v->changes++;
2050 0 : return rel_inplace_groupby( rel, u, gbe, exps);
2051 : }
2052 : return rel;
2053 : }
2054 :
2055 : /*
2056 : * More general
2057 : * groupby(
2058 : * [ outer ] join(
2059 : * project(
2060 : * table(A) [ c1, c2, .. ]
2061 : * ) [ c1, c2, identity(c2) as I, .. ],
2062 : * table(B) [ c1, c2, .. ]
2063 : * ) [ A.c1 = B.c1 ]
2064 : * ) [ I ] [ a1, a2, .. ]
2065 : *
2066 : * ->
2067 : *
2068 : * [ outer ] join(
2069 : * project(
2070 : * table(A) [ c1, c2, .. ]
2071 : * ) [ c1, c2, .. ],
2072 : * groupby (
2073 : * table(B) [ c1, c2, .. ]
2074 : * ) [ B.c1 ] [ a1, a2, .. ]
2075 : * ) [ A.c1 = B.c1 ]
2076 : */
2077 : static sql_rel *
2078 46049 : gen_push_groupby_down(mvc *sql, sql_rel *rel, int *changes)
2079 : {
2080 46049 : sql_rel *j = rel->l;
2081 46049 : list *gbe = rel->r;
2082 :
2083 46049 : if (rel->op == op_groupby && list_length(gbe) == 1 && j->op == op_join){
2084 6442 : sql_rel *jl = j->l, *jr = j->r, *cr, *cl;
2085 6442 : sql_exp *gb = gbe->h->data, *e;
2086 6442 : node *n;
2087 6442 : int left = 1;
2088 6442 : list *aggrs, *aliases, *gbe;
2089 :
2090 6442 : if (!is_identity(gb, jl) && !is_identity(gb, jr))
2091 : return rel;
2092 6 : if (jl->op == op_project &&
2093 4 : (e = list_find_exp( jl->exps, gb)) != NULL &&
2094 2 : find_prop(e->p, PROP_HASHCOL) != NULL) {
2095 : left = 0;
2096 : cr = jr;
2097 : cl = jl;
2098 4 : } else if (jr->op == op_project &&
2099 8 : (e = list_find_exp( jr->exps, gb)) != NULL &&
2100 4 : find_prop(e->p, PROP_HASHCOL) != NULL) {
2101 : left = 1;
2102 : cr = jl;
2103 : cl = jr;
2104 : } else {
2105 0 : return rel;
2106 : }
2107 :
2108 6 : if ((left && is_base(jl->op)) || (!left && is_base(jr->op))||
2109 0 : (left && is_select(jl->op)) || (!left && is_select(jr->op))
2110 0 : || rel_is_join_on_pkey(j, false))
2111 6 : return rel;
2112 :
2113 : /* only add aggr (based on left/right), and repeat the group by column */
2114 0 : aggrs = sa_list(sql->sa);
2115 0 : aliases = sa_list(sql->sa);
2116 0 : if (rel->exps) for (n = rel->exps->h; n; n = n->next) {
2117 0 : sql_exp *ce = n->data;
2118 :
2119 0 : if (exp_is_atom(ce))
2120 0 : list_append(aliases, ce);
2121 0 : else if (ce->type == e_column) {
2122 0 : if (rel_has_exp(cl, ce, false) == 0) /* collect aliases outside groupby */
2123 0 : list_append(aliases, ce);
2124 : else
2125 0 : list_append(aggrs, ce);
2126 0 : } else if (ce->type == e_aggr) {
2127 0 : list *args = ce->l;
2128 :
2129 : /* check args are part of left/right */
2130 0 : if (!list_empty(args) && rel_has_exps(cl, args, false) == 0)
2131 : return rel;
2132 0 : if (rel->op != op_join && exp_aggr_is_count(ce))
2133 0 : ce->p = prop_create(sql->sa, PROP_COUNT, ce->p);
2134 0 : list_append(aggrs, ce);
2135 : }
2136 : }
2137 : /* TODO move any column expressions (aliases) into the project list */
2138 :
2139 : /* find gb in left or right and should be unique */
2140 0 : gbe = sa_list(sql->sa);
2141 : /* push groupby to right, group on join exps */
2142 0 : if (j->exps) for (n = j->exps->h; n; n = n->next) {
2143 0 : sql_exp *ce = n->data, *l = ce->l, *r = ce->r, *e;
2144 :
2145 : /* get left/right hand of e_cmp */
2146 0 : assert(ce->type == e_cmp);
2147 0 : if (ce->flag == cmp_equal && is_alias(l->type) && is_alias(r->type) &&
2148 0 : (((e = rel_find_exp(cr, l)) && rel_find_exp(cl, r)) ||
2149 0 : ((e = rel_find_exp(cr, r)) && rel_find_exp(cl, l)))) {
2150 0 : e = exp_ref(sql, e);
2151 0 : list_append(gbe, e);
2152 : } else {
2153 0 : return rel;
2154 : }
2155 : }
2156 0 : if (!left)
2157 0 : cr = j->r = rel_groupby(sql, cr, gbe);
2158 : else
2159 0 : cr = j->l = rel_groupby(sql, cr, gbe);
2160 0 : cr->exps = list_merge(cr->exps, aggrs, (fdup)NULL);
2161 0 : set_processed(cr);
2162 0 : if (!is_project(cl->op))
2163 0 : cl = rel_project(sql->sa, cl,
2164 : rel_projections(sql, cl, NULL, 1, 1));
2165 0 : cl->exps = list_merge(cl->exps, aliases, (fdup)NULL);
2166 0 : set_processed(cl);
2167 0 : if (!left)
2168 0 : j->l = cl;
2169 : else
2170 0 : j->r = cl;
2171 0 : rel -> l = NULL;
2172 0 : rel_destroy(rel);
2173 :
2174 0 : if (list_empty(cr->exps) && list_empty(j->exps)) { /* remove crossproduct */
2175 0 : sql_rel *r = cl;
2176 0 : if (!left)
2177 0 : j->l = NULL;
2178 : else
2179 0 : j->r = NULL;
2180 0 : rel_destroy(j);
2181 0 : j = r;
2182 : }
2183 0 : (*changes)++;
2184 0 : return j;
2185 : }
2186 : return rel;
2187 : }
2188 :
2189 : /*
2190 : * Rewrite group(project(join(A,Dict)[a.i==dict.i])[...dict.n])[dict.n][ ... dict.n ]
2191 : * into
2192 : * project(join(groupby (A)[a.i],[a.i]), Dict)[a.i==dict.i])[dict.n]
2193 : *
2194 : */
2195 : static inline sql_rel *
2196 93716 : rel_push_groupby_down(visitor *v, sql_rel *rel)
2197 : {
2198 93716 : sql_rel *p = rel->l;
2199 93716 : list *gbe = rel->r;
2200 :
2201 93716 : if (rel->op == op_groupby && gbe && p && is_join(p->op))
2202 10012 : return gen_push_groupby_down(v->sql, rel, &v->changes);
2203 83704 : if (rel->op == op_groupby && gbe && p && p->op == op_project) {
2204 36037 : sql_rel *j = p->l;
2205 36037 : sql_rel *jl, *jr;
2206 36037 : node *n;
2207 :
2208 36037 : if (!j || j->op != op_join || list_length(j->exps) != 1)
2209 33974 : return gen_push_groupby_down(v->sql, rel, &v->changes);
2210 2063 : jl = j->l;
2211 2063 : jr = j->r;
2212 :
2213 : /* check if jr is a dict with index and var still used */
2214 2063 : if (jr->op != op_basetable || jr->l || !jr->r || list_length(jr->exps) != 2)
2215 2063 : return gen_push_groupby_down(v->sql, rel, &v->changes);
2216 :
2217 : /* check if group by is done on dict column */
2218 0 : for(n = gbe->h; n; n = n->next) {
2219 0 : sql_exp *ge = n->data, *pe = NULL, *e = NULL;
2220 :
2221 : /* find group by exp in project, then in dict */
2222 0 : pe = rel_find_exp(p, ge);
2223 0 : if (pe) /* find project exp in right hand of join, ie dict */
2224 0 : e = rel_find_exp(jr, pe);
2225 0 : if (pe && e) { /* Rewrite: join with dict after the group by */
2226 0 : list *pexps = rel_projections(v->sql, rel, NULL, 1, 1), *npexps;
2227 0 : node *m;
2228 0 : sql_exp *ne = j->exps->h->data; /* join exp */
2229 0 : p->l = jl; /* Project now only on the left side of the join */
2230 :
2231 0 : ne = ne->l; /* The left side of the compare is the index of the left */
2232 :
2233 : /* find ge reference in new projection list */
2234 0 : npexps = sa_list(v->sql->sa);
2235 0 : for (m = pexps->h; m; m = m->next) {
2236 0 : sql_exp *a = m->data;
2237 :
2238 0 : if (exp_refers(ge, a)) {
2239 0 : sql_exp *sc = jr->exps->t->data;
2240 0 : sql_exp *e = exp_ref(v->sql, sc);
2241 0 : if (exp_name(a))
2242 0 : exp_prop_alias(v->sql->sa, e, a);
2243 : a = e;
2244 : }
2245 0 : append(npexps, a);
2246 : }
2247 :
2248 : /* find ge in aggr list */
2249 0 : for (m = rel->exps->h; m; m = m->next) {
2250 0 : sql_exp *a = m->data;
2251 :
2252 0 : if (exp_match_exp(a, ge) || exp_refers(ge, a)) {
2253 0 : a = exp_ref(v->sql, ne);
2254 0 : if (exp_name(ne))
2255 0 : exp_prop_alias(v->sql->sa, a, ne);
2256 0 : m->data = a;
2257 : }
2258 : }
2259 :
2260 : /* change alias pe, ie project out the index */
2261 0 : pe->l = (void*)exp_relname(ne);
2262 0 : pe->r = (void*)exp_name(ne);
2263 0 : if (exp_name(ne))
2264 0 : exp_prop_alias(v->sql->sa, pe, ne);
2265 :
2266 : /* change alias ge */
2267 0 : ge->l = (void*)exp_relname(pe);
2268 0 : ge->r = (void*)exp_name(pe);
2269 0 : if (exp_name(pe))
2270 0 : exp_prop_alias(v->sql->sa, ge, pe);
2271 :
2272 : /* zap both project and groupby name hash tables (as we changed names above) */
2273 0 : list_hash_clear(rel->exps);
2274 0 : list_hash_clear((list*)rel->r);
2275 0 : list_hash_clear(p->exps);
2276 :
2277 : /* add join */
2278 0 : j->l = rel;
2279 0 : rel = rel_project(v->sql->sa, j, npexps);
2280 0 : v->changes++;
2281 : }
2282 : }
2283 : }
2284 : return rel;
2285 : }
2286 :
2287 : /* reduce group by expressions based on pkey info
2288 : *
2289 : * The reduced group by and (derived) aggr expressions are restored via
2290 : * extra (new) aggregate columns.
2291 : */
2292 : static inline sql_rel *
2293 93716 : rel_reduce_groupby_exps(visitor *v, sql_rel *rel)
2294 : {
2295 93716 : list *gbe = rel->r;
2296 :
2297 93716 : if (is_groupby(rel->op) && rel->r && !rel_is_ref(rel) && list_length(gbe)) {
2298 45989 : node *n, *m;
2299 45989 : int k, j, i, ngbe = list_length(gbe);
2300 45989 : int8_t *scores = SA_NEW_ARRAY(v->sql->ta, int8_t, ngbe);
2301 45989 : sql_column *c;
2302 45989 : sql_table **tbls = SA_NEW_ARRAY(v->sql->ta, sql_table*, ngbe);
2303 45989 : sql_rel **bts = SA_NEW_ARRAY(v->sql->ta, sql_rel*, ngbe), *bt = NULL;
2304 :
2305 45989 : gbe = rel->r;
2306 120209 : for (k = 0, i = 0, n = gbe->h; n; n = n->next, k++) {
2307 74220 : sql_exp *e = n->data;
2308 :
2309 74220 : c = exp_find_column_(rel, e, -2, &bt);
2310 74220 : if (c) {
2311 83814 : for(j = 0; j < i; j++)
2312 31641 : if (c->t == tbls[j] && bts[j] == bt)
2313 : break;
2314 67651 : tbls[j] = c->t;
2315 67651 : bts[j] = bt;
2316 67651 : i += (j == i);
2317 : }
2318 : }
2319 45989 : if (i) { /* forall tables find pkey and
2320 : remove useless other columns */
2321 : /* TODO also remove group by columns which are related to
2322 : * the other columns using a foreign-key join (n->1), ie 1
2323 : * on the to be removed side.
2324 : */
2325 95345 : for(j = 0; j < i; j++) {
2326 52161 : int l, nr = 0, cnr = 0;
2327 :
2328 52161 : k = list_length(gbe);
2329 52161 : memset(scores, 0, list_length(gbe));
2330 52161 : if (tbls[j]->pkey) {
2331 26298 : for (l = 0, n = gbe->h; l < k && n; l++, n = n->next) {
2332 20103 : fcmp cmp = (fcmp)&kc_column_cmp;
2333 20103 : sql_exp *e = n->data;
2334 :
2335 20103 : c = exp_find_column_(rel, e, -2, &bt);
2336 28677 : if (c && c->t == tbls[j] && bts[j] == bt &&
2337 8574 : list_find(tbls[j]->pkey->k.columns, c, cmp) != NULL) {
2338 708 : scores[l] = 1;
2339 708 : nr ++;
2340 17924 : } else if (c && c->t == tbls[j] && bts[j] == bt) {
2341 : /* Okay we can cleanup a group by column */
2342 7866 : scores[l] = -1;
2343 7866 : cnr ++;
2344 : }
2345 : }
2346 : }
2347 6195 : if (nr) {
2348 699 : int all = (list_length(tbls[j]->pkey->k.columns) == nr);
2349 699 : sql_kc *kc = tbls[j]->pkey->k.columns->h->data;
2350 :
2351 699 : c = kc->c;
2352 1926 : for (l = 0, n = gbe->h; l < k && n; l++, n = n->next) {
2353 1227 : sql_exp *e = n->data;
2354 :
2355 : /* pkey based group by */
2356 1376 : if (scores[l] == 1 && ((all ||
2357 : /* first of key */
2358 833 : (c == exp_find_column(rel, e, -2))) && !find_prop(e->p, PROP_HASHCOL)))
2359 13 : e->p = prop_create(v->sql->sa, PROP_HASHCOL, e->p);
2360 : }
2361 2702 : for (m = rel->exps->h; m; m = m->next ){
2362 2003 : sql_exp *e = m->data;
2363 :
2364 7340 : for (l = 0, n = gbe->h; l < k && n; l++, n = n->next) {
2365 5350 : sql_exp *gb = n->data;
2366 :
2367 : /* pkey based group by */
2368 5350 : if (scores[l] == 1 && exp_match_exp(e,gb) && find_prop(gb->p, PROP_HASHCOL) && !find_prop(e->p, PROP_HASHCOL)) {
2369 13 : e->p = prop_create(v->sql->sa, PROP_HASHCOL, e->p);
2370 13 : break;
2371 : }
2372 :
2373 : }
2374 : }
2375 : }
2376 52161 : if (cnr && nr && list_length(tbls[j]->pkey->k.columns) == nr) {
2377 108 : list *ngbe = new_exp_list(v->sql->sa);
2378 :
2379 392 : for (l = 0, n = gbe->h; l < k && n; l++, n = n->next) {
2380 284 : sql_exp *e = n->data;
2381 :
2382 : /* keep the group by columns which form a primary key
2383 : * of this table. And those unrelated to this table. */
2384 284 : if (scores[l] != -1)
2385 139 : append(ngbe, e);
2386 : }
2387 108 : rel->r = ngbe;
2388 : /* rewrite gbe and aggr, in the aggr list */
2389 108 : if (0)
2390 : for (m = rel->exps->h; m; m = m->next ){
2391 : sql_exp *e = m->data;
2392 : int fnd = 0;
2393 :
2394 : for (l = 0, n = gbe->h; l < k && n && !fnd; l++, n = n->next) {
2395 : sql_exp *gb = n->data;
2396 :
2397 : if (scores[l] == -1 && exp_refers(gb, e)) {
2398 : /*
2399 : sql_exp *rs = exp_column(v->sql->sa, gb->l?gb->l:exp_relname(gb), gb->r?gb->r:exp_name(gb), exp_subtype(gb), rel->card, has_nil(gb), is_unique(gb), is_intern(gb));
2400 : exp_setalias(rs, e->alias.label, exp_find_rel_name(e), exp_name(e));
2401 : e = rs;
2402 : */
2403 : assert(e->alias.label == e->nid);
2404 : fnd = 1;
2405 : }
2406 : }
2407 : m->data = e;
2408 : }
2409 : /* new reduced aggr expression list */
2410 108 : assert(list_length(rel->exps)>0);
2411 : /* only one reduction at a time */
2412 108 : list_hash_clear(rel->exps);
2413 108 : v->changes++;
2414 108 : return rel;
2415 : }
2416 52053 : gbe = rel->r;
2417 : }
2418 : }
2419 : }
2420 : /* remove constants from group by list */
2421 93608 : if (is_groupby(rel->op) && rel->r && !rel_is_ref(rel)) {
2422 46387 : int i;
2423 46387 : node *n;
2424 :
2425 120323 : for (i = 0, n = gbe->h; n; n = n->next) {
2426 73935 : sql_exp *e = n->data;
2427 :
2428 73935 : if (exp_is_atom(e))
2429 26 : i++;
2430 : }
2431 46388 : if (i) {
2432 26 : list *ngbe = new_exp_list(v->sql->sa);
2433 26 : list *dgbe = new_exp_list(v->sql->sa);
2434 :
2435 53 : for (n = gbe->h; n; n = n->next) {
2436 27 : sql_exp *e = n->data;
2437 :
2438 27 : if (!exp_is_atom(e))
2439 1 : append(ngbe, e);
2440 : /* we need at least one gbe */
2441 26 : else if (!n->next && list_empty(ngbe))
2442 25 : append(ngbe, e);
2443 : else
2444 1 : append(dgbe, e);
2445 : }
2446 26 : rel->r = ngbe;
2447 26 : if (!list_empty(dgbe)) {
2448 : /* use atom's directly in the aggr expr list */
2449 :
2450 3 : for (n = rel->exps->h; n; n = n->next) {
2451 2 : sql_exp *e = n->data, *ne = NULL;
2452 :
2453 2 : if (e->type == e_column) {
2454 2 : if (e->nid)
2455 2 : ne = exps_bind_nid(dgbe, e->nid);
2456 2 : if (ne) {
2457 1 : ne = exp_copy(v->sql, ne);
2458 1 : exp_prop_alias(v->sql->sa, ne, e);
2459 1 : e = ne;
2460 : }
2461 : }
2462 2 : n->data = e;
2463 : }
2464 1 : list_hash_clear(rel->exps);
2465 1 : v->changes++;
2466 : }
2467 : }
2468 : }
2469 : return rel;
2470 : }
2471 :
2472 : /* if all arguments to a distinct aggregate are unique, remove 'distinct' property */
2473 : static inline sql_rel *
2474 93716 : rel_distinct_aggregate_on_unique_values(visitor *v, sql_rel *rel)
2475 : {
2476 93716 : if (is_groupby(rel->op) && !list_empty(rel->exps)) {
2477 290401 : for (node *n = rel->exps->h; n; n = n->next) {
2478 196760 : sql_exp *exp = (sql_exp*) n->data;
2479 :
2480 196760 : if (exp->type == e_aggr && need_distinct(exp)) {
2481 689 : bool all_unique = true;
2482 689 : list *l = exp->l;
2483 :
2484 1378 : for (node *m = l->h; m && all_unique; m = m->next) {
2485 689 : sql_exp *arg = (sql_exp*) m->data;
2486 :
2487 823 : all_unique &= arg->type == e_column && is_unique(arg) && (!is_semantics(exp) || !has_nil(arg));
2488 : }
2489 689 : if (!all_unique && exps_card(l) > CARD_ATOM)
2490 394 : all_unique = exps_unique(v->sql, rel, l) && (!is_semantics(exp) || !have_nil(l));
2491 : if (all_unique) {
2492 134 : set_nodistinct(exp);
2493 134 : v->changes++;
2494 : }
2495 : }
2496 : }
2497 : }
2498 93716 : return rel;
2499 : }
2500 :
2501 : static inline sql_rel *
2502 93716 : rel_remove_const_aggr(visitor *v, sql_rel *rel)
2503 : {
2504 93716 : if (!rel)
2505 : return rel;
2506 93716 : if (rel && is_groupby(rel->op) && list_length(rel->exps) >= 1 && !rel_is_ref(rel)) {
2507 70292 : int needed = 0;
2508 216593 : for (node *n = rel->exps->h; n; n = n->next) {
2509 146301 : sql_exp *exp = (sql_exp*) n->data;
2510 :
2511 146301 : if (exp_is_atom(exp) && exp->type != e_aggr)
2512 55 : needed++;
2513 : }
2514 70292 : if (needed) {
2515 53 : if (!list_empty(rel->r)) {
2516 3 : int atoms = 0;
2517 : /* corner case, all grouping columns are atoms */
2518 6 : for (node *n = ((list*)rel->r)->h; n; n = n->next) {
2519 3 : sql_exp *exp = (sql_exp*) n->data;
2520 :
2521 3 : if (exp_is_atom(exp))
2522 0 : atoms++;
2523 : }
2524 3 : if (atoms == list_length(rel->r)) {
2525 0 : list *nexps = sa_list(v->sql->sa);
2526 0 : for (node *n = rel->exps->h; n; ) {
2527 0 : node *next = n->next;
2528 0 : sql_exp *e = (sql_exp*) n->data;
2529 :
2530 : /* remove references to constant group by columns */
2531 0 : if (e->type == e_column) {
2532 0 : sql_exp *found = NULL;
2533 0 : found = exps_bind_nid(rel->r, e->nid);
2534 0 : if (found) {
2535 0 : list_append(nexps, found);
2536 0 : list_remove_node(rel->exps, NULL, n);
2537 : }
2538 : }
2539 : n = next;
2540 : }
2541 0 : rel->r = NULL; /* transform it into a global aggregate */
2542 0 : rel->exps = list_merge(nexps, rel->exps, (fdup) NULL); /* add grouping columns back as projections */
2543 : /* global aggregates may return 1 row, so filter it based on the count */
2544 0 : sql_subfunc *cf = sql_bind_func(v->sql, "sys", "count", sql_bind_localtype("void"), NULL, F_AGGR, true, true);
2545 0 : sql_exp *count = exp_aggr(v->sql->sa, NULL, cf, 0, 1, CARD_ATOM, 0);
2546 0 : count = rel_groupby_add_aggr(v->sql, rel, count);
2547 0 : sql_exp *cp = exp_compare(v->sql->sa, exp_ref(v->sql, count), exp_atom(v->sql->sa, atom_int(v->sql->sa, exp_subtype(count), 0)), cmp_notequal);
2548 0 : rel = rel_select(v->sql->sa, rel, cp);
2549 0 : set_processed(rel);
2550 0 : return rel;
2551 : }
2552 50 : } else if (list_length(rel->exps) == needed) { /* all are const */
2553 48 : sql_rel *ll = rel->l;
2554 48 : rel->op = op_project;
2555 : /* TODO check if l->l == const, else change that */
2556 48 : if (ll && ll->l) {
2557 44 : rel_destroy(ll);
2558 44 : rel->l = rel_project_exp(v->sql, exp_atom_bool(v->sql->sa, 1));
2559 : }
2560 48 : return rel;
2561 : }
2562 5 : sql_rel *nrel = rel_project(v->sql->sa, rel, rel_projections(v->sql, rel, NULL, 1, 1));
2563 17 : for (node *n = nrel->exps->h; n; n = n->next) {
2564 12 : sql_exp *exp = (sql_exp*) n->data;
2565 12 : if (exp->type == e_column) {
2566 12 : sql_exp *e = rel_find_exp(rel, exp);
2567 :
2568 12 : if (e && exp_is_atom(e) && e->type == e_atom) {
2569 7 : sql_exp *ne = exp_copy(v->sql, e);
2570 7 : assert(ne->alias.label);
2571 7 : exp_setalias(ne, ne->alias.label, exp_find_rel_name(exp), exp_name(exp));
2572 7 : n->data = ne;
2573 7 : v->changes++;
2574 : }
2575 : }
2576 : }
2577 5 : list *nl = sa_list(v->sql->sa);
2578 17 : for (node *n = rel->exps->h; n; n = n->next) {
2579 12 : sql_exp *exp = (sql_exp*) n->data;
2580 :
2581 12 : if (!exp_is_atom(exp) || exp->type != e_atom)
2582 5 : append(nl, exp);
2583 : }
2584 5 : rel->exps = nl;
2585 5 : return nrel;
2586 : }
2587 : }
2588 : return rel;
2589 : }
2590 :
2591 : #if 0
2592 : static sql_rel *
2593 : rel_groupby_distinct2(visitor *v, sql_rel *rel)
2594 : {
2595 : list *ngbes = sa_list(v->sql->sa), *gbes, *naggrs = sa_list(v->sql->sa), *aggrs = sa_list(v->sql->sa);
2596 : sql_rel *l;
2597 : node *n;
2598 :
2599 : gbes = rel->r;
2600 : if (!gbes)
2601 : return rel;
2602 :
2603 : /* check if each aggr is, rewritable (max,min,sum,count)
2604 : * and only has one argument */
2605 : for (n = rel->exps->h; n; n = n->next) {
2606 : sql_exp *e = n->data;
2607 : sql_subfunc *af = e->f;
2608 :
2609 : if (e->type == e_aggr &&
2610 : (strcmp(af->func->base.name, "sum") &&
2611 : strcmp(af->func->base.name, "count") &&
2612 : strcmp(af->func->base.name, "min") &&
2613 : strcmp(af->func->base.name, "max")))
2614 : return rel;
2615 : }
2616 :
2617 : for (n = gbes->h; n; n = n->next) {
2618 : sql_exp *e = n->data;
2619 :
2620 : e = exp_column(v->sql->sa, exp_find_rel_name(e), exp_name(e), exp_subtype(e), e->card, has_nil(e), is_unique(e), is_intern(e));
2621 : append(ngbes, e);
2622 : }
2623 :
2624 : /* 1 for each aggr(distinct v) add the attribute expression v to gbes and aggrs list
2625 : * 2 for each aggr(z) add aggr_phase2('z') to the naggrs list
2626 : * 3 for each group by col, add also to the naggrs list
2627 : * */
2628 : for (n = rel->exps->h; n; n = n->next) {
2629 : sql_exp *e = n->data;
2630 :
2631 : if (e->type == e_aggr && need_distinct(e)) { /* 1 */
2632 : /* need column expression */
2633 : list *args = e->l;
2634 : sql_exp *v = args->h->data;
2635 : append(gbes, v);
2636 : if (!exp_name(v))
2637 : exp_label(v->sql->sa, v, ++v->sql->label);
2638 : v = exp_column(v->sql->sa, exp_find_rel_name(v), exp_name(v), exp_subtype(v), v->card, has_nil(v), is_unique(v), is_intern(v));
2639 : append(aggrs, v);
2640 : v = exp_aggr1(v->sql->sa, v, e->f, need_distinct(e), 1, e->card, 1);
2641 : exp_setalias(v, e->alias.label, exp_find_rel_name(e), exp_name(e));
2642 : append(naggrs, v);
2643 : } else if (e->type == e_aggr && !need_distinct(e)) {
2644 : sql_exp *v;
2645 : sql_subfunc *f = e->f;
2646 : int cnt = exp_aggr_is_count(e);
2647 : sql_subfunc *a = sql_bind_func(v->sql, "sys", (cnt)?"sum":f->func->base.name, exp_subtype(e), NULL, F_AGGR, true, true);
2648 :
2649 : append(aggrs, e);
2650 : if (!exp_name(e))
2651 : exp_label(v->sql->sa, e, ++v->sql->label);
2652 : set_has_nil(e);
2653 : v = exp_column(v->sql->sa, exp_find_rel_name(e), exp_name(e), exp_subtype(e), e->card, has_nil(e), is_unique(e), is_intern(e));
2654 : v = exp_aggr1(v->sql->sa, v, a, 0, 1, e->card, 1);
2655 : if (cnt)
2656 : set_zero_if_empty(v);
2657 : exp_setalias(v, e->alias.label, exp_find_rel_name(e), exp_name(e));
2658 : append(naggrs, v);
2659 : } else { /* group by col */
2660 : if (list_find_exp(gbes, e) || !list_find_exp(naggrs, e)) {
2661 : append(aggrs, e);
2662 :
2663 : e = exp_column(v->sql->sa, exp_find_rel_name(e), exp_name(e), exp_subtype(e), e->card, has_nil(e), is_unique(e), is_intern(e));
2664 : }
2665 : append(naggrs, e);
2666 : }
2667 : }
2668 :
2669 : l = rel->l = rel_groupby(v->sql, rel->l, gbes);
2670 : l->exps = aggrs;
2671 : rel->r = ngbes;
2672 : rel->exps = naggrs;
2673 : v->changes++;
2674 : return rel;
2675 : }
2676 : #endif
2677 :
2678 : /* Rewrite group by expressions with distinct
2679 : *
2680 : * ie select a, count(distinct b) from c where ... groupby a;
2681 : * No other aggregations should be present
2682 : *
2683 : * Rewrite the more general case, good for parallel execution
2684 : *
2685 : * groupby(R) [e,f] [ aggr1 a distinct, aggr2 b distinct, aggr3 c, aggr4 d]
2686 : *
2687 : * into
2688 : *
2689 : * groupby(
2690 : * groupby(R) [e,f,a,b] [ a, b, aggr3 c, aggr4 d]
2691 : * ) [e,f]( aggr1 a distinct, aggr2 b distinct, aggr3_phase2 c, aggr4_phase2 d)
2692 : */
2693 : static inline sql_rel *
2694 93716 : rel_groupby_distinct(visitor *v, sql_rel *rel)
2695 : {
2696 93716 : node *n;
2697 :
2698 93716 : if (is_groupby(rel->op)) {
2699 93647 : sql_rel *l = rel->l;
2700 93647 : if (!l || is_groupby(l->op))
2701 : return rel;
2702 : }
2703 93582 : if (is_groupby(rel->op) && rel->r && !rel_is_ref(rel)) {
2704 46368 : int nr = 0, anr = 0;
2705 46368 : list *gbe, *ngbe, *arg, *exps, *nexps;
2706 46368 : sql_exp *distinct = NULL, *darg, *found;
2707 46368 : sql_rel *l = NULL;
2708 :
2709 167575 : for (n=rel->exps->h; n && nr <= 2; n = n->next) {
2710 121207 : sql_exp *e = n->data;
2711 121207 : if (need_distinct(e)) {
2712 393 : distinct = n->data;
2713 393 : nr++;
2714 : }
2715 121207 : anr += is_aggr(e->type);
2716 : }
2717 46368 : if (nr < 1 || distinct->type != e_aggr)
2718 : return rel;
2719 386 : if (nr > 1 || anr > nr)
2720 : return rel;//rel_groupby_distinct2(v, rel);
2721 117 : arg = distinct->l;
2722 117 : if (list_length(arg) != 1 || list_length(rel->r) + nr != list_length(rel->exps))
2723 2 : return rel;
2724 :
2725 115 : gbe = rel->r;
2726 115 : ngbe = sa_list(v->sql->sa);
2727 115 : exps = sa_list(v->sql->sa);
2728 115 : nexps = sa_list(v->sql->sa);
2729 362 : for (n=rel->exps->h; n; n = n->next) {
2730 247 : sql_exp *e = n->data;
2731 247 : if (e != distinct) {
2732 132 : if (e->type == e_aggr) { /* copy the arguments to the aggregate */
2733 0 : list *args = e->l;
2734 0 : if (args) {
2735 0 : for (node *n = args->h ; n ; n = n->next) {
2736 0 : sql_exp *e = n->data;
2737 0 : list_append(ngbe, exp_copy(v->sql, e));
2738 0 : list_append(exps, exp_copy(v->sql, e));
2739 : }
2740 : }
2741 : } else {
2742 132 : e = exp_ref(v->sql, e);
2743 132 : append(ngbe, e);
2744 132 : append(exps, e);
2745 : }
2746 132 : if (e->type == e_aggr) /* aggregates must be copied */
2747 0 : e = exp_copy(v->sql, e);
2748 : else
2749 132 : e = exp_ref(v->sql, e);
2750 132 : append(nexps, e);
2751 : }
2752 : }
2753 :
2754 115 : darg = arg->h->data;
2755 115 : if ((found = exps_find_exp(exps, darg)) == NULL) { /* not already in the groups projection list */
2756 112 : if ((found = exps_find_exp(gbe, darg))) { /* first find if the aggregate argument already exists in the grouping list */
2757 0 : darg = exp_ref(v->sql, found);
2758 : } else {
2759 112 : list_append(gbe, darg = exp_copy(v->sql, darg));
2760 112 : exp_label(v->sql->sa, darg, ++v->sql->label);
2761 112 : darg = exp_ref(v->sql, darg);
2762 : }
2763 112 : list_append(exps, darg);
2764 112 : darg = exp_ref(v->sql, darg);
2765 : } else {
2766 3 : darg = exp_ref(v->sql, found);
2767 : }
2768 115 : arg->h->data = darg;
2769 115 : l = rel->l = rel_groupby(v->sql, rel->l, gbe);
2770 115 : l->exps = exps;
2771 115 : set_processed(l);
2772 115 : rel->r = ngbe;
2773 115 : rel->exps = nexps;
2774 115 : set_nodistinct(distinct);
2775 115 : append(nexps, distinct);
2776 115 : v->changes++;
2777 : }
2778 : return rel;
2779 : }
2780 :
2781 : /*
2782 : * Push Count inside crossjoin down, and multiply the results
2783 : *
2784 : * project ( project(
2785 : * group by ( crossproduct (
2786 : * crossproduct( project (
2787 : * L, => group by (
2788 : * R L
2789 : * ) [ ] [ count NOT NULL ] ) [ ] [ count NOT NULL ]
2790 : * ) ),
2791 : * ) [ NOT NULL ] project (
2792 : * group by (
2793 : * R
2794 : * ) [ ] [ count NOT NULL ]
2795 : * )
2796 : * ) [ sql_mul(.., .. NOT NULL) ]
2797 : * )
2798 : */
2799 : static inline sql_rel *
2800 93716 : rel_push_count_down(visitor *v, sql_rel *rel)
2801 : {
2802 93716 : sql_rel *r = rel->l;
2803 :
2804 93716 : if (is_groupby(rel->op) && !rel_is_ref(rel) && list_empty(rel->r) &&
2805 24268 : r && !r->exps && r->op == op_join && !(rel_is_ref(r)) &&
2806 : /* currently only single count aggregation is handled, no other projects or aggregation */
2807 50 : list_length(rel->exps) == 1 && exp_aggr_is_count(rel->exps->h->data)) {
2808 12 : sql_exp *nce, *oce, *cnt1 = NULL, *cnt2 = NULL;
2809 12 : sql_rel *gbl = NULL, *gbr = NULL; /* Group By */
2810 12 : sql_rel *cp = NULL; /* Cross Product */
2811 12 : sql_rel *srel;
2812 :
2813 12 : oce = rel->exps->h->data;
2814 12 : if (oce->l) /* we only handle COUNT(*) */
2815 : return rel;
2816 :
2817 10 : srel = r->l;
2818 : {
2819 10 : sql_subfunc *cf = sql_bind_func(v->sql, "sys", "count", sql_bind_localtype("void"), NULL, F_AGGR, true, true);
2820 10 : sql_exp *e = exp_aggr(v->sql->sa, NULL, cf, need_distinct(oce), need_no_nil(oce), oce->card, 0);
2821 :
2822 10 : exp_label(v->sql->sa, e, ++v->sql->label);
2823 10 : cnt1 = exp_ref(v->sql, e);
2824 10 : gbl = rel_groupby(v->sql, rel_dup(srel), NULL);
2825 10 : set_processed(gbl);
2826 10 : rel_groupby_add_aggr(v->sql, gbl, e);
2827 : }
2828 :
2829 10 : srel = r->r;
2830 : {
2831 10 : sql_subfunc *cf = sql_bind_func(v->sql, "sys", "count", sql_bind_localtype("void"), NULL, F_AGGR, true, true);
2832 10 : sql_exp *e = exp_aggr(v->sql->sa, NULL, cf, need_distinct(oce), need_no_nil(oce), oce->card, 0);
2833 :
2834 10 : exp_label(v->sql->sa, e, ++v->sql->label);
2835 10 : cnt2 = exp_ref(v->sql, e);
2836 10 : gbr = rel_groupby(v->sql, rel_dup(srel), NULL);
2837 10 : set_processed(gbr);
2838 10 : rel_groupby_add_aggr(v->sql, gbr, e);
2839 : }
2840 :
2841 10 : cp = rel_crossproduct(v->sql->sa, gbl, gbr, op_join);
2842 10 : set_processed(cp);
2843 :
2844 10 : if (!(nce = rel_binop_(v->sql, NULL, cnt1, cnt2, "sys", "sql_mul", card_value, true))) {
2845 0 : v->sql->session->status = 0;
2846 0 : v->sql->errstr[0] = '\0';
2847 0 : return rel; /* error, fallback to original expression */
2848 : }
2849 : /* because of remote plans, make sure "sql_mul" returns bigint. The cardinality is atomic, so no major performance penalty */
2850 10 : if (subtype_cmp(exp_subtype(oce), exp_subtype(nce)) != 0)
2851 10 : nce = exp_convert(v->sql, nce, exp_subtype(nce), exp_subtype(oce));
2852 10 : if (exp_name(oce))
2853 10 : exp_prop_alias(v->sql->sa, nce, oce);
2854 :
2855 10 : rel_destroy(rel);
2856 10 : rel = rel_project(v->sql->sa, cp, append(new_exp_list(v->sql->sa), nce));
2857 10 : set_processed(rel);
2858 :
2859 10 : v->changes++;
2860 : }
2861 :
2862 : return rel;
2863 : }
2864 :
2865 : static inline sql_rel *
2866 40482 : rel_basecount(visitor *v, sql_rel *rel)
2867 : {
2868 50794 : if (is_groupby(rel->op) && !rel_is_ref(rel) && rel->l && list_empty(rel->r) &&
2869 19589 : list_length(rel->exps) == 1 && exp_aggr_is_count(rel->exps->h->data)) {
2870 4444 : sql_rel *bt = rel->l;
2871 4444 : sql_exp *e = rel->exps->h->data;
2872 4444 : if (is_basetable(bt->op) && list_empty(e->l)) { /* count(*) */
2873 : /* change into select cnt('schema','table') */
2874 668 : sql_table *t = bt->l;
2875 : /* I need to get the declared table's frame number to make this work correctly for those */
2876 668 : if (!isTable(t) || isDeclaredTable(t))
2877 : return rel;
2878 589 : sql_subfunc *cf = sql_bind_func(v->sql, "sys", "cnt", sql_bind_localtype("str"), sql_bind_localtype("str"), F_FUNC, true, true);
2879 589 : list *exps = sa_list(v->sql->sa);
2880 589 : append(exps, exp_atom_str(v->sql->sa, t->s->base.name, sql_bind_localtype("str")));
2881 589 : append(exps, exp_atom_str(v->sql->sa, t->base.name, sql_bind_localtype("str")));
2882 589 : sql_exp *ne = exp_op(v->sql->sa, exps, cf);
2883 :
2884 589 : ne = exp_propagate(v->sql->sa, ne, e);
2885 589 : exp_setalias(ne, e->alias.label, exp_find_rel_name(e), exp_name(e));
2886 589 : rel_destroy(rel);
2887 589 : rel = rel_project(v->sql->sa, NULL, append(sa_list(v->sql->sa), ne));
2888 589 : v->changes++;
2889 : }
2890 : }
2891 : return rel;
2892 : }
2893 :
2894 : static inline sql_rel *
2895 40482 : rel_simplify_count(visitor *v, sql_rel *rel)
2896 : {
2897 40482 : if (is_groupby(rel->op) && !list_empty(rel->exps)) {
2898 40401 : mvc *sql = v->sql;
2899 40401 : int ncountstar = 0;
2900 :
2901 : /* Convert count(no null) into count(*) */
2902 125604 : for (node *n = rel->exps->h; n ; n = n->next) {
2903 85203 : sql_exp *e = n->data;
2904 :
2905 85203 : if (exp_aggr_is_count(e) && !need_distinct(e)) {
2906 13849 : if (list_length(e->l) == 0) {
2907 11089 : ncountstar++;
2908 2760 : } else if (list_length(e->l) == 1 && !has_nil((sql_exp*)((list*)e->l)->h->data)) {
2909 2600 : sql_subfunc *cf = sql_bind_func(sql, "sys", "count", sql_bind_localtype("void"), NULL, F_AGGR, true, true);
2910 2600 : sql_exp *ne = exp_aggr(sql->sa, NULL, cf, 0, 0, e->card, 0);
2911 2600 : if (exp_name(e))
2912 2600 : exp_prop_alias(sql->sa, ne, e);
2913 2600 : n->data = ne;
2914 2600 : ncountstar++;
2915 2600 : v->changes++;
2916 : }
2917 : }
2918 : }
2919 : /* With multiple count(*), use exp_ref to reduce the number of calls to this aggregate */
2920 40401 : if (ncountstar > 1) {
2921 56 : sql_exp *count_star = NULL;
2922 56 : sql_rel *nrel = rel_project(v->sql->sa, rel, NULL);
2923 56 : list *aexps = sa_list(v->sql->sa), *nexps = sa_list(v->sql->sa);
2924 56 : nrel->exps = nexps;
2925 247 : for (node *n = rel->exps->h; n ; n = n->next) {
2926 191 : sql_exp *e = n->data;
2927 :
2928 191 : if (exp_aggr_is_count(e) && !need_distinct(e) && list_length(e->l) == 0) {
2929 126 : if (!count_star) {
2930 56 : count_star = e;
2931 56 : append(aexps, e);
2932 56 : append(nexps, exp_ref(sql, e));
2933 : } else {
2934 70 : sql_exp *ne = exp_ref(sql, count_star);
2935 :
2936 70 : if (exp_name(e))
2937 70 : exp_prop_alias(sql->sa, ne, e);
2938 70 : v->changes++;
2939 70 : append(nexps, ne);
2940 : }
2941 : } else {
2942 65 : append(aexps, e);
2943 65 : append(nexps, exp_ref(sql, e));
2944 : }
2945 : }
2946 56 : rel->exps = aexps;
2947 56 : return nrel;
2948 : }
2949 : }
2950 : return rel;
2951 : }
2952 :
2953 : static sql_rel *
2954 40482 : rel_groupjoin(visitor *v, sql_rel *rel)
2955 : {
2956 40482 : if (!rel || rel_is_ref(rel) || !is_groupby(rel->op) || list_empty(rel->r))
2957 14506 : return rel;
2958 :
2959 25976 : sql_rel *j = rel->l;
2960 25976 : if (!j || rel_is_ref(j) /*|| !is_left(j->op)*/ || j->op != op_join || list_length(rel->exps) > 1 /* only join because left joins aren't optimized jet (TODO), only length 1 as implementation of groupjoins is missing */ || !list_empty(rel->attr))
2961 25255 : return rel;
2962 : /* check group by exps == equi join exps */
2963 721 : list *gbes = rel->r;
2964 721 : if (list_length(gbes) != list_length(j->exps))
2965 : return rel;
2966 620 : int nr = 0;
2967 1237 : for(node *n = gbes->h; n; n = n->next) {
2968 620 : sql_exp *gbe = n->data;
2969 1237 : for(node *m = j->exps->h; m; m = m->next) {
2970 620 : sql_exp *je = m->data;
2971 620 : if (je->type != e_cmp || je->flag != cmp_equal)
2972 : return rel;
2973 : /* check if its a join exp (ie not a selection) */
2974 621 : if (!( (!rel_has_exp(j->l, je->l, false) && !rel_has_exp(j->r, je->r, false)) ||
2975 8 : (!rel_has_exp(j->l, je->r, false) && !rel_has_exp(j->r, je->l, false))))
2976 0 : return rel;
2977 617 : if (exp_match(je->l, gbe)) {
2978 78 : nr++;
2979 539 : } else if (exp_match(je->r, gbe)) {
2980 10 : nr++;
2981 : }
2982 : }
2983 : }
2984 617 : if (nr == list_length(gbes)) {
2985 : // printf("#group by converted\n");
2986 88 : j = rel_dup(j);
2987 88 : j->attr = rel->exps;
2988 88 : v->changes++;
2989 88 : rel_destroy(rel);
2990 88 : return j;
2991 : }
2992 : return rel;
2993 : }
2994 :
2995 : /* select k1 from bla where k1 = const -> select const from bla where k1 = const */
2996 : static sql_rel *
2997 3638714 : rel_project_select_exp(visitor *v, sql_rel *rel)
2998 : {
2999 3638714 : if (is_simple_project(rel->op) && rel->exps && rel->l) {
3000 1179084 : sql_rel *l = rel->l;
3001 1179084 : if (is_select(l->op) && l->exps) {
3002 1197535 : for(node *n = rel->exps->h; n; n = n->next) {
3003 891513 : sql_exp *col = n->data;
3004 891513 : if (col->type == e_column) {
3005 1599537 : for(node *m = l->exps->h; m; m = m->next) {
3006 961443 : sql_exp *cmp = m->data;
3007 961443 : if (cmp->type == e_cmp && cmp->flag == cmp_equal && !is_anti(cmp) && !is_semantics(cmp) && exp_is_atom(cmp->r)) {
3008 608880 : sql_exp *l = cmp->l;
3009 608880 : if(l->type == e_column && col->nid == l->nid) {
3010 : /* replace column with the constant */
3011 12159 : sql_exp *e = n->data = exp_copy(v->sql, cmp->r);
3012 12159 : exp_setalias(e, col->alias.label, exp_relname(col), exp_name(col));
3013 12159 : exp_propagate(v->sql->sa, e, col);
3014 12159 : list_hash_clear(rel->exps);
3015 : }
3016 : }
3017 : }
3018 : }
3019 : }
3020 : }
3021 : }
3022 3638714 : return rel;
3023 : }
3024 :
3025 : static sql_rel *
3026 3638710 : rel_optimize_projections_(visitor *v, sql_rel *rel)
3027 : {
3028 3638710 : rel = rel_project_cse(v, rel);
3029 3638714 : rel = rel_project_select_exp(v, rel);
3030 :
3031 3638717 : if (!rel || !is_groupby(rel->op))
3032 : return rel;
3033 :
3034 93716 : rel = rel_remove_const_aggr(v, rel);
3035 93716 : if (v->value_based_opt) {
3036 40482 : rel = rel_simplify_sum(v, rel);
3037 40482 : rel = rel_simplify_groupby_columns(v, rel);
3038 : }
3039 93716 : rel = rel_groupby_cse(v, rel);
3040 93716 : rel = rel_push_aggr_down(v, rel);
3041 93716 : rel = rel_push_groupby_down(v, rel);
3042 93716 : rel = rel_reduce_groupby_exps(v, rel);
3043 93716 : rel = rel_distinct_aggregate_on_unique_values(v, rel);
3044 93716 : rel = rel_groupby_distinct(v, rel);
3045 93716 : rel = rel_push_count_down(v, rel);
3046 : /* only when value_based_opt is on, ie not for dependency resolution */
3047 93716 : if (v->value_based_opt) {
3048 40482 : rel = rel_simplify_count(v, rel);
3049 40482 : rel = rel_basecount(v, rel);
3050 :
3051 40482 : rel = rel_groupjoin(v, rel);
3052 : }
3053 : return rel;
3054 : }
3055 :
3056 : static sql_rel *
3057 171298 : rel_optimize_projections(visitor *v, global_props *gp, sql_rel *rel)
3058 : {
3059 171298 : (void) gp;
3060 171298 : return rel_visitor_topdown(v, rel, &rel_optimize_projections_);
3061 : }
3062 :
3063 : run_optimizer
3064 611469 : bind_optimize_projections(visitor *v, global_props *gp)
3065 : {
3066 611469 : int flag = v->sql->sql_optimizer;
3067 611249 : return gp->opt_level == 1 && (gp->cnt[op_groupby] || gp->cnt[op_project] || gp->cnt[op_union] || gp->cnt[op_munion]
3068 1222718 : || gp->cnt[op_inter] || gp->cnt[op_except]) && (flag & optimize_projections) ? rel_optimize_projections : NULL;
3069 : }
3070 :
3071 :
3072 : static inline sql_rel *
3073 2584258 : rel_push_project_down_union(visitor *v, sql_rel *rel)
3074 : {
3075 : /* first remove distinct if already unique */
3076 2584258 : if (rel->op == op_project && need_distinct(rel) && rel->exps && exps_unique(v->sql, rel, rel->exps) && !have_nil(rel->exps)) {
3077 109 : set_nodistinct(rel);
3078 109 : if (exps_card(rel->exps) <= CARD_ATOM && rel->card > CARD_ATOM) { /* if the projection just contains constants, then no topN is needed */
3079 2 : sql_rel *nl = rel->l = rel_topn(v->sql->sa, rel->l, append(sa_list(v->sql->sa), exp_atom_lng(v->sql->sa, 1)));
3080 2 : set_processed(nl);
3081 : }
3082 109 : v->changes++;
3083 : }
3084 :
3085 2584258 : if (rel->op == op_project && rel->l && rel->exps && list_empty(rel->r)) {
3086 847573 : int need_distinct = need_distinct(rel);
3087 847573 : sql_rel *u = rel->l;
3088 847573 : sql_rel *p = rel;
3089 :
3090 847573 : if (!u || !(is_union(u->op) || is_munion(u->op)) || need_distinct(u) || !u->exps || rel_is_ref(u) || project_unsafe(rel, false))
3091 823072 : return rel;
3092 :
3093 24501 : sql_rel *r;
3094 :
3095 : /* don't push project down union of single values */
3096 73686 : for (node *n = ((list*)u->l)->h; n; n = n->next) {
3097 49207 : r = n->data;
3098 : // TODO: does this check make sense?
3099 49207 : if (is_project(r->op) && !r->l)
3100 : return rel;
3101 : }
3102 :
3103 73664 : for (node *n = ((list*)u->l)->h; n; n = n->next) {
3104 49185 : r = rel_dup(n->data);
3105 :
3106 : /* introduce projection around each operand if needed */
3107 49185 : if (!is_project(r->op))
3108 750 : r = rel_project(v->sql->sa, r,
3109 : rel_projections(v->sql, r, NULL, 1, 1));
3110 : /* check if we need distinct */
3111 98370 : need_distinct &=
3112 49185 : (!exps_unique(v->sql, r, r->exps) || have_nil(r->exps));
3113 49185 : rel_rename_exps(v->sql, u->exps, r->exps);
3114 :
3115 49185 : rel_destroy(n->data);
3116 49185 : n->data = r;
3117 : }
3118 :
3119 : /* once we have checked for need_distinct in every rel we can
3120 : * introduce the projects under the munion which are gonna be
3121 : * copies of the single project above munion */
3122 73664 : for (node *n = ((list*)u->l)->h; n; n = n->next) {
3123 49185 : r = rel_dup(n->data);
3124 :
3125 49185 : r = rel_project(v->sql->sa, r, NULL);
3126 49185 : if (need_distinct)
3127 0 : set_distinct(r);
3128 49185 : r->exps = exps_copy(v->sql, p->exps);
3129 49185 : set_processed(r);
3130 :
3131 49185 : rel_destroy(n->data);
3132 49185 : n->data = r;
3133 : }
3134 :
3135 : /* turn the project-munion on top into munion. incr operand
3136 : * rels count to make sure that they are not deleted by the
3137 : * subsequent rel_inplace_setop_n_ary */
3138 73664 : for (node *n = ((list*)u->l)->h; n; n = n->next)
3139 49185 : rel_dup(n->data);
3140 24479 : rel = rel_inplace_setop_n_ary(v->sql, rel, u->l, op_munion,
3141 : rel_projections(v->sql, rel, NULL, 1, 1));
3142 24479 : if (need_distinct)
3143 0 : set_distinct(rel);
3144 24479 : if (is_single(u))
3145 2 : set_single(rel);
3146 :
3147 24479 : v->changes++;
3148 :
3149 : /* if any operand has two project above then squash them */
3150 73664 : for (node *n = ((list*)u->l)->h; n; n = n->next) {
3151 49185 : r = rel_dup(n->data);
3152 49185 : r = rel_merge_projects_(v, r);
3153 49185 : rel_destroy(n->data);
3154 49185 : n->data = r;
3155 : }
3156 :
3157 : return rel;
3158 : }
3159 : return rel;
3160 : }
3161 :
3162 : static inline sql_rel *
3163 2584258 : rel_merge_unions(visitor *v, sql_rel *rel)
3164 : {
3165 : /* stacked munion flattening e.g.
3166 : * munion( munion(a, b, c), munion(d, e)) => munion(a,b,c,d,e)
3167 : */
3168 2584258 : if (rel && is_munion(rel->op)) {
3169 72816 : list *l = rel->l;
3170 479618 : for(node *n = l->h; n; ) {
3171 333986 : node *next = n->next;
3172 333986 : sql_rel *oc = n->data;
3173 333986 : sql_rel *c = oc;
3174 :
3175 : /* account for any group-bys pushed down between stacked munions */
3176 333986 : if (oc->op == op_groupby)
3177 2357 : c = oc->l;
3178 :
3179 333986 : if (is_munion(c->op)) {
3180 9021 : c = rel_dup(c);
3181 9021 : list_remove_node(l, NULL, n);
3182 9021 : l = list_merge(l, c->l, (fdup)NULL);
3183 9021 : c->l = NULL;
3184 9021 : rel_destroy(oc);
3185 9021 : if (!next)
3186 3 : next = l->h;
3187 9021 : v->changes++;
3188 : }
3189 : n = next;
3190 : }
3191 72816 : rel->l = l;
3192 : }
3193 2584258 : return rel;
3194 : }
3195 :
3196 :
3197 : /*
3198 : * Push (semi)joins down unions, this is basically for merge tables, where
3199 : * we know that the fk-indices are split over two clustered merge tables.
3200 : */
3201 : static inline sql_rel *
3202 2584258 : rel_push_join_down_munion(visitor *v, sql_rel *rel)
3203 : {
3204 2584258 : if ((is_join(rel->op) && !is_outerjoin(rel->op) && !is_single(rel)) || is_semi(rel->op)) {
3205 319590 : sql_rel *l = rel->l, *r = rel->r, *ol = l, *or = r;
3206 319590 : list *exps = rel->exps, *attr = rel->attr;
3207 319590 : sql_exp *je = NULL;
3208 :
3209 : /* we would like to optimize in place reference rels which point
3210 : * to replica tables and let the replica optimizer handle those
3211 : * later. otherwise we miss the push join down optimization due
3212 : * to the rel_is_ref bailout
3213 : */
3214 319590 : if (rel_is_ref(l) && is_basetable(l->op) && l->l && isReplicaTable((sql_table*)l->l)) {
3215 1 : rel->l = rel_copy(v->sql, l, true);
3216 1 : rel_destroy(l);
3217 : }
3218 319590 : if (rel_is_ref(r) && is_basetable(r->op) && r->l && isReplicaTable((sql_table*)r->l)) {
3219 4 : rel->r = rel_copy(v->sql, r, true);
3220 4 : rel_destroy(r);
3221 : }
3222 :
3223 : // TODO: do we need to check if it's l/r are refs?
3224 319590 : if (!l || !r || need_distinct(l) || need_distinct(r) || rel_is_ref(l) || rel_is_ref(r))
3225 : return rel;
3226 271481 : if (l->op == op_project)
3227 22465 : l = l->l;
3228 271481 : if (r->op == op_project)
3229 25317 : r = r->l;
3230 :
3231 : /* both sides only if we have a join index ASSUMING pkey-fkey are aligned */
3232 : // TODO: we could also check if the join cols are (not) unique
3233 271481 : bool aligned_pk_fk = true;
3234 271481 : if (!l || !r || (is_munion(l->op) && is_munion(r->op) &&
3235 134 : !(je = rel_is_join_on_pkey(rel, aligned_pk_fk))))
3236 642 : return rel;
3237 :
3238 : // TODO: why? bailout for union semijoin without pkey joins expressions
3239 270839 : if (is_semi(rel->op) && is_munion(l->op) && !je)
3240 : return rel;
3241 :
3242 : /* if both sides are munions we assume that they will have the same number of children */
3243 270101 : if (is_munion(l->op) && is_munion(r->op) && list_length(l->l) != list_length(r->l))
3244 : return rel;
3245 :
3246 270101 : if (is_munion(l->op) && !need_distinct(l) && !is_single(l) &&
3247 4163 : !is_munion(r->op)){
3248 : /* join(munion(a,b,c), d) -> munion(join(a,d), join(b,d), join(c,d)) */
3249 4163 : list *js = sa_list(v->sql->sa);
3250 12499 : for (node *n = ((list*)l->l)->h; n; n = n->next) {
3251 8336 : sql_rel *pc = rel_dup(n->data);
3252 8336 : if (!is_project(pc->op))
3253 17 : pc = rel_project(v->sql->sa, pc, rel_projections(v->sql, pc, NULL, 1, 1));
3254 8336 : rel_rename_exps(v->sql, l->exps, pc->exps);
3255 8336 : if (l != ol) {
3256 12 : pc = rel_project(v->sql->sa, pc, NULL);
3257 12 : pc->exps = exps_copy(v->sql, ol->exps);
3258 12 : set_processed(pc);
3259 : }
3260 8336 : pc = rel_crossproduct(v->sql->sa, pc, rel_dup(or), rel->op);
3261 8336 : pc->exps = exps_copy(v->sql, exps);
3262 8336 : pc->attr = exps_copy(v->sql, attr);
3263 8336 : set_processed(pc);
3264 8336 : pc = rel_project(v->sql->sa, pc, rel_projections(v->sql, pc, NULL, 1, 1));
3265 8336 : js = append(js, pc);
3266 : }
3267 4163 : v->changes++;
3268 4163 : return rel_inplace_setop_n_ary(v->sql, rel, js, op_munion,
3269 : rel_projections(v->sql, rel, NULL, 1, 1));
3270 265938 : } else if (is_munion(l->op) && !need_distinct(l) && !is_single(l) &&
3271 0 : is_munion(r->op) && !need_distinct(r) && !is_single(r) &&
3272 : je) {
3273 : /* join(munion(a,b,c), munion(d,e,f)) -> munion(join(a,d), join(b,e), join(c,f)) */
3274 0 : list *cps = sa_list(v->sql->sa);
3275 : /* create pairwise joins between left and right parts. assume eq num of parts (see earlier bailout) */
3276 0 : for (node *n = ((list*)l->l)->h, *m=((list*)r->l)->h; n && m; n = n->next, m = m->next) {
3277 : /* left part */
3278 0 : sql_rel *lp = rel_dup(n->data);
3279 0 : if (!is_project(lp->op))
3280 0 : lp = rel_project(v->sql->sa, lp, rel_projections(v->sql, lp, NULL, 1, 1));
3281 0 : rel_rename_exps(v->sql, l->exps, lp->exps);
3282 0 : if (l != ol) {
3283 0 : lp = rel_project(v->sql->sa, lp, NULL);
3284 0 : lp->exps = exps_copy(v->sql, ol->exps);
3285 0 : set_processed(lp);
3286 : }
3287 : /* right part */
3288 0 : sql_rel *rp = rel_dup(m->data);
3289 0 : if (!is_project(rp->op))
3290 0 : rp = rel_project(v->sql->sa, rp, rel_projections(v->sql, rp, NULL, 1, 1));
3291 0 : rel_rename_exps(v->sql, r->exps, rp->exps);
3292 0 : if (r != or) {
3293 0 : rp = rel_project(v->sql->sa, rp, NULL);
3294 0 : rp->exps = exps_copy(v->sql, or->exps);
3295 0 : set_processed(rp);
3296 : }
3297 : /* combine them */
3298 0 : sql_rel *cp = rel_crossproduct(v->sql->sa, lp, rp, rel->op);
3299 0 : cp->exps = exps_copy(v->sql, exps);
3300 0 : cp->attr = exps_copy(v->sql, attr);
3301 0 : set_processed(cp);
3302 0 : cp = rel_project(v->sql->sa, cp, rel_projections(v->sql, cp, NULL, 1, 1));
3303 0 : cps = append(cps, cp);
3304 : }
3305 0 : v->changes++;
3306 0 : return rel_inplace_setop_n_ary(v->sql, rel, cps, op_munion,
3307 : rel_projections(v->sql, rel, NULL, 1, 1));
3308 265938 : } else if (!is_munion(l->op) &&
3309 265930 : is_munion(r->op) && !need_distinct(r) && !is_single(r) &&
3310 2006 : !is_semi(rel->op)) {
3311 : /* join(a, munion(b,c,d)) -> munion(join(a,b), join(a,c), join(a,d)) */
3312 1419 : list *js = sa_list(v->sql->sa);
3313 4260 : for (node *n = ((list*)r->l)->h; n; n = n->next) {
3314 2841 : sql_rel *pc = rel_dup(n->data);
3315 2841 : if (!is_project(pc->op))
3316 21 : pc = rel_project(v->sql->sa, pc, rel_projections(v->sql, pc, NULL, 1, 1));
3317 2841 : rel_rename_exps(v->sql, r->exps, pc->exps);
3318 2841 : if (r != or) {
3319 93 : pc = rel_project(v->sql->sa, pc, NULL);
3320 93 : pc->exps = exps_copy(v->sql, or->exps);
3321 93 : set_processed(pc);
3322 : }
3323 2841 : pc = rel_crossproduct(v->sql->sa, rel_dup(ol), pc, rel->op);
3324 2841 : pc->exps = exps_copy(v->sql, exps);
3325 2841 : pc->attr = exps_copy(v->sql, attr);
3326 2841 : set_processed(pc);
3327 2841 : pc = rel_project(v->sql->sa, pc, rel_projections(v->sql, pc, NULL, 1, 1));
3328 2841 : js = append(js, pc);
3329 : }
3330 1419 : v->changes++;
3331 1419 : return rel_inplace_setop_n_ary(v->sql, rel, js, op_munion,
3332 : rel_projections(v->sql, rel, NULL, 1, 1));
3333 264519 : } else if (!is_munion(l->op) &&
3334 264511 : is_munion(r->op) && !need_distinct(r) && !is_single(r) &&
3335 587 : is_semi(rel->op) && je) {
3336 : /* {semi}join ( A1, munion (B, A2a, C, A2b)) [A1.partkey = A2.partkey] ->
3337 : * {semi}join ( A1, munion (A2a, A2b))
3338 : * (ie some parts of an n-th munion operand)
3339 : *
3340 : * How to detect that a relation isn't matching?
3341 : * partitioning is currently done only on pkey/fkey's
3342 : * ie only matching per part if join is on pkey/fkey (parts)
3343 : * and part numbers should match.
3344 : * */
3345 0 : int lpnr = rel_part_nr(l, je);
3346 0 : if (lpnr < 0)
3347 : return rel;
3348 :
3349 0 : list *ups = sa_list(v->sql->sa);
3350 0 : for (node *n = ((list*)r->l)->h; n; n = n->next) {
3351 0 : if (rel_uses_part_nr(n->data, je, lpnr)) {
3352 0 : sql_rel *pc = rel_dup(n->data);
3353 0 : ups = append(ups, pc);
3354 : }
3355 : }
3356 0 : v->changes++;
3357 0 : return rel_inplace_setop_n_ary(v->sql, r, ups, op_munion,
3358 : rel_projections(v->sql, rel, NULL, 1, 1));
3359 : }
3360 : }
3361 : return rel;
3362 : }
3363 :
3364 : static sql_rel *
3365 2584258 : rel_optimize_unions_topdown_(visitor *v, sql_rel *rel)
3366 : {
3367 2584258 : rel = rel_push_project_down_union(v, rel);
3368 2584258 : rel = rel_merge_unions(v, rel);
3369 2584258 : rel = rel_push_join_down_munion(v, rel);
3370 2584258 : return rel;
3371 : }
3372 :
3373 : static sql_rel *
3374 25763 : rel_optimize_unions_topdown(visitor *v, global_props *gp, sql_rel *rel)
3375 : {
3376 25763 : (void) gp;
3377 25763 : return rel_visitor_topdown(v, rel, &rel_optimize_unions_topdown_);
3378 : }
3379 :
3380 : run_optimizer
3381 611747 : bind_optimize_unions_topdown(visitor *v, global_props *gp)
3382 : {
3383 611747 : (void) v;
3384 611747 : return gp->opt_level == 1 && gp->cnt[op_munion] ? rel_optimize_unions_topdown : NULL;
3385 : }
3386 :
3387 :
3388 : static sql_column *
3389 0 : is_fk_column_of_pk(mvc *sql, sql_rel *rel, sql_column *pkc, sql_exp *e) /* test if e is a foreign key column for the pk on pkc */
3390 : {
3391 0 : sql_trans *tr = sql->session->tr;
3392 0 : sql_column *c = exp_find_column(rel, e, -2);
3393 :
3394 0 : if (c) {
3395 0 : sql_table *t = c->t;
3396 :
3397 0 : for (node *n = ol_first_node(t->idxs); n; n = n->next) {
3398 0 : sql_idx *li = n->data;
3399 :
3400 0 : if (li->type == join_idx) {
3401 0 : for (node *m = li->columns->h ; m ; m = m->next) {
3402 0 : sql_kc *fkc = m->data;
3403 :
3404 0 : if (strcmp(fkc->c->base.name, c->base.name) == 0) { /* same fkey column */
3405 0 : sql_key *fkey = (sql_key*)os_find_id(tr->cat->objects, tr, ((sql_fkey*)li->key)->rkey);
3406 :
3407 0 : if (strcmp(fkey->t->base.name, pkc->t->base.name) == 0) { /* to same pk table */
3408 0 : for (node *o = fkey->columns->h ; o ; o = n->next) {
3409 0 : sql_kc *kc = m->data;
3410 :
3411 0 : if (strcmp(kc->c->base.name, pkc->base.name) == 0) /* to same pk table column */
3412 : return c;
3413 : }
3414 : }
3415 : }
3416 : }
3417 : }
3418 : }
3419 : }
3420 : return NULL;
3421 : }
3422 :
3423 : static bool
3424 0 : has_no_selectivity(mvc *sql, sql_rel *rel)
3425 : {
3426 0 : if (!rel)
3427 : return true;
3428 :
3429 0 : switch(rel->op){
3430 : case op_basetable:
3431 : case op_truncate:
3432 : case op_table:
3433 : return true;
3434 0 : case op_topn:
3435 : case op_sample:
3436 : case op_project:
3437 : case op_groupby:
3438 0 : return has_no_selectivity(sql, rel->l);
3439 : case op_ddl:
3440 : case op_insert:
3441 : case op_update:
3442 : case op_delete:
3443 : case op_merge:
3444 : case op_join:
3445 : case op_left:
3446 : case op_right:
3447 : case op_full:
3448 : case op_semi:
3449 : case op_anti:
3450 : case op_union:
3451 : case op_inter:
3452 : case op_except:
3453 : case op_munion:
3454 : case op_select:
3455 : return false;
3456 : }
3457 : return true;
3458 : }
3459 :
3460 : static sql_rel *
3461 701508 : rel_distinct_project2groupby_(visitor *v, sql_rel *rel)
3462 : {
3463 701508 : sql_rel *l = rel->l;
3464 :
3465 : /* rewrite distinct project (table) [ constant ] -> project [ constant ] */
3466 709228 : if (rel->op == op_project && rel->l && !rel->r /* no order by */ && need_distinct(rel) &&
3467 7720 : exps_card(rel->exps) <= CARD_ATOM) {
3468 471 : set_nodistinct(rel);
3469 471 : if (rel->card > CARD_ATOM) { /* if the projection just contains constants, then no topN is needed */
3470 193 : sql_rel *nl = rel->l = rel_topn(v->sql->sa, rel->l, append(sa_list(v->sql->sa), exp_atom_lng(v->sql->sa, 1)));
3471 193 : set_processed(nl);
3472 : }
3473 471 : v->changes++;
3474 : }
3475 :
3476 : /* rewrite distinct project [ pk ] ( select ( table ) [ e op val ])
3477 : * into project [ pk ] ( select/semijoin ( table ) */
3478 701508 : if (rel->op == op_project && rel->l && !rel->r /* no order by */ && need_distinct(rel) &&
3479 7461 : (l->op == op_select || l->op == op_semi) && exps_unique(v->sql, rel, rel->exps) &&
3480 212 : (!have_semantics(l->exps) || !have_nil(rel->exps))) {
3481 212 : set_nodistinct(rel);
3482 212 : v->changes++;
3483 : }
3484 :
3485 : /* rewrite distinct project ( join(p,f) [ p.pk = f.fk ] ) [ p.pk ]
3486 : * into project( (semi)join(p,f) [ p.pk = f.fk ] ) [ p.pk ] */
3487 701508 : if (rel->op == op_project && rel->l && !rel->r /* no order by */ && need_distinct(rel) &&
3488 7037 : l && (is_select(l->op) || l->op == op_join) && rel_is_join_on_pkey(l, true) /* [ pk == fk ] */) {
3489 0 : sql_exp *found = NULL, *pk = NULL, *fk = NULL;
3490 0 : bool all_exps_atoms = true;
3491 0 : sql_column *pkc = NULL;
3492 :
3493 0 : for (node *m = l->exps->h ; m ; m = m->next) { /* find a primary key join */
3494 0 : sql_exp *je = (sql_exp *) m->data;
3495 0 : sql_exp *le = je->l, *re = je->r;
3496 :
3497 0 : if (!find_prop(je->p, PROP_JOINIDX)) /* must be a pk-fk join expression */
3498 0 : continue;
3499 :
3500 0 : if ((pkc = exp_is_pkey(l, le))) { /* le is the primary key */
3501 0 : all_exps_atoms = true;
3502 :
3503 0 : for (node *n = rel->exps->h; n && all_exps_atoms; n = n->next) {
3504 0 : sql_exp *e = (sql_exp *) n->data;
3505 :
3506 0 : if (exp_match(e, le) || exp_refers(e, le))
3507 : found = e;
3508 0 : else if (e->card > CARD_ATOM)
3509 0 : all_exps_atoms = false;
3510 : }
3511 : pk = le;
3512 : fk = re;
3513 : }
3514 0 : if (!found && (pkc = exp_is_pkey(l, re))) { /* re is the primary key */
3515 0 : all_exps_atoms = true;
3516 :
3517 0 : for (node *n = rel->exps->h; n && all_exps_atoms; n = n->next) {
3518 0 : sql_exp *e = (sql_exp *) n->data;
3519 :
3520 0 : if (exp_match(e, re) || exp_refers(e, re))
3521 : found = e;
3522 0 : else if (e->card > CARD_ATOM)
3523 0 : all_exps_atoms = false;
3524 : }
3525 : pk = re;
3526 : fk = le;
3527 : }
3528 : }
3529 :
3530 0 : if (all_exps_atoms && found) { /* rel must have the same primary key on the projection list */
3531 : /* if the foreign key has no selectivity, the join can be removed */
3532 0 : if (!(rel_is_ref(l)) && ((rel_find_exp(l->l, fk) && is_fk_column_of_pk(v->sql, l->l, pkc, fk) && has_no_selectivity(v->sql, l->l)) ||
3533 0 : (l->r && rel_find_exp(l->r, fk) && is_fk_column_of_pk(v->sql, l->r, pkc, fk) && has_no_selectivity(v->sql, l->r)))) {
3534 0 : sql_rel *side = (rel_find_exp(l->l, pk) != NULL)?l->l:l->r;
3535 :
3536 0 : rel->l = rel_dup(side);
3537 0 : rel_destroy(l);
3538 0 : v->changes++;
3539 0 : set_nodistinct(rel);
3540 0 : return rel;
3541 : }
3542 : /* if the join has no multiple references it can be re-written into a semijoin */
3543 0 : if (l->op == op_join && !(rel_is_ref(l)) && list_length(rel->exps) == 1) { /* other expressions may come from the other side */
3544 0 : if (l->r && rel_find_exp(l->r, pk)) {
3545 0 : sql_rel *temp = l->l;
3546 0 : l->l = l->r;
3547 0 : l->r = temp;
3548 :
3549 0 : l->op = op_semi;
3550 0 : } else if (rel_find_exp(l->l, pk)) {
3551 0 : l->op = op_semi;
3552 : }
3553 : }
3554 0 : v->changes++;
3555 0 : set_nodistinct(rel);
3556 0 : return rel;
3557 : }
3558 : }
3559 : /* rewrite distinct project [ gbe ] ( select ( groupby [ gbe ] [ gbe, e ] )[ e op val ])
3560 : * into project [ gbe ] ( select ( group etc ) */
3561 701508 : if (rel->op == op_project && rel->l && !rel->r /* no order by */ &&
3562 7037 : need_distinct(rel) && l->op == op_select){
3563 2573 : sql_rel *g = l->l;
3564 2573 : if (is_groupby(g->op)) {
3565 4 : list *used = sa_list(v->sql->sa);
3566 4 : list *gbe = g->r;
3567 4 : node *n;
3568 4 : int fnd = 1;
3569 :
3570 8 : for (n = rel->exps->h; n && fnd; n = n->next) {
3571 4 : sql_exp *e = n->data;
3572 :
3573 4 : if (e->card > CARD_ATOM) {
3574 : /* find e in gbe */
3575 4 : sql_exp *ne = list_find_exp(g->exps, e);
3576 :
3577 4 : if (ne)
3578 3 : ne = list_find_exp( gbe, ne);
3579 3 : if (ne && !list_find_exp(used, ne)) {
3580 2 : fnd++;
3581 2 : list_append(used, ne);
3582 : }
3583 3 : if (!ne)
3584 : fnd = 0;
3585 : }
3586 : }
3587 4 : if (fnd == (list_length(gbe)+1)) {
3588 0 : v->changes++;
3589 0 : set_nodistinct(rel);
3590 : }
3591 : }
3592 : }
3593 701508 : if (rel->op == op_project && rel->l &&
3594 7037 : need_distinct(rel) && exps_card(rel->exps) > CARD_ATOM) {
3595 7037 : node *n;
3596 7037 : list *exps = new_exp_list(v->sql->sa), *gbe = new_exp_list(v->sql->sa);
3597 7037 : list *obe = rel->r; /* we need to read the ordering later */
3598 :
3599 7037 : if (obe) {
3600 0 : int fnd = 0;
3601 :
3602 0 : for(n = obe->h; n && !fnd; n = n->next) {
3603 0 : sql_exp *e = n->data;
3604 :
3605 0 : if (e->type != e_column)
3606 : fnd = 1;
3607 0 : else if (exps_bind_nid(rel->exps, e->nid) == NULL)
3608 0 : fnd = 1;
3609 : }
3610 0 : if (fnd)
3611 : return rel;
3612 : }
3613 7037 : rel->l = rel_project(v->sql->sa, rel->l, rel->exps);
3614 :
3615 21496 : for (n = rel->exps->h; n; n = n->next) {
3616 14459 : sql_exp *e = n->data, *ne;
3617 :
3618 14459 : set_nodistinct(e);
3619 14459 : ne = exp_ref(v->sql, e);
3620 14459 : if (e->card > CARD_ATOM && !list_find_exp(gbe, ne)) { /* no need to group by on constants, or the same column multiple times */
3621 14088 : append(gbe, ne);
3622 14088 : ne = exp_ref(v->sql, ne);
3623 : }
3624 14459 : append(exps, ne);
3625 : }
3626 7037 : rel->op = op_groupby;
3627 7037 : rel->exps = exps;
3628 7037 : rel->r = gbe;
3629 7037 : set_nodistinct(rel);
3630 7037 : if (obe) {
3631 : /* add order again */
3632 0 : rel = rel_project(v->sql->sa, rel, rel_projections(v->sql, rel, NULL, 1, 1));
3633 0 : rel->r = obe;
3634 : }
3635 7037 : v->changes++;
3636 : }
3637 : return rel;
3638 : }
3639 :
3640 : static sql_rel *
3641 5577 : rel_distinct_project2groupby(visitor *v, global_props *gp, sql_rel *rel)
3642 : {
3643 5577 : (void) gp;
3644 5577 : return rel_visitor_bottomup(v, rel, &rel_distinct_project2groupby_);
3645 : }
3646 :
3647 : run_optimizer
3648 611616 : bind_distinct_project2groupby(visitor *v, global_props *gp)
3649 : {
3650 611616 : int flag = v->sql->sql_optimizer;
3651 541364 : return gp->opt_cycle == 0 && gp->opt_level == 1 && gp->needs_distinct && gp->cnt[op_project] &&
3652 617193 : (flag & distinct_project2groupby)? rel_distinct_project2groupby : NULL;
3653 : }
|