Line data Source code
1 : /*
2 : * SPDX-License-Identifier: MPL-2.0
3 : *
4 : * This Source Code Form is subject to the terms of the Mozilla Public
5 : * License, v. 2.0. If a copy of the MPL was not distributed with this
6 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
7 : *
8 : * Copyright 2024 MonetDB Foundation;
9 : * Copyright August 2008 - 2023 MonetDB B.V.;
10 : * Copyright 1997 - July 2008 CWI.
11 : */
12 :
13 : #include "monetdb_config.h"
14 : #include "rel_optimizer_private.h"
15 : #include "rel_basetable.h"
16 : #include "rel_exp.h"
17 : #include "rel_select.h"
18 : #include "rel_rewriter.h"
19 :
20 : static int
21 3714755 : exp_is_rename(sql_exp *e)
22 : {
23 3714755 : return (e->type == e_column);
24 : }
25 :
26 : static int
27 2067240 : exp_is_useless_rename(sql_exp *e)
28 : {
29 2067240 : return (e->type == e_column && e->alias.label == e->nid);
30 : }
31 :
32 : static list *
33 20965 : rel_used_projections(mvc *sql, list *exps, list *users)
34 : {
35 20965 : list *nexps = sa_list(sql->sa);
36 20965 : bool *used = SA_ZNEW_ARRAY(sql->ta, bool, list_length(exps));
37 20966 : int i = 0;
38 :
39 136559 : for(node *n = users->h; n; n = n->next) {
40 115594 : sql_exp *e = n->data, *ne = NULL;
41 115594 : assert(e->nid && exps_bind_nid(exps, e->nid));
42 115593 : if (e->nid && (ne = exps_bind_nid(exps, e->nid))) {
43 115594 : used[list_position(exps, ne)] = 1;
44 : }
45 : }
46 147252 : for(node *n = exps->h; n; n = n->next, i++) {
47 126286 : sql_exp *e = n->data;
48 126286 : if (is_intern(e) || used[i])
49 126149 : append(nexps, e);
50 : }
51 20966 : return nexps;
52 : }
53 :
54 : /* move projects down with the goal op removing them completely (ie push renames/reduced lists into basetable)
55 : * for some cases we can directly remove iff renames rename into same alias
56 : * */
57 : static sql_rel *
58 4011714 : rel_push_project_down_(visitor *v, sql_rel *rel)
59 : {
60 : /* for now only push down renames */
61 4011714 : if (v->depth > 1 && is_simple_project(rel->op) && !need_distinct(rel) && !rel_is_ref(rel) && rel->l && !rel->r &&
62 1168828 : v->parent &&
63 1168820 : !is_modify(v->parent->op) && !is_topn(v->parent->op) && !is_sample(v->parent->op) &&
64 1851845 : !is_ddl(v->parent->op) && !is_set(v->parent->op) && !is_munion(v->parent->op) &&
65 719531 : list_check_prop_all(rel->exps, (prop_check_func)&exp_is_rename)) {
66 418091 : sql_rel *l = rel->l;
67 :
68 418091 : if (rel_is_ref(l))
69 : return rel;
70 387152 : if (is_basetable(l->op)) {
71 40802 : if (list_check_prop_all(rel->exps, (prop_check_func)&exp_is_useless_rename)) {
72 : /* TODO reduce list (those in the project + internal) */
73 20966 : rel->l = NULL;
74 20966 : l->exps = rel_used_projections(v->sql, l->exps, rel->exps);
75 20966 : rel_destroy(rel);
76 20966 : v->changes++;
77 20966 : return l;
78 : }
79 : return rel;
80 346350 : } else if (list_check_prop_all(rel->exps, (prop_check_func)&exp_is_useless_rename)) {
81 139207 : if ((is_project(l->op) && list_length(l->exps) == list_length(rel->exps)) ||
82 122318 : ((v->parent && is_project(v->parent->op)) &&
83 42407 : (is_mset(l->op) || is_set(l->op) || is_select(l->op) || is_join(l->op) || is_semi(l->op) || is_topn(l->op) || is_sample(l->op)))) {
84 58291 : rel->l = NULL;
85 58291 : rel_destroy(rel);
86 58291 : v->changes++;
87 58291 : return l;
88 : }
89 : }
90 : }
91 : /* ToDo handle useful renames, ie new relation name and unique set of attribute names (could reduce set of * attributes) */
92 : /* handle both useless and useful with project [ group by ] */
93 : return rel;
94 : }
95 :
96 : static sql_rel *
97 185705 : rel_push_project_down(visitor *v, global_props *gp, sql_rel *rel)
98 : {
99 185705 : (void) gp;
100 185705 : return rel_visitor_bottomup(v, rel, &rel_push_project_down_);
101 : }
102 :
103 : run_optimizer
104 629577 : bind_push_project_down(visitor *v, global_props *gp)
105 : {
106 629577 : int flag = v->sql->sql_optimizer;
107 629381 : return gp->opt_level == 1 && (flag & push_project_down) &&
108 1258958 : (gp->cnt[op_project] || gp->cnt[op_groupby]) ? rel_push_project_down : NULL;
109 : }
110 :
111 :
112 : static bool exp_shares_exps(sql_exp *e, list *shared, uint64_t *uses);
113 :
114 : static bool
115 113475 : exps_shares_exps(list *exps, list *shared, uint64_t *uses)
116 : {
117 113475 : if (!exps || !shared)
118 : return false;
119 393555 : for (node *n = exps->h; n; n = n->next) {
120 280494 : sql_exp *e = n->data;
121 :
122 280494 : if (exp_shares_exps(e, shared, uses))
123 : return true;
124 : }
125 : return false;
126 : }
127 :
128 : static bool
129 3208279 : exp_shares_exps(sql_exp *e, list *shared, uint64_t *uses)
130 : {
131 5881070 : switch(e->type) {
132 81 : case e_cmp:
133 81 : if (e->flag == cmp_or || e->flag == cmp_filter)
134 58 : return exps_shares_exps(e->l, shared, uses) || exps_shares_exps(e->r, shared, uses);
135 23 : else if (e->flag == cmp_in || e->flag == cmp_notin)
136 0 : return exp_shares_exps(e->l, shared, uses) || exps_shares_exps(e->r, shared, uses);
137 : else
138 46 : return exp_shares_exps(e->l, shared, uses) || exp_shares_exps(e->r, shared, uses) || (e->f && exp_shares_exps(e->f, shared, uses));
139 114083 : case e_atom:
140 114083 : if (e->f)
141 0 : return exps_shares_exps(e->f, shared, uses);
142 : return false;
143 5366982 : case e_column:
144 : {
145 5366982 : sql_exp *ne = NULL;
146 5366982 : assert(e->nid);
147 5366982 : if (e->nid)
148 5366982 : ne = exps_bind_nid(shared, e->nid);
149 5366982 : if (!ne)
150 : return false;
151 4101578 : if (ne->type != e_column) {
152 587187 : int i = list_position(shared, ne);
153 587187 : if (i < 0)
154 : return false;
155 587187 : uint64_t used = (uint64_t) 1 << i;
156 587187 : if (used & *uses)
157 : return true;
158 586777 : *uses |= used;
159 586777 : return false;
160 : }
161 3514391 : if (ne != e && (list_position(shared, e) < 0 || list_position(shared, e) > list_position(shared, ne)))
162 : /* maybe ne refers to a local complex exp */
163 2386226 : return exp_shares_exps(ne, shared, uses);
164 : return false;
165 : }
166 286565 : case e_convert:
167 286565 : return exp_shares_exps(e->l, shared, uses);
168 113359 : case e_aggr:
169 : case e_func:
170 113359 : return exps_shares_exps(e->l, shared, uses);
171 : case e_psm:
172 0 : assert(0); /* not in projection list */
173 : }
174 : return false;
175 : }
176 :
177 : static bool
178 387635 : exps_share_expensive_exp(list *exps, list *shared )
179 : {
180 387635 : uint64_t uses = 0;
181 :
182 387635 : if (list_empty(exps) || list_empty(shared))
183 0 : return false;
184 387635 : if (list_length(shared) > 64)
185 : return true;
186 3314201 : for (node *n = exps->h; n; n = n->next) {
187 2927723 : sql_exp *e = n->data;
188 :
189 2927723 : if (exp_shares_exps(e, shared, &uses))
190 : return true;
191 : }
192 : return false;
193 : }
194 :
195 : static bool ambigious_ref( list *exps, sql_exp *e);
196 : static bool
197 87304 : ambigious_refs( list *exps, list *refs)
198 : {
199 87304 : if (list_empty(refs))
200 : return false;
201 264779 : for(node *n=refs->h; n; n = n->next) {
202 198604 : if (ambigious_ref(exps, n->data))
203 : return true;
204 : }
205 : return false;
206 : }
207 :
208 : static bool
209 4385117 : ambigious_ref( list *exps, sql_exp *e)
210 : {
211 4385117 : sql_exp *ne = NULL;
212 :
213 4385117 : if (e->type == e_column) {
214 3791225 : assert(e->nid);
215 3791225 : if (e->nid)
216 3791225 : ne = exps_bind_nid(exps, e->nid);
217 3791225 : if (ne && e != ne)
218 : return true;
219 : }
220 4376136 : if (e->type == e_func)
221 87304 : return ambigious_refs(exps, e->l);
222 : return false;
223 : }
224 :
225 : /* merge 2 projects into the lower one */
226 : static sql_rel *
227 4298034 : rel_merge_projects_(visitor *v, sql_rel *rel)
228 : {
229 4365973 : list *exps = rel->exps;
230 4365973 : sql_rel *prj = rel->l;
231 4365973 : node *n;
232 :
233 4365973 : if (rel->op == op_project &&
234 1756979 : prj && prj->op == op_project && !(rel_is_ref(prj)) && list_empty(prj->r)) {
235 671055 : int all = 1;
236 :
237 671055 : if (project_unsafe(rel, false) || project_unsafe(prj, false) || exps_share_expensive_exp(rel->exps, prj->exps))
238 284577 : return rel;
239 :
240 : /* here we try to fix aliases */
241 386478 : list *nexps = NULL;
242 : /* for each exp check if we can rename it */
243 2630023 : for (n = exps->h; n && all; n = n->next) {
244 2252526 : sql_exp *e = n->data, *ne = NULL;
245 :
246 : /* We do not handle expressions pointing back in the list */
247 2252526 : if (ambigious_ref(exps, e)) {
248 : all = 0;
249 : break;
250 : }
251 2243545 : ne = exp_push_down_prj(v->sql, e, prj, prj->l);
252 : /* check if the referred alias name isn't used twice */
253 2243545 : if (ne && ambigious_ref(nexps, ne)) {
254 : all = 0;
255 : break;
256 : }
257 1933987 : if (ne) {
258 1933987 : if (exp_name(e))
259 1927589 : exp_prop_alias(v->sql->sa, ne, e);
260 1933987 : if (!nexps)
261 377487 : nexps = new_exp_list(v->sql->sa);
262 1933987 : list_append(nexps, ne);
263 : } else {
264 : all = 0;
265 : }
266 : }
267 386478 : if (all) {
268 67939 : rel->exps = nexps;
269 : /* we can now remove the intermediate project */
270 : /* push order by expressions */
271 67939 : if (!list_empty(rel->r)) {
272 0 : list *nr = new_exp_list(v->sql->sa), *res = rel->r;
273 0 : for (n = res->h; n; n = n->next) {
274 0 : sql_exp *e = n->data, *ne = NULL;
275 :
276 0 : ne = exp_push_down_prj(v->sql, e, prj, prj->l);
277 0 : if (ne) {
278 0 : if (exp_name(e))
279 0 : exp_prop_alias(v->sql->sa, ne, e);
280 0 : list_append(nr, ne);
281 : } else {
282 : all = 0;
283 : }
284 : }
285 0 : if (all) {
286 0 : rel->r = nr;
287 : } else {
288 : /* leave as is */
289 0 : rel->exps = exps;
290 0 : return rel;
291 : }
292 : }
293 67939 : rel->l = prj->l;
294 67939 : prj->l = NULL;
295 67939 : rel_destroy(prj);
296 67939 : v->changes++;
297 67939 : return rel_merge_projects_(v, rel);
298 : } else {
299 : /* leave as is */
300 318539 : rel->exps = exps;
301 : }
302 318539 : return rel;
303 : }
304 : return rel;
305 : }
306 :
307 : static sql_rel *
308 185701 : rel_merge_projects(visitor *v, global_props *gp, sql_rel *rel)
309 : {
310 185701 : (void) gp;
311 185701 : return rel_visitor_bottomup(v, rel, &rel_merge_projects_);
312 : }
313 :
314 : run_optimizer
315 629618 : bind_merge_projects(visitor *v, global_props *gp)
316 : {
317 629618 : int flag = v->sql->sql_optimizer;
318 629422 : return gp->opt_level == 1 && (flag & merge_projects) &&
319 1259032 : (gp->cnt[op_project] || gp->cnt[op_groupby]) ? rel_merge_projects : NULL;
320 : }
321 :
322 :
323 : static sql_exp *split_aggr_and_project(mvc *sql, list *aexps, sql_exp *e);
324 :
325 : static void
326 0 : list_split_aggr_and_project(mvc *sql, list *aexps, list *exps)
327 : {
328 0 : if (list_empty(exps))
329 : return ;
330 0 : for(node *n = exps->h; n; n = n->next)
331 0 : n->data = split_aggr_and_project(sql, aexps, n->data);
332 : }
333 :
334 : static sql_exp *
335 0 : split_aggr_and_project(mvc *sql, list *aexps, sql_exp *e)
336 : {
337 0 : switch(e->type) {
338 0 : case e_aggr:
339 : /* add to the aggrs */
340 0 : if (!exp_name(e))
341 0 : exp_label(sql->sa, e, ++sql->label);
342 0 : list_append(aexps, e);
343 0 : return exp_ref(sql, e);
344 : case e_cmp:
345 : /* e_cmp's shouldn't exist in an aggr expression list */
346 0 : assert(0);
347 : case e_convert:
348 0 : e->l = split_aggr_and_project(sql, aexps, e->l);
349 0 : return e;
350 0 : case e_func:
351 0 : list_split_aggr_and_project(sql, aexps, e->l);
352 0 : return e;
353 : case e_atom:
354 : case e_column: /* constants and columns shouldn't be rewritten */
355 : case e_psm:
356 : return e;
357 : }
358 : return NULL;
359 : }
360 :
361 : /* exp_rename */
362 : static sql_exp * exp_rename(mvc *sql, sql_exp *e, sql_rel *f, sql_rel *t);
363 :
364 : static list *
365 52943 : exps_rename(mvc *sql, list *l, sql_rel *f, sql_rel *t)
366 : {
367 52943 : if (list_empty(l))
368 : return l;
369 46861 : for (node *n=l->h; n; n=n->next)
370 30097 : n->data = exp_rename(sql, n->data, f, t);
371 : return l;
372 : }
373 :
374 : /* exp_rename */
375 : static sql_exp *
376 180551 : exp_rename(mvc *sql, sql_exp *e, sql_rel *f, sql_rel *t)
377 : {
378 180551 : sql_exp *ne = NULL;
379 :
380 180551 : assert(is_project(f->op));
381 :
382 180551 : switch(e->type) {
383 80875 : case e_column:
384 80875 : assert(e->nid);
385 80875 : ne = exps_bind_nid(f->exps, e->nid);
386 80875 : if (!ne)
387 : return e;
388 53114 : sql_exp *oe = e;
389 53114 : e = NULL;
390 53114 : if (ne && ne->nid)
391 52965 : e = rel_find_exp(t, ne);
392 52965 : if (!e) {
393 149 : sql->session->status = 0;
394 149 : sql->errstr[0] = 0;
395 149 : if (exp_is_atom(ne))
396 : return ne;
397 : return oe;
398 : }
399 52965 : ne = exp_ref(sql, e);
400 52965 : if (oe)
401 52965 : exp_propagate(sql->sa, ne, oe);
402 52965 : return ne;
403 55693 : case e_cmp:
404 55693 : if (e->flag == cmp_or || e->flag == cmp_filter) {
405 3768 : e->l = exps_rename(sql, e->l, f, t);
406 3768 : e->r = exps_rename(sql, e->r, f, t);
407 51925 : } else if (e->flag == cmp_in || e->flag == cmp_notin) {
408 6410 : e->l = exp_rename(sql, e->l, f, t);
409 6410 : e->r = exps_rename(sql, e->r, f, t);
410 : } else {
411 45515 : e->l = exp_rename(sql, e->l, f, t);
412 45515 : e->r = exp_rename(sql, e->r, f, t);
413 45515 : if (e->f)
414 232 : e->f = exp_rename(sql, e->f, f, t);
415 : }
416 : break;
417 4986 : case e_convert:
418 4986 : e->l = exp_rename(sql, e->l, f, t);
419 4986 : break;
420 2833 : case e_aggr:
421 : case e_func:
422 2833 : e->l = exps_rename(sql, e->l, f, t);
423 2833 : break;
424 36164 : case e_atom:
425 36164 : e->f = exps_rename(sql, e->f, f, t);
426 36164 : break;
427 : case e_psm:
428 : break;
429 : }
430 : return e;
431 : }
432 :
433 : static int
434 5874111 : exp_match_exp_cmp( sql_exp *e1, sql_exp *e2)
435 : {
436 5874111 : if (exp_match_exp(e1,e2))
437 60944 : return 0;
438 : return -1;
439 : }
440 :
441 : /* Pushing projects up the tree. Done very early in the optimizer.
442 : * Makes later steps easier.
443 : */
444 : extern void _rel_print(mvc *sql, sql_rel *rel);
445 : static sql_rel *
446 3898771 : rel_push_project_up_(visitor *v, sql_rel *rel)
447 : {
448 3898771 : if (is_simple_project(rel->op) && rel->l && !rel_is_ref(rel)) {
449 1256572 : sql_rel *l = rel->l;
450 1256572 : if (is_simple_project(l->op))
451 323734 : return rel_merge_projects_(v, rel);
452 : /* find equal column references, later cases are rewritten into references back to the first
453 : * project () [ i.i L1, i.i L2 ] -> project() [ i.i L1, L1 L2 ] */
454 932838 : if (list_length(rel->exps) > 1) {
455 786872 : list *exps = rel->exps;
456 786872 : node *n = exps->h;
457 786872 : bool needed = false;
458 4048492 : for(n = n->next; n && !needed; n = n->next) {
459 3261616 : sql_exp *e = n->data;
460 3261616 : if (e->type == e_column && !is_selfref(e)) {
461 21322883 : for(node *m = exps->h; m && m != n && !needed; m = m->next) {
462 18548069 : sql_exp *h = m->data;
463 18548069 : if (exp_match_exp(h,e))
464 9745 : needed = true;
465 : }
466 : }
467 : }
468 786876 : if (needed) {
469 9745 : rel->exps = sa_list(v->sql->sa);
470 9745 : node *n = exps->h;
471 9745 : list_append(rel->exps, n->data);
472 120561 : for(n = n->next; n; n = n->next) {
473 110816 : sql_exp *e = n->data;
474 110816 : if (e->type == e_column && !is_selfref(e)) {
475 69793 : node *m = list_find(rel->exps, e, (fcmp)&exp_match_exp_cmp);
476 69793 : if (m) {
477 12174 : sql_exp *me = m->data;
478 12174 : if (me->alias.label != me->nid) {
479 793 : sql_exp *ne = exp_ref(v->sql, m->data);
480 793 : exp_setalias(ne, e->alias.label, exp_relname(e), exp_name(e));
481 793 : exp_propagate(v->sql->sa, ne, e);
482 793 : set_selfref(ne);
483 793 : e = ne;
484 : }
485 : }
486 : }
487 110816 : list_append(rel->exps, e);
488 : }
489 : }
490 : }
491 : }
492 :
493 : /* project/project cleanup is done later */
494 3575042 : if (is_join(rel->op) || is_select(rel->op)) {
495 1150557 : node *n;
496 1150557 : list *exps = NULL, *l_exps, *r_exps;
497 1150557 : sql_rel *l = rel->l;
498 1150557 : sql_rel *r = rel->r;
499 1150557 : sql_rel *t;
500 1150557 : int nlexps = 0, i = 0;
501 :
502 : /* Don't rewrite refs, non projections or constant or
503 : order by projections */
504 1150557 : if (!l || rel_is_ref(l) || is_topn(l->op) || is_sample(l->op) ||
505 1063051 : (is_join(rel->op) && !list_empty(rel->attr)) ||
506 1012487 : (is_join(rel->op) && (!r || rel_is_ref(r))) ||
507 987059 : (is_left(rel->op) && (rel->flag&MERGE_LEFT) /* can't push projections above merge statements left joins */) ||
508 987057 : (is_select(rel->op) && l->op != op_project) ||
509 484130 : (is_join(rel->op) && ((l->op != op_project && r->op != op_project) || is_topn(r->op) || is_sample(r->op))) ||
510 167946 : ((l->op == op_project && (!l->l || l->r || project_unsafe(l, is_select(rel->op)))) ||
511 98416 : (is_join(rel->op) && (r->op == op_project && (!r->l || r->r || project_unsafe(r, false))))))
512 1066174 : return rel;
513 :
514 84383 : if (l->op == op_project && l->l) {
515 1019423 : for (n = l->exps->h; n; n = n->next) {
516 978726 : sql_exp *e = n->data;
517 978726 : if (!(is_column(e->type) && exp_is_atom(e) && !(is_right(rel->op) || is_full(rel->op))) &&
518 977481 : !(e->type == e_column && !has_label(e)))
519 : return rel;
520 : }
521 : }
522 62235 : if (is_join(rel->op) && r->op == op_project && r->l) {
523 65342 : for (n = r->exps->h; n; n = n->next) {
524 60122 : sql_exp *e = n->data;
525 60122 : if (!(is_column(e->type) && exp_is_atom(e) && !(is_right(rel->op) || is_full(rel->op))) &&
526 59710 : !(e->type == e_column && !has_label(e)))
527 : return rel;
528 : }
529 : }
530 :
531 43690 : if (l->op == op_project && l->l) {
532 : /* Go through the list of project expressions.
533 : Check if they can be pushed up, ie are they not
534 : changing or introducing any columns used
535 : by the upper operator. */
536 :
537 39835 : exps = new_exp_list(v->sql->sa);
538 904108 : for (n = l->exps->h; n; n = n->next) {
539 864273 : sql_exp *e = n->data;
540 :
541 : /* we cannot rewrite projection with atomic values from outer joins */
542 864273 : if (is_column(e->type) && exp_is_atom(e) && !(is_right(rel->op) || is_full(rel->op))) {
543 663 : list_append(exps, e);
544 863610 : } else if (e->type == e_column) {
545 863610 : if (has_label(e))
546 : return rel;
547 863610 : list_append(exps, e);
548 : } else {
549 : return rel;
550 : }
551 : }
552 : } else {
553 3855 : exps = rel_projections(v->sql, l, NULL, 1, 1);
554 : }
555 43690 : nlexps = list_length(exps);
556 : /* also handle right hand of join */
557 43690 : if (is_join(rel->op) && r->op == op_project && r->l && list_empty(rel->attr)) {
558 : /* Here we also check all expressions of r like above
559 : but also we need to check for ambiguous names. */
560 :
561 43638 : for (n = r->exps->h; n; n = n->next) {
562 38669 : sql_exp *e = n->data;
563 :
564 : /* we cannot rewrite projection with atomic values from outer joins */
565 38669 : if (is_column(e->type) && exp_is_atom(e) && !(is_left(rel->op) || is_full(rel->op))) {
566 150 : list_append(exps, e);
567 38519 : } else if (e->type == e_column) {
568 38268 : if (has_label(e))
569 : return rel;
570 38268 : list_append(exps, e);
571 : } else {
572 : return rel;
573 : }
574 : }
575 38470 : } else if (is_join(rel->op) && list_empty(rel->attr)) {
576 21098 : list *r_exps = rel_projections(v->sql, r, NULL, 1, 1);
577 21098 : list_merge(exps, r_exps, (fdup)NULL);
578 : }
579 43439 : if (!list_empty(rel->attr))
580 0 : append(exps, exp_ref(v->sql, rel->attr->h->data));
581 : /* Here we should check for ambiguous names ? */
582 43439 : if (is_join(rel->op) && r && list_empty(rel->attr)) {
583 26067 : t = (l->op == op_project && l->l)?l->l:l;
584 26067 : l_exps = rel_projections(v->sql, t, NULL, 1, 1);
585 : /* conflict with old right expressions */
586 26067 : r_exps = rel_projections(v->sql, r, NULL, 1, 1);
587 26067 : if (rel->attr)
588 0 : append(r_exps, exp_ref(v->sql, rel->attr->h->data));
589 26067 : t = (r->op == op_project && r->l)?r->l:r;
590 26067 : r_exps = rel_projections(v->sql, t, NULL, 1, 1);
591 : /* conflict with new right expressions */
592 691764 : for(n = l_exps->h; n; n = n->next) {
593 666068 : sql_exp *e = n->data;
594 :
595 666068 : if (exp_is_atom(e))
596 0 : continue;
597 666068 : if (e->nid && exps_bind_nid(r_exps, e->nid))
598 : return rel;
599 665697 : if (e->alias.label && exps_bind_nid(r_exps, e->alias.label))
600 : return rel;
601 : }
602 : /* conflict with new left expressions */
603 175825 : for(n = r_exps->h; n; n = n->next) {
604 150129 : sql_exp *e = n->data;
605 :
606 150129 : if (exp_is_atom(e))
607 0 : continue;
608 150129 : if (e->nid && exps_bind_nid(l_exps, e->nid))
609 : return rel;
610 150129 : if (e->alias.label && exps_bind_nid(l_exps, e->alias.label))
611 : return rel;
612 : }
613 : }
614 :
615 : /* rename operator expressions */
616 43068 : if (l->op == op_project) {
617 : /* rewrite rel from rel->l into rel->l->l */
618 39435 : if (rel->exps) {
619 81308 : for (n = rel->exps->h; n; n = n->next) {
620 43260 : sql_exp *e = n->data, *ne;
621 :
622 43260 : ne = exp_rename(v->sql, e, l, l->l);
623 43260 : assert(ne);
624 43260 : if (ne != e && exp_name(e))
625 0 : exp_propagate(v->sql->sa, ne, e);
626 43260 : n->data = ne;
627 : }
628 : }
629 39435 : rel->l = l->l;
630 39435 : l->l = NULL;
631 39435 : rel_destroy(l);
632 : }
633 43068 : if (is_join(rel->op) && r->op == op_project && list_empty(rel->attr)) {
634 : /* rewrite rel from rel->r into rel->r->l */
635 4608 : if (rel->exps) {
636 8488 : for (n = rel->exps->h; n; n = n->next) {
637 4536 : sql_exp *e = n->data, *ne;
638 :
639 4536 : ne = exp_rename(v->sql, e, r, r->l);
640 4536 : assert(ne);
641 4536 : if (ne != e && exp_name(e))
642 0 : exp_propagate(v->sql->sa, ne, e);
643 4536 : n->data = ne;
644 : }
645 : }
646 4608 : rel->r = r->l;
647 4608 : r->l = NULL;
648 4608 : rel_destroy(r);
649 : }
650 : /* Done, ie introduce new project */
651 43068 : exps_fix_card(exps, rel->card);
652 : /* Fix nil flag */
653 43068 : if (!list_empty(exps)) {
654 926548 : for (n = exps->h ; n && i < nlexps ; n = n->next, i++) {
655 883480 : sql_exp *e = n->data;
656 :
657 883480 : if (is_right(rel->op) || is_full(rel->op))
658 449 : set_has_nil(e);
659 883480 : set_not_unique(e);
660 : }
661 186560 : for (; n ; n = n->next) {
662 143492 : sql_exp *e = n->data;
663 :
664 143492 : if (is_left(rel->op) || is_full(rel->op))
665 44540 : set_has_nil(e);
666 143492 : set_not_unique(e);
667 : }
668 : }
669 43068 : v->changes++;
670 43068 : return rel_inplace_project(v->sql->sa, rel, NULL, exps);
671 : }
672 2424485 : if (is_groupby(rel->op) && !rel_is_ref(rel) && rel->exps && list_length(rel->exps) > 1) {
673 41602 : node *n;
674 41602 : int fnd = 0;
675 41602 : list *aexps, *pexps;
676 :
677 : /* check if some are expressions aren't e_aggr */
678 163331 : for (n = rel->exps->h; n && !fnd; n = n->next) {
679 121729 : sql_exp *e = n->data;
680 :
681 121729 : if (e->type != e_aggr && e->type != e_column && e->type != e_atom && e->card > CARD_ATOM) {
682 121729 : fnd = 1;
683 : }
684 : }
685 : /* only aggr, no rewrite needed */
686 41602 : if (!fnd)
687 : return rel;
688 :
689 0 : aexps = sa_list(v->sql->sa);
690 0 : pexps = sa_list(v->sql->sa);
691 0 : for (n = rel->exps->h; n; n = n->next) {
692 0 : sql_exp *e = n->data, *ne = NULL;
693 :
694 0 : switch (e->type) {
695 0 : case e_atom: /* move over to the projection */
696 0 : list_append(pexps, e);
697 0 : break;
698 0 : case e_func:
699 0 : list_append(pexps, e);
700 0 : list_split_aggr_and_project(v->sql, aexps, e->l);
701 0 : break;
702 0 : case e_convert:
703 0 : list_append(pexps, e);
704 0 : e->l = split_aggr_and_project(v->sql, aexps, e->l);
705 0 : break;
706 0 : default: /* simple alias */
707 0 : list_append(aexps, e);
708 0 : ne = exp_column(v->sql->sa, exp_find_rel_name(e), exp_name(e), exp_subtype(e), e->card, has_nil(e), is_unique(e), is_intern(e));
709 0 : assert(0);
710 : list_append(pexps, ne);
711 : break;
712 : }
713 : }
714 0 : v->changes++;
715 0 : rel->exps = aexps;
716 0 : return rel_inplace_project( v->sql->sa, rel, NULL, pexps);
717 : }
718 : return rel;
719 : }
720 :
721 : static sql_rel *
722 184980 : rel_push_project_up(visitor *v, global_props *gp, sql_rel *rel)
723 : {
724 184980 : (void) gp;
725 184980 : return rel_visitor_bottomup(v, rel, &rel_push_project_up_);
726 : }
727 :
728 : run_optimizer
729 629584 : bind_push_project_up(visitor *v, global_props *gp)
730 : {
731 629584 : int flag = v->sql->sql_optimizer;
732 629388 : return gp->opt_level == 1 && (flag & push_project_up) &&
733 1258972 : gp->cnt[op_project] ? rel_push_project_up : NULL;
734 : }
735 :
736 :
737 : static void split_exps(mvc *sql, list *exps, sql_rel *rel);
738 :
739 : static int
740 3154616 : exp_refers_cmp( sql_exp *e1, sql_exp *e2)
741 : {
742 3154616 : if (exp_refers(e1,e2))
743 0 : return 0;
744 : return -1;
745 : }
746 :
747 : sql_exp *
748 133364 : add_exp_too_project(mvc *sql, sql_exp *e, sql_rel *rel)
749 : {
750 133364 : node *n = list_find(rel->exps, e, (fcmp)&exp_match_exp_cmp);
751 :
752 : /* if not matching we may refer to an older expression */
753 133364 : if (!n)
754 84594 : n = list_find(rel->exps, e, (fcmp)&exp_refers_cmp);
755 84594 : if (!n) {
756 84594 : exp_label(sql->sa, e, ++sql->label);
757 84594 : append(rel->exps, e);
758 : } else {
759 48770 : sql_exp *ne = n->data;
760 :
761 48770 : if (rel && rel->l) {
762 48770 : if (ne && ne->alias.label && rel_find_nid(rel->l, ne->alias.label)) {
763 0 : exp_label(sql->sa, e, ++sql->label);
764 0 : append(rel->exps, e);
765 0 : ne = e;
766 : }
767 : }
768 : e = ne;
769 : }
770 133364 : e = exp_ref(sql, e);
771 133364 : return e;
772 : }
773 :
774 : static void
775 154582 : add_exps_too_project(mvc *sql, list *exps, sql_rel *rel)
776 : {
777 154582 : node *n;
778 :
779 154582 : if (!exps)
780 : return;
781 478687 : for(n=exps->h; n; n = n->next) {
782 324105 : sql_exp *e = n->data;
783 :
784 324105 : if (e->type != e_column && !exp_is_atom(e))
785 133081 : n->data = add_exp_too_project(sql, e, rel);
786 : }
787 : }
788 :
789 : static sql_exp *
790 509789 : split_exp(mvc *sql, sql_exp *e, sql_rel *rel)
791 : {
792 509789 : if (exp_is_atom(e))
793 : return e;
794 378895 : switch(e->type) {
795 : case e_column:
796 : return e;
797 16920 : case e_convert:
798 16920 : e->l = split_exp(sql, e->l, rel);
799 16920 : return e;
800 164398 : case e_aggr:
801 : case e_func:
802 164398 : if (!is_analytic(e) && !exp_has_sideeffect(e)) {
803 162458 : sql_subfunc *f = e->f;
804 162458 : if (e->type == e_func && !f->func->s && is_caselike_func(f) /*is_ifthenelse_func(f)*/) {
805 : return e;
806 : } else {
807 154582 : split_exps(sql, e->l, rel);
808 154582 : add_exps_too_project(sql, e->l, rel);
809 : }
810 : }
811 : return e;
812 486 : case e_cmp:
813 486 : if (e->flag == cmp_or || e->flag == cmp_filter) {
814 463 : split_exps(sql, e->l, rel);
815 463 : split_exps(sql, e->r, rel);
816 23 : } else if (e->flag == cmp_in || e->flag == cmp_notin) {
817 0 : e->l = split_exp(sql, e->l, rel);
818 0 : split_exps(sql, e->r, rel);
819 : } else {
820 23 : e->l = split_exp(sql, e->l, rel);
821 23 : e->r = split_exp(sql, e->r, rel);
822 23 : if (e->f)
823 21 : e->f = split_exp(sql, e->f, rel);
824 : }
825 : return e;
826 : case e_atom:
827 : case e_psm:
828 : return e;
829 : }
830 : return e;
831 : }
832 :
833 : static void
834 155515 : split_exps(mvc *sql, list *exps, sql_rel *rel)
835 : {
836 155515 : if (list_empty(exps))
837 : return;
838 480567 : for(node *n=exps->h; n; n = n->next){
839 325052 : sql_exp *e = n->data;
840 :
841 325052 : e = split_exp(sql, e, rel);
842 325052 : n->data = e;
843 : }
844 : }
845 :
846 : sql_rel *
847 638742 : rel_split_project_(visitor *v, sql_rel *rel, int top)
848 : {
849 1277506 : if (mvc_highwater(v->sql))
850 : return rel;
851 :
852 638749 : if (!rel)
853 : return NULL;
854 638749 : if (is_project(rel->op) && list_length(rel->exps) && (is_groupby(rel->op) || rel->l) && !need_distinct(rel) && !is_single(rel)) {
855 178917 : list *exps = rel->exps;
856 178917 : node *n;
857 178917 : int funcs = 0;
858 178917 : sql_rel *nrel;
859 :
860 : /* are there functions */
861 1395926 : for (n=exps->h; n && !funcs; n = n->next) {
862 1217015 : sql_exp *e = n->data;
863 :
864 1217015 : funcs = exp_has_func(e);
865 : }
866 : /* introduce extra project */
867 178911 : if (funcs && rel->op != op_project) {
868 7 : nrel = rel_project(v->sql->sa, rel->l,
869 7 : rel_projections(v->sql, rel->l, NULL, 1, 1));
870 7 : rel->l = nrel;
871 : /* recursively split all functions and add those to the projection list */
872 7 : split_exps(v->sql, rel->exps, nrel);
873 14 : if (nrel->l && !(nrel->l = rel_split_project_(v, nrel->l, (is_topn(rel->op)||is_sample(rel->op))?top:0)))
874 : return NULL;
875 7 : return rel;
876 178904 : } else if (funcs && !top && list_empty(rel->r)) {
877 : /* projects can have columns point back into the expression list, ie
878 : * create a new list including the split expressions */
879 17203 : node *n;
880 17203 : list *exps = rel->exps;
881 :
882 17203 : rel->exps = sa_list(v->sql->sa);
883 184953 : for (n=exps->h; n; n = n->next)
884 167750 : append(rel->exps, split_exp(v->sql, n->data, rel));
885 161701 : } else if (funcs && top && rel_is_ref(rel) && list_empty(rel->r)) {
886 : /* inplace */
887 9 : list *exps = rel_projections(v->sql, rel, NULL, 1, 1);
888 9 : sql_rel *l = rel_project(v->sql->sa, rel->l, NULL);
889 9 : rel->l = l;
890 9 : l->exps = rel->exps;
891 9 : set_processed(l);
892 9 : rel->exps = exps;
893 : }
894 : }
895 638746 : if (is_mset(rel->op) || is_set(rel->op) || is_basetable(rel->op))
896 : return rel;
897 418353 : if (rel->l && (rel->op != op_table || rel->flag != TRIGGER_WRAPPER)) {
898 438415 : rel->l = rel_split_project_(v, rel->l, (is_topn(rel->op)||is_sample(rel->op)||is_ddl(rel->op)||is_modify(rel->op))?top:0);
899 407981 : if (!rel->l)
900 : return NULL;
901 : }
902 418363 : if ((is_join(rel->op) || is_semi(rel->op)) && rel->r) {
903 121430 : rel->r = rel_split_project_(v, rel->r, (is_topn(rel->op)||is_sample(rel->op)||is_ddl(rel->op)||is_modify(rel->op))?top:0);
904 121430 : if (!rel->r)
905 : return NULL;
906 : }
907 : return rel;
908 : }
909 :
910 : static sql_rel *
911 109335 : rel_split_project(visitor *v, global_props *gp, sql_rel *rel)
912 : {
913 109335 : (void) gp;
914 109335 : return rel_split_project_(v, rel, 1);
915 : }
916 :
917 : run_optimizer
918 629541 : bind_split_project(visitor *v, global_props *gp)
919 : {
920 629541 : int flag = v->sql->sql_optimizer;
921 552966 : return gp->opt_cycle == 0 && gp->opt_level == 1 && (flag & split_project) &&
922 1182499 : (gp->cnt[op_project] || gp->cnt[op_groupby]) ? rel_split_project : NULL;
923 : }
924 :
925 :
926 : static sql_rel *
927 185172 : rel_project_reduce_casts(visitor *v, global_props *gp, sql_rel *rel)
928 : {
929 185172 : if (!rel)
930 : return NULL;
931 185172 : (void) gp;
932 185172 : if (gp->opt_level == 1 && v->value_based_opt && is_simple_project(rel->op) && list_length(rel->exps)) {
933 98619 : list *exps = rel->exps;
934 98619 : node *n;
935 :
936 512787 : for (n=exps->h; n; n = n->next) {
937 414168 : sql_exp *e = n->data;
938 :
939 414168 : if (e && e->type == e_func) {
940 30664 : sql_subfunc *f = e->f;
941 30664 : sql_subtype *res = f->res->h->data;
942 :
943 30664 : if (!f->func->s && !strcmp(f->func->base.name, "sql_mul") && res->scale > 0) {
944 6 : list *args = e->l;
945 6 : sql_exp *h = args->h->data;
946 6 : sql_exp *t = args->t->data;
947 6 : atom *ha = exp_value(v->sql, h), *ta = exp_value(v->sql, t);
948 :
949 6 : if (ha || ta) {
950 5 : atom *a = ha ? ha : ta;
951 5 : atom *na = reduce_scale(v->sql, a);
952 :
953 5 : if (na && na != a) {
954 2 : int rs = a->tpe.scale - na->tpe.scale;
955 2 : res->scale -= rs;
956 2 : if (ha) {
957 0 : h->r = NULL;
958 0 : h->l = na;
959 : } else {
960 2 : t->r = NULL;
961 2 : t->l = na;
962 : }
963 2 : v->changes++;
964 : }
965 : }
966 : }
967 : }
968 : }
969 : }
970 : return rel;
971 : }
972 :
973 : run_optimizer
974 629751 : bind_project_reduce_casts(visitor *v, global_props *gp)
975 : {
976 629751 : int flag = v->sql->sql_optimizer;
977 629751 : return gp->cnt[op_project] && (flag & project_reduce_casts) ? rel_project_reduce_casts : NULL;
978 : }
979 :
980 : static sql_column *
981 99377 : exp_find_column_( sql_rel *rel, sql_exp *exp, int pnr, sql_rel **bt )
982 : {
983 99377 : if (exp->type == e_column)
984 99351 : return name_find_column(rel, exp->l, exp->r, pnr, bt);
985 : return NULL;
986 : }
987 :
988 : static int
989 0 : rel_part_nr( sql_rel *rel, sql_exp *e )
990 : {
991 0 : sql_column *c = NULL;
992 0 : sql_rel *bt = NULL;
993 0 : assert(e->type == e_cmp);
994 :
995 0 : c = exp_find_column_(rel, e->l, -1, &bt);
996 0 : if (!c)
997 0 : c = exp_find_column_(rel, e->r, -1, &bt);
998 0 : if (!c && e->f)
999 0 : c = exp_find_column_(rel, e->f, -1, &bt);
1000 0 : if (!c || !bt || !rel_base_get_mergetable(bt))
1001 0 : return -1;
1002 0 : sql_table *pp = c->t;
1003 0 : sql_table *mt = rel_base_get_mergetable(bt);
1004 0 : return find_member_pos(mt->members, pp);
1005 : }
1006 :
1007 : static int
1008 0 : rel_uses_part_nr( sql_rel *rel, sql_exp *e, int pnr )
1009 : {
1010 0 : sql_column *c = NULL;
1011 0 : sql_rel *bt = NULL;
1012 0 : assert(e->type == e_cmp);
1013 :
1014 : /*
1015 : * following case fails.
1016 : *
1017 : * semijoin( A1, union [A1, A2] )
1018 : * The union will never return proper column (from A2).
1019 : * ie need different solution (probably pass pnr).
1020 : */
1021 0 : c = exp_find_column_(rel, e->l, pnr, &bt);
1022 0 : if (!c)
1023 0 : c = exp_find_column_(rel, e->r, pnr, &bt);
1024 0 : if (c && bt && rel_base_get_mergetable(bt)) {
1025 0 : sql_table *pp = c->t;
1026 0 : sql_table *mt = rel_base_get_mergetable(bt);
1027 0 : if (find_member_pos(mt->members, pp) == pnr)
1028 : return 1;
1029 : }
1030 : /* for projects we may need to do a rename! */
1031 0 : if (is_project(rel->op) || is_topn(rel->op) || is_sample(rel->op))
1032 0 : return rel_uses_part_nr( rel->l, e, pnr);
1033 :
1034 0 : if (is_munion(rel->op)) {
1035 : list *l = rel->l;
1036 : if (rel_uses_part_nr( l->h->data, e, pnr))
1037 : return 1;
1038 0 : } else if (is_union(rel->op) || is_join(rel->op) || is_semi(rel->op)) {
1039 0 : if (rel_uses_part_nr( rel->l, e, pnr))
1040 : return 1;
1041 0 : if (!is_semi(rel->op) && rel_uses_part_nr( rel->r, e, pnr))
1042 : return 1;
1043 : }
1044 : return 0;
1045 : }
1046 :
1047 : static sql_column *
1048 3030 : exp_is_pkey(sql_rel *rel, sql_exp *e)
1049 : {
1050 3030 : if (find_prop(e->p, PROP_HASHCOL)) { /* aligned PKEY JOIN */
1051 247 : fcmp cmp = (fcmp)&kc_column_cmp;
1052 247 : sql_column *c = exp_find_column(rel, e, -2);
1053 :
1054 247 : if (c && c->t->pkey && list_find(c->t->pkey->k.columns, c, cmp) != NULL)
1055 : return c;
1056 : }
1057 : return NULL;
1058 : }
1059 :
1060 : static sql_exp *
1061 3531 : rel_is_join_on_pkey(sql_rel *rel, bool pk_fk) /* pk_fk is used to verify is a join on pk-fk */
1062 : {
1063 3531 : if (!rel || !rel->exps)
1064 : return NULL;
1065 4856 : for (node *n = rel->exps->h; n; n = n->next) {
1066 2576 : sql_exp *je = n->data;
1067 :
1068 3892 : if (je->type == e_cmp && je->flag == cmp_equal &&
1069 2632 : (exp_is_pkey(rel, je->l) || exp_is_pkey(rel, je->r)) &&
1070 246 : (!pk_fk || find_prop(je->p, PROP_JOINIDX)))
1071 0 : return je;
1072 : }
1073 : return NULL;
1074 : }
1075 :
1076 : static int
1077 0 : exps_has_predicate( list *l )
1078 : {
1079 0 : node *n;
1080 :
1081 0 : for( n = l->h; n; n = n->next){
1082 0 : sql_exp *e = n->data;
1083 :
1084 0 : if (e->card <= CARD_ATOM)
1085 : return 1;
1086 : }
1087 : return 0;
1088 : }
1089 :
1090 : static sql_rel *
1091 735 : rel_find_select( sql_rel *r)
1092 : {
1093 2416 : while (!is_select(r->op) && r->l && is_project(r->op))
1094 : r = r->l;
1095 735 : if (is_select(r->op))
1096 15 : return r;
1097 : return NULL;
1098 : }
1099 :
1100 : static bool
1101 144657 : rels_share_rel(list *l)
1102 : {
1103 144657 : sql_rel *ref = NULL;
1104 227311 : for (node *n = l->h; n; n = n->next) {
1105 226576 : sql_rel *r = n->data;
1106 226576 : if(!ref) {
1107 144657 : if ((ref = rel_find_ref(r)) == NULL)
1108 : return false;
1109 81919 : } else if (ref != rel_find_ref(r)) {
1110 : return false;
1111 : }
1112 : }
1113 : return true;
1114 : }
1115 :
1116 : static inline sql_rel *
1117 2802257 : rel_merge_munion(visitor *v, sql_rel *rel)
1118 : {
1119 2802257 : if (is_munion(rel->op) && rels_share_rel(rel->l)) {
1120 735 : list *rels = rel->l, *nrels = NULL;
1121 735 : sql_rel *cur = NULL, *curs = NULL;
1122 :
1123 : /* Find selects and try to merge */
1124 735 : for(node *n = rels->h; n; n = n->next) {
1125 735 : sql_rel *r = n->data;
1126 735 : sql_rel *s = rel_find_select(r);
1127 :
1128 735 : if (!s)
1129 : return rel;
1130 15 : if (cur) {
1131 0 : if (s != r->l || curs->l != s->l || !rel_is_ref(s->l) ||
1132 : /* for now only union(project*(select(R),project*(select(R))) */
1133 0 : !s->exps || !curs->exps || exps_has_predicate(s->exps) || exps_has_predicate(curs->exps)) {
1134 0 : if (nrels)
1135 0 : append(nrels, r);
1136 : else
1137 : return rel;
1138 : }
1139 :
1140 : /* merge, ie. add 'or exp' */
1141 0 : curs->exps = append(new_exp_list(v->sql->sa), exp_or(v->sql->sa, cur->exps, s->exps, 0));
1142 0 : if (!nrels) {
1143 0 : nrels = sa_list(v->sql->sa);
1144 0 : append(nrels, cur);
1145 : }
1146 : } else {
1147 15 : if (s != r->l || !rel_is_ref(s->l)) {
1148 15 : if (nrels) {
1149 0 : append(nrels, r);
1150 : } else {
1151 : return rel;
1152 : }
1153 : }
1154 : cur = r;
1155 : curs = s;
1156 : }
1157 : }
1158 0 : if (nrels) {
1159 0 : v->changes++;
1160 0 : rel->l = nrels;
1161 : }
1162 0 : return rel;
1163 : }
1164 : return rel;
1165 : }
1166 :
1167 : static sql_rel *
1168 2802257 : rel_optimize_munions_bottomup_(visitor *v, sql_rel *rel)
1169 : {
1170 2802257 : rel = rel_merge_munion(v, rel);
1171 2802257 : return rel;
1172 : }
1173 :
1174 : static sql_rel *
1175 28976 : rel_optimize_munions_bottomup(visitor *v, global_props *gp, sql_rel *rel)
1176 : {
1177 28976 : (void) gp;
1178 28976 : return rel_visitor_bottomup(v, rel, &rel_optimize_munions_bottomup_);
1179 : }
1180 :
1181 : run_optimizer
1182 629598 : bind_optimize_unions_bottomup(visitor *v, global_props *gp)
1183 : {
1184 629598 : int flag = v->sql->sql_optimizer;
1185 629402 : return gp->opt_level == 1 && gp->cnt[op_munion] && (flag & optimize_unions_bottomup)
1186 629598 : ? rel_optimize_munions_bottomup : NULL;
1187 : }
1188 :
1189 :
1190 : static inline sql_rel *
1191 3874521 : rel_project_cse(visitor *v, sql_rel *rel)
1192 : {
1193 3874521 : if (is_project(rel->op) && rel->exps) {
1194 1572976 : node *n, *m;
1195 1572976 : list *nexps;
1196 1572976 : int needed = 0;
1197 :
1198 9385584 : for (n=rel->exps->h; n && !needed; n = n->next) {
1199 7812606 : sql_exp *e1 = n->data;
1200 :
1201 7812606 : if (e1->type != e_column && !exp_is_atom(e1) && exp_name(e1)) {
1202 16542001 : for (m=n->next; m && !needed; m = m->next){
1203 15760109 : sql_exp *e2 = m->data;
1204 :
1205 15760109 : if (exp_name(e2) && exp_match_exp(e1, e2))
1206 15760109 : needed = 1;
1207 : }
1208 : }
1209 : }
1210 :
1211 1572978 : if (!needed)
1212 : return rel;
1213 :
1214 739 : nexps = new_exp_list(v->sql->sa);
1215 8621 : for (n=rel->exps->h; n; n = n->next) {
1216 7882 : sql_exp *e1 = n->data;
1217 :
1218 7882 : if (e1->type != e_column && !exp_is_atom(e1) && exp_name(e1)) {
1219 33802 : for (m=nexps->h; m; m = m->next){
1220 29202 : sql_exp *e2 = m->data;
1221 :
1222 29202 : if (exp_name(e2) && exp_match_exp(e1, e2)) {
1223 779 : assert(e2->alias.label);
1224 779 : sql_exp *ne = exp_ref(v->sql, e2);
1225 :
1226 779 : ne = exp_propagate(v->sql->sa, ne, e1);
1227 779 : assert(!e1->ref);
1228 779 : exp_setalias(ne, e1->alias.label, exp_relname(e1), exp_name(e1));
1229 779 : set_selfref(ne);
1230 779 : assert(ne->nid);
1231 : e1 = ne;
1232 : break;
1233 : }
1234 : }
1235 : }
1236 7882 : append(nexps, e1);
1237 : }
1238 739 : rel->exps = nexps;
1239 : }
1240 : return rel;
1241 : }
1242 :
1243 : static int exp_is_const_op(sql_exp *exp, sql_exp *tope, sql_rel *expr);
1244 :
1245 : static int
1246 76 : exps_are_const_op(list *exps, sql_exp *tope, sql_rel *expr)
1247 : {
1248 76 : int ok = 1;
1249 :
1250 76 : if (list_empty(exps))
1251 : return 1;
1252 152 : for (node *n = exps->h; n && ok; n = n->next)
1253 76 : ok &= exp_is_const_op(n->data, tope, expr);
1254 : return ok;
1255 : }
1256 :
1257 : static int
1258 526 : exp_is_const_op(sql_exp *exp, sql_exp *tope, sql_rel *expr)
1259 : {
1260 666 : switch (exp->type) {
1261 30 : case e_atom:
1262 30 : return exp->f ? 0 : 1;
1263 2 : case e_convert:
1264 2 : return exp_is_const_op(exp->l, tope, expr);
1265 76 : case e_func:
1266 : case e_aggr: {
1267 76 : sql_subfunc *f = exp->f;
1268 76 : if (f->func->side_effect || IS_ANALYTIC(f->func))
1269 : return 0;
1270 76 : return exps_are_const_op(exp->l, tope, expr);
1271 : }
1272 0 : case e_cmp:
1273 0 : if (exp->flag == cmp_or || exp->flag == cmp_filter)
1274 0 : return exps_are_const_op(exp->l, tope, expr) && exps_are_const_op(exp->r, tope, expr);
1275 0 : if (exp->flag == cmp_in || exp->flag == cmp_notin)
1276 0 : return exp_is_const_op(exp->l, tope, expr) && exps_are_const_op(exp->r, tope, expr);
1277 0 : return exp_is_const_op(exp->l, tope, expr) && exp_is_const_op(exp->r, tope, expr) && (!exp->f || exp_is_const_op(exp->f, tope, expr));
1278 558 : case e_column: {
1279 558 : if (is_simple_project(expr->op) || is_groupby(expr->op)) {
1280 : /* in a simple projection, self-references may occur */
1281 558 : sql_exp *nexp = exps_bind_nid(expr->exps, exp->nid);
1282 558 : if (nexp && list_position(expr->exps, nexp) < list_position(expr->exps, tope))
1283 : return exp_is_const_op(nexp, exp, expr);
1284 : }
1285 : return 0;
1286 : }
1287 : default:
1288 : return 0;
1289 : }
1290 : }
1291 :
1292 : static sql_exp *
1293 25 : rel_groupby_add_count_star(mvc *sql, sql_rel *rel, sql_exp *count_star_exp, bool *count_added)
1294 : {
1295 25 : if (count_star_exp)
1296 : return count_star_exp;
1297 18 : if (!list_empty(rel->exps)) {
1298 49 : for (node *n=rel->exps->h; n ; n = n->next) {
1299 35 : sql_exp *e = n->data;
1300 :
1301 35 : if (exp_aggr_is_count(e) && !need_distinct(e) && list_length(e->l) == 0)
1302 4 : return e;
1303 : }
1304 : }
1305 14 : sql_subfunc *cf = sql_bind_func(sql, "sys", "count", sql_bind_localtype("void"), NULL, F_AGGR, true, true);
1306 14 : *count_added = true;
1307 14 : return rel_groupby_add_aggr(sql, rel, exp_aggr(sql->sa, NULL, cf, 0, 0, rel->card, 0));
1308 : }
1309 :
1310 : /* optimize sum(x + 12) into sum(x) + 12*count(*) */
1311 : static inline sql_rel *
1312 42783 : rel_simplify_sum(visitor *v, sql_rel *rel)
1313 : {
1314 42783 : if (is_groupby(rel->op) && !list_empty(rel->exps)) {
1315 42728 : sql_rel *upper = NULL, *groupby = rel, *l = groupby->l;
1316 42728 : sql_exp *count_star_exp = NULL;
1317 42728 : bool count_added = false;
1318 :
1319 134561 : for (node *n=groupby->exps->h; n ; n = n->next) {
1320 91833 : sql_exp *e = n->data;
1321 91833 : list *el = e->l;
1322 91833 : sql_subfunc *sf = e->f;
1323 :
1324 91833 : if (e->type == e_aggr && !need_distinct(e) && sf->func->type == F_AGGR && !sf->func->s && !strcmp(sf->func->base.name, "sum")) {
1325 8964 : sql_rel *expr = groupby;
1326 8964 : sql_exp *exp = (sql_exp*) el->h->data, *oexp = exp;
1327 :
1328 8964 : while (is_numeric_upcast(exp))
1329 0 : exp = exp->l;
1330 : /* we want to find a +/- call, so expect them to appear only on simple projections */
1331 19445 : while (exp && exp->type == e_column && (is_simple_project(expr->op) || is_groupby(expr->op)) && expr->l) {
1332 12145 : sql_rel *nexpr = NULL;
1333 12145 : sql_exp *nexp = rel_find_exp_and_corresponding_rel(l, exp, false, &nexpr, NULL);
1334 :
1335 : /* break when it loops on the same relation */
1336 12145 : if (nexpr == expr && list_position(expr->exps, nexp) >= list_position(expr->exps, exp))
1337 : break;
1338 10481 : expr = nexpr;
1339 10481 : exp = oexp = nexp;
1340 10483 : while (exp && is_numeric_upcast(exp))
1341 2 : exp = exp->l;
1342 : }
1343 :
1344 8964 : list *expl = exp ? exp->l : NULL;
1345 91833 : sql_subfunc *expf = exp ? exp->f : NULL;
1346 : /* found a candidate function */
1347 7370 : if (exp && exp->type == e_func && expf->func->type == F_FUNC && !expf->func->s &&
1348 1255 : (!strcmp(expf->func->base.name, "sql_sub") || !strcmp(expf->func->base.name, "sql_add"))) {
1349 225 : sql_exp *e1 = (sql_exp*) expl->h->data, *e2 = (sql_exp*) expl->h->next->data;
1350 225 : int e1ok = exp_is_const_op(e1, oexp, expr), e2ok = exp_is_const_op(e2, oexp, expr);
1351 :
1352 225 : if ((!e1ok && e2ok) || (e1ok && !e2ok)) {
1353 26 : sql_exp *ocol = e1ok ? e2 : e1, *constant = e1ok ? e1 : e2, *mul, *colref, *naggr, *newop, *col = ocol, *match;
1354 26 : bool add_col = true, prepend = false;
1355 :
1356 : /* if 'col' is a projection from the under relation, then use it */
1357 26 : while (is_numeric_upcast(col))
1358 0 : col = col->l;
1359 26 : if (col->type == e_column) {
1360 24 : sql_exp *colf = exps_find_exp(l->exps, col);
1361 :
1362 : /* col is already found in the inner relation. Also look for a new reference for col, eg sql_add(col, 1), 1 as col */
1363 24 : if (colf && list_position(l->exps, colf) < list_position(l->exps, oexp)) {
1364 : add_col = false;
1365 11 : } else if (!colf && is_simple_project(l->op) && list_empty(l->r) && !rel_is_ref(l) && !need_distinct(l)) {
1366 : prepend = true;
1367 : add_col = false;
1368 1 : } else if (!colf && (is_simple_project(l->op) || is_groupby(l->op))) {
1369 : /* on these scenarios the new column expression will be ordered/(grouped for distinct) or create potential ambiguity (multiple ref), so skip */
1370 1 : continue;
1371 : }
1372 2 : } else if ((is_simple_project(l->op) && (!list_empty(l->r) || rel_is_ref(l) || need_distinct(l))) || is_groupby(l->op)) {
1373 0 : continue;
1374 : }
1375 :
1376 : /* add count star */
1377 25 : count_star_exp = rel_groupby_add_count_star(v->sql, groupby, count_star_exp, &count_added);
1378 : /* multiply constant by count star */
1379 25 : if (!(mul = rel_binop_(v->sql, NULL, constant, exp_ref(v->sql, count_star_exp), "sys", "sql_mul", card_value, true))) {
1380 0 : v->sql->session->status = 0;
1381 0 : v->sql->errstr[0] = '\0';
1382 0 : continue;
1383 : }
1384 25 : if (!has_label(mul))
1385 25 : exp_label(v->sql->sa, mul, ++v->sql->label);
1386 :
1387 25 : colref = exp_ref(v->sql, ocol);
1388 25 : if (add_col) /* if 'col' will be added, then make sure it has an unique label */
1389 3 : exp_label(v->sql->sa, colref, ++v->sql->label);
1390 :
1391 : /* 'oexp' contains the type for the input for the 'sum' aggregate */
1392 25 : if (!(colref = exp_check_type(v->sql, exp_subtype(oexp), groupby, colref, type_equal))) {
1393 0 : v->sql->session->status = 0;
1394 0 : v->sql->errstr[0] = '\0';
1395 0 : continue;
1396 : }
1397 : /* update sum to use the column side only */
1398 25 : sql_subfunc *a = sql_bind_func(v->sql, "sys", "sum", exp_subtype(colref), NULL, F_AGGR, true, true);
1399 25 : if (!a)
1400 0 : continue;
1401 25 : naggr = exp_aggr(v->sql->sa, list_append(sa_list(v->sql->sa), colref), a, need_distinct(e), need_no_nil(e), groupby->card, has_nil(e));
1402 25 : if ((match = exps_any_match(groupby->exps, naggr)) && list_position(groupby->exps, match) < list_position(groupby->exps, e)) { /* found a matching aggregate, use it */
1403 9 : naggr = exp_ref(v->sql, match);
1404 9 : exp_label(v->sql->sa, naggr, ++v->sql->label);
1405 16 : } else if (!has_label(naggr)) { /* otherwise use the new one */
1406 16 : exp_label(v->sql->sa, naggr, ++v->sql->label);
1407 : }
1408 :
1409 : /* generate addition/subtraction. subtraction is not commutative, so keep original order! */
1410 25 : if (!(newop = rel_binop_(v->sql, NULL, e1 == constant ? mul : exp_ref(v->sql, naggr), e1 == constant ? exp_ref(v->sql, naggr) : mul, "sys", expf->func->base.name, card_value, true))) {
1411 0 : v->sql->session->status = 0;
1412 0 : v->sql->errstr[0] = '\0';
1413 0 : continue;
1414 : }
1415 25 : if (!(newop = exp_check_type(v->sql, exp_subtype(e), groupby, newop, type_equal))) {
1416 0 : v->sql->session->status = 0;
1417 0 : v->sql->errstr[0] = '\0';
1418 0 : continue;
1419 : }
1420 :
1421 : /* a column reference can be prepended to the inner relation, add it after all the check type calls succeed */
1422 25 : if (prepend)
1423 10 : list_prepend(l->exps, col);
1424 :
1425 : /* the new generate function calls are valid, update relations */
1426 : /* we need a new relation for the multiplication and addition/subtraction */
1427 25 : if (!upper) {
1428 : /* be careful with relations with more than 1 reference, so do in-place replacement */
1429 18 : list *projs = rel_projections(v->sql, rel, NULL, 1, 1);
1430 18 : sql_rel *nrel = rel_groupby(v->sql, rel->l, NULL);
1431 18 : nrel->exps = rel->exps;
1432 18 : nrel->r = rel->r;
1433 18 : nrel->card = rel->card;
1434 18 : nrel->nrcols = list_length(rel->exps);
1435 18 : set_processed(nrel);
1436 18 : rel_dup(rel->l);
1437 18 : upper = rel = rel_inplace_project(v->sql->sa, rel, nrel, projs);
1438 18 : rel->card = exps_card(projs);
1439 18 : groupby = nrel; /* update pointers :) */
1440 18 : l = groupby->l;
1441 : }
1442 98 : for (node *n = upper->exps->h ; n ; ) {
1443 73 : node *next = n->next;
1444 73 : sql_exp *re = n->data;
1445 :
1446 : /* remove the old reference to the aggregate because we will use the addition/subtraction instead,
1447 : as well as the count star if count_added */
1448 73 : if (exp_refers(e, re) || (count_added && exp_refers(count_star_exp, re)))
1449 39 : list_remove_node(upper->exps, NULL, n);
1450 : n = next;
1451 : }
1452 :
1453 : /* update sum aggregate with new aggregate or reference to an existing one */
1454 25 : n->data = naggr;
1455 25 : list_hash_clear(groupby->exps);
1456 :
1457 : /* add column reference with new label, if 'col' was not found */
1458 25 : if (add_col) {
1459 3 : if (!is_simple_project(l->op) || !list_empty(l->r) || rel_is_ref(l) || need_distinct(l))
1460 0 : groupby->l = l = rel_project(v->sql->sa, l, rel_projections(v->sql, l, NULL, 1, 1));
1461 3 : list_append(l->exps, ocol);
1462 : }
1463 :
1464 : /* propagate alias and add new expression */
1465 25 : exp_prop_alias(v->sql->sa, newop, e);
1466 25 : list_append(upper->exps, newop);
1467 25 : v->changes++;
1468 : }
1469 : }
1470 : }
1471 : }
1472 : }
1473 42783 : return rel;
1474 : }
1475 :
1476 : /* optimize group by x+1,(y-2)*3,2-z into group by x,y,z */
1477 : static inline sql_rel *
1478 42783 : rel_simplify_groupby_columns(visitor *v, sql_rel *rel)
1479 : {
1480 42783 : if (is_groupby(rel->op) && !list_empty(rel->r)) {
1481 32146 : sql_rel *l = rel->l;
1482 :
1483 83300 : for (node *n=((list*)rel->r)->h; n ; n = n->next) {
1484 51154 : sql_exp *e = n->data;
1485 51154 : e->used = 0; /* we need to use this flag, clean it first */
1486 : }
1487 83300 : for (node *n=((list*)rel->r)->h; n ; n = n->next) {
1488 51154 : sql_exp *e = n->data;
1489 :
1490 51154 : if (e->type == e_column) {
1491 51117 : bool searching = true;
1492 51117 : sql_rel *efrel = NULL;
1493 51117 : sql_exp *exp = rel_find_exp_and_corresponding_rel(l, e, false, &efrel, NULL), *col = NULL, *tope = exp;
1494 :
1495 102253 : while (searching && !col) {
1496 51136 : sql_exp *exp_col = exp;
1497 :
1498 51136 : if (exp && is_numeric_upcast(exp))
1499 54 : exp = exp->l;
1500 51136 : if (exp && exp->type == e_func) {
1501 666 : list *el = exp->l;
1502 666 : sql_subfunc *sf = exp->f;
1503 : /* At the moment look only at injective math functions */
1504 666 : if (sf->func->type == F_FUNC && !sf->func->s &&
1505 653 : (!strcmp(sf->func->base.name, "sql_sub") || !strcmp(sf->func->base.name, "sql_add") || !strcmp(sf->func->base.name, "sql_mul"))) {
1506 113 : sql_exp *e1 = (sql_exp*) el->h->data, *e2 = (sql_exp*) el->h->next->data;
1507 : /* the optimization cannot be done if side-effect calls (e.g. rand()) are present */
1508 219 : int e1ok = exp_is_atom(e1) && !exp_unsafe(e1, true, false) && !exp_has_sideeffect(e1), e2ok = exp_is_atom(e2) && !exp_unsafe(e2, true, false) && !exp_has_sideeffect(e2);
1509 :
1510 113 : if ((!e1ok && e2ok) || (e1ok && !e2ok)) {
1511 59 : sql_exp *c = e1ok ? e2 : e1;
1512 59 : bool done = false;
1513 59 : exp_col = exps_find_exp(efrel->exps, c);
1514 59 : if (exp_col)
1515 28 : c = exp_col;
1516 :
1517 120 : while (!done) {
1518 61 : if (is_numeric_upcast(c))
1519 0 : c = c->l;
1520 61 : if (c->type == e_column) {
1521 42 : if (is_simple_project(efrel->op) || is_groupby(efrel->op)) {
1522 : /* in a simple projection, self-references may occur */
1523 42 : sql_exp *nc = exps_find_exp(efrel->exps, c);
1524 42 : if (nc && list_position(efrel->exps, nc) < list_position(efrel->exps, exp_col)) {
1525 2 : exp_col = c;
1526 2 : c = nc;
1527 2 : continue;
1528 : }
1529 : }
1530 : col = c; /* 'c' is a column reference from the left relation */
1531 : done = true;
1532 : } else {
1533 : exp = c; /* maybe a nested function call, let's continue searching */
1534 : done = true;
1535 : }
1536 : }
1537 : } else {
1538 : searching = false;
1539 : }
1540 : } else {
1541 : searching = false;
1542 : }
1543 : } else {
1544 : searching = false;
1545 : }
1546 : }
1547 51117 : if (col) { /* a column reference was found */
1548 40 : const char *rname = exp_relname(e), *name = exp_name(e);
1549 :
1550 : /* the grouping column has an alias, we have to keep it */
1551 40 : if ((rname && name && (strcmp(rname, e->l) != 0 || strcmp(name, e->r) != 0)) || (!rname && name && strcmp(name, e->r) != 0)) {
1552 2 : if (!has_label(e)) /* dangerous to merge, skip it */
1553 0 : continue;
1554 2 : if (!is_simple_project(l->op) || !list_empty(l->r) || rel_is_ref(l) || need_distinct(l))
1555 0 : rel->l = l = rel_project(v->sql->sa, l, rel_projections(v->sql, l, NULL, 1, 1));
1556 2 : list_append(l->exps, e);
1557 2 : n->data = e = exp_ref(v->sql, e);
1558 2 : list_hash_clear(rel->r);
1559 : }
1560 :
1561 40 : sql_exp *f = exps_find_exp(rel->r, col);
1562 :
1563 40 : if (f && list_position(rel->r, f) < list_position(rel->r, e)) { /* if already present, remove it */
1564 10 : e->used = 1;
1565 : } else {
1566 : /* Use an unique reference to the column found. If there's another grouping column label pointing into it,
1567 : rel_groupby_cse will hopefully remove it */
1568 30 : sql_exp *colf = exps_find_exp(l->exps, col);
1569 :
1570 : /* col is already found in the inner relation. Also look for a new reference for col, eg sql_add(col, 1), 1 as col */
1571 30 : if (colf && list_position(l->exps, colf) < list_position(l->exps, tope)) {
1572 2 : n->data = exp_ref(v->sql, col);
1573 25 : } else if (!colf && is_simple_project(l->op) && list_empty(l->r) && !rel_is_ref(l) && !need_distinct(l)) { /* trivial case, it can be added */
1574 25 : sql_exp *ne = col;
1575 25 : list_prepend(l->exps, ne);
1576 25 : n->data = exp_ref(v->sql, ne);
1577 0 : } else if (!colf && (is_simple_project(l->op) || is_groupby(l->op))) {
1578 : /* on these scenarios the new column expression will be ordered/(grouped for distinct) or create potential ambiguity (multiple ref), so skip */
1579 0 : continue;
1580 : } else {
1581 3 : sql_exp *ne = exp_ref(v->sql, col);
1582 :
1583 3 : if (colf) /* a col reference is already there, add a new label */
1584 3 : exp_label(v->sql->sa, ne, ++v->sql->label);
1585 3 : if (!is_simple_project(l->op) || !list_empty(l->r) || rel_is_ref(l) || need_distinct(l))
1586 0 : rel->l = l = rel_project(v->sql->sa, l, rel_projections(v->sql, l, NULL, 1, 1));
1587 3 : list_append(l->exps, ne);
1588 3 : n->data = exp_ref(v->sql, ne);
1589 : }
1590 30 : list_hash_clear(rel->r);
1591 : }
1592 40 : v->changes++;
1593 : }
1594 : }
1595 : }
1596 83300 : for (node *n=((list*)rel->r)->h; n ; ) {
1597 51154 : node *next = n->next;
1598 51154 : sql_exp *e = n->data;
1599 :
1600 51154 : if (e->used) /* remove unnecessary grouping columns */
1601 10 : list_remove_node(rel->r, NULL, n);
1602 : n = next;
1603 : }
1604 : }
1605 42783 : return rel;
1606 : }
1607 :
1608 : /* remove identical grouping columns */
1609 : static inline sql_rel *
1610 97531 : rel_groupby_cse(visitor *v, sql_rel *rel)
1611 : {
1612 97531 : if (is_groupby(rel->op) && !list_empty(rel->r)) {
1613 71986 : sql_rel *l = rel->l;
1614 :
1615 : /* for every group expression e1 */
1616 200377 : for (node *n=((list*)rel->r)->h; n ; n = n->next) {
1617 128391 : sql_exp *e1 = n->data;
1618 : /* it's good to examine the same expression in the subrelation e.g. in case it's an alias */
1619 : /* TODO maybe cover more cases? Here I only look at the left relation */
1620 256782 : sql_exp *e1_sub = e1->type == e_column ?
1621 128391 : e1->nid ? exps_bind_nid(l->exps, e1->nid) : exps_find_exp(l->exps, e1) : NULL;
1622 :
1623 : /* for every other group expression */
1624 220976 : for (node *m=n->next; m; m = m->next) {
1625 92585 : sql_exp *e2 = m->data;
1626 185170 : sql_exp *e2_sub = e2->type == e_column ?
1627 92585 : e2->nid ? exps_bind_nid(l->exps, e2->nid) : exps_find_exp(l->exps, e2) : NULL;
1628 :
1629 : /* check if the expression are the same */
1630 92585 : if (exp_match_exp(e1, e2) || exp_refers(e1, e2) || (e1_sub && e2_sub && (exp_match_exp(e1_sub, e2_sub) || exp_refers(e1_sub, e2_sub)))) {
1631 :
1632 : /* use e2 from rel->exps instead of e2 from the rel->r as it can have an alias from the higher rel */
1633 41 : sql_exp *e2_in_exps = exps_uses_nid(rel->exps, e2->alias.label);
1634 41 : assert(e2_in_exps);
1635 :
1636 : /* same as e2 */
1637 41 : sql_exp *e1_in_exps = exps_uses_nid(rel->exps, e1->alias.label);
1638 41 : if (!e1_in_exps)
1639 6 : continue;
1640 :
1641 : /* write e2 as an e1 alias since the expressions are the same */
1642 35 : sql_exp* e2_as_e1_alias = exp_copy(v->sql, e1_in_exps);
1643 : /* NOTE: it is important to get the rname (exp->l) and name (exp->r) from e2 IN the exps
1644 : * (e2_in_exps), and not from e2, since it could carry an alias from the higher rel */
1645 35 : exp_setalias(e2_as_e1_alias, e2_as_e1_alias->alias.label, e2_in_exps->l, e2_in_exps->r);
1646 35 : e2_as_e1_alias->alias.label = e2->alias.label;
1647 :
1648 : /* replace e2 with e2_as_e1_alias in expressions list */
1649 35 : node *e2_exps_node = list_find(rel->exps, e2_in_exps, NULL);
1650 35 : list_append_before(rel->exps, e2_exps_node, e2_as_e1_alias);
1651 35 : list_remove_node(rel->exps, NULL, e2_exps_node);
1652 :
1653 : /* finally remove e2 from the groups' list (->r) since it's redundant */
1654 35 : node *e2_r_node = list_find(rel->r, e2, NULL);
1655 35 : list_remove_node(rel->r, NULL, e2_r_node);
1656 :
1657 35 : v->changes++;
1658 : }
1659 : }
1660 : }
1661 : }
1662 97531 : return rel;
1663 : }
1664 :
1665 : sql_exp *list_exps_uses_exp(list *exps, const char *rname, const char *name);
1666 :
1667 : static sql_exp*
1668 19477 : exp_uses_exp(sql_exp *e, const char *rname, const char *name)
1669 : {
1670 19774 : sql_exp *res = NULL;
1671 :
1672 19774 : switch (e->type) {
1673 : case e_psm:
1674 : break;
1675 1923 : case e_atom: {
1676 1923 : if (e->f)
1677 0 : return list_exps_uses_exp(e->f, rname, name);
1678 : } break;
1679 296 : case e_convert:
1680 296 : return exp_uses_exp(e->l, rname, name);
1681 10952 : case e_column: {
1682 10952 : if (e->l && rname && strcmp(e->l, rname) == 0 &&
1683 2687 : e->r && name && strcmp(e->r, name) == 0)
1684 : return e;
1685 8460 : if (!e->l && !rname &&
1686 244 : e->r && name && strcmp(e->r, name) == 0)
1687 : return e;
1688 : } break;
1689 6189 : case e_func:
1690 : case e_aggr: {
1691 6189 : if (e->l)
1692 4970 : return list_exps_uses_exp(e->l, rname, name);
1693 : } break;
1694 414 : case e_cmp: {
1695 414 : if (e->flag == cmp_in || e->flag == cmp_notin) {
1696 0 : if ((res = exp_uses_exp(e->l, rname, name)))
1697 : return res;
1698 0 : return list_exps_uses_exp(e->r, rname, name);
1699 414 : } else if (e->flag == cmp_or || e->flag == cmp_filter) {
1700 2 : if ((res = list_exps_uses_exp(e->l, rname, name)))
1701 : return res;
1702 2 : return list_exps_uses_exp(e->r, rname, name);
1703 : } else {
1704 412 : if ((res = exp_uses_exp(e->l, rname, name)))
1705 : return res;
1706 410 : if ((res = exp_uses_exp(e->r, rname, name)))
1707 : return res;
1708 404 : if (e->f)
1709 : return exp_uses_exp(e->f, rname, name);
1710 : }
1711 : } break;
1712 : }
1713 : return NULL;
1714 : }
1715 :
1716 : sql_exp *
1717 10240 : list_exps_uses_exp(list *exps, const char *rname, const char *name)
1718 : {
1719 10240 : sql_exp *res = NULL;
1720 :
1721 10240 : if (!exps)
1722 : return NULL;
1723 28895 : for (node *n = exps->h; n && !res; n = n->next) {
1724 18655 : sql_exp *e = n->data;
1725 18655 : res = exp_uses_exp(e, rname, name);
1726 : }
1727 : return res;
1728 : }
1729 :
1730 : /* find in the list of expression an expression which uses e */
1731 : sql_exp *
1732 995 : exps_uses_exp(list *exps, sql_exp *e)
1733 : {
1734 995 : return list_exps_uses_exp(exps, exp_relname(e), exp_name(e));
1735 : }
1736 : /*
1737 : * Rewrite aggregations over munion all.
1738 : * groupby ([ union all (a, b, c) ], [gbe], [ count, sum ] )
1739 : *
1740 : * into
1741 : * groupby ([ union all( groupby( a, [gbe], [ count, sum] ),
1742 : * groupby( b, [gbe], [ count, sum] ),
1743 : * groupby( c, [gbe], [ count, sum] ) ],
1744 : * [gbe], [sum, sum] )
1745 : */
1746 : static inline sql_rel *
1747 1388 : rel_push_aggr_down_n_arry(visitor *v, sql_rel *rel)
1748 : {
1749 1388 : sql_rel *g = rel;
1750 1388 : sql_rel *u = rel->l, *ou = u;
1751 1388 : sql_rel *r = NULL;
1752 1388 : list *rgbe = NULL, *gbe = NULL, *exps = NULL;
1753 1388 : node *n, *m;
1754 :
1755 : // TODO why?
1756 1388 : if (u->op == op_project && !need_distinct(u))
1757 353 : u = u->l;
1758 :
1759 1388 : if (is_recursive(u))
1760 : return rel;
1761 :
1762 : /* make sure we don't create group by on group by's */
1763 2810 : for (node *n = ((list*)u->l)->h; n; n = n->next) {
1764 2147 : r = n->data;
1765 2147 : if (r->op == op_groupby)
1766 : return rel;
1767 : }
1768 :
1769 : /* distinct should be done over the full result */
1770 1827 : for (n = g->exps->h; n; n = n->next) {
1771 1270 : sql_exp *e = n->data;
1772 1270 : sql_subfunc *af = e->f;
1773 :
1774 1270 : if (e->type == e_atom ||
1775 1270 : e->type == e_func ||
1776 624 : (e->type == e_aggr &&
1777 624 : ((strcmp(af->func->base.name, "sum") &&
1778 487 : strcmp(af->func->base.name, "count") &&
1779 171 : strcmp(af->func->base.name, "min") &&
1780 624 : strcmp(af->func->base.name, "max")) ||
1781 : need_distinct(e))))
1782 : return rel;
1783 : }
1784 :
1785 557 : list *nl = sa_list(v->sql->sa);
1786 1717 : for (node *n = ((list*)u->l)->h; n; n = n->next) {
1787 1160 : r = rel_dup(n->data);
1788 : //n->data = NULL; /* clean list as we steal the relation r, stealing is needed else (with multiple references) double project cleanup fails */
1789 1160 : if (!is_project(r->op))
1790 47 : r = rel_project(v->sql->sa, r,
1791 : rel_projections(v->sql, r, NULL, 1, 1));
1792 1160 : rel_rename_exps(v->sql, u->exps, r->exps);
1793 1160 : if (u != ou) {
1794 663 : bool isproject = is_project(r->op);
1795 663 : r = rel_project(v->sql->sa, r, NULL);
1796 663 : r->exps = exps_copy(v->sql, ou->exps);
1797 663 : rel_rename_exps(v->sql, ou->exps, r->exps);
1798 663 : set_processed(r);
1799 663 : if (isproject)
1800 663 : r = rel_push_project_down_(v, r); /* cleanup any double projects */
1801 : }
1802 1160 : if (g->r && list_length(g->r) > 0) {
1803 910 : list *gbe = g->r;
1804 910 : rgbe = exps_copy(v->sql, gbe);
1805 : }
1806 1160 : r = rel_groupby(v->sql, r, NULL);
1807 1160 : r->r = rgbe;
1808 1160 : r->nrcols = g->nrcols;
1809 1160 : r->card = g->card;
1810 1160 : r->exps = exps_copy(v->sql, g->exps);
1811 1160 : r->nrcols = list_length(r->exps);
1812 1160 : set_processed(r);
1813 :
1814 1160 : assert(r);
1815 1160 : append(nl, r);
1816 : }
1817 :
1818 : /* group by on primary keys which define the partitioning scheme
1819 : * don't need a finalizing group by */
1820 : /* how to check if a partition is based on some primary key ?
1821 : * */
1822 557 : if (!list_empty(rel->r)) {
1823 1074 : for (node *n = ((list*)rel->r)->h; n; n = n->next) {
1824 638 : sql_exp *e = n->data;
1825 638 : sql_column *c = NULL;
1826 :
1827 638 : if ((c = exp_is_pkey(rel, e)) && partition_find_part(v->sql->session->tr, c->t, NULL)) {
1828 : /* check if key is partition key */
1829 0 : v->changes++;
1830 0 : return rel_inplace_setop_n_ary(v->sql, rel, nl, op_munion,
1831 : rel_projections(v->sql, rel, NULL, 1, 1));
1832 : }
1833 : }
1834 : }
1835 :
1836 557 : if (!list_empty(rel->r)) {
1837 436 : list *ogbe = rel->r;
1838 :
1839 436 : gbe = new_exp_list(v->sql->sa);
1840 1074 : for (n = ogbe->h; n; n = n->next) {
1841 638 : sql_exp *e = n->data, *ne;
1842 :
1843 : /* group by in aggregation list */
1844 638 : ne = exps_uses_exp( rel->exps, e);
1845 638 : if (ne) {
1846 632 : sql_rel *first_munion_rel = nl->h->data;
1847 632 : ne = list_find_exp(first_munion_rel->exps, ne);
1848 : }
1849 632 : if (!ne) {
1850 : /* e only in the u1,u2,...un->r (group by list) */
1851 24 : for (node *n = nl->h; n; n = n->next) {
1852 16 : ne = exp_ref(v->sql, e);
1853 16 : list_append(((sql_rel*)n->data)->exps, ne);
1854 : }
1855 : }
1856 8 : assert(ne);
1857 638 : ne = exp_ref(v->sql, ne);
1858 638 : append(gbe, ne);
1859 : }
1860 : }
1861 :
1862 557 : u = rel_setop_n_ary(v->sql->sa, nl, op_munion);
1863 557 : rel_setop_n_ary_set_exps(v->sql, u,
1864 557 : rel_projections(v->sql, nl->h->data, NULL, 1, 1), false);
1865 557 : set_processed(u);
1866 :
1867 557 : exps = new_exp_list(v->sql->sa);
1868 1718 : for (n = u->exps->h, m = rel->exps->h; n && m; n = n->next, m = m->next) {
1869 1161 : sql_exp *ne, *e = n->data, *oa = m->data;
1870 :
1871 1161 : if (oa->type == e_aggr) {
1872 518 : sql_subfunc *f = oa->f;
1873 518 : int cnt = exp_aggr_is_count(oa);
1874 518 : sql_subfunc *a = sql_bind_func(v->sql, "sys", (cnt)?"sum":f->func->base.name, exp_subtype(e), NULL, F_AGGR, true, true);
1875 :
1876 518 : assert(a);
1877 : /* munion of aggr result may have nils
1878 : * because sum/count of empty set */
1879 518 : set_has_nil(e);
1880 518 : e = exp_ref(v->sql, e);
1881 518 : ne = exp_aggr1(v->sql->sa, e, a, need_distinct(e), 1, e->card, 1);
1882 : } else {
1883 643 : ne = exp_copy(v->sql, oa);
1884 : }
1885 1161 : exp_setalias(ne, oa->alias.label, exp_find_rel_name(oa), exp_name(oa));
1886 1161 : append(exps, ne);
1887 : }
1888 557 : v->changes++;
1889 557 : return rel_inplace_groupby(rel, u, gbe, exps);
1890 : }
1891 :
1892 : /*
1893 : * Rewrite aggregations over union all.
1894 : * groupby ([ union all (a, b) ], [gbe], [ count, sum ] )
1895 : *
1896 : * into
1897 : * groupby ( [ union all( groupby( a, [gbe], [ count, sum] ), [ groupby( b, [gbe], [ count, sum] )) , [gbe], [sum, sum] )
1898 : */
1899 : static inline sql_rel *
1900 97531 : rel_push_aggr_down(visitor *v, sql_rel *rel)
1901 : {
1902 97531 : if (rel->op == op_groupby && rel->l) {
1903 97460 : sql_rel *u = rel->l, *ou = u;
1904 97460 : sql_rel *g = rel;
1905 97460 : sql_rel *ul = u->l;
1906 97460 : sql_rel *ur = u->r;
1907 97460 : node *n, *m;
1908 97460 : list *lgbe = NULL, *rgbe = NULL, *gbe = NULL, *exps = NULL;
1909 :
1910 97460 : if (u->op == op_project && !need_distinct(u))
1911 42257 : u = u->l;
1912 :
1913 97460 : if (!u || !(is_union(u->op) || is_munion(u->op)) || need_distinct(u) || is_single(u) || !u->exps || rel_is_ref(u))
1914 : return rel;
1915 :
1916 1388 : if (is_munion(u->op))
1917 1388 : return rel_push_aggr_down_n_arry(v, rel);
1918 :
1919 0 : ul = u->l;
1920 0 : ur = u->r;
1921 :
1922 : /* make sure we don't create group by on group by's */
1923 0 : if (ul->op == op_groupby || ur->op == op_groupby)
1924 : return rel;
1925 :
1926 : /* distinct should be done over the full result */
1927 0 : for (n = g->exps->h; n; n = n->next) {
1928 0 : sql_exp *e = n->data;
1929 0 : sql_subfunc *af = e->f;
1930 :
1931 0 : if (e->type == e_atom ||
1932 0 : e->type == e_func ||
1933 0 : (e->type == e_aggr &&
1934 0 : ((strcmp(af->func->base.name, "sum") &&
1935 0 : strcmp(af->func->base.name, "count") &&
1936 0 : strcmp(af->func->base.name, "min") &&
1937 0 : strcmp(af->func->base.name, "max")) ||
1938 : need_distinct(e))))
1939 : return rel;
1940 : }
1941 :
1942 0 : ul = rel_dup(ul);
1943 0 : ur = rel_dup(ur);
1944 0 : if (!is_project(ul->op))
1945 0 : ul = rel_project(v->sql->sa, ul,
1946 : rel_projections(v->sql, ul, NULL, 1, 1));
1947 0 : if (!is_project(ur->op))
1948 0 : ur = rel_project(v->sql->sa, ur,
1949 : rel_projections(v->sql, ur, NULL, 1, 1));
1950 0 : rel_rename_exps(v->sql, u->exps, ul->exps);
1951 0 : rel_rename_exps(v->sql, u->exps, ur->exps);
1952 0 : if (u != ou) {
1953 0 : ul = rel_project(v->sql->sa, ul, NULL);
1954 0 : ul->exps = exps_copy(v->sql, ou->exps);
1955 0 : rel_rename_exps(v->sql, ou->exps, ul->exps);
1956 0 : set_processed(ul);
1957 0 : ur = rel_project(v->sql->sa, ur, NULL);
1958 0 : ur->exps = exps_copy(v->sql, ou->exps);
1959 0 : rel_rename_exps(v->sql, ou->exps, ur->exps);
1960 0 : set_processed(ur);
1961 : }
1962 :
1963 0 : if (g->r && list_length(g->r) > 0) {
1964 0 : list *gbe = g->r;
1965 :
1966 0 : lgbe = exps_copy(v->sql, gbe);
1967 0 : rgbe = exps_copy(v->sql, gbe);
1968 : }
1969 0 : ul = rel_groupby(v->sql, ul, NULL);
1970 0 : ul->r = lgbe;
1971 0 : ul->nrcols = g->nrcols;
1972 0 : ul->card = g->card;
1973 0 : ul->exps = exps_copy(v->sql, g->exps);
1974 0 : ul->nrcols = list_length(ul->exps);
1975 0 : set_processed(ul);
1976 :
1977 0 : ur = rel_groupby(v->sql, ur, NULL);
1978 0 : ur->r = rgbe;
1979 0 : ur->nrcols = g->nrcols;
1980 0 : ur->card = g->card;
1981 0 : ur->exps = exps_copy(v->sql, g->exps);
1982 0 : ur->nrcols = list_length(ur->exps);
1983 0 : set_processed(ur);
1984 :
1985 : /* group by on primary keys which define the partitioning scheme
1986 : * don't need a finalizing group by */
1987 : /* how to check if a partition is based on some primary key ?
1988 : * */
1989 0 : if (!list_empty(rel->r)) {
1990 0 : for (node *n = ((list*)rel->r)->h; n; n = n->next) {
1991 0 : sql_exp *e = n->data;
1992 0 : sql_column *c = NULL;
1993 :
1994 0 : if ((c = exp_is_pkey(rel, e)) && partition_find_part(v->sql->session->tr, c->t, NULL)) {
1995 : /* check if key is partition key */
1996 0 : v->changes++;
1997 0 : return rel_inplace_setop(v->sql, rel, ul, ur, op_union,
1998 : rel_projections(v->sql, rel, NULL, 1, 1));
1999 : }
2000 : }
2001 : }
2002 :
2003 0 : if (!list_empty(rel->r)) {
2004 0 : list *ogbe = rel->r;
2005 :
2006 0 : gbe = new_exp_list(v->sql->sa);
2007 0 : for (n = ogbe->h; n; n = n->next) {
2008 0 : sql_exp *e = n->data, *ne;
2009 :
2010 : /* group by in aggregation list */
2011 0 : ne = exps_uses_exp( rel->exps, e);
2012 0 : if (ne)
2013 0 : ne = list_find_exp( ul->exps, ne);
2014 0 : if (!ne) {
2015 : /* e only in the ul/ur->r (group by list) */
2016 0 : ne = exp_ref(v->sql, e);
2017 0 : list_append(ul->exps, ne);
2018 0 : ne = exp_ref(v->sql, e);
2019 0 : list_append(ur->exps, ne);
2020 : }
2021 0 : assert(ne);
2022 0 : ne = exp_ref(v->sql, ne);
2023 0 : append(gbe, ne);
2024 : }
2025 : }
2026 :
2027 0 : u = rel_setop(v->sql->sa, ul, ur, op_union);
2028 0 : rel_setop_set_exps(v->sql, u, rel_projections(v->sql, ul, NULL, 1, 1), false);
2029 0 : set_processed(u);
2030 :
2031 0 : exps = new_exp_list(v->sql->sa);
2032 0 : for (n = u->exps->h, m = rel->exps->h; n && m; n = n->next, m = m->next) {
2033 0 : sql_exp *ne, *e = n->data, *oa = m->data;
2034 :
2035 0 : if (oa->type == e_aggr) {
2036 0 : sql_subfunc *f = oa->f;
2037 0 : int cnt = exp_aggr_is_count(oa);
2038 0 : sql_subfunc *a = sql_bind_func(v->sql, "sys", (cnt)?"sum":f->func->base.name, exp_subtype(e), NULL, F_AGGR, true, true);
2039 :
2040 0 : assert(a);
2041 : /* union of aggr result may have nils
2042 : * because sum/count of empty set */
2043 0 : set_has_nil(e);
2044 0 : e = exp_ref(v->sql, e);
2045 0 : ne = exp_aggr1(v->sql->sa, e, a, need_distinct(e), 1, e->card, 1);
2046 : } else {
2047 0 : ne = exp_copy(v->sql, oa);
2048 : }
2049 0 : exp_setalias(ne, oa->alias.label, exp_find_rel_name(oa), exp_name(oa));
2050 0 : append(exps, ne);
2051 : }
2052 0 : v->changes++;
2053 0 : return rel_inplace_groupby( rel, u, gbe, exps);
2054 : }
2055 : return rel;
2056 : }
2057 :
2058 : /*
2059 : * More general
2060 : * groupby(
2061 : * [ outer ] join(
2062 : * project(
2063 : * table(A) [ c1, c2, .. ]
2064 : * ) [ c1, c2, identity(c2) as I, .. ],
2065 : * table(B) [ c1, c2, .. ]
2066 : * ) [ A.c1 = B.c1 ]
2067 : * ) [ I ] [ a1, a2, .. ]
2068 : *
2069 : * ->
2070 : *
2071 : * [ outer ] join(
2072 : * project(
2073 : * table(A) [ c1, c2, .. ]
2074 : * ) [ c1, c2, .. ],
2075 : * groupby (
2076 : * table(B) [ c1, c2, .. ]
2077 : * ) [ B.c1 ] [ a1, a2, .. ]
2078 : * ) [ A.c1 = B.c1 ]
2079 : */
2080 : static sql_rel *
2081 47864 : gen_push_groupby_down(mvc *sql, sql_rel *rel, int *changes)
2082 : {
2083 47864 : sql_rel *j = rel->l;
2084 47864 : list *gbe = rel->r;
2085 :
2086 47864 : if (rel->op == op_groupby && list_length(gbe) == 1 && j->op == op_join){
2087 6492 : sql_rel *jl = j->l, *jr = j->r, *cr, *cl;
2088 6492 : sql_exp *gb = gbe->h->data, *e;
2089 6492 : node *n;
2090 6492 : int left = 1;
2091 6492 : list *aggrs, *aliases, *gbe;
2092 :
2093 6492 : if (!is_identity(gb, jl) && !is_identity(gb, jr))
2094 : return rel;
2095 6 : if (jl->op == op_project &&
2096 4 : (e = list_find_exp( jl->exps, gb)) != NULL &&
2097 2 : find_prop(e->p, PROP_HASHCOL) != NULL) {
2098 : left = 0;
2099 : cr = jr;
2100 : cl = jl;
2101 4 : } else if (jr->op == op_project &&
2102 8 : (e = list_find_exp( jr->exps, gb)) != NULL &&
2103 4 : find_prop(e->p, PROP_HASHCOL) != NULL) {
2104 : left = 1;
2105 : cr = jl;
2106 : cl = jr;
2107 : } else {
2108 0 : return rel;
2109 : }
2110 :
2111 6 : if ((left && is_base(jl->op)) || (!left && is_base(jr->op))||
2112 0 : (left && is_select(jl->op)) || (!left && is_select(jr->op))
2113 0 : || rel_is_join_on_pkey(j, false))
2114 6 : return rel;
2115 :
2116 : /* only add aggr (based on left/right), and repeat the group by column */
2117 0 : aggrs = sa_list(sql->sa);
2118 0 : aliases = sa_list(sql->sa);
2119 0 : if (rel->exps) for (n = rel->exps->h; n; n = n->next) {
2120 0 : sql_exp *ce = n->data;
2121 :
2122 0 : if (exp_is_atom(ce))
2123 0 : list_append(aliases, ce);
2124 0 : else if (ce->type == e_column) {
2125 0 : if (rel_has_exp(cl, ce, false) == 0) /* collect aliases outside groupby */
2126 0 : list_append(aliases, ce);
2127 : else
2128 0 : list_append(aggrs, ce);
2129 0 : } else if (ce->type == e_aggr) {
2130 0 : list *args = ce->l;
2131 :
2132 : /* check args are part of left/right */
2133 0 : if (!list_empty(args) && rel_has_exps(cl, args, false) == 0)
2134 : return rel;
2135 0 : if (rel->op != op_join && exp_aggr_is_count(ce))
2136 0 : ce->p = prop_create(sql->sa, PROP_COUNT, ce->p);
2137 0 : list_append(aggrs, ce);
2138 : }
2139 : }
2140 : /* TODO move any column expressions (aliases) into the project list */
2141 :
2142 : /* find gb in left or right and should be unique */
2143 0 : gbe = sa_list(sql->sa);
2144 : /* push groupby to right, group on join exps */
2145 0 : if (j->exps) for (n = j->exps->h; n; n = n->next) {
2146 0 : sql_exp *ce = n->data, *l = ce->l, *r = ce->r, *e;
2147 :
2148 : /* get left/right hand of e_cmp */
2149 0 : assert(ce->type == e_cmp);
2150 0 : if (ce->flag == cmp_equal && is_alias(l->type) && is_alias(r->type) &&
2151 0 : (((e = rel_find_exp(cr, l)) && rel_find_exp(cl, r)) ||
2152 0 : ((e = rel_find_exp(cr, r)) && rel_find_exp(cl, l)))) {
2153 0 : e = exp_ref(sql, e);
2154 0 : list_append(gbe, e);
2155 : } else {
2156 0 : return rel;
2157 : }
2158 : }
2159 0 : if (!left)
2160 0 : cr = j->r = rel_groupby(sql, cr, gbe);
2161 : else
2162 0 : cr = j->l = rel_groupby(sql, cr, gbe);
2163 0 : cr->exps = list_merge(cr->exps, aggrs, (fdup)NULL);
2164 0 : set_processed(cr);
2165 0 : if (!is_project(cl->op))
2166 0 : cl = rel_project(sql->sa, cl,
2167 : rel_projections(sql, cl, NULL, 1, 1));
2168 0 : cl->exps = list_merge(cl->exps, aliases, (fdup)NULL);
2169 0 : set_processed(cl);
2170 0 : if (!left)
2171 0 : j->l = cl;
2172 : else
2173 0 : j->r = cl;
2174 0 : rel -> l = NULL;
2175 0 : rel_destroy(rel);
2176 :
2177 0 : if (list_empty(cr->exps) && list_empty(j->exps)) { /* remove crossproduct */
2178 0 : sql_rel *r = cl;
2179 0 : if (!left)
2180 0 : j->l = NULL;
2181 : else
2182 0 : j->r = NULL;
2183 0 : rel_destroy(j);
2184 0 : j = r;
2185 : }
2186 0 : (*changes)++;
2187 0 : return j;
2188 : }
2189 : return rel;
2190 : }
2191 :
2192 : /*
2193 : * Rewrite group(project(join(A,Dict)[a.i==dict.i])[...dict.n])[dict.n][ ... dict.n ]
2194 : * into
2195 : * project(join(groupby (A)[a.i],[a.i]), Dict)[a.i==dict.i])[dict.n]
2196 : *
2197 : */
2198 : static inline sql_rel *
2199 97531 : rel_push_groupby_down(visitor *v, sql_rel *rel)
2200 : {
2201 97531 : sql_rel *p = rel->l;
2202 97531 : list *gbe = rel->r;
2203 :
2204 97531 : if (rel->op == op_groupby && gbe && p && is_join(p->op))
2205 10180 : return gen_push_groupby_down(v->sql, rel, &v->changes);
2206 87351 : if (rel->op == op_groupby && gbe && p && p->op == op_project) {
2207 37684 : sql_rel *j = p->l;
2208 37684 : sql_rel *jl, *jr;
2209 37684 : node *n;
2210 :
2211 37684 : if (!j || j->op != op_join || list_length(j->exps) != 1)
2212 35535 : return gen_push_groupby_down(v->sql, rel, &v->changes);
2213 2149 : jl = j->l;
2214 2149 : jr = j->r;
2215 :
2216 : /* check if jr is a dict with index and var still used */
2217 2149 : if (jr->op != op_basetable || jr->l || !jr->r || list_length(jr->exps) != 2)
2218 2149 : return gen_push_groupby_down(v->sql, rel, &v->changes);
2219 :
2220 : /* check if group by is done on dict column */
2221 0 : for(n = gbe->h; n; n = n->next) {
2222 0 : sql_exp *ge = n->data, *pe = NULL, *e = NULL;
2223 :
2224 : /* find group by exp in project, then in dict */
2225 0 : pe = rel_find_exp(p, ge);
2226 0 : if (pe) /* find project exp in right hand of join, ie dict */
2227 0 : e = rel_find_exp(jr, pe);
2228 0 : if (pe && e) { /* Rewrite: join with dict after the group by */
2229 0 : list *pexps = rel_projections(v->sql, rel, NULL, 1, 1), *npexps;
2230 0 : node *m;
2231 0 : sql_exp *ne = j->exps->h->data; /* join exp */
2232 0 : p->l = jl; /* Project now only on the left side of the join */
2233 :
2234 0 : ne = ne->l; /* The left side of the compare is the index of the left */
2235 :
2236 : /* find ge reference in new projection list */
2237 0 : npexps = sa_list(v->sql->sa);
2238 0 : for (m = pexps->h; m; m = m->next) {
2239 0 : sql_exp *a = m->data;
2240 :
2241 0 : if (exp_refers(ge, a)) {
2242 0 : sql_exp *sc = jr->exps->t->data;
2243 0 : sql_exp *e = exp_ref(v->sql, sc);
2244 0 : if (exp_name(a))
2245 0 : exp_prop_alias(v->sql->sa, e, a);
2246 : a = e;
2247 : }
2248 0 : append(npexps, a);
2249 : }
2250 :
2251 : /* find ge in aggr list */
2252 0 : for (m = rel->exps->h; m; m = m->next) {
2253 0 : sql_exp *a = m->data;
2254 :
2255 0 : if (exp_match_exp(a, ge) || exp_refers(ge, a)) {
2256 0 : a = exp_ref(v->sql, ne);
2257 0 : if (exp_name(ne))
2258 0 : exp_prop_alias(v->sql->sa, a, ne);
2259 0 : m->data = a;
2260 : }
2261 : }
2262 :
2263 : /* change alias pe, ie project out the index */
2264 0 : pe->l = (void*)exp_relname(ne);
2265 0 : pe->r = (void*)exp_name(ne);
2266 0 : if (exp_name(ne))
2267 0 : exp_prop_alias(v->sql->sa, pe, ne);
2268 :
2269 : /* change alias ge */
2270 0 : ge->l = (void*)exp_relname(pe);
2271 0 : ge->r = (void*)exp_name(pe);
2272 0 : if (exp_name(pe))
2273 0 : exp_prop_alias(v->sql->sa, ge, pe);
2274 :
2275 : /* zap both project and groupby name hash tables (as we changed names above) */
2276 0 : list_hash_clear(rel->exps);
2277 0 : list_hash_clear((list*)rel->r);
2278 0 : list_hash_clear(p->exps);
2279 :
2280 : /* add join */
2281 0 : j->l = rel;
2282 0 : rel = rel_project(v->sql->sa, j, npexps);
2283 0 : v->changes++;
2284 : }
2285 : }
2286 : }
2287 : return rel;
2288 : }
2289 :
2290 : /* reduce group by expressions based on pkey info
2291 : *
2292 : * The reduced group by and (derived) aggr expressions are restored via
2293 : * extra (new) aggregate columns.
2294 : */
2295 : static inline sql_rel *
2296 97531 : rel_reduce_groupby_exps(visitor *v, sql_rel *rel)
2297 : {
2298 97531 : list *gbe = rel->r;
2299 :
2300 97531 : if (is_groupby(rel->op) && rel->r && !rel_is_ref(rel) && list_length(gbe)) {
2301 48583 : node *n, *m;
2302 48583 : int k, j, i, ngbe = list_length(gbe);
2303 48583 : int8_t *scores = SA_NEW_ARRAY(v->sql->ta, int8_t, ngbe);
2304 48583 : sql_column *c;
2305 48583 : sql_table **tbls = SA_NEW_ARRAY(v->sql->ta, sql_table*, ngbe);
2306 48583 : sql_rel **bts = SA_NEW_ARRAY(v->sql->ta, sql_rel*, ngbe), *bt = NULL;
2307 :
2308 48583 : gbe = rel->r;
2309 127065 : for (k = 0, i = 0, n = gbe->h; n; n = n->next, k++) {
2310 78482 : sql_exp *e = n->data;
2311 :
2312 78482 : c = exp_find_column_(rel, e, -2, &bt);
2313 78482 : if (c) {
2314 88083 : for(j = 0; j < i; j++)
2315 33423 : if (c->t == tbls[j] && bts[j] == bt)
2316 : break;
2317 70912 : tbls[j] = c->t;
2318 70912 : bts[j] = bt;
2319 70912 : i += (j == i);
2320 : }
2321 : }
2322 48583 : if (i) { /* forall tables find pkey and
2323 : remove useless other columns */
2324 : /* TODO also remove group by columns which are related to
2325 : * the other columns using a foreign-key join (n->1), ie 1
2326 : * on the to be removed side.
2327 : */
2328 99720 : for(j = 0; j < i; j++) {
2329 54648 : int l, nr = 0, cnr = 0;
2330 :
2331 54648 : k = list_length(gbe);
2332 54648 : memset(scores, 0, list_length(gbe));
2333 54648 : if (tbls[j]->pkey) {
2334 27420 : for (l = 0, n = gbe->h; l < k && n; l++, n = n->next) {
2335 20895 : fcmp cmp = (fcmp)&kc_column_cmp;
2336 20895 : sql_exp *e = n->data;
2337 :
2338 20895 : c = exp_find_column_(rel, e, -2, &bt);
2339 29799 : if (c && c->t == tbls[j] && bts[j] == bt &&
2340 8904 : list_find(tbls[j]->pkey->k.columns, c, cmp) != NULL) {
2341 796 : scores[l] = 1;
2342 796 : nr ++;
2343 18556 : } else if (c && c->t == tbls[j] && bts[j] == bt) {
2344 : /* Okay we can cleanup a group by column */
2345 8108 : scores[l] = -1;
2346 8108 : cnr ++;
2347 : }
2348 : }
2349 : }
2350 6525 : if (nr) {
2351 787 : int all = (list_length(tbls[j]->pkey->k.columns) == nr);
2352 787 : sql_kc *kc = tbls[j]->pkey->k.columns->h->data;
2353 :
2354 787 : c = kc->c;
2355 2102 : for (l = 0, n = gbe->h; l < k && n; l++, n = n->next) {
2356 1315 : sql_exp *e = n->data;
2357 :
2358 : /* pkey based group by */
2359 1464 : if (scores[l] == 1 && ((all ||
2360 : /* first of key */
2361 921 : (c == exp_find_column(rel, e, -2))) && !find_prop(e->p, PROP_HASHCOL)))
2362 13 : e->p = prop_create(v->sql->sa, PROP_HASHCOL, e->p);
2363 : }
2364 2966 : for (m = rel->exps->h; m; m = m->next ){
2365 2179 : sql_exp *e = m->data;
2366 :
2367 7692 : for (l = 0, n = gbe->h; l < k && n; l++, n = n->next) {
2368 5526 : sql_exp *gb = n->data;
2369 :
2370 : /* pkey based group by */
2371 5526 : if (scores[l] == 1 && exp_match_exp(e,gb) && find_prop(gb->p, PROP_HASHCOL) && !find_prop(e->p, PROP_HASHCOL)) {
2372 13 : e->p = prop_create(v->sql->sa, PROP_HASHCOL, e->p);
2373 13 : break;
2374 : }
2375 :
2376 : }
2377 : }
2378 : }
2379 54648 : if (cnr && nr && list_length(tbls[j]->pkey->k.columns) == nr) {
2380 108 : list *ngbe = new_exp_list(v->sql->sa);
2381 :
2382 392 : for (l = 0, n = gbe->h; l < k && n; l++, n = n->next) {
2383 284 : sql_exp *e = n->data;
2384 :
2385 : /* keep the group by columns which form a primary key
2386 : * of this table. And those unrelated to this table. */
2387 284 : if (scores[l] != -1)
2388 139 : append(ngbe, e);
2389 : }
2390 108 : rel->r = ngbe;
2391 : /* rewrite gbe and aggr, in the aggr list */
2392 108 : if (0)
2393 : for (m = rel->exps->h; m; m = m->next ){
2394 : sql_exp *e = m->data;
2395 : int fnd = 0;
2396 :
2397 : for (l = 0, n = gbe->h; l < k && n && !fnd; l++, n = n->next) {
2398 : sql_exp *gb = n->data;
2399 :
2400 : if (scores[l] == -1 && exp_refers(gb, e)) {
2401 : /*
2402 : sql_exp *rs = exp_column(v->sql->sa, gb->l?gb->l:exp_relname(gb), gb->r?gb->r:exp_name(gb), exp_subtype(gb), rel->card, has_nil(gb), is_unique(gb), is_intern(gb));
2403 : exp_setalias(rs, e->alias.label, exp_find_rel_name(e), exp_name(e));
2404 : e = rs;
2405 : */
2406 : assert(e->alias.label == e->nid);
2407 : fnd = 1;
2408 : }
2409 : }
2410 : m->data = e;
2411 : }
2412 : /* new reduced aggr expression list */
2413 108 : assert(list_length(rel->exps)>0);
2414 : /* only one reduction at a time */
2415 108 : list_hash_clear(rel->exps);
2416 108 : v->changes++;
2417 108 : return rel;
2418 : }
2419 54540 : gbe = rel->r;
2420 : }
2421 : }
2422 : }
2423 : /* remove constants from group by list */
2424 97423 : if (is_groupby(rel->op) && rel->r && !rel_is_ref(rel)) {
2425 48982 : int i;
2426 48982 : node *n;
2427 :
2428 127180 : for (i = 0, n = gbe->h; n; n = n->next) {
2429 78198 : sql_exp *e = n->data;
2430 :
2431 78198 : if (exp_is_atom(e))
2432 26 : i++;
2433 : }
2434 48982 : if (i) {
2435 26 : list *ngbe = new_exp_list(v->sql->sa);
2436 26 : list *dgbe = new_exp_list(v->sql->sa);
2437 :
2438 53 : for (n = gbe->h; n; n = n->next) {
2439 27 : sql_exp *e = n->data;
2440 :
2441 27 : if (!exp_is_atom(e))
2442 1 : append(ngbe, e);
2443 : /* we need at least one gbe */
2444 26 : else if (!n->next && list_empty(ngbe))
2445 25 : append(ngbe, e);
2446 : else
2447 1 : append(dgbe, e);
2448 : }
2449 26 : rel->r = ngbe;
2450 26 : if (!list_empty(dgbe)) {
2451 : /* use atom's directly in the aggr expr list */
2452 :
2453 3 : for (n = rel->exps->h; n; n = n->next) {
2454 2 : sql_exp *e = n->data, *ne = NULL;
2455 :
2456 2 : if (e->type == e_column) {
2457 2 : if (e->nid)
2458 2 : ne = exps_bind_nid(dgbe, e->nid);
2459 2 : if (ne) {
2460 1 : ne = exp_copy(v->sql, ne);
2461 1 : exp_prop_alias(v->sql->sa, ne, e);
2462 1 : e = ne;
2463 : }
2464 : }
2465 2 : n->data = e;
2466 : }
2467 1 : list_hash_clear(rel->exps);
2468 1 : v->changes++;
2469 : }
2470 : }
2471 : }
2472 : return rel;
2473 : }
2474 :
2475 : /* if all arguments to a distinct aggregate are unique, remove 'distinct' property */
2476 : static inline sql_rel *
2477 97531 : rel_distinct_aggregate_on_unique_values(visitor *v, sql_rel *rel)
2478 : {
2479 97531 : if (is_groupby(rel->op) && !list_empty(rel->exps)) {
2480 303188 : for (node *n = rel->exps->h; n; n = n->next) {
2481 205734 : sql_exp *exp = (sql_exp*) n->data;
2482 :
2483 205734 : if (exp->type == e_aggr && need_distinct(exp)) {
2484 689 : bool all_unique = true;
2485 689 : list *l = exp->l;
2486 :
2487 1378 : for (node *m = l->h; m && all_unique; m = m->next) {
2488 689 : sql_exp *arg = (sql_exp*) m->data;
2489 :
2490 823 : all_unique &= arg->type == e_column && is_unique(arg) && (!is_semantics(exp) || !has_nil(arg));
2491 : }
2492 689 : if (!all_unique && exps_card(l) > CARD_ATOM)
2493 394 : all_unique = exps_unique(v->sql, rel, l) && (!is_semantics(exp) || !have_nil(l));
2494 : if (all_unique) {
2495 134 : set_nodistinct(exp);
2496 134 : v->changes++;
2497 : }
2498 : }
2499 : }
2500 : }
2501 97531 : return rel;
2502 : }
2503 :
2504 : static inline sql_rel *
2505 97531 : rel_remove_const_aggr(visitor *v, sql_rel *rel)
2506 : {
2507 97531 : if (!rel)
2508 : return rel;
2509 97531 : if (rel && is_groupby(rel->op) && list_length(rel->exps) >= 1 && !rel_is_ref(rel)) {
2510 73991 : int needed = 0;
2511 229041 : for (node *n = rel->exps->h; n; n = n->next) {
2512 155050 : sql_exp *exp = (sql_exp*) n->data;
2513 :
2514 155050 : if (exp_is_atom(exp) && exp->type != e_aggr)
2515 55 : needed++;
2516 : }
2517 73991 : if (needed) {
2518 53 : if (!list_empty(rel->r)) {
2519 3 : int atoms = 0;
2520 : /* corner case, all grouping columns are atoms */
2521 6 : for (node *n = ((list*)rel->r)->h; n; n = n->next) {
2522 3 : sql_exp *exp = (sql_exp*) n->data;
2523 :
2524 3 : if (exp_is_atom(exp))
2525 0 : atoms++;
2526 : }
2527 3 : if (atoms == list_length(rel->r)) {
2528 0 : list *nexps = sa_list(v->sql->sa);
2529 0 : for (node *n = rel->exps->h; n; ) {
2530 0 : node *next = n->next;
2531 0 : sql_exp *e = (sql_exp*) n->data;
2532 :
2533 : /* remove references to constant group by columns */
2534 0 : if (e->type == e_column) {
2535 0 : sql_exp *found = NULL;
2536 0 : found = exps_bind_nid(rel->r, e->nid);
2537 0 : if (found) {
2538 0 : list_append(nexps, found);
2539 0 : list_remove_node(rel->exps, NULL, n);
2540 : }
2541 : }
2542 : n = next;
2543 : }
2544 0 : rel->r = NULL; /* transform it into a global aggregate */
2545 0 : rel->exps = list_merge(nexps, rel->exps, (fdup) NULL); /* add grouping columns back as projections */
2546 : /* global aggregates may return 1 row, so filter it based on the count */
2547 0 : sql_subfunc *cf = sql_bind_func(v->sql, "sys", "count", sql_bind_localtype("void"), NULL, F_AGGR, true, true);
2548 0 : sql_exp *count = exp_aggr(v->sql->sa, NULL, cf, 0, 1, CARD_ATOM, 0);
2549 0 : count = rel_groupby_add_aggr(v->sql, rel, count);
2550 0 : sql_exp *cp = exp_compare(v->sql->sa, exp_ref(v->sql, count), exp_atom(v->sql->sa, atom_int(v->sql->sa, exp_subtype(count), 0)), cmp_notequal);
2551 0 : rel = rel_select(v->sql->sa, rel, cp);
2552 0 : set_processed(rel);
2553 0 : return rel;
2554 : }
2555 50 : } else if (list_length(rel->exps) == needed) { /* all are const */
2556 48 : sql_rel *ll = rel->l;
2557 48 : rel->op = op_project;
2558 : /* TODO check if l->l == const, else change that */
2559 48 : if (ll && ll->l) {
2560 44 : rel_destroy(ll);
2561 44 : rel->l = rel_project_exp(v->sql, exp_atom_bool(v->sql->sa, 1));
2562 : }
2563 48 : return rel;
2564 : }
2565 5 : sql_rel *nrel = rel_project(v->sql->sa, rel, rel_projections(v->sql, rel, NULL, 1, 1));
2566 17 : for (node *n = nrel->exps->h; n; n = n->next) {
2567 12 : sql_exp *exp = (sql_exp*) n->data;
2568 12 : if (exp->type == e_column) {
2569 12 : sql_exp *e = rel_find_exp(rel, exp);
2570 :
2571 12 : if (e && exp_is_atom(e) && e->type == e_atom) {
2572 7 : sql_exp *ne = exp_copy(v->sql, e);
2573 7 : assert(ne->alias.label);
2574 7 : exp_setalias(ne, ne->alias.label, exp_find_rel_name(exp), exp_name(exp));
2575 7 : n->data = ne;
2576 7 : v->changes++;
2577 : }
2578 : }
2579 : }
2580 5 : list *nl = sa_list(v->sql->sa);
2581 17 : for (node *n = rel->exps->h; n; n = n->next) {
2582 12 : sql_exp *exp = (sql_exp*) n->data;
2583 :
2584 12 : if (!exp_is_atom(exp) || exp->type != e_atom)
2585 5 : append(nl, exp);
2586 : }
2587 5 : rel->exps = nl;
2588 5 : return nrel;
2589 : }
2590 : }
2591 : return rel;
2592 : }
2593 :
2594 : #if 0
2595 : static sql_rel *
2596 : rel_groupby_distinct2(visitor *v, sql_rel *rel)
2597 : {
2598 : list *ngbes = sa_list(v->sql->sa), *gbes, *naggrs = sa_list(v->sql->sa), *aggrs = sa_list(v->sql->sa);
2599 : sql_rel *l;
2600 : node *n;
2601 :
2602 : gbes = rel->r;
2603 : if (!gbes)
2604 : return rel;
2605 :
2606 : /* check if each aggr is, rewritable (max,min,sum,count)
2607 : * and only has one argument */
2608 : for (n = rel->exps->h; n; n = n->next) {
2609 : sql_exp *e = n->data;
2610 : sql_subfunc *af = e->f;
2611 :
2612 : if (e->type == e_aggr &&
2613 : (strcmp(af->func->base.name, "sum") &&
2614 : strcmp(af->func->base.name, "count") &&
2615 : strcmp(af->func->base.name, "min") &&
2616 : strcmp(af->func->base.name, "max")))
2617 : return rel;
2618 : }
2619 :
2620 : for (n = gbes->h; n; n = n->next) {
2621 : sql_exp *e = n->data;
2622 :
2623 : e = exp_column(v->sql->sa, exp_find_rel_name(e), exp_name(e), exp_subtype(e), e->card, has_nil(e), is_unique(e), is_intern(e));
2624 : append(ngbes, e);
2625 : }
2626 :
2627 : /* 1 for each aggr(distinct v) add the attribute expression v to gbes and aggrs list
2628 : * 2 for each aggr(z) add aggr_phase2('z') to the naggrs list
2629 : * 3 for each group by col, add also to the naggrs list
2630 : * */
2631 : for (n = rel->exps->h; n; n = n->next) {
2632 : sql_exp *e = n->data;
2633 :
2634 : if (e->type == e_aggr && need_distinct(e)) { /* 1 */
2635 : /* need column expression */
2636 : list *args = e->l;
2637 : sql_exp *v = args->h->data;
2638 : append(gbes, v);
2639 : if (!exp_name(v))
2640 : exp_label(v->sql->sa, v, ++v->sql->label);
2641 : v = exp_column(v->sql->sa, exp_find_rel_name(v), exp_name(v), exp_subtype(v), v->card, has_nil(v), is_unique(v), is_intern(v));
2642 : append(aggrs, v);
2643 : v = exp_aggr1(v->sql->sa, v, e->f, need_distinct(e), 1, e->card, 1);
2644 : exp_setalias(v, e->alias.label, exp_find_rel_name(e), exp_name(e));
2645 : append(naggrs, v);
2646 : } else if (e->type == e_aggr && !need_distinct(e)) {
2647 : sql_exp *v;
2648 : sql_subfunc *f = e->f;
2649 : int cnt = exp_aggr_is_count(e);
2650 : sql_subfunc *a = sql_bind_func(v->sql, "sys", (cnt)?"sum":f->func->base.name, exp_subtype(e), NULL, F_AGGR, true, true);
2651 :
2652 : append(aggrs, e);
2653 : if (!exp_name(e))
2654 : exp_label(v->sql->sa, e, ++v->sql->label);
2655 : set_has_nil(e);
2656 : v = exp_column(v->sql->sa, exp_find_rel_name(e), exp_name(e), exp_subtype(e), e->card, has_nil(e), is_unique(e), is_intern(e));
2657 : v = exp_aggr1(v->sql->sa, v, a, 0, 1, e->card, 1);
2658 : if (cnt)
2659 : set_zero_if_empty(v);
2660 : exp_setalias(v, e->alias.label, exp_find_rel_name(e), exp_name(e));
2661 : append(naggrs, v);
2662 : } else { /* group by col */
2663 : if (list_find_exp(gbes, e) || !list_find_exp(naggrs, e)) {
2664 : append(aggrs, e);
2665 :
2666 : e = exp_column(v->sql->sa, exp_find_rel_name(e), exp_name(e), exp_subtype(e), e->card, has_nil(e), is_unique(e), is_intern(e));
2667 : }
2668 : append(naggrs, e);
2669 : }
2670 : }
2671 :
2672 : l = rel->l = rel_groupby(v->sql, rel->l, gbes);
2673 : l->exps = aggrs;
2674 : rel->r = ngbes;
2675 : rel->exps = naggrs;
2676 : v->changes++;
2677 : return rel;
2678 : }
2679 : #endif
2680 :
2681 : /* Rewrite group by expressions with distinct
2682 : *
2683 : * ie select a, count(distinct b) from c where ... groupby a;
2684 : * No other aggregations should be present
2685 : *
2686 : * Rewrite the more general case, good for parallel execution
2687 : *
2688 : * groupby(R) [e,f] [ aggr1 a distinct, aggr2 b distinct, aggr3 c, aggr4 d]
2689 : *
2690 : * into
2691 : *
2692 : * groupby(
2693 : * groupby(R) [e,f,a,b] [ a, b, aggr3 c, aggr4 d]
2694 : * ) [e,f]( aggr1 a distinct, aggr2 b distinct, aggr3_phase2 c, aggr4_phase2 d)
2695 : */
2696 : static inline sql_rel *
2697 97531 : rel_groupby_distinct(visitor *v, sql_rel *rel)
2698 : {
2699 97531 : node *n;
2700 :
2701 97531 : if (is_groupby(rel->op)) {
2702 97460 : sql_rel *l = rel->l;
2703 97460 : if (!l || is_groupby(l->op))
2704 : return rel;
2705 : }
2706 97397 : if (is_groupby(rel->op) && rel->r && !rel_is_ref(rel)) {
2707 48963 : int nr = 0, anr = 0;
2708 48963 : list *gbe, *ngbe, *arg, *exps, *nexps;
2709 48963 : sql_exp *distinct = NULL, *darg, *found;
2710 48963 : sql_rel *l = NULL;
2711 :
2712 177812 : for (n=rel->exps->h; n && nr <= 2; n = n->next) {
2713 128849 : sql_exp *e = n->data;
2714 128849 : if (need_distinct(e)) {
2715 393 : distinct = n->data;
2716 393 : nr++;
2717 : }
2718 128849 : anr += is_aggr(e->type);
2719 : }
2720 48963 : if (nr < 1 || distinct->type != e_aggr)
2721 : return rel;
2722 386 : if (nr > 1 || anr > nr)
2723 : return rel;//rel_groupby_distinct2(v, rel);
2724 117 : arg = distinct->l;
2725 117 : if (list_length(arg) != 1 || list_length(rel->r) + nr != list_length(rel->exps))
2726 2 : return rel;
2727 :
2728 115 : gbe = rel->r;
2729 115 : ngbe = sa_list(v->sql->sa);
2730 115 : exps = sa_list(v->sql->sa);
2731 115 : nexps = sa_list(v->sql->sa);
2732 362 : for (n=rel->exps->h; n; n = n->next) {
2733 247 : sql_exp *e = n->data;
2734 247 : if (e != distinct) {
2735 132 : if (e->type == e_aggr) { /* copy the arguments to the aggregate */
2736 0 : list *args = e->l;
2737 0 : if (args) {
2738 0 : for (node *n = args->h ; n ; n = n->next) {
2739 0 : sql_exp *e = n->data;
2740 0 : list_append(ngbe, exp_copy(v->sql, e));
2741 0 : list_append(exps, exp_copy(v->sql, e));
2742 : }
2743 : }
2744 : } else {
2745 132 : e = exp_ref(v->sql, e);
2746 132 : append(ngbe, e);
2747 132 : append(exps, e);
2748 : }
2749 132 : if (e->type == e_aggr) /* aggregates must be copied */
2750 0 : e = exp_copy(v->sql, e);
2751 : else
2752 132 : e = exp_ref(v->sql, e);
2753 132 : append(nexps, e);
2754 : }
2755 : }
2756 :
2757 115 : darg = arg->h->data;
2758 115 : if ((found = exps_find_exp(exps, darg)) == NULL) { /* not already in the groups projection list */
2759 112 : if ((found = exps_find_exp(gbe, darg))) { /* first find if the aggregate argument already exists in the grouping list */
2760 0 : darg = exp_ref(v->sql, found);
2761 : } else {
2762 112 : list_append(gbe, darg = exp_copy(v->sql, darg));
2763 112 : exp_label(v->sql->sa, darg, ++v->sql->label);
2764 112 : darg = exp_ref(v->sql, darg);
2765 : }
2766 112 : list_append(exps, darg);
2767 112 : darg = exp_ref(v->sql, darg);
2768 : } else {
2769 3 : darg = exp_ref(v->sql, found);
2770 : }
2771 115 : arg->h->data = darg;
2772 115 : l = rel->l = rel_groupby(v->sql, rel->l, gbe);
2773 115 : l->exps = exps;
2774 115 : set_processed(l);
2775 115 : rel->r = ngbe;
2776 115 : rel->exps = nexps;
2777 115 : set_nodistinct(distinct);
2778 115 : append(nexps, distinct);
2779 115 : v->changes++;
2780 : }
2781 : return rel;
2782 : }
2783 :
2784 : /*
2785 : * Push Count inside crossjoin down, and multiply the results
2786 : *
2787 : * project ( project(
2788 : * group by ( crossproduct (
2789 : * crossproduct( project (
2790 : * L, => group by (
2791 : * R L
2792 : * ) [ ] [ count NOT NULL ] ) [ ] [ count NOT NULL ]
2793 : * ) ),
2794 : * ) [ NOT NULL ] project (
2795 : * group by (
2796 : * R
2797 : * ) [ ] [ count NOT NULL ]
2798 : * )
2799 : * ) [ sql_mul(.., .. NOT NULL) ]
2800 : * )
2801 : */
2802 : static inline sql_rel *
2803 97531 : rel_push_count_down(visitor *v, sql_rel *rel)
2804 : {
2805 97531 : sql_rel *r = rel->l;
2806 :
2807 97531 : if (is_groupby(rel->op) && !rel_is_ref(rel) && list_empty(rel->r) &&
2808 25374 : r && !r->exps && r->op == op_join && !(rel_is_ref(r)) &&
2809 : /* currently only single count aggregation is handled, no other projects or aggregation */
2810 56 : list_length(rel->exps) == 1 && exp_aggr_is_count(rel->exps->h->data)) {
2811 12 : sql_exp *nce, *oce, *cnt1 = NULL, *cnt2 = NULL;
2812 12 : sql_rel *gbl = NULL, *gbr = NULL; /* Group By */
2813 12 : sql_rel *cp = NULL; /* Cross Product */
2814 12 : sql_rel *srel;
2815 :
2816 12 : oce = rel->exps->h->data;
2817 12 : if (oce->l) /* we only handle COUNT(*) */
2818 : return rel;
2819 :
2820 10 : srel = r->l;
2821 : {
2822 10 : sql_subfunc *cf = sql_bind_func(v->sql, "sys", "count", sql_bind_localtype("void"), NULL, F_AGGR, true, true);
2823 10 : sql_exp *e = exp_aggr(v->sql->sa, NULL, cf, need_distinct(oce), need_no_nil(oce), oce->card, 0);
2824 :
2825 10 : exp_label(v->sql->sa, e, ++v->sql->label);
2826 10 : cnt1 = exp_ref(v->sql, e);
2827 10 : gbl = rel_groupby(v->sql, rel_dup(srel), NULL);
2828 10 : set_processed(gbl);
2829 10 : rel_groupby_add_aggr(v->sql, gbl, e);
2830 : }
2831 :
2832 10 : srel = r->r;
2833 : {
2834 10 : sql_subfunc *cf = sql_bind_func(v->sql, "sys", "count", sql_bind_localtype("void"), NULL, F_AGGR, true, true);
2835 10 : sql_exp *e = exp_aggr(v->sql->sa, NULL, cf, need_distinct(oce), need_no_nil(oce), oce->card, 0);
2836 :
2837 10 : exp_label(v->sql->sa, e, ++v->sql->label);
2838 10 : cnt2 = exp_ref(v->sql, e);
2839 10 : gbr = rel_groupby(v->sql, rel_dup(srel), NULL);
2840 10 : set_processed(gbr);
2841 10 : rel_groupby_add_aggr(v->sql, gbr, e);
2842 : }
2843 :
2844 10 : cp = rel_crossproduct(v->sql->sa, gbl, gbr, op_join);
2845 10 : set_processed(cp);
2846 :
2847 10 : if (!(nce = rel_binop_(v->sql, NULL, cnt1, cnt2, "sys", "sql_mul", card_value, true))) {
2848 0 : v->sql->session->status = 0;
2849 0 : v->sql->errstr[0] = '\0';
2850 0 : return rel; /* error, fallback to original expression */
2851 : }
2852 : /* because of remote plans, make sure "sql_mul" returns bigint. The cardinality is atomic, so no major performance penalty */
2853 10 : if (subtype_cmp(exp_subtype(oce), exp_subtype(nce)) != 0)
2854 10 : nce = exp_convert(v->sql, nce, exp_subtype(nce), exp_subtype(oce));
2855 10 : if (exp_name(oce))
2856 10 : exp_prop_alias(v->sql->sa, nce, oce);
2857 :
2858 10 : rel_destroy(rel);
2859 10 : rel = rel_project(v->sql->sa, cp, append(new_exp_list(v->sql->sa), nce));
2860 10 : set_processed(rel);
2861 :
2862 10 : v->changes++;
2863 : }
2864 :
2865 : return rel;
2866 : }
2867 :
2868 : static inline sql_rel *
2869 42783 : rel_basecount(visitor *v, sql_rel *rel)
2870 : {
2871 53177 : if (is_groupby(rel->op) && !rel_is_ref(rel) && rel->l && list_empty(rel->r) &&
2872 19751 : list_length(rel->exps) == 1 && exp_aggr_is_count(rel->exps->h->data)) {
2873 4489 : sql_rel *bt = rel->l;
2874 4489 : sql_exp *e = rel->exps->h->data;
2875 4489 : if (is_basetable(bt->op) && list_empty(e->l)) { /* count(*) */
2876 : /* change into select cnt('schema','table') */
2877 684 : sql_table *t = bt->l;
2878 : /* I need to get the declared table's frame number to make this work correctly for those */
2879 684 : if (!isTable(t) || isDeclaredTable(t))
2880 : return rel;
2881 605 : sql_subfunc *cf = sql_bind_func(v->sql, "sys", "cnt", sql_bind_localtype("str"), sql_bind_localtype("str"), F_FUNC, true, true);
2882 605 : list *exps = sa_list(v->sql->sa);
2883 605 : append(exps, exp_atom_str(v->sql->sa, t->s->base.name, sql_bind_localtype("str")));
2884 605 : append(exps, exp_atom_str(v->sql->sa, t->base.name, sql_bind_localtype("str")));
2885 605 : sql_exp *ne = exp_op(v->sql->sa, exps, cf);
2886 :
2887 605 : ne = exp_propagate(v->sql->sa, ne, e);
2888 605 : exp_setalias(ne, e->alias.label, exp_find_rel_name(e), exp_name(e));
2889 605 : rel_destroy(rel);
2890 605 : rel = rel_project(v->sql->sa, NULL, append(sa_list(v->sql->sa), ne));
2891 605 : v->changes++;
2892 : }
2893 : }
2894 : return rel;
2895 : }
2896 :
2897 : static inline sql_rel *
2898 42783 : rel_simplify_count(visitor *v, sql_rel *rel)
2899 : {
2900 42783 : if (is_groupby(rel->op) && !list_empty(rel->exps)) {
2901 42700 : mvc *sql = v->sql;
2902 42700 : int ncountstar = 0;
2903 :
2904 : /* Convert count(no null) into count(*) */
2905 134474 : for (node *n = rel->exps->h; n ; n = n->next) {
2906 91774 : sql_exp *e = n->data;
2907 :
2908 91774 : if (exp_aggr_is_count(e) && !need_distinct(e)) {
2909 15641 : if (list_length(e->l) == 0) {
2910 12881 : ncountstar++;
2911 2760 : } else if (list_length(e->l) == 1 && !has_nil((sql_exp*)((list*)e->l)->h->data)) {
2912 2600 : sql_subfunc *cf = sql_bind_func(sql, "sys", "count", sql_bind_localtype("void"), NULL, F_AGGR, true, true);
2913 2600 : sql_exp *ne = exp_aggr(sql->sa, NULL, cf, 0, 0, e->card, 0);
2914 2600 : if (exp_name(e))
2915 2600 : exp_prop_alias(sql->sa, ne, e);
2916 2600 : n->data = ne;
2917 2600 : ncountstar++;
2918 2600 : v->changes++;
2919 : }
2920 : }
2921 : }
2922 : /* With multiple count(*), use exp_ref to reduce the number of calls to this aggregate */
2923 42700 : if (ncountstar > 1) {
2924 56 : sql_exp *count_star = NULL;
2925 56 : sql_rel *nrel = rel_project(v->sql->sa, rel, NULL);
2926 56 : list *aexps = sa_list(v->sql->sa), *nexps = sa_list(v->sql->sa);
2927 56 : nrel->exps = nexps;
2928 247 : for (node *n = rel->exps->h; n ; n = n->next) {
2929 191 : sql_exp *e = n->data;
2930 :
2931 191 : if (exp_aggr_is_count(e) && !need_distinct(e) && list_length(e->l) == 0) {
2932 126 : if (!count_star) {
2933 56 : count_star = e;
2934 56 : append(aexps, e);
2935 56 : append(nexps, exp_ref(sql, e));
2936 : } else {
2937 70 : sql_exp *ne = exp_ref(sql, count_star);
2938 :
2939 70 : if (exp_name(e))
2940 70 : exp_prop_alias(sql->sa, ne, e);
2941 70 : v->changes++;
2942 70 : append(nexps, ne);
2943 : }
2944 : } else {
2945 65 : append(aexps, e);
2946 65 : append(nexps, exp_ref(sql, e));
2947 : }
2948 : }
2949 56 : rel->exps = aexps;
2950 56 : return nrel;
2951 : }
2952 : }
2953 : return rel;
2954 : }
2955 :
2956 : static sql_rel *
2957 42783 : rel_groupjoin(visitor *v, sql_rel *rel)
2958 : {
2959 42783 : if (!rel || rel_is_ref(rel) || !is_groupby(rel->op) || list_empty(rel->r))
2960 14620 : return rel;
2961 :
2962 28163 : sql_rel *j = rel->l;
2963 28163 : if (!j || rel_is_ref(j) /*|| !is_left(j->op)*/ || j->op != op_join || list_length(rel->exps) > 1 /* only join because left joins aren't optimized jet (TODO), only length 1 as implementation of groupjoins is missing */ || !list_empty(rel->attr))
2964 27446 : return rel;
2965 : /* check group by exps == equi join exps */
2966 717 : list *gbes = rel->r;
2967 717 : if (list_length(gbes) != list_length(j->exps))
2968 : return rel;
2969 618 : int nr = 0;
2970 1233 : for(node *n = gbes->h; n; n = n->next) {
2971 618 : sql_exp *gbe = n->data;
2972 1233 : for(node *m = j->exps->h; m; m = m->next) {
2973 618 : sql_exp *je = m->data;
2974 618 : if (je->type != e_cmp || je->flag != cmp_equal)
2975 : return rel;
2976 : /* check if its a join exp (ie not a selection) */
2977 619 : if (!( (!rel_has_exp(j->l, je->l, false) && !rel_has_exp(j->r, je->r, false)) ||
2978 8 : (!rel_has_exp(j->l, je->r, false) && !rel_has_exp(j->r, je->l, false))))
2979 0 : return rel;
2980 615 : if (exp_match(je->l, gbe)) {
2981 76 : nr++;
2982 539 : } else if (exp_match(je->r, gbe)) {
2983 10 : nr++;
2984 : }
2985 : }
2986 : }
2987 615 : if (nr == list_length(gbes)) {
2988 : // printf("#group by converted\n");
2989 86 : j = rel_dup(j);
2990 86 : j->attr = rel->exps;
2991 86 : v->changes++;
2992 86 : rel_destroy(rel);
2993 86 : return j;
2994 : }
2995 : return rel;
2996 : }
2997 :
2998 : /* select k1 from bla where k1 = const -> select const from bla where k1 = const */
2999 : static sql_rel *
3000 3874523 : rel_project_select_exp(visitor *v, sql_rel *rel)
3001 : {
3002 3874523 : if (is_simple_project(rel->op) && rel->exps && rel->l) {
3003 1267071 : sql_rel *l = rel->l;
3004 1267071 : if (is_select(l->op) && l->exps) {
3005 1295550 : for(node *n = rel->exps->h; n; n = n->next) {
3006 975166 : sql_exp *col = n->data;
3007 975166 : if (col->type == e_column) {
3008 1755090 : for(node *m = l->exps->h; m; m = m->next) {
3009 1045042 : sql_exp *cmp = m->data;
3010 1045042 : if (cmp->type == e_cmp && cmp->flag == cmp_equal && !is_anti(cmp) && !is_semantics(cmp) && exp_is_atom(cmp->r)) {
3011 632604 : sql_exp *l = cmp->l;
3012 632604 : if(l->type == e_column && col->nid == l->nid) {
3013 : /* replace column with the constant */
3014 13617 : sql_exp *e = n->data = exp_copy(v->sql, cmp->r);
3015 13617 : exp_setalias(e, col->alias.label, exp_relname(col), exp_name(col));
3016 13617 : exp_propagate(v->sql->sa, e, col);
3017 13617 : list_hash_clear(rel->exps);
3018 : }
3019 : }
3020 : }
3021 : }
3022 : }
3023 : }
3024 : }
3025 3874522 : return rel;
3026 : }
3027 :
3028 : static sql_rel *
3029 3874523 : rel_optimize_projections_(visitor *v, sql_rel *rel)
3030 : {
3031 3874523 : rel = rel_project_cse(v, rel);
3032 3874523 : rel = rel_project_select_exp(v, rel);
3033 :
3034 3874535 : if (!rel || !is_groupby(rel->op))
3035 : return rel;
3036 :
3037 97531 : rel = rel_remove_const_aggr(v, rel);
3038 97531 : if (v->value_based_opt) {
3039 42783 : rel = rel_simplify_sum(v, rel);
3040 42783 : rel = rel_simplify_groupby_columns(v, rel);
3041 : }
3042 97531 : rel = rel_groupby_cse(v, rel);
3043 97531 : rel = rel_push_aggr_down(v, rel);
3044 97531 : rel = rel_push_groupby_down(v, rel);
3045 97531 : rel = rel_reduce_groupby_exps(v, rel);
3046 97531 : rel = rel_distinct_aggregate_on_unique_values(v, rel);
3047 97531 : rel = rel_groupby_distinct(v, rel);
3048 97531 : rel = rel_push_count_down(v, rel);
3049 : /* only when value_based_opt is on, ie not for dependency resolution */
3050 97531 : if (v->value_based_opt) {
3051 42783 : rel = rel_simplify_count(v, rel);
3052 42783 : rel = rel_basecount(v, rel);
3053 :
3054 42783 : rel = rel_groupjoin(v, rel);
3055 : }
3056 : return rel;
3057 : }
3058 :
3059 : static sql_rel *
3060 185698 : rel_optimize_projections(visitor *v, global_props *gp, sql_rel *rel)
3061 : {
3062 185698 : (void) gp;
3063 185698 : return rel_visitor_topdown(v, rel, &rel_optimize_projections_);
3064 : }
3065 :
3066 : run_optimizer
3067 629453 : bind_optimize_projections(visitor *v, global_props *gp)
3068 : {
3069 629453 : int flag = v->sql->sql_optimizer;
3070 629249 : return gp->opt_level == 1 && (gp->cnt[op_groupby] || gp->cnt[op_project] || gp->cnt[op_union] || gp->cnt[op_munion]
3071 1258702 : || gp->cnt[op_inter] || gp->cnt[op_except]) && (flag & optimize_projections) ? rel_optimize_projections : NULL;
3072 : }
3073 :
3074 :
3075 : static inline sql_rel *
3076 2737916 : rel_push_project_down_union(visitor *v, sql_rel *rel)
3077 : {
3078 : /* first remove distinct if already unique */
3079 2737916 : if (rel->op == op_project && need_distinct(rel) && rel->exps && exps_unique(v->sql, rel, rel->exps) && !have_nil(rel->exps)) {
3080 109 : set_nodistinct(rel);
3081 109 : if (exps_card(rel->exps) <= CARD_ATOM && rel->card > CARD_ATOM) { /* if the projection just contains constants, then no topN is needed */
3082 2 : sql_rel *nl = rel->l = rel_topn(v->sql->sa, rel->l, append(sa_list(v->sql->sa), exp_atom_lng(v->sql->sa, 1)));
3083 2 : set_processed(nl);
3084 : }
3085 109 : v->changes++;
3086 : }
3087 :
3088 2737916 : if (rel->op == op_project && rel->l && rel->exps && list_empty(rel->r)) {
3089 927093 : int need_distinct = need_distinct(rel);
3090 927093 : sql_rel *u = rel->l;
3091 927093 : sql_rel *p = rel;
3092 :
3093 927093 : if (!u || !(is_union(u->op) || is_munion(u->op)) || need_distinct(u) || is_recursive(u) || !u->exps || rel_is_ref(u) || project_unsafe(rel, false))
3094 898027 : return rel;
3095 :
3096 29066 : sql_rel *r;
3097 :
3098 29066 : assert(!is_union(u->op));
3099 29066 : if (is_recursive(u))
3100 : return rel;
3101 :
3102 : /* don't push project down union of single values */
3103 87363 : for (node *n = ((list*)u->l)->h; n; n = n->next) {
3104 58319 : r = n->data;
3105 : // TODO: does this check make sense?
3106 58319 : if (is_project(r->op) && !r->l)
3107 : return rel;
3108 : }
3109 :
3110 87341 : for (node *n = ((list*)u->l)->h; n; n = n->next) {
3111 58297 : r = rel_dup(n->data);
3112 :
3113 : /* introduce projection around each operand if needed */
3114 58297 : if (!is_project(r->op))
3115 270 : r = rel_project(v->sql->sa, r,
3116 : rel_projections(v->sql, r, NULL, 1, 1));
3117 : /* check if we need distinct */
3118 116594 : need_distinct &=
3119 58297 : (!exps_unique(v->sql, r, r->exps) || have_nil(r->exps));
3120 58297 : rel_rename_exps(v->sql, u->exps, r->exps);
3121 :
3122 58297 : rel_destroy(n->data);
3123 58297 : n->data = r;
3124 : }
3125 :
3126 : /* once we have checked for need_distinct in every rel we can
3127 : * introduce the projects under the munion which are gonna be
3128 : * copies of the single project above munion */
3129 87341 : for (node *n = ((list*)u->l)->h; n; n = n->next) {
3130 58297 : r = rel_dup(n->data);
3131 :
3132 58297 : r = rel_project(v->sql->sa, r, NULL);
3133 58297 : if (need_distinct)
3134 0 : set_distinct(r);
3135 58297 : r->exps = exps_copy(v->sql, p->exps);
3136 58297 : set_processed(r);
3137 :
3138 58297 : rel_destroy(n->data);
3139 58297 : n->data = r;
3140 : }
3141 :
3142 : /* turn the project-munion on top into munion. incr operand
3143 : * rels count to make sure that they are not deleted by the
3144 : * subsequent rel_inplace_setop_n_ary */
3145 87341 : for (node *n = ((list*)u->l)->h; n; n = n->next)
3146 58297 : rel_dup(n->data);
3147 29044 : rel = rel_inplace_setop_n_ary(v->sql, rel, u->l, op_munion,
3148 : rel_projections(v->sql, rel, NULL, 1, 1));
3149 29044 : if (need_distinct)
3150 0 : set_distinct(rel);
3151 29044 : if (is_single(u))
3152 2 : set_single(rel);
3153 :
3154 29044 : v->changes++;
3155 :
3156 : /* if any operand has two project above then squash them */
3157 87341 : for (node *n = ((list*)u->l)->h; n; n = n->next) {
3158 58297 : r = rel_dup(n->data);
3159 58297 : r = rel_merge_projects_(v, r);
3160 58297 : rel_destroy(n->data);
3161 58297 : n->data = r;
3162 : }
3163 :
3164 : return rel;
3165 : }
3166 : return rel;
3167 : }
3168 :
3169 : static inline sql_rel *
3170 2737916 : rel_merge_unions(visitor *v, sql_rel *rel)
3171 : {
3172 : /* stacked munion flattening e.g.
3173 : * munion( munion(a, b, c), munion(d, e)) => munion(a,b,c,d,e)
3174 : */
3175 2737916 : if (rel && is_munion(rel->op) && !is_recursive(rel)) {
3176 80486 : list *l = rel->l;
3177 518449 : for(node *n = l->h; n; ) {
3178 357477 : node *next = n->next;
3179 357477 : sql_rel *oc = n->data;
3180 357477 : sql_rel *c = oc;
3181 :
3182 : /* account for any group-bys pushed down between stacked munions */
3183 357477 : if (oc->op == op_groupby)
3184 3285 : c = oc->l;
3185 :
3186 357477 : if (is_munion(c->op)) {
3187 10599 : c = rel_dup(c);
3188 10599 : list_remove_node(l, NULL, n);
3189 10599 : l = list_merge(l, c->l, (fdup)NULL);
3190 10599 : c->l = NULL;
3191 10599 : rel_destroy(oc);
3192 10599 : if (!next)
3193 3 : next = l->h;
3194 10599 : v->changes++;
3195 : }
3196 : n = next;
3197 : }
3198 80486 : rel->l = l;
3199 : }
3200 2737916 : return rel;
3201 : }
3202 :
3203 :
3204 : /*
3205 : * Push (semi)joins down unions, this is basically for merge tables, where
3206 : * we know that the fk-indices are split over two clustered merge tables.
3207 : */
3208 : static inline sql_rel *
3209 2737916 : rel_push_join_down_munion(visitor *v, sql_rel *rel)
3210 : {
3211 2737916 : if ((is_join(rel->op) && !is_outerjoin(rel->op) && !is_single(rel)) || is_semi(rel->op)) {
3212 337453 : sql_rel *l = rel->l, *r = rel->r, *ol = l, *or = r;
3213 337453 : list *exps = rel->exps, *attr = rel->attr;
3214 337453 : sql_exp *je = NULL;
3215 :
3216 337453 : if (is_recursive(l) || is_recursive(r))
3217 : return rel;
3218 : /* we would like to optimize in place reference rels which point
3219 : * to replica tables and let the replica optimizer handle those
3220 : * later. otherwise we miss the push join down optimization due
3221 : * to the rel_is_ref bailout
3222 : */
3223 337449 : if (rel_is_ref(l) && is_basetable(l->op) && l->l && isReplicaTable((sql_table*)l->l)) {
3224 1 : rel->l = rel_copy(v->sql, l, true);
3225 1 : rel_destroy(l);
3226 : }
3227 337449 : if (rel_is_ref(r) && is_basetable(r->op) && r->l && isReplicaTable((sql_table*)r->l)) {
3228 4 : rel->r = rel_copy(v->sql, r, true);
3229 4 : rel_destroy(r);
3230 : }
3231 :
3232 : // TODO: do we need to check if it's l/r are refs?
3233 337449 : if (!l || !r || need_distinct(l) || need_distinct(r) || rel_is_ref(l) || rel_is_ref(r))
3234 : return rel;
3235 284939 : if (l->op == op_project)
3236 23240 : l = l->l;
3237 284939 : if (r->op == op_project)
3238 26878 : r = r->l;
3239 :
3240 : /* both sides only if we have a join index ASSUMING pkey-fkey are aligned */
3241 : // TODO: we could also check if the join cols are (not) unique
3242 284939 : bool aligned_pk_fk = true;
3243 284939 : if (!l || !r || (is_munion(l->op) && is_munion(r->op) &&
3244 173 : !(je = rel_is_join_on_pkey(rel, aligned_pk_fk))))
3245 861 : return rel;
3246 :
3247 : // TODO: why? bailout for union semijoin without pkey joins expressions
3248 284078 : if (is_semi(rel->op) && is_munion(l->op) && !je)
3249 : return rel;
3250 :
3251 : /* if both sides are munions we assume that they will have the same number of children */
3252 283212 : if (is_munion(l->op) && is_munion(r->op) && list_length(l->l) != list_length(r->l))
3253 : return rel;
3254 :
3255 283212 : if (is_munion(l->op) && !need_distinct(l) && !is_single(l) && !is_recursive(l) &&
3256 4561 : !is_munion(r->op)){
3257 : /* join(munion(a,b,c), d) -> munion(join(a,d), join(b,d), join(c,d)) */
3258 4561 : list *js = sa_list(v->sql->sa);
3259 13693 : for (node *n = ((list*)l->l)->h; n; n = n->next) {
3260 9132 : sql_rel *pc = rel_dup(n->data);
3261 9132 : if (!is_project(pc->op))
3262 16 : pc = rel_project(v->sql->sa, pc, rel_projections(v->sql, pc, NULL, 1, 1));
3263 9132 : rel_rename_exps(v->sql, l->exps, pc->exps);
3264 9132 : if (l != ol) {
3265 12 : pc = rel_project(v->sql->sa, pc, NULL);
3266 12 : pc->exps = exps_copy(v->sql, ol->exps);
3267 12 : set_processed(pc);
3268 : }
3269 9132 : pc = rel_crossproduct(v->sql->sa, pc, rel_dup(or), rel->op);
3270 9132 : pc->exps = exps_copy(v->sql, exps);
3271 9132 : pc->attr = exps_copy(v->sql, attr);
3272 9132 : set_processed(pc);
3273 9132 : pc = rel_project(v->sql->sa, pc, rel_projections(v->sql, pc, NULL, 1, 1));
3274 9132 : js = append(js, pc);
3275 : }
3276 4561 : v->changes++;
3277 4561 : return rel_inplace_setop_n_ary(v->sql, rel, js, op_munion,
3278 : rel_projections(v->sql, rel, NULL, 1, 1));
3279 278651 : } else if (is_munion(l->op) && !need_distinct(l) && !is_single(l) && !is_recursive(l) &&
3280 0 : is_munion(r->op) && !need_distinct(r) && !is_single(r) && !is_recursive(r) &&
3281 : je) {
3282 : /* join(munion(a,b,c), munion(d,e,f)) -> munion(join(a,d), join(b,e), join(c,f)) */
3283 0 : list *cps = sa_list(v->sql->sa);
3284 : /* create pairwise joins between left and right parts. assume eq num of parts (see earlier bailout) */
3285 0 : for (node *n = ((list*)l->l)->h, *m=((list*)r->l)->h; n && m; n = n->next, m = m->next) {
3286 : /* left part */
3287 0 : sql_rel *lp = rel_dup(n->data);
3288 0 : if (!is_project(lp->op))
3289 0 : lp = rel_project(v->sql->sa, lp, rel_projections(v->sql, lp, NULL, 1, 1));
3290 0 : rel_rename_exps(v->sql, l->exps, lp->exps);
3291 0 : if (l != ol) {
3292 0 : lp = rel_project(v->sql->sa, lp, NULL);
3293 0 : lp->exps = exps_copy(v->sql, ol->exps);
3294 0 : set_processed(lp);
3295 : }
3296 : /* right part */
3297 0 : sql_rel *rp = rel_dup(m->data);
3298 0 : if (!is_project(rp->op))
3299 0 : rp = rel_project(v->sql->sa, rp, rel_projections(v->sql, rp, NULL, 1, 1));
3300 0 : rel_rename_exps(v->sql, r->exps, rp->exps);
3301 0 : if (r != or) {
3302 0 : rp = rel_project(v->sql->sa, rp, NULL);
3303 0 : rp->exps = exps_copy(v->sql, or->exps);
3304 0 : set_processed(rp);
3305 : }
3306 : /* combine them */
3307 0 : sql_rel *cp = rel_crossproduct(v->sql->sa, lp, rp, rel->op);
3308 0 : cp->exps = exps_copy(v->sql, exps);
3309 0 : cp->attr = exps_copy(v->sql, attr);
3310 0 : set_processed(cp);
3311 0 : cp = rel_project(v->sql->sa, cp, rel_projections(v->sql, cp, NULL, 1, 1));
3312 0 : cps = append(cps, cp);
3313 : }
3314 0 : v->changes++;
3315 0 : return rel_inplace_setop_n_ary(v->sql, rel, cps, op_munion,
3316 : rel_projections(v->sql, rel, NULL, 1, 1));
3317 278651 : } else if (!is_munion(l->op) &&
3318 278643 : is_munion(r->op) && !need_distinct(r) && !is_single(r) && !is_recursive(r) &&
3319 2945 : !is_semi(rel->op)) {
3320 : /* join(a, munion(b,c,d)) -> munion(join(a,b), join(a,c), join(a,d)) */
3321 1574 : list *js = sa_list(v->sql->sa);
3322 4725 : for (node *n = ((list*)r->l)->h; n; n = n->next) {
3323 3151 : sql_rel *pc = rel_dup(n->data);
3324 3151 : if (!is_project(pc->op))
3325 13 : pc = rel_project(v->sql->sa, pc, rel_projections(v->sql, pc, NULL, 1, 1));
3326 3151 : rel_rename_exps(v->sql, r->exps, pc->exps);
3327 3151 : if (r != or) {
3328 125 : pc = rel_project(v->sql->sa, pc, NULL);
3329 125 : pc->exps = exps_copy(v->sql, or->exps);
3330 125 : set_processed(pc);
3331 : }
3332 3151 : pc = rel_crossproduct(v->sql->sa, rel_dup(ol), pc, rel->op);
3333 3151 : pc->exps = exps_copy(v->sql, exps);
3334 3151 : pc->attr = exps_copy(v->sql, attr);
3335 3151 : set_processed(pc);
3336 3151 : pc = rel_project(v->sql->sa, pc, rel_projections(v->sql, pc, NULL, 1, 1));
3337 3151 : js = append(js, pc);
3338 : }
3339 1574 : v->changes++;
3340 1574 : return rel_inplace_setop_n_ary(v->sql, rel, js, op_munion,
3341 : rel_projections(v->sql, rel, NULL, 1, 1));
3342 277077 : } else if (!is_munion(l->op) &&
3343 277069 : is_munion(r->op) && !need_distinct(r) && !is_single(r) && !is_recursive(r) &&
3344 1371 : is_semi(rel->op) && je) {
3345 : /* {semi}join ( A1, munion (B, A2a, C, A2b)) [A1.partkey = A2.partkey] ->
3346 : * {semi}join ( A1, munion (A2a, A2b))
3347 : * (ie some parts of an n-th munion operand)
3348 : *
3349 : * How to detect that a relation isn't matching?
3350 : * partitioning is currently done only on pkey/fkey's
3351 : * ie only matching per part if join is on pkey/fkey (parts)
3352 : * and part numbers should match.
3353 : * */
3354 0 : int lpnr = rel_part_nr(l, je);
3355 0 : if (lpnr < 0)
3356 : return rel;
3357 :
3358 0 : list *ups = sa_list(v->sql->sa);
3359 0 : for (node *n = ((list*)r->l)->h; n; n = n->next) {
3360 0 : if (rel_uses_part_nr(n->data, je, lpnr)) {
3361 0 : sql_rel *pc = rel_dup(n->data);
3362 0 : ups = append(ups, pc);
3363 : }
3364 : }
3365 0 : v->changes++;
3366 0 : return rel_inplace_setop_n_ary(v->sql, r, ups, op_munion,
3367 : rel_projections(v->sql, rel, NULL, 1, 1));
3368 : }
3369 : }
3370 : return rel;
3371 : }
3372 :
3373 : static sql_rel *
3374 2737916 : rel_optimize_unions_topdown_(visitor *v, sql_rel *rel)
3375 : {
3376 2737916 : rel = rel_push_project_down_union(v, rel);
3377 2737916 : rel = rel_merge_unions(v, rel);
3378 2737916 : rel = rel_push_join_down_munion(v, rel);
3379 2737916 : return rel;
3380 : }
3381 :
3382 : static sql_rel *
3383 28976 : rel_optimize_unions_topdown(visitor *v, global_props *gp, sql_rel *rel)
3384 : {
3385 28976 : (void) gp;
3386 28976 : return rel_visitor_topdown(v, rel, &rel_optimize_unions_topdown_);
3387 : }
3388 :
3389 : run_optimizer
3390 629759 : bind_optimize_unions_topdown(visitor *v, global_props *gp)
3391 : {
3392 629759 : (void) v;
3393 629759 : return gp->opt_level == 1 && gp->cnt[op_munion] ? rel_optimize_unions_topdown : NULL;
3394 : }
3395 :
3396 :
3397 : static sql_column *
3398 0 : is_fk_column_of_pk(mvc *sql, sql_rel *rel, sql_column *pkc, sql_exp *e) /* test if e is a foreign key column for the pk on pkc */
3399 : {
3400 0 : sql_trans *tr = sql->session->tr;
3401 0 : sql_column *c = exp_find_column(rel, e, -2);
3402 :
3403 0 : if (c) {
3404 0 : sql_table *t = c->t;
3405 :
3406 0 : for (node *n = ol_first_node(t->idxs); n; n = n->next) {
3407 0 : sql_idx *li = n->data;
3408 :
3409 0 : if (li->type == join_idx) {
3410 0 : for (node *m = li->columns->h ; m ; m = m->next) {
3411 0 : sql_kc *fkc = m->data;
3412 :
3413 0 : if (strcmp(fkc->c->base.name, c->base.name) == 0) { /* same fkey column */
3414 0 : sql_key *fkey = (sql_key*)os_find_id(tr->cat->objects, tr, ((sql_fkey*)li->key)->rkey);
3415 :
3416 0 : if (strcmp(fkey->t->base.name, pkc->t->base.name) == 0) { /* to same pk table */
3417 0 : for (node *o = fkey->columns->h ; o ; o = n->next) {
3418 0 : sql_kc *kc = m->data;
3419 :
3420 0 : if (strcmp(kc->c->base.name, pkc->base.name) == 0) /* to same pk table column */
3421 : return c;
3422 : }
3423 : }
3424 : }
3425 : }
3426 : }
3427 : }
3428 : }
3429 : return NULL;
3430 : }
3431 :
3432 : static bool
3433 0 : has_no_selectivity(mvc *sql, sql_rel *rel)
3434 : {
3435 0 : if (!rel)
3436 : return true;
3437 :
3438 0 : switch(rel->op){
3439 : case op_basetable:
3440 : case op_truncate:
3441 : case op_table:
3442 : return true;
3443 0 : case op_topn:
3444 : case op_sample:
3445 : case op_project:
3446 : case op_groupby:
3447 0 : return has_no_selectivity(sql, rel->l);
3448 : case op_ddl:
3449 : case op_insert:
3450 : case op_update:
3451 : case op_delete:
3452 : case op_merge:
3453 : case op_join:
3454 : case op_left:
3455 : case op_right:
3456 : case op_full:
3457 : case op_semi:
3458 : case op_anti:
3459 : case op_union:
3460 : case op_inter:
3461 : case op_except:
3462 : case op_munion:
3463 : case op_select:
3464 : return false;
3465 : }
3466 : return true;
3467 : }
3468 :
3469 : static sql_rel *
3470 706304 : rel_distinct_project2groupby_(visitor *v, sql_rel *rel)
3471 : {
3472 706304 : sql_rel *l = rel->l;
3473 :
3474 : /* rewrite distinct project (table) [ constant ] -> project [ constant ] */
3475 714112 : if (rel->op == op_project && rel->l && !rel->r /* no order by */ && need_distinct(rel) &&
3476 7808 : exps_card(rel->exps) <= CARD_ATOM) {
3477 479 : set_nodistinct(rel);
3478 479 : if (rel->card > CARD_ATOM) { /* if the projection just contains constants, then no topN is needed */
3479 193 : sql_rel *nl = rel->l = rel_topn(v->sql->sa, rel->l, append(sa_list(v->sql->sa), exp_atom_lng(v->sql->sa, 1)));
3480 193 : set_processed(nl);
3481 : }
3482 479 : v->changes++;
3483 : }
3484 :
3485 : /* rewrite distinct project [ pk ] ( select ( table ) [ e op val ])
3486 : * into project [ pk ] ( select/semijoin ( table ) */
3487 706304 : if (rel->op == op_project && rel->l && !rel->r /* no order by */ && need_distinct(rel) &&
3488 7541 : (l->op == op_select || l->op == op_semi) && exps_unique(v->sql, rel, rel->exps) &&
3489 212 : (!have_semantics(l->exps) || !have_nil(rel->exps))) {
3490 212 : set_nodistinct(rel);
3491 212 : v->changes++;
3492 : }
3493 :
3494 : /* rewrite distinct project ( join(p,f) [ p.pk = f.fk ] ) [ p.pk ]
3495 : * into project( (semi)join(p,f) [ p.pk = f.fk ] ) [ p.pk ] */
3496 706304 : if (rel->op == op_project && rel->l && !rel->r /* no order by */ && need_distinct(rel) &&
3497 7117 : l && (is_select(l->op) || l->op == op_join) && rel_is_join_on_pkey(l, true) /* [ pk == fk ] */) {
3498 0 : sql_exp *found = NULL, *pk = NULL, *fk = NULL;
3499 0 : bool all_exps_atoms = true;
3500 0 : sql_column *pkc = NULL;
3501 :
3502 0 : for (node *m = l->exps->h ; m ; m = m->next) { /* find a primary key join */
3503 0 : sql_exp *je = (sql_exp *) m->data;
3504 0 : sql_exp *le = je->l, *re = je->r;
3505 :
3506 0 : if (!find_prop(je->p, PROP_JOINIDX)) /* must be a pk-fk join expression */
3507 0 : continue;
3508 :
3509 0 : if ((pkc = exp_is_pkey(l, le))) { /* le is the primary key */
3510 0 : all_exps_atoms = true;
3511 :
3512 0 : for (node *n = rel->exps->h; n && all_exps_atoms; n = n->next) {
3513 0 : sql_exp *e = (sql_exp *) n->data;
3514 :
3515 0 : if (exp_match(e, le) || exp_refers(e, le))
3516 : found = e;
3517 0 : else if (e->card > CARD_ATOM)
3518 0 : all_exps_atoms = false;
3519 : }
3520 : pk = le;
3521 : fk = re;
3522 : }
3523 0 : if (!found && (pkc = exp_is_pkey(l, re))) { /* re is the primary key */
3524 0 : all_exps_atoms = true;
3525 :
3526 0 : for (node *n = rel->exps->h; n && all_exps_atoms; n = n->next) {
3527 0 : sql_exp *e = (sql_exp *) n->data;
3528 :
3529 0 : if (exp_match(e, re) || exp_refers(e, re))
3530 : found = e;
3531 0 : else if (e->card > CARD_ATOM)
3532 0 : all_exps_atoms = false;
3533 : }
3534 : pk = re;
3535 : fk = le;
3536 : }
3537 : }
3538 :
3539 0 : if (all_exps_atoms && found) { /* rel must have the same primary key on the projection list */
3540 : /* if the foreign key has no selectivity, the join can be removed */
3541 0 : if (!(rel_is_ref(l)) && ((rel_find_exp(l->l, fk) && is_fk_column_of_pk(v->sql, l->l, pkc, fk) && has_no_selectivity(v->sql, l->l)) ||
3542 0 : (l->r && rel_find_exp(l->r, fk) && is_fk_column_of_pk(v->sql, l->r, pkc, fk) && has_no_selectivity(v->sql, l->r)))) {
3543 0 : sql_rel *side = (rel_find_exp(l->l, pk) != NULL)?l->l:l->r;
3544 :
3545 0 : rel->l = rel_dup(side);
3546 0 : rel_destroy(l);
3547 0 : v->changes++;
3548 0 : set_nodistinct(rel);
3549 0 : return rel;
3550 : }
3551 : /* if the join has no multiple references it can be re-written into a semijoin */
3552 0 : if (l->op == op_join && !(rel_is_ref(l)) && list_length(rel->exps) == 1) { /* other expressions may come from the other side */
3553 0 : if (l->r && rel_find_exp(l->r, pk)) {
3554 0 : sql_rel *temp = l->l;
3555 0 : l->l = l->r;
3556 0 : l->r = temp;
3557 :
3558 0 : l->op = op_semi;
3559 0 : } else if (rel_find_exp(l->l, pk)) {
3560 0 : l->op = op_semi;
3561 : }
3562 : }
3563 0 : v->changes++;
3564 0 : set_nodistinct(rel);
3565 0 : return rel;
3566 : }
3567 : }
3568 : /* rewrite distinct project [ gbe ] ( select ( groupby [ gbe ] [ gbe, e ] )[ e op val ])
3569 : * into project [ gbe ] ( select ( group etc ) */
3570 706304 : if (rel->op == op_project && rel->l && !rel->r /* no order by */ &&
3571 7117 : need_distinct(rel) && l->op == op_select){
3572 2590 : sql_rel *g = l->l;
3573 2590 : if (is_groupby(g->op)) {
3574 4 : list *used = sa_list(v->sql->sa);
3575 4 : list *gbe = g->r;
3576 4 : node *n;
3577 4 : int fnd = 1;
3578 :
3579 8 : for (n = rel->exps->h; n && fnd; n = n->next) {
3580 4 : sql_exp *e = n->data;
3581 :
3582 4 : if (e->card > CARD_ATOM) {
3583 : /* find e in gbe */
3584 4 : sql_exp *ne = list_find_exp(g->exps, e);
3585 :
3586 4 : if (ne)
3587 3 : ne = list_find_exp( gbe, ne);
3588 3 : if (ne && !list_find_exp(used, ne)) {
3589 2 : fnd++;
3590 2 : list_append(used, ne);
3591 : }
3592 3 : if (!ne)
3593 : fnd = 0;
3594 : }
3595 : }
3596 4 : if (fnd == (list_length(gbe)+1)) {
3597 0 : v->changes++;
3598 0 : set_nodistinct(rel);
3599 : }
3600 : }
3601 : }
3602 706304 : if (rel->op == op_project && rel->l &&
3603 7117 : need_distinct(rel) && exps_card(rel->exps) > CARD_ATOM) {
3604 7117 : node *n;
3605 7117 : list *exps = new_exp_list(v->sql->sa), *gbe = new_exp_list(v->sql->sa);
3606 7117 : list *obe = rel->r; /* we need to read the ordering later */
3607 :
3608 7117 : if (obe) {
3609 0 : int fnd = 0;
3610 :
3611 0 : for(n = obe->h; n && !fnd; n = n->next) {
3612 0 : sql_exp *e = n->data;
3613 :
3614 0 : if (e->type != e_column)
3615 : fnd = 1;
3616 0 : else if (exps_bind_nid(rel->exps, e->nid) == NULL)
3617 0 : fnd = 1;
3618 : }
3619 0 : if (fnd)
3620 : return rel;
3621 : }
3622 7117 : rel->l = rel_project(v->sql->sa, rel->l, rel->exps);
3623 :
3624 21748 : for (n = rel->exps->h; n; n = n->next) {
3625 14631 : sql_exp *e = n->data, *ne;
3626 :
3627 14631 : set_nodistinct(e);
3628 14631 : ne = exp_ref(v->sql, e);
3629 14631 : if (e->card > CARD_ATOM && !list_find_exp(gbe, ne)) { /* no need to group by on constants, or the same column multiple times */
3630 14259 : append(gbe, ne);
3631 14259 : ne = exp_ref(v->sql, ne);
3632 : }
3633 14631 : append(exps, ne);
3634 : }
3635 7117 : rel->op = op_groupby;
3636 7117 : rel->exps = exps;
3637 7117 : rel->r = gbe;
3638 7117 : set_nodistinct(rel);
3639 7117 : if (obe) {
3640 : /* add order again */
3641 0 : rel = rel_project(v->sql->sa, rel, rel_projections(v->sql, rel, NULL, 1, 1));
3642 0 : rel->r = obe;
3643 : }
3644 7117 : v->changes++;
3645 : }
3646 : return rel;
3647 : }
3648 :
3649 : static sql_rel *
3650 5666 : rel_distinct_project2groupby(visitor *v, global_props *gp, sql_rel *rel)
3651 : {
3652 5666 : (void) gp;
3653 5666 : return rel_visitor_bottomup(v, rel, &rel_distinct_project2groupby_);
3654 : }
3655 :
3656 : run_optimizer
3657 629519 : bind_distinct_project2groupby(visitor *v, global_props *gp)
3658 : {
3659 629519 : int flag = v->sql->sql_optimizer;
3660 552974 : return gp->opt_cycle == 0 && gp->opt_level == 1 && gp->needs_distinct && gp->cnt[op_project] &&
3661 635185 : (flag & distinct_project2groupby)? rel_distinct_project2groupby : NULL;
3662 : }
|