Line data Source code
1 : /*
2 : * SPDX-License-Identifier: MPL-2.0
3 : *
4 : * This Source Code Form is subject to the terms of the Mozilla Public
5 : * License, v. 2.0. If a copy of the MPL was not distributed with this
6 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
7 : *
8 : * Copyright 2024, 2025 MonetDB Foundation;
9 : * Copyright August 2008 - 2023 MonetDB B.V.;
10 : * Copyright 1997 - July 2008 CWI.
11 : */
12 :
13 : #include "monetdb_config.h"
14 : #include "rel_optimizer_private.h"
15 : #include "rel_basetable.h"
16 : #include "rel_exp.h"
17 : #include "rel_select.h"
18 : #include "rel_rewriter.h"
19 :
20 : static int
21 2807902 : exp_is_rename(sql_exp *e)
22 : {
23 2807902 : return (e->type == e_column);
24 : }
25 :
26 : static int
27 1397654 : exp_is_useless_rename(sql_exp *e)
28 : {
29 1397654 : return (e->type == e_column && e->alias.label == e->nid);
30 : }
31 :
32 : static list *
33 20708 : rel_used_projections(mvc *sql, list *exps, list *users)
34 : {
35 20708 : list *nexps = sa_list(sql->sa);
36 20706 : bool *used = SA_ZNEW_ARRAY(sql->ta, bool, list_length(exps));
37 20708 : int i = 0;
38 :
39 135735 : for(node *n = users->h; n; n = n->next) {
40 115028 : sql_exp *e = n->data, *ne = NULL;
41 115028 : assert(e->nid && exps_bind_nid(exps, e->nid));
42 115028 : if (e->nid && (ne = exps_bind_nid(exps, e->nid))) {
43 115028 : used[list_position(exps, ne)] = 1;
44 : }
45 : }
46 146515 : for(node *n = exps->h; n; n = n->next, i++) {
47 125808 : sql_exp *e = n->data;
48 125808 : if (is_intern(e) || used[i])
49 125672 : append(nexps, e);
50 : }
51 20707 : return nexps;
52 : }
53 :
54 : /* move projects down with the goal op removing them completely (ie push renames/reduced lists into basetable)
55 : * for some cases we can directly remove iff renames rename into same alias
56 : * */
57 : static sql_rel *
58 2149904 : rel_push_project_down_(visitor *v, sql_rel *rel)
59 : {
60 : /* for now only push down renames */
61 2149904 : if (v->depth > 1 && is_simple_project(rel->op) && !need_distinct(rel) && !rel_is_ref(rel) && rel->l && !rel->r &&
62 657196 : v->parent &&
63 657188 : !is_modify(v->parent->op) && !is_topn(v->parent->op) && !is_sample(v->parent->op) &&
64 1071831 : !is_ddl(v->parent->op) && !is_set(v->parent->op) && !is_munion(v->parent->op) &&
65 451475 : list_check_prop_all(rel->exps, (prop_check_func)&exp_is_rename)) {
66 224727 : sql_rel *l = rel->l;
67 :
68 224727 : if (rel_is_ref(l))
69 : return rel;
70 216158 : if (is_basetable(l->op)) {
71 35911 : if (list_check_prop_all(rel->exps, (prop_check_func)&exp_is_useless_rename)) {
72 : /* TODO reduce list (those in the project + internal) */
73 20708 : rel->l = NULL;
74 20708 : l->exps = rel_used_projections(v->sql, l->exps, rel->exps);
75 20707 : rel_destroy(rel);
76 20706 : v->changes++;
77 20706 : return l;
78 : }
79 : return rel;
80 180247 : } else if (list_check_prop_all(rel->exps, (prop_check_func)&exp_is_useless_rename)) {
81 79843 : if ((is_project(l->op) && list_length(l->exps) == list_length(rel->exps)) ||
82 63208 : ((v->parent && is_project(v->parent->op)) &&
83 34641 : (is_mset(l->op) || is_set(l->op) || is_select(l->op) || is_join(l->op) || is_semi(l->op) || is_topn(l->op) || is_sample(l->op)))) {
84 50727 : rel->l = NULL;
85 50727 : rel_destroy(rel);
86 50727 : v->changes++;
87 50727 : return l;
88 : }
89 : }
90 : }
91 : /* ToDo handle useful renames, ie new relation name and unique set of attribute names (could reduce set of * attributes) */
92 : /* handle both useless and useful with project [ group by ] */
93 : return rel;
94 : }
95 :
96 : static sql_rel *
97 192069 : rel_push_project_down(visitor *v, global_props *gp, sql_rel *rel)
98 : {
99 192069 : (void) gp;
100 192069 : return rel_visitor_bottomup(v, rel, &rel_push_project_down_);
101 : }
102 :
103 : run_optimizer
104 645183 : bind_push_project_down(visitor *v, global_props *gp)
105 : {
106 645183 : int flag = v->sql->sql_optimizer;
107 644987 : return gp->opt_level == 1 && (flag & push_project_down) &&
108 1290170 : (gp->cnt[op_project] || gp->cnt[op_groupby]) ? rel_push_project_down : NULL;
109 : }
110 :
111 :
112 : static bool exp_shares_exps(sql_exp *e, list *shared, uint64_t *uses);
113 :
114 : static bool
115 109263 : exps_shares_exps(list *exps, list *shared, uint64_t *uses)
116 : {
117 109263 : if (!exps || !shared)
118 : return false;
119 384265 : for (node *n = exps->h; n; n = n->next) {
120 275411 : sql_exp *e = n->data;
121 :
122 275411 : if (exp_shares_exps(e, shared, uses))
123 : return true;
124 : }
125 : return false;
126 : }
127 :
128 : static bool
129 3026523 : exp_shares_exps(sql_exp *e, list *shared, uint64_t *uses)
130 : {
131 5539661 : switch(e->type) {
132 83 : case e_cmp:
133 83 : if (e->flag == cmp_or || e->flag == cmp_filter)
134 61 : return exps_shares_exps(e->l, shared, uses) || exps_shares_exps(e->r, shared, uses);
135 22 : else if (e->flag == cmp_in || e->flag == cmp_notin)
136 0 : return exp_shares_exps(e->l, shared, uses) || exps_shares_exps(e->r, shared, uses);
137 : else
138 44 : return exp_shares_exps(e->l, shared, uses) || exp_shares_exps(e->r, shared, uses) || (e->f && exp_shares_exps(e->f, shared, uses));
139 113291 : case e_atom:
140 113291 : if (e->f)
141 0 : return exps_shares_exps(e->f, shared, uses);
142 : return false;
143 5042801 : case e_column:
144 : {
145 5042801 : sql_exp *ne = NULL;
146 5042801 : assert(e->nid);
147 5042801 : if (e->nid)
148 5042801 : ne = exps_bind_nid(shared, e->nid);
149 5042801 : if (!ne)
150 : return false;
151 3873414 : if (ne->type != e_column) {
152 557683 : int i = list_position(shared, ne);
153 557683 : if (i < 0)
154 : return false;
155 557683 : uint64_t used = (uint64_t) 1 << i;
156 557683 : if (used & *uses)
157 : return true;
158 557292 : *uses |= used;
159 557292 : return false;
160 : }
161 : /* todo better check nested */
162 3315731 : if (ne != e && !ne->nid)
163 : return true;
164 3315718 : if (ne != e && (list_position(shared, e) < 0 || list_position(shared, e) > list_position(shared, ne)))
165 : /* maybe ne refers to a local complex exp */
166 2238793 : return exp_shares_exps(ne, shared, uses);
167 : return false;
168 : }
169 274345 : case e_convert:
170 274345 : return exp_shares_exps(e->l, shared, uses);
171 109141 : case e_aggr:
172 : case e_func:
173 109141 : return exps_shares_exps(e->l, shared, uses);
174 : case e_psm:
175 0 : assert(0); /* not in projection list */
176 : }
177 : return false;
178 : }
179 :
180 : static bool
181 364871 : exps_share_expensive_exp(list *exps, list *shared )
182 : {
183 364871 : uint64_t uses = 0;
184 :
185 364871 : if (list_empty(exps) || list_empty(shared))
186 0 : return false;
187 364871 : if (list_length(shared) > 64)
188 : return true;
189 3114801 : for (node *n = exps->h; n; n = n->next) {
190 2751054 : sql_exp *e = n->data;
191 :
192 2751054 : if (exp_shares_exps(e, shared, &uses))
193 : return true;
194 : }
195 : return false;
196 : }
197 :
198 : /* merge 2 projects into the lower one */
199 : static sql_rel *
200 2406060 : rel_merge_projects_(visitor *v, sql_rel *rel)
201 : {
202 2477521 : list *exps = rel->exps;
203 2477521 : sql_rel *prj = rel->l;
204 2477521 : node *n;
205 :
206 2477521 : if (rel->op == op_project &&
207 1151782 : prj && prj->op == op_project && !(rel_is_ref(prj)) && list_empty(prj->r)) {
208 568187 : int all = 1;
209 :
210 568187 : if (project_unsafe(rel, false) || project_unsafe(prj, false) || exps_share_expensive_exp(rel->exps, prj->exps))
211 204438 : return rel;
212 :
213 : /* here we try to fix aliases */
214 363747 : list *nexps = NULL;
215 : /* for each exp check if we can rename it */
216 2483927 : for (n = exps->h; n && all; n = n->next) {
217 2120180 : sql_exp *e = n->data, *ne = NULL;
218 :
219 2120180 : ne = exp_push_down_prj(v->sql, e, prj, prj->l);
220 2120180 : if (ne) {
221 1827894 : if (exp_name(e))
222 1818100 : exp_prop_alias(v->sql->sa, ne, e);
223 1827894 : if (!nexps)
224 355430 : nexps = new_exp_list(v->sql->sa);
225 1827894 : list_append(nexps, ne);
226 : } else {
227 : all = 0;
228 : }
229 : }
230 363747 : if (all) {
231 71461 : rel->exps = nexps;
232 : /* we can now remove the intermediate project */
233 : /* push order by expressions */
234 71461 : if (!list_empty(rel->r)) {
235 0 : list *nr = new_exp_list(v->sql->sa), *res = rel->r;
236 0 : for (n = res->h; n; n = n->next) {
237 0 : sql_exp *e = n->data, *ne = NULL;
238 :
239 0 : ne = exp_push_down_prj(v->sql, e, prj, prj->l);
240 0 : if (ne) {
241 0 : if (exp_name(e))
242 0 : exp_prop_alias(v->sql->sa, ne, e);
243 0 : list_append(nr, ne);
244 : } else {
245 : all = 0;
246 : }
247 : }
248 0 : if (all) {
249 0 : rel->r = nr;
250 : } else {
251 : /* leave as is */
252 0 : rel->exps = exps;
253 0 : return rel;
254 : }
255 : }
256 71461 : rel->l = prj->l;
257 71461 : prj->l = NULL;
258 71461 : rel_destroy(prj);
259 71461 : v->changes++;
260 71461 : return rel_merge_projects_(v, rel);
261 : } else {
262 : /* leave as is */
263 292286 : rel->exps = exps;
264 : }
265 292286 : return rel;
266 : }
267 : return rel;
268 : }
269 :
270 : static sql_rel *
271 192058 : rel_merge_projects(visitor *v, global_props *gp, sql_rel *rel)
272 : {
273 192058 : (void) gp;
274 192058 : return rel_visitor_bottomup(v, rel, &rel_merge_projects_);
275 : }
276 :
277 : run_optimizer
278 645173 : bind_merge_projects(visitor *v, global_props *gp)
279 : {
280 645173 : int flag = v->sql->sql_optimizer;
281 644977 : return gp->opt_level == 1 && (flag & merge_projects) &&
282 1290142 : (gp->cnt[op_project] || gp->cnt[op_groupby]) ? rel_merge_projects : NULL;
283 : }
284 :
285 :
286 : static sql_exp *split_aggr_and_project(mvc *sql, list *aexps, sql_exp *e);
287 :
288 : static void
289 0 : list_split_aggr_and_project(mvc *sql, list *aexps, list *exps)
290 : {
291 0 : if (list_empty(exps))
292 : return ;
293 0 : for(node *n = exps->h; n; n = n->next)
294 0 : n->data = split_aggr_and_project(sql, aexps, n->data);
295 : }
296 :
297 : static sql_exp *
298 0 : split_aggr_and_project(mvc *sql, list *aexps, sql_exp *e)
299 : {
300 0 : switch(e->type) {
301 0 : case e_aggr:
302 : /* add to the aggrs */
303 0 : if (!exp_name(e))
304 0 : exp_label(sql->sa, e, ++sql->label);
305 0 : list_append(aexps, e);
306 0 : return exp_ref(sql, e);
307 : case e_cmp:
308 : /* e_cmp's shouldn't exist in an aggr expression list */
309 0 : assert(0);
310 : case e_convert:
311 0 : e->l = split_aggr_and_project(sql, aexps, e->l);
312 0 : return e;
313 0 : case e_func:
314 0 : list_split_aggr_and_project(sql, aexps, e->l);
315 0 : return e;
316 : case e_atom:
317 : case e_column: /* constants and columns shouldn't be rewritten */
318 : case e_psm:
319 : return e;
320 : }
321 : return NULL;
322 : }
323 :
324 : /* exp_rename */
325 : static sql_exp * exp_rename(mvc *sql, sql_exp *e, sql_rel *f, sql_rel *t);
326 :
327 : static list *
328 53720 : exps_rename(mvc *sql, list *l, sql_rel *f, sql_rel *t)
329 : {
330 53720 : if (list_empty(l))
331 : return l;
332 47684 : for (node *n=l->h; n; n=n->next)
333 30652 : n->data = exp_rename(sql, n->data, f, t);
334 : return l;
335 : }
336 :
337 : /* exp_rename */
338 : static sql_exp *
339 182804 : exp_rename(mvc *sql, sql_exp *e, sql_rel *f, sql_rel *t)
340 : {
341 182804 : sql_exp *ne = NULL;
342 :
343 182804 : assert(is_project(f->op));
344 :
345 182804 : switch(e->type) {
346 81835 : case e_column:
347 81835 : assert(e->nid);
348 81835 : ne = exps_bind_nid(f->exps, e->nid);
349 81835 : if (!ne)
350 : return e;
351 53739 : sql_exp *oe = e;
352 53739 : e = NULL;
353 53739 : if (ne && ne->nid)
354 53580 : e = rel_find_exp(t, ne);
355 53580 : if (!e) {
356 159 : sql->session->status = 0;
357 159 : sql->errstr[0] = 0;
358 159 : if (exp_is_atom(ne))
359 : return ne;
360 : return oe;
361 : }
362 53580 : ne = exp_ref(sql, e);
363 53580 : if (oe)
364 53580 : exp_propagate(sql->sa, ne, oe);
365 53580 : return ne;
366 56312 : case e_cmp:
367 56312 : if (e->flag == cmp_or || e->flag == cmp_filter) {
368 3849 : e->l = exps_rename(sql, e->l, f, t);
369 3849 : e->r = exps_rename(sql, e->r, f, t);
370 52463 : } else if (e->flag == cmp_in || e->flag == cmp_notin) {
371 6475 : e->l = exp_rename(sql, e->l, f, t);
372 6475 : e->r = exps_rename(sql, e->r, f, t);
373 : } else {
374 45988 : e->l = exp_rename(sql, e->l, f, t);
375 45988 : e->r = exp_rename(sql, e->r, f, t);
376 45988 : if (e->f)
377 225 : e->f = exp_rename(sql, e->f, f, t);
378 : }
379 : break;
380 5110 : case e_convert:
381 5110 : e->l = exp_rename(sql, e->l, f, t);
382 5110 : break;
383 2874 : case e_aggr:
384 : case e_func:
385 2874 : e->l = exps_rename(sql, e->l, f, t);
386 2874 : break;
387 36673 : case e_atom:
388 36673 : e->f = exps_rename(sql, e->f, f, t);
389 36673 : break;
390 : case e_psm:
391 : break;
392 : }
393 : return e;
394 : }
395 :
396 : static int
397 5969883 : exp_match_exp_cmp( sql_exp *e1, sql_exp *e2)
398 : {
399 5969883 : if (exp_match_exp(e1,e2))
400 61393 : return 0;
401 : return -1;
402 : }
403 :
404 : /* Pushing projects up the tree. Done very early in the optimizer.
405 : * Makes later steps easier.
406 : */
407 : extern void _rel_print(mvc *sql, sql_rel *rel);
408 : static sql_rel *
409 2059739 : rel_push_project_up_(visitor *v, sql_rel *rel)
410 : {
411 2059739 : if (is_simple_project(rel->op) && rel->l && !rel_is_ref(rel)) {
412 731877 : sql_rel *l = rel->l;
413 731877 : if (is_simple_project(l->op))
414 269601 : return rel_merge_projects_(v, rel);
415 : /* find equal column references, later cases are rewritten into references back to the first
416 : * project () [ i.i L1, i.i L2 ] -> project() [ i.i L1, L1 L2 ] */
417 462276 : if (list_length(rel->exps) > 1) {
418 356396 : list *exps = rel->exps;
419 356396 : node *n = exps->h;
420 356396 : bool needed = false;
421 2478152 : for(n = n->next; n && !needed; n = n->next) {
422 2121755 : sql_exp *e = n->data;
423 2121755 : if (e->type == e_column && !is_selfref(e)) {
424 16542352 : for(node *m = exps->h; m && m != n && !needed; m = m->next) {
425 14849560 : sql_exp *h = m->data;
426 14849560 : if (exp_match_exp(h,e))
427 9404 : needed = true;
428 : }
429 : }
430 : }
431 356397 : if (needed) {
432 9404 : rel->exps = sa_list(v->sql->sa);
433 9404 : node *n = exps->h;
434 9404 : list_append(rel->exps, n->data);
435 120374 : for(n = n->next; n; n = n->next) {
436 110970 : sql_exp *e = n->data;
437 110970 : if (e->type == e_column && !is_selfref(e)) {
438 69259 : node *m = list_find(rel->exps, e, (fcmp)&exp_match_exp_cmp);
439 69259 : if (m) {
440 11851 : sql_exp *me = m->data;
441 11851 : if (me->alias.label != me->nid) {
442 790 : sql_exp *ne = exp_ref(v->sql, m->data);
443 790 : exp_setalias(ne, e->alias.label, exp_relname(e), exp_name(e));
444 790 : exp_propagate(v->sql->sa, ne, e);
445 790 : set_selfref(ne);
446 790 : e = ne;
447 : }
448 : }
449 : }
450 110970 : list_append(rel->exps, e);
451 : }
452 : }
453 : }
454 : }
455 :
456 : /* project/project cleanup is done later */
457 1790138 : if (is_join(rel->op) || is_select(rel->op)) {
458 515463 : node *n;
459 515463 : list *exps = NULL, *l_exps, *r_exps;
460 515463 : sql_rel *l = rel->l;
461 515463 : sql_rel *r = rel->r;
462 515463 : sql_rel *t;
463 515463 : int nlexps = 0, i = 0;
464 :
465 : /* Don't rewrite refs, non projections or constant or
466 : order by projections */
467 515463 : if (!l || rel_is_ref(l) || is_topn(l->op) || is_sample(l->op) ||
468 486935 : (is_join(rel->op) && !list_empty(rel->attr)) ||
469 477608 : (is_join(rel->op) && (!r || rel_is_ref(r))) ||
470 455634 : (is_left(rel->op) && (rel->flag&MERGE_LEFT) /* can't push projections above merge statements left joins */) ||
471 455632 : (is_select(rel->op) && l->op != op_project) ||
472 247746 : (is_join(rel->op) && ((l->op != op_project && r->op != op_project) || is_topn(r->op) || is_sample(r->op))) ||
473 92135 : ((l->op == op_project && (!l->l || l->r || project_unsafe(l, is_select(rel->op)))) ||
474 76566 : (is_join(rel->op) && (r->op == op_project && (!r->l || r->r || project_unsafe(r, false))))))
475 443106 : return rel;
476 :
477 72357 : if (l->op == op_project && l->l) {
478 988324 : for (n = l->exps->h; n; n = n->next) {
479 947192 : sql_exp *e = n->data;
480 947192 : if (!(is_column(e->type) && exp_is_atom(e) && !(is_right(rel->op) || is_full(rel->op))) &&
481 946204 : !(e->type == e_column && !has_label(e)))
482 : return rel;
483 : }
484 : }
485 61633 : if (is_join(rel->op) && r->op == op_project && r->l) {
486 65218 : for (n = r->exps->h; n; n = n->next) {
487 59966 : sql_exp *e = n->data;
488 59966 : if (!(is_column(e->type) && exp_is_atom(e) && !(is_right(rel->op) || is_full(rel->op))) &&
489 59571 : !(e->type == e_column && !has_label(e)))
490 : return rel;
491 : }
492 : }
493 :
494 44199 : if (l->op == op_project && l->l) {
495 : /* Go through the list of project expressions.
496 : Check if they can be pushed up, ie are they not
497 : changing or introducing any columns used
498 : by the upper operator. */
499 :
500 40317 : exps = new_exp_list(v->sql->sa);
501 915970 : for (n = l->exps->h; n; n = n->next) {
502 875653 : sql_exp *e = n->data;
503 :
504 : /* we cannot rewrite projection with atomic values from outer joins */
505 875653 : if (is_column(e->type) && exp_is_atom(e) && !(is_right(rel->op) || is_full(rel->op))) {
506 661 : list_append(exps, e);
507 874992 : } else if (e->type == e_column) {
508 874992 : if (has_label(e))
509 : return rel;
510 874992 : list_append(exps, e);
511 : } else {
512 : return rel;
513 : }
514 : }
515 : } else {
516 3882 : exps = rel_projections(v->sql, l, NULL, 1, 1);
517 : }
518 44199 : nlexps = list_length(exps);
519 : /* also handle right hand of join */
520 44199 : if (is_join(rel->op) && r->op == op_project && r->l && list_empty(rel->attr)) {
521 : /* Here we also check all expressions of r like above
522 : but also we need to check for ambiguous names. */
523 :
524 45121 : for (n = r->exps->h; n; n = n->next) {
525 40107 : sql_exp *e = n->data;
526 :
527 : /* we cannot rewrite projection with atomic values from outer joins */
528 40107 : if (is_column(e->type) && exp_is_atom(e) && !(is_left(rel->op) || is_full(rel->op))) {
529 148 : list_append(exps, e);
530 39959 : } else if (e->type == e_column) {
531 39721 : if (has_label(e))
532 : return rel;
533 39721 : list_append(exps, e);
534 : } else {
535 : return rel;
536 : }
537 : }
538 38947 : } else if (is_join(rel->op) && list_empty(rel->attr)) {
539 21369 : list *r_exps = rel_projections(v->sql, r, NULL, 1, 1);
540 21369 : list_merge(exps, r_exps, (fdup)NULL);
541 : }
542 43961 : if (!list_empty(rel->attr))
543 0 : append(exps, exp_ref(v->sql, rel->attr->h->data));
544 : /* Here we should check for ambiguous names ? */
545 43961 : if (is_join(rel->op) && r && list_empty(rel->attr)) {
546 26383 : t = (l->op == op_project && l->l)?l->l:l;
547 26383 : l_exps = rel_projections(v->sql, t, NULL, 1, 1);
548 : /* conflict with old right expressions */
549 26383 : r_exps = rel_projections(v->sql, r, NULL, 1, 1);
550 26383 : if (rel->attr)
551 0 : append(r_exps, exp_ref(v->sql, rel->attr->h->data));
552 26383 : t = (r->op == op_project && r->l)?r->l:r;
553 26383 : r_exps = rel_projections(v->sql, t, NULL, 1, 1);
554 : /* conflict with new right expressions */
555 702844 : for(n = l_exps->h; n; n = n->next) {
556 676827 : sql_exp *e = n->data;
557 :
558 676827 : if (exp_is_atom(e))
559 0 : continue;
560 676827 : if (e->nid && exps_bind_nid(r_exps, e->nid))
561 : return rel;
562 676461 : if (e->alias.label && exps_bind_nid(r_exps, e->alias.label))
563 : return rel;
564 : }
565 : /* conflict with new left expressions */
566 180335 : for(n = r_exps->h; n; n = n->next) {
567 154318 : sql_exp *e = n->data;
568 :
569 154318 : if (exp_is_atom(e))
570 0 : continue;
571 154318 : if (e->nid && exps_bind_nid(l_exps, e->nid))
572 : return rel;
573 154318 : if (e->alias.label && exps_bind_nid(l_exps, e->alias.label))
574 : return rel;
575 : }
576 : }
577 :
578 : /* rename operator expressions */
579 43595 : if (l->op == op_project) {
580 : /* rewrite rel from rel->l into rel->l->l */
581 39920 : if (rel->exps) {
582 82299 : for (n = rel->exps->h; n; n = n->next) {
583 43778 : sql_exp *e = n->data, *ne;
584 :
585 43778 : ne = exp_rename(v->sql, e, l, l->l);
586 43778 : assert(ne);
587 43778 : if (ne != e && exp_name(e))
588 0 : exp_propagate(v->sql->sa, ne, e);
589 43778 : n->data = ne;
590 : }
591 : }
592 39920 : rel->l = l->l;
593 39920 : l->l = NULL;
594 39920 : rel_destroy(l);
595 : }
596 43595 : if (is_join(rel->op) && r->op == op_project && list_empty(rel->attr)) {
597 : /* rewrite rel from rel->r into rel->r->l */
598 4658 : if (rel->exps) {
599 8585 : for (n = rel->exps->h; n; n = n->next) {
600 4588 : sql_exp *e = n->data, *ne;
601 :
602 4588 : ne = exp_rename(v->sql, e, r, r->l);
603 4588 : assert(ne);
604 4588 : if (ne != e && exp_name(e))
605 0 : exp_propagate(v->sql->sa, ne, e);
606 4588 : n->data = ne;
607 : }
608 : }
609 4658 : rel->r = r->l;
610 4658 : r->l = NULL;
611 4658 : rel_destroy(r);
612 : }
613 : /* Done, ie introduce new project */
614 43595 : exps_fix_card(exps, rel->card);
615 : /* Fix nil flag */
616 43595 : if (!list_empty(exps)) {
617 938977 : for (n = exps->h ; n && i < nlexps ; n = n->next, i++) {
618 895382 : sql_exp *e = n->data;
619 :
620 895382 : if (is_right(rel->op) || is_full(rel->op))
621 458 : set_has_nil(e);
622 895382 : set_not_unique(e);
623 : }
624 190620 : for (; n ; n = n->next) {
625 147025 : sql_exp *e = n->data;
626 :
627 147025 : if (is_left(rel->op) || is_full(rel->op))
628 45640 : set_has_nil(e);
629 147025 : set_not_unique(e);
630 : }
631 : }
632 43595 : v->changes++;
633 43595 : return rel_inplace_project(v->sql->sa, rel, NULL, exps);
634 : }
635 1274675 : if (is_groupby(rel->op) && !rel_is_ref(rel) && rel->exps && list_length(rel->exps) > 1) {
636 24184 : node *n;
637 24184 : int fnd = 0;
638 24184 : list *aexps, *pexps;
639 :
640 : /* check if some are expressions aren't e_aggr */
641 103392 : for (n = rel->exps->h; n && !fnd; n = n->next) {
642 79208 : sql_exp *e = n->data;
643 :
644 79208 : if (e->type != e_aggr && e->type != e_column && e->type != e_atom && e->card > CARD_ATOM) {
645 79208 : fnd = 1;
646 : }
647 : }
648 : /* only aggr, no rewrite needed */
649 24184 : if (!fnd)
650 : return rel;
651 :
652 0 : aexps = sa_list(v->sql->sa);
653 0 : pexps = sa_list(v->sql->sa);
654 0 : for (n = rel->exps->h; n; n = n->next) {
655 0 : sql_exp *e = n->data, *ne = NULL;
656 :
657 0 : switch (e->type) {
658 0 : case e_atom: /* move over to the projection */
659 0 : list_append(pexps, e);
660 0 : break;
661 0 : case e_func:
662 0 : list_append(pexps, e);
663 0 : list_split_aggr_and_project(v->sql, aexps, e->l);
664 0 : break;
665 0 : case e_convert:
666 0 : list_append(pexps, e);
667 0 : e->l = split_aggr_and_project(v->sql, aexps, e->l);
668 0 : break;
669 0 : default: /* simple alias */
670 0 : list_append(aexps, e);
671 0 : ne = exp_column(v->sql->sa, exp_find_rel_name(e), exp_name(e), exp_subtype(e), e->card, has_nil(e), is_unique(e), is_intern(e));
672 0 : assert(0);
673 : list_append(pexps, ne);
674 : break;
675 : }
676 : }
677 0 : v->changes++;
678 0 : rel->exps = aexps;
679 0 : return rel_inplace_project( v->sql->sa, rel, NULL, pexps);
680 : }
681 : return rel;
682 : }
683 :
684 : static sql_rel *
685 191350 : rel_push_project_up(visitor *v, global_props *gp, sql_rel *rel)
686 : {
687 191350 : (void) gp;
688 191350 : return rel_visitor_bottomup(v, rel, &rel_push_project_up_);
689 : }
690 :
691 : run_optimizer
692 645094 : bind_push_project_up(visitor *v, global_props *gp)
693 : {
694 645094 : int flag = v->sql->sql_optimizer;
695 644898 : return gp->opt_level == 1 && (flag & push_project_up) &&
696 1289992 : gp->cnt[op_project] ? rel_push_project_up : NULL;
697 : }
698 :
699 :
700 : static void split_exps(mvc *sql, list *exps, sql_rel *rel);
701 :
702 : static int
703 3206354 : exp_refers_cmp( sql_exp *e1, sql_exp *e2)
704 : {
705 3206354 : if (exp_refers(e1,e2))
706 0 : return 0;
707 : return -1;
708 : }
709 :
710 : sql_exp *
711 135338 : add_exp_too_project(mvc *sql, sql_exp *e, sql_rel *rel)
712 : {
713 135338 : node *n = list_find(rel->exps, e, (fcmp)&exp_match_exp_cmp);
714 :
715 : /* if not matching we may refer to an older expression */
716 135338 : if (!n)
717 85796 : n = list_find(rel->exps, e, (fcmp)&exp_refers_cmp);
718 85796 : if (!n) {
719 85796 : exp_label(sql->sa, e, ++sql->label);
720 85796 : append(rel->exps, e);
721 : } else {
722 49542 : sql_exp *ne = n->data;
723 :
724 49542 : if (rel && rel->l) {
725 49542 : if (ne && ne->alias.label && rel_find_nid(rel->l, ne->alias.label)) {
726 0 : exp_label(sql->sa, e, ++sql->label);
727 0 : append(rel->exps, e);
728 0 : ne = e;
729 : }
730 : }
731 : e = ne;
732 : }
733 135338 : e = exp_ref(sql, e);
734 135338 : return e;
735 : }
736 :
737 : static void
738 156808 : add_exps_too_project(mvc *sql, list *exps, sql_rel *rel)
739 : {
740 156808 : node *n;
741 :
742 156808 : if (!exps)
743 : return;
744 485614 : for(n=exps->h; n; n = n->next) {
745 328805 : sql_exp *e = n->data;
746 :
747 328805 : if (e->type != e_column && !exp_is_atom(e))
748 135055 : n->data = add_exp_too_project(sql, e, rel);
749 : }
750 : }
751 :
752 : static sql_exp *
753 517586 : split_exp(mvc *sql, sql_exp *e, sql_rel *rel)
754 : {
755 517586 : if (exp_is_atom(e))
756 : return e;
757 384640 : switch(e->type) {
758 : case e_column:
759 : return e;
760 17140 : case e_convert:
761 17140 : e->l = split_exp(sql, e->l, rel);
762 17141 : return e;
763 166719 : case e_aggr:
764 : case e_func:
765 166719 : if (!is_analytic(e) && !exp_has_sideeffect(e)) {
766 164769 : sql_subfunc *f = e->f;
767 164769 : if (e->type == e_func && !f->func->s && is_caselike_func(f) /*is_ifthenelse_func(f)*/) {
768 : return e;
769 : } else {
770 156808 : split_exps(sql, e->l, rel);
771 156808 : add_exps_too_project(sql, e->l, rel);
772 : }
773 : }
774 : return e;
775 494 : case e_cmp:
776 494 : if (e->flag == cmp_or || e->flag == cmp_filter) {
777 471 : split_exps(sql, e->l, rel);
778 471 : split_exps(sql, e->r, rel);
779 23 : } else if (e->flag == cmp_in || e->flag == cmp_notin) {
780 0 : e->l = split_exp(sql, e->l, rel);
781 0 : split_exps(sql, e->r, rel);
782 : } else {
783 23 : e->l = split_exp(sql, e->l, rel);
784 23 : e->r = split_exp(sql, e->r, rel);
785 23 : if (e->f)
786 21 : e->f = split_exp(sql, e->f, rel);
787 : }
788 : return e;
789 : case e_atom:
790 : case e_psm:
791 : return e;
792 : }
793 : return e;
794 : }
795 :
796 : static void
797 157756 : split_exps(mvc *sql, list *exps, sql_rel *rel)
798 : {
799 157756 : if (list_empty(exps))
800 : return;
801 487524 : for(node *n=exps->h; n; n = n->next){
802 329767 : sql_exp *e = n->data;
803 :
804 329767 : e = split_exp(sql, e, rel);
805 329768 : n->data = e;
806 : }
807 : }
808 :
809 : sql_rel *
810 649997 : rel_split_project_(visitor *v, sql_rel *rel, int top)
811 : {
812 1300013 : if (mvc_highwater(v->sql))
813 : return rel;
814 :
815 650004 : if (!rel)
816 : return NULL;
817 650004 : if (is_project(rel->op) && list_length(rel->exps) && (is_groupby(rel->op) || rel->l) && !need_distinct(rel) && !is_single(rel)) {
818 181508 : list *exps = rel->exps;
819 181508 : node *n;
820 181508 : int funcs = 0;
821 181508 : sql_rel *nrel;
822 :
823 : /* are there functions */
824 1407346 : for (n=exps->h; n && !funcs; n = n->next) {
825 1225840 : sql_exp *e = n->data;
826 :
827 1225840 : funcs = exp_has_func(e);
828 : }
829 : /* introduce extra project */
830 181506 : if (funcs && rel->op != op_project) {
831 7 : nrel = rel_project(v->sql->sa, rel->l,
832 7 : rel_projections(v->sql, rel->l, NULL, 1, 1));
833 7 : rel->l = nrel;
834 : /* recursively split all functions and add those to the projection list */
835 7 : split_exps(v->sql, rel->exps, nrel);
836 14 : if (nrel->l && !(nrel->l = rel_split_project_(v, nrel->l, (is_topn(rel->op)||is_sample(rel->op))?top:0)))
837 : return NULL;
838 7 : return rel;
839 181499 : } else if (funcs && !top && list_empty(rel->r)) {
840 : /* projects can have columns point back into the expression list, ie
841 : * create a new list including the split expressions */
842 17380 : node *n;
843 17380 : list *exps = rel->exps;
844 :
845 17380 : rel->exps = sa_list(v->sql->sa);
846 187992 : for (n=exps->h; n; n = n->next)
847 170611 : append(rel->exps, split_exp(v->sql, n->data, rel));
848 164119 : } else if (funcs && top && rel_is_ref(rel) && list_empty(rel->r)) {
849 : /* inplace */
850 9 : list *exps = rel_projections(v->sql, rel, NULL, 1, 1);
851 9 : sql_rel *l = rel_project(v->sql->sa, rel->l, NULL);
852 9 : rel->l = l;
853 9 : l->exps = rel->exps;
854 9 : set_processed(l);
855 9 : rel->exps = exps;
856 : }
857 : }
858 650000 : if (is_mset(rel->op) || is_set(rel->op) || is_basetable(rel->op))
859 : return rel;
860 425493 : if (rel->l && (rel->op != op_table || rel->flag != TRIGGER_WRAPPER)) {
861 445416 : rel->l = rel_split_project_(v, rel->l, (is_topn(rel->op)||is_sample(rel->op)||is_ddl(rel->op)||is_modify(rel->op))?top:0);
862 414848 : if (!rel->l)
863 : return NULL;
864 : }
865 425504 : if ((is_join(rel->op) || is_semi(rel->op)) && rel->r) {
866 122332 : rel->r = rel_split_project_(v, rel->r, (is_topn(rel->op)||is_sample(rel->op)||is_ddl(rel->op)||is_modify(rel->op))?top:0);
867 122332 : if (!rel->r)
868 : return NULL;
869 : }
870 : return rel;
871 : }
872 :
873 : static sql_rel *
874 112821 : rel_split_project(visitor *v, global_props *gp, sql_rel *rel)
875 : {
876 112821 : (void) gp;
877 112821 : return rel_split_project_(v, rel, 1);
878 : }
879 :
880 : run_optimizer
881 645091 : bind_split_project(visitor *v, global_props *gp)
882 : {
883 645091 : int flag = v->sql->sql_optimizer;
884 565639 : return gp->opt_cycle == 0 && gp->opt_level == 1 && (flag & split_project) &&
885 1210722 : (gp->cnt[op_project] || gp->cnt[op_groupby]) ? rel_split_project : NULL;
886 : }
887 :
888 :
889 : static sql_rel *
890 191547 : rel_project_reduce_casts(visitor *v, global_props *gp, sql_rel *rel)
891 : {
892 191547 : if (!rel)
893 : return NULL;
894 191547 : (void) gp;
895 191547 : if (gp->opt_level == 1 && v->value_based_opt && is_simple_project(rel->op) && list_length(rel->exps)) {
896 104109 : list *exps = rel->exps;
897 104109 : node *n;
898 :
899 526361 : for (n=exps->h; n; n = n->next) {
900 422252 : sql_exp *e = n->data;
901 :
902 422252 : if (e && e->type == e_func) {
903 30690 : sql_subfunc *f = e->f;
904 30690 : sql_subtype *res = f->res->h->data;
905 :
906 30690 : if (!f->func->s && !strcmp(f->func->base.name, "sql_mul") && res->scale > 0) {
907 6 : list *args = e->l;
908 6 : sql_exp *h = args->h->data;
909 6 : sql_exp *t = args->t->data;
910 6 : atom *ha = exp_value(v->sql, h), *ta = exp_value(v->sql, t);
911 :
912 6 : if (ha || ta) {
913 5 : atom *a = ha ? ha : ta;
914 5 : atom *na = reduce_scale(v->sql, a);
915 :
916 5 : if (na && na != a) {
917 2 : int rs = a->tpe.scale - na->tpe.scale;
918 2 : res->scale -= rs;
919 2 : if (ha) {
920 2 : h->r = NULL;
921 2 : h->l = na;
922 : } else {
923 0 : t->r = NULL;
924 0 : t->l = na;
925 : }
926 2 : v->changes++;
927 : }
928 : }
929 : }
930 : }
931 : }
932 : }
933 : return rel;
934 : }
935 :
936 : run_optimizer
937 645176 : bind_project_reduce_casts(visitor *v, global_props *gp)
938 : {
939 645176 : int flag = v->sql->sql_optimizer;
940 645176 : return gp->cnt[op_project] && (flag & project_reduce_casts) ? rel_project_reduce_casts : NULL;
941 : }
942 :
943 : static sql_column *
944 74267 : exp_find_column_( sql_rel *rel, sql_exp *exp, int pnr, sql_rel **bt )
945 : {
946 74267 : if (exp->type == e_column)
947 74244 : return name_find_column(rel, exp->l, exp->r, pnr, bt);
948 : return NULL;
949 : }
950 :
951 : static int
952 0 : rel_part_nr( sql_rel *rel, sql_exp *e )
953 : {
954 0 : sql_column *c = NULL;
955 0 : sql_rel *bt = NULL;
956 0 : assert(e->type == e_cmp);
957 :
958 0 : c = exp_find_column_(rel, e->l, -1, &bt);
959 0 : if (!c)
960 0 : c = exp_find_column_(rel, e->r, -1, &bt);
961 0 : if (!c && e->f)
962 0 : c = exp_find_column_(rel, e->f, -1, &bt);
963 0 : if (!c || !bt || !rel_base_get_mergetable(bt))
964 0 : return -1;
965 0 : sql_table *pp = c->t;
966 0 : sql_table *mt = rel_base_get_mergetable(bt);
967 0 : return find_member_pos(mt->members, pp);
968 : }
969 :
970 : static int
971 0 : rel_uses_part_nr( sql_rel *rel, sql_exp *e, int pnr )
972 : {
973 0 : sql_column *c = NULL;
974 0 : sql_rel *bt = NULL;
975 0 : assert(e->type == e_cmp);
976 :
977 : /*
978 : * following case fails.
979 : *
980 : * semijoin( A1, union [A1, A2] )
981 : * The union will never return proper column (from A2).
982 : * ie need different solution (probably pass pnr).
983 : */
984 0 : c = exp_find_column_(rel, e->l, pnr, &bt);
985 0 : if (!c)
986 0 : c = exp_find_column_(rel, e->r, pnr, &bt);
987 0 : if (c && bt && rel_base_get_mergetable(bt)) {
988 0 : sql_table *pp = c->t;
989 0 : sql_table *mt = rel_base_get_mergetable(bt);
990 0 : if (find_member_pos(mt->members, pp) == pnr)
991 : return 1;
992 : }
993 : /* for projects we may need to do a rename! */
994 0 : if (is_project(rel->op) || is_topn(rel->op) || is_sample(rel->op))
995 0 : return rel_uses_part_nr( rel->l, e, pnr);
996 :
997 0 : if (is_munion(rel->op)) {
998 : list *l = rel->l;
999 : if (rel_uses_part_nr( l->h->data, e, pnr))
1000 : return 1;
1001 0 : } else if (is_union(rel->op) || is_join(rel->op) || is_semi(rel->op)) {
1002 0 : if (rel_uses_part_nr( rel->l, e, pnr))
1003 : return 1;
1004 0 : if (!is_semi(rel->op) && rel_uses_part_nr( rel->r, e, pnr))
1005 : return 1;
1006 : }
1007 : return 0;
1008 : }
1009 :
1010 : static sql_column *
1011 3009 : exp_is_pkey(sql_rel *rel, sql_exp *e)
1012 : {
1013 3009 : if (find_prop(e->p, PROP_HASHCOL)) { /* aligned PKEY JOIN */
1014 251 : fcmp cmp = (fcmp)&kc_column_cmp;
1015 251 : sql_column *c = exp_find_column(rel, e, -2);
1016 :
1017 251 : if (c && c->t->pkey && list_find(c->t->pkey->k.columns, c, cmp) != NULL)
1018 : return c;
1019 : }
1020 : return NULL;
1021 : }
1022 :
1023 : static sql_exp *
1024 3520 : rel_is_join_on_pkey(sql_rel *rel, bool pk_fk) /* pk_fk is used to verify is a join on pk-fk */
1025 : {
1026 3520 : if (!rel || !rel->exps)
1027 : return NULL;
1028 4858 : for (node *n = rel->exps->h; n; n = n->next) {
1029 2579 : sql_exp *je = n->data;
1030 :
1031 3900 : if (je->type == e_cmp && je->flag == cmp_equal &&
1032 2642 : (exp_is_pkey(rel, je->l) || exp_is_pkey(rel, je->r)) &&
1033 250 : (!pk_fk || find_prop(je->p, PROP_JOINIDX)))
1034 0 : return je;
1035 : }
1036 : return NULL;
1037 : }
1038 :
1039 : static int
1040 0 : exps_has_predicate( list *l )
1041 : {
1042 0 : node *n;
1043 :
1044 0 : for( n = l->h; n; n = n->next){
1045 0 : sql_exp *e = n->data;
1046 :
1047 0 : if (e->card <= CARD_ATOM)
1048 : return 1;
1049 : }
1050 : return 0;
1051 : }
1052 :
1053 : static sql_rel *
1054 367 : rel_find_select( sql_rel *r)
1055 : {
1056 1297 : while (!is_select(r->op) && r->l && is_project(r->op))
1057 : r = r->l;
1058 367 : if (is_select(r->op))
1059 15 : return r;
1060 : return NULL;
1061 : }
1062 :
1063 : static bool
1064 73394 : rels_share_rel(list *l)
1065 : {
1066 73394 : sql_rel *ref = NULL;
1067 104540 : for (node *n = l->h; n; n = n->next) {
1068 104173 : sql_rel *r = n->data;
1069 104173 : if(!ref) {
1070 73394 : if ((ref = rel_find_ref(r)) == NULL)
1071 : return false;
1072 30779 : } else if (ref != rel_find_ref(r)) {
1073 : return false;
1074 : }
1075 : }
1076 : return true;
1077 : }
1078 :
1079 : static inline sql_rel *
1080 992293 : rel_merge_munion(visitor *v, sql_rel *rel)
1081 : {
1082 992293 : if (is_munion(rel->op) && rels_share_rel(rel->l)) {
1083 367 : list *rels = rel->l, *nrels = NULL;
1084 367 : sql_rel *cur = NULL, *curs = NULL;
1085 :
1086 : /* Find selects and try to merge */
1087 367 : for(node *n = rels->h; n; n = n->next) {
1088 367 : sql_rel *r = n->data;
1089 367 : sql_rel *s = rel_find_select(r);
1090 :
1091 367 : if (!s)
1092 : return rel;
1093 15 : if (cur) {
1094 0 : if (s != r->l || curs->l != s->l || !rel_is_ref(s->l) ||
1095 : /* for now only union(project*(select(R),project*(select(R))) */
1096 0 : !s->exps || !curs->exps || exps_has_predicate(s->exps) || exps_has_predicate(curs->exps)) {
1097 0 : if (nrels)
1098 0 : append(nrels, r);
1099 : else
1100 : return rel;
1101 : }
1102 :
1103 : /* merge, ie. add 'or exp' */
1104 0 : curs->exps = append(new_exp_list(v->sql->sa), exp_or(v->sql->sa, cur->exps, s->exps, 0));
1105 0 : if (!nrels) {
1106 0 : nrels = sa_list(v->sql->sa);
1107 0 : append(nrels, cur);
1108 : }
1109 : } else {
1110 15 : if (s != r->l || !rel_is_ref(s->l)) {
1111 15 : if (nrels) {
1112 0 : append(nrels, r);
1113 : } else {
1114 : return rel;
1115 : }
1116 : }
1117 : cur = r;
1118 : curs = s;
1119 : }
1120 : }
1121 0 : if (nrels) {
1122 0 : v->changes++;
1123 0 : rel->l = nrels;
1124 : }
1125 0 : return rel;
1126 : }
1127 : return rel;
1128 : }
1129 :
1130 : static sql_rel *
1131 992293 : rel_optimize_munions_bottomup_(visitor *v, sql_rel *rel)
1132 : {
1133 992293 : rel = rel_merge_munion(v, rel);
1134 992293 : return rel;
1135 : }
1136 :
1137 : static sql_rel *
1138 29040 : rel_optimize_munions_bottomup(visitor *v, global_props *gp, sql_rel *rel)
1139 : {
1140 29040 : (void) gp;
1141 29040 : return rel_visitor_bottomup(v, rel, &rel_optimize_munions_bottomup_);
1142 : }
1143 :
1144 : run_optimizer
1145 645123 : bind_optimize_unions_bottomup(visitor *v, global_props *gp)
1146 : {
1147 645123 : int flag = v->sql->sql_optimizer;
1148 644927 : return gp->opt_level == 1 && gp->cnt[op_munion] && (flag & optimize_unions_bottomup)
1149 645123 : ? rel_optimize_munions_bottomup : NULL;
1150 : }
1151 :
1152 :
1153 : static inline sql_rel *
1154 2036154 : rel_project_cse(visitor *v, sql_rel *rel)
1155 : {
1156 2036154 : if (is_project(rel->op) && rel->exps) {
1157 880743 : node *n, *m;
1158 880743 : list *nexps;
1159 880743 : int needed = 0;
1160 :
1161 6419429 : for (n=rel->exps->h; n && !needed; n = n->next) {
1162 5538681 : sql_exp *e1 = n->data;
1163 :
1164 5538681 : if (e1->type != e_column && !exp_is_atom(e1) && exp_name(e1)) {
1165 16305924 : for (m=n->next; m && !needed; m = m->next){
1166 15702613 : sql_exp *e2 = m->data;
1167 :
1168 15702613 : if (exp_name(e2) && exp_match_exp(e1, e2))
1169 15702613 : needed = 1;
1170 : }
1171 : }
1172 : }
1173 :
1174 880748 : if (!needed)
1175 : return rel;
1176 :
1177 743 : nexps = new_exp_list(v->sql->sa);
1178 8661 : for (n=rel->exps->h; n; n = n->next) {
1179 7918 : sql_exp *e1 = n->data;
1180 :
1181 7918 : if (e1->type != e_column && !exp_is_atom(e1) && exp_name(e1)) {
1182 33874 : for (m=nexps->h; m; m = m->next){
1183 29258 : sql_exp *e2 = m->data;
1184 :
1185 29258 : if (exp_name(e2) && exp_match_exp(e1, e2)) {
1186 783 : assert(e2->alias.label);
1187 783 : sql_exp *ne = exp_ref(v->sql, e2);
1188 :
1189 783 : ne = exp_propagate(v->sql->sa, ne, e1);
1190 783 : assert(!e1->ref);
1191 783 : exp_setalias(ne, e1->alias.label, exp_relname(e1), exp_name(e1));
1192 783 : set_selfref(ne);
1193 783 : assert(ne->nid);
1194 : e1 = ne;
1195 : break;
1196 : }
1197 : }
1198 : }
1199 7918 : append(nexps, e1);
1200 : }
1201 743 : rel->exps = nexps;
1202 : }
1203 : return rel;
1204 : }
1205 :
1206 : static int exp_is_const_op(sql_exp *exp, sql_exp *tope, sql_rel *expr);
1207 :
1208 : static int
1209 37 : exps_are_const_op(list *exps, sql_exp *tope, sql_rel *expr)
1210 : {
1211 37 : int ok = 1;
1212 :
1213 37 : if (list_empty(exps))
1214 : return 1;
1215 74 : for (node *n = exps->h; n && ok; n = n->next)
1216 37 : ok &= exp_is_const_op(n->data, tope, expr);
1217 : return ok;
1218 : }
1219 :
1220 : static int
1221 339 : exp_is_const_op(sql_exp *exp, sql_exp *tope, sql_rel *expr)
1222 : {
1223 449 : switch (exp->type) {
1224 30 : case e_atom:
1225 30 : return exp->f ? 0 : 1;
1226 2 : case e_convert:
1227 2 : return exp_is_const_op(exp->l, tope, expr);
1228 37 : case e_func:
1229 : case e_aggr: {
1230 37 : sql_subfunc *f = exp->f;
1231 37 : if (f->func->side_effect || IS_ANALYTIC(f->func))
1232 : return 0;
1233 37 : return exps_are_const_op(exp->l, tope, expr);
1234 : }
1235 0 : case e_cmp:
1236 0 : if (exp->flag == cmp_or || exp->flag == cmp_filter)
1237 0 : return exps_are_const_op(exp->l, tope, expr) && exps_are_const_op(exp->r, tope, expr);
1238 0 : if (exp->flag == cmp_in || exp->flag == cmp_notin)
1239 0 : return exp_is_const_op(exp->l, tope, expr) && exps_are_const_op(exp->r, tope, expr);
1240 0 : return exp_is_const_op(exp->l, tope, expr) && exp_is_const_op(exp->r, tope, expr) && (!exp->f || exp_is_const_op(exp->f, tope, expr));
1241 380 : case e_column: {
1242 380 : if (is_simple_project(expr->op) || is_groupby(expr->op)) {
1243 : /* in a simple projection, self-references may occur */
1244 380 : sql_exp *nexp = exps_bind_nid(expr->exps, exp->nid);
1245 380 : if (nexp && list_position(expr->exps, nexp) < list_position(expr->exps, tope))
1246 : return exp_is_const_op(nexp, exp, expr);
1247 : }
1248 : return 0;
1249 : }
1250 : default:
1251 : return 0;
1252 : }
1253 : }
1254 :
1255 : static sql_exp *
1256 25 : rel_groupby_add_count_star(mvc *sql, sql_rel *rel, sql_exp *count_star_exp, bool *count_added)
1257 : {
1258 25 : if (count_star_exp)
1259 : return count_star_exp;
1260 18 : if (!list_empty(rel->exps)) {
1261 49 : for (node *n=rel->exps->h; n ; n = n->next) {
1262 35 : sql_exp *e = n->data;
1263 :
1264 35 : if (exp_aggr_is_count(e) && !need_distinct(e) && list_length(e->l) == 0)
1265 4 : return e;
1266 : }
1267 : }
1268 14 : sql_subfunc *cf = sql_bind_func(sql, "sys", "count", sql_bind_localtype("void"), NULL, F_AGGR, true, true);
1269 14 : *count_added = true;
1270 14 : return rel_groupby_add_aggr(sql, rel, exp_aggr(sql->sa, NULL, cf, 0, 0, rel->card, 0));
1271 : }
1272 :
1273 : /* optimize sum(x + 12) into sum(x) + 12*count(*) */
1274 : static inline sql_rel *
1275 31095 : rel_simplify_sum(visitor *v, sql_rel *rel)
1276 : {
1277 31095 : if (is_groupby(rel->op) && !list_empty(rel->exps)) {
1278 31021 : sql_rel *upper = NULL, *groupby = rel, *l = groupby->l;
1279 31021 : sql_exp *count_star_exp = NULL;
1280 31021 : bool count_added = false;
1281 :
1282 93737 : for (node *n=groupby->exps->h; n ; n = n->next) {
1283 62716 : sql_exp *e = n->data;
1284 62716 : list *el = e->l;
1285 62716 : sql_subfunc *sf = e->f;
1286 :
1287 62716 : if (e->type == e_aggr && !need_distinct(e) && sf->func->type == F_AGGR && !sf->func->s && !strcmp(sf->func->base.name, "sum")) {
1288 5532 : sql_rel *expr = groupby;
1289 5532 : sql_exp *exp = (sql_exp*) el->h->data, *oexp = exp;
1290 :
1291 5532 : while (is_numeric_upcast(exp))
1292 0 : exp = exp->l;
1293 : /* we want to find a +/- call, so expect them to appear only on simple projections */
1294 12191 : while (exp && exp->type == e_column && (is_simple_project(expr->op) || is_groupby(expr->op)) && expr->l) {
1295 7521 : sql_rel *nexpr = NULL;
1296 7521 : sql_exp *nexp = rel_find_exp_and_corresponding_rel(l, exp, false, &nexpr, NULL);
1297 :
1298 : /* break when it loops on the same relation */
1299 7521 : if (nexpr == expr && list_position(expr->exps, nexp) >= list_position(expr->exps, exp))
1300 : break;
1301 6659 : expr = nexpr;
1302 6659 : exp = oexp = nexp;
1303 6661 : while (exp && is_numeric_upcast(exp))
1304 2 : exp = exp->l;
1305 : }
1306 :
1307 5532 : list *expl = exp ? exp->l : NULL;
1308 62716 : sql_subfunc *expf = exp ? exp->f : NULL;
1309 : /* found a candidate function */
1310 4337 : if (exp && exp->type == e_func && expf->func->type == F_FUNC && !expf->func->s &&
1311 894 : (!strcmp(expf->func->base.name, "sql_sub") || !strcmp(expf->func->base.name, "sql_add"))) {
1312 151 : sql_exp *e1 = (sql_exp*) expl->h->data, *e2 = (sql_exp*) expl->h->next->data;
1313 151 : int e1ok = exp_is_const_op(e1, oexp, expr), e2ok = exp_is_const_op(e2, oexp, expr);
1314 :
1315 151 : if ((!e1ok && e2ok) || (e1ok && !e2ok)) {
1316 26 : sql_exp *ocol = e1ok ? e2 : e1, *constant = e1ok ? e1 : e2, *mul, *colref, *naggr, *newop, *col = ocol, *match;
1317 26 : bool add_col = true, prepend = false;
1318 :
1319 : /* if 'col' is a projection from the under relation, then use it */
1320 26 : while (is_numeric_upcast(col))
1321 0 : col = col->l;
1322 26 : if (col->type == e_column) {
1323 24 : sql_exp *colf = exps_find_exp(l->exps, col);
1324 :
1325 : /* col is already found in the inner relation. Also look for a new reference for col, eg sql_add(col, 1), 1 as col */
1326 24 : if (colf && list_position(l->exps, colf) < list_position(l->exps, oexp)) {
1327 : add_col = false;
1328 11 : } else if (!colf && is_simple_project(l->op) && list_empty(l->r) && !rel_is_ref(l) && !need_distinct(l)) {
1329 : prepend = true;
1330 : add_col = false;
1331 1 : } else if (!colf && (is_simple_project(l->op) || is_groupby(l->op))) {
1332 : /* on these scenarios the new column expression will be ordered/(grouped for distinct) or create potential ambiguity (multiple ref), so skip */
1333 1 : continue;
1334 : }
1335 2 : } else if ((is_simple_project(l->op) && (!list_empty(l->r) || rel_is_ref(l) || need_distinct(l))) || is_groupby(l->op)) {
1336 0 : continue;
1337 : }
1338 :
1339 : /* add count star */
1340 25 : count_star_exp = rel_groupby_add_count_star(v->sql, groupby, count_star_exp, &count_added);
1341 : /* multiply constant by count star */
1342 25 : if (!(mul = rel_binop_(v->sql, NULL, constant, exp_ref(v->sql, count_star_exp), "sys", "sql_mul", card_value, true))) {
1343 0 : v->sql->session->status = 0;
1344 0 : v->sql->errstr[0] = '\0';
1345 0 : continue;
1346 : }
1347 25 : if (!has_label(mul))
1348 25 : exp_label(v->sql->sa, mul, ++v->sql->label);
1349 :
1350 25 : colref = exp_ref(v->sql, ocol);
1351 25 : if (add_col) /* if 'col' will be added, then make sure it has an unique label */
1352 3 : exp_label(v->sql->sa, colref, ++v->sql->label);
1353 :
1354 : /* 'oexp' contains the type for the input for the 'sum' aggregate */
1355 25 : if (!(colref = exp_check_type(v->sql, exp_subtype(oexp), groupby, colref, type_equal))) {
1356 0 : v->sql->session->status = 0;
1357 0 : v->sql->errstr[0] = '\0';
1358 0 : continue;
1359 : }
1360 : /* update sum to use the column side only */
1361 25 : sql_subfunc *a = sql_bind_func(v->sql, "sys", "sum", exp_subtype(colref), NULL, F_AGGR, true, true);
1362 25 : if (!a)
1363 0 : continue;
1364 25 : naggr = exp_aggr(v->sql->sa, list_append(sa_list(v->sql->sa), colref), a, need_distinct(e), need_no_nil(e), groupby->card, has_nil(e));
1365 25 : if ((match = exps_any_match(groupby->exps, naggr)) && list_position(groupby->exps, match) < list_position(groupby->exps, e)) { /* found a matching aggregate, use it */
1366 9 : naggr = exp_ref(v->sql, match);
1367 9 : exp_label(v->sql->sa, naggr, ++v->sql->label);
1368 16 : } else if (!has_label(naggr)) { /* otherwise use the new one */
1369 16 : exp_label(v->sql->sa, naggr, ++v->sql->label);
1370 : }
1371 :
1372 : /* generate addition/subtraction. subtraction is not commutative, so keep original order! */
1373 25 : if (!(newop = rel_binop_(v->sql, NULL, e1 == constant ? mul : exp_ref(v->sql, naggr), e1 == constant ? exp_ref(v->sql, naggr) : mul, "sys", expf->func->base.name, card_value, true))) {
1374 0 : v->sql->session->status = 0;
1375 0 : v->sql->errstr[0] = '\0';
1376 0 : continue;
1377 : }
1378 25 : if (!(newop = exp_check_type(v->sql, exp_subtype(e), groupby, newop, type_equal))) {
1379 0 : v->sql->session->status = 0;
1380 0 : v->sql->errstr[0] = '\0';
1381 0 : continue;
1382 : }
1383 :
1384 : /* a column reference can be prepended to the inner relation, add it after all the check type calls succeed */
1385 25 : if (prepend)
1386 10 : list_prepend(l->exps, col);
1387 :
1388 : /* the new generate function calls are valid, update relations */
1389 : /* we need a new relation for the multiplication and addition/subtraction */
1390 25 : if (!upper) {
1391 : /* be careful with relations with more than 1 reference, so do in-place replacement */
1392 18 : list *projs = rel_projections(v->sql, rel, NULL, 1, 1);
1393 18 : sql_rel *nrel = rel_groupby(v->sql, rel->l, NULL);
1394 18 : nrel->exps = rel->exps;
1395 18 : nrel->r = rel->r;
1396 18 : nrel->card = rel->card;
1397 18 : nrel->nrcols = list_length(rel->exps);
1398 18 : set_processed(nrel);
1399 18 : rel_dup(rel->l);
1400 18 : upper = rel = rel_inplace_project(v->sql->sa, rel, nrel, projs);
1401 18 : rel->card = exps_card(projs);
1402 18 : groupby = nrel; /* update pointers :) */
1403 18 : l = groupby->l;
1404 : }
1405 98 : for (node *n = upper->exps->h ; n ; ) {
1406 73 : node *next = n->next;
1407 73 : sql_exp *re = n->data;
1408 :
1409 : /* remove the old reference to the aggregate because we will use the addition/subtraction instead,
1410 : as well as the count star if count_added */
1411 73 : if (exp_refers(e, re) || (count_added && exp_refers(count_star_exp, re)))
1412 39 : list_remove_node(upper->exps, NULL, n);
1413 : n = next;
1414 : }
1415 :
1416 : /* update sum aggregate with new aggregate or reference to an existing one */
1417 25 : n->data = naggr;
1418 25 : list_hash_clear(groupby->exps);
1419 :
1420 : /* add column reference with new label, if 'col' was not found */
1421 25 : if (add_col) {
1422 3 : if (!is_simple_project(l->op) || !list_empty(l->r) || rel_is_ref(l) || need_distinct(l))
1423 0 : groupby->l = l = rel_project(v->sql->sa, l, rel_projections(v->sql, l, NULL, 1, 1));
1424 3 : list_append(l->exps, ocol);
1425 : }
1426 :
1427 : /* propagate alias and add new expression */
1428 25 : exp_prop_alias(v->sql->sa, newop, e);
1429 25 : list_append(upper->exps, newop);
1430 25 : v->changes++;
1431 : }
1432 : }
1433 : }
1434 : }
1435 : }
1436 31095 : return rel;
1437 : }
1438 :
1439 : /* optimize group by x+1,(y-2)*3,2-z into group by x,y,z */
1440 : static inline sql_rel *
1441 31095 : rel_simplify_groupby_columns(visitor *v, sql_rel *rel)
1442 : {
1443 31095 : if (is_groupby(rel->op) && !list_empty(rel->r)) {
1444 22239 : sql_rel *l = rel->l;
1445 :
1446 57011 : for (node *n=((list*)rel->r)->h; n ; n = n->next) {
1447 34772 : sql_exp *e = n->data;
1448 34772 : e->used = 0; /* we need to use this flag, clean it first */
1449 : }
1450 57011 : for (node *n=((list*)rel->r)->h; n ; n = n->next) {
1451 34772 : sql_exp *e = n->data;
1452 :
1453 34772 : if (e->type == e_column) {
1454 34738 : bool searching = true;
1455 34738 : sql_rel *efrel = NULL;
1456 34738 : sql_exp *exp = rel_find_exp_and_corresponding_rel(l, e, false, &efrel, NULL), *col = NULL, *tope = exp;
1457 :
1458 69494 : while (searching && !col) {
1459 34756 : sql_exp *exp_col = exp;
1460 :
1461 34756 : if (exp && is_numeric_upcast(exp))
1462 9 : exp = exp->l;
1463 34756 : if (exp && exp->type == e_func) {
1464 639 : list *el = exp->l;
1465 639 : sql_subfunc *sf = exp->f;
1466 : /* At the moment look only at injective math functions */
1467 639 : if (sf->func->type == F_FUNC && !sf->func->s &&
1468 627 : (!strcmp(sf->func->base.name, "sql_sub") || !strcmp(sf->func->base.name, "sql_add") || !strcmp(sf->func->base.name, "sql_mul"))) {
1469 107 : sql_exp *e1 = (sql_exp*) el->h->data, *e2 = (sql_exp*) el->h->next->data;
1470 : /* the optimization cannot be done if side-effect calls (e.g. rand()) are present */
1471 194 : int e1ok = exp_is_atom(e1) && !exp_unsafe(e1, true, false) && !exp_has_sideeffect(e1), e2ok = exp_is_atom(e2) && !exp_unsafe(e2, true, false) && !exp_has_sideeffect(e2);
1472 :
1473 107 : if ((!e1ok && e2ok) || (e1ok && !e2ok)) {
1474 59 : sql_exp *c = e1ok ? e2 : e1;
1475 59 : bool done = false;
1476 59 : exp_col = exps_find_exp(efrel->exps, c);
1477 59 : if (exp_col)
1478 28 : c = exp_col;
1479 :
1480 120 : while (!done) {
1481 61 : if (is_numeric_upcast(c))
1482 0 : c = c->l;
1483 61 : if (c->type == e_column) {
1484 43 : if (is_simple_project(efrel->op) || is_groupby(efrel->op)) {
1485 : /* in a simple projection, self-references may occur */
1486 43 : sql_exp *nc = exps_find_exp(efrel->exps, c);
1487 43 : if (nc && list_position(efrel->exps, nc) < list_position(efrel->exps, exp_col)) {
1488 2 : exp_col = c;
1489 2 : c = nc;
1490 2 : continue;
1491 : }
1492 : }
1493 : col = c; /* 'c' is a column reference from the left relation */
1494 : done = true;
1495 : } else {
1496 : exp = c; /* maybe a nested function call, let's continue searching */
1497 : done = true;
1498 : }
1499 : }
1500 : } else {
1501 : searching = false;
1502 : }
1503 : } else {
1504 : searching = false;
1505 : }
1506 : } else {
1507 : searching = false;
1508 : }
1509 : }
1510 34738 : if (col) { /* a column reference was found */
1511 41 : sql_alias *rname = exp_relname(e);
1512 41 : const char *name = exp_name(e);
1513 :
1514 : /* the grouping column has an alias, we have to keep it */
1515 41 : if ((rname && name && (!a_match(rname, e->l) || strcmp(name, e->r) != 0)) || (!rname && name && strcmp(name, e->r) != 0)) {
1516 2 : if (!has_label(e)) /* dangerous to merge, skip it */
1517 0 : continue;
1518 2 : if (!is_simple_project(l->op) || !list_empty(l->r) || rel_is_ref(l) || need_distinct(l))
1519 0 : rel->l = l = rel_project(v->sql->sa, l, rel_projections(v->sql, l, NULL, 1, 1));
1520 2 : list_append(l->exps, e);
1521 2 : n->data = e = exp_ref(v->sql, e);
1522 2 : list_hash_clear(rel->r);
1523 : }
1524 :
1525 41 : sql_exp *f = exps_find_exp(rel->r, col);
1526 :
1527 41 : if (f && list_position(rel->r, f) < list_position(rel->r, e)) { /* if already present, remove it */
1528 10 : e->used = 1;
1529 : } else {
1530 : /* Use an unique reference to the column found. If there's another grouping column label pointing into it,
1531 : rel_groupby_cse will hopefully remove it */
1532 31 : sql_exp *colf = exps_find_exp(l->exps, col);
1533 :
1534 : /* col is already found in the inner relation. Also look for a new reference for col, eg sql_add(col, 1), 1 as col */
1535 31 : if (colf && list_position(l->exps, colf) < list_position(l->exps, tope)) {
1536 2 : n->data = exp_ref(v->sql, col);
1537 25 : } else if (!colf && is_simple_project(l->op) && list_empty(l->r) && !rel_is_ref(l) && !need_distinct(l)) { /* trivial case, it can be added */
1538 25 : sql_exp *ne = col;
1539 25 : list_prepend(l->exps, ne);
1540 25 : n->data = exp_ref(v->sql, ne);
1541 0 : } else if (!colf && (is_simple_project(l->op) || is_groupby(l->op))) {
1542 : /* on these scenarios the new column expression will be ordered/(grouped for distinct) or create potential ambiguity (multiple ref), so skip */
1543 0 : continue;
1544 : } else {
1545 4 : sql_exp *ne = exp_ref(v->sql, col);
1546 :
1547 4 : if (colf) /* a col reference is already there, add a new label */
1548 4 : exp_label(v->sql->sa, ne, ++v->sql->label);
1549 4 : if (!is_simple_project(l->op) || !list_empty(l->r) || rel_is_ref(l) || need_distinct(l))
1550 0 : rel->l = l = rel_project(v->sql->sa, l, rel_projections(v->sql, l, NULL, 1, 1));
1551 4 : list_append(l->exps, ne);
1552 4 : n->data = exp_ref(v->sql, ne);
1553 : }
1554 31 : list_hash_clear(rel->r);
1555 : }
1556 41 : v->changes++;
1557 : }
1558 : }
1559 : }
1560 57011 : for (node *n=((list*)rel->r)->h; n ; ) {
1561 34772 : node *next = n->next;
1562 34772 : sql_exp *e = n->data;
1563 :
1564 34772 : if (e->used) /* remove unnecessary grouping columns */
1565 10 : list_remove_node(rel->r, NULL, n);
1566 : n = next;
1567 : }
1568 : }
1569 31095 : return rel;
1570 : }
1571 :
1572 : /* remove identical grouping columns */
1573 : static inline sql_rel *
1574 55794 : rel_groupby_cse(visitor *v, sql_rel *rel)
1575 : {
1576 55794 : if (is_groupby(rel->op) && !list_empty(rel->r)) {
1577 32023 : sql_rel *l = rel->l;
1578 :
1579 : /* for every group expression e1 */
1580 93464 : for (node *n=((list*)rel->r)->h; n ; n = n->next) {
1581 61441 : sql_exp *e1 = n->data;
1582 : /* it's good to examine the same expression in the subrelation e.g. in case it's an alias */
1583 : /* TODO maybe cover more cases? Here I only look at the left relation */
1584 122882 : sql_exp *e1_sub = e1->type == e_column ?
1585 61441 : e1->nid ? exps_bind_nid(l->exps, e1->nid) : exps_find_exp(l->exps, e1) : NULL;
1586 :
1587 : /* for every other group expression */
1588 119608 : for (node *m=n->next; m; m = m->next) {
1589 58167 : sql_exp *e2 = m->data;
1590 116334 : sql_exp *e2_sub = e2->type == e_column ?
1591 58167 : e2->nid ? exps_bind_nid(l->exps, e2->nid) : exps_find_exp(l->exps, e2) : NULL;
1592 :
1593 : /* check if the expression are the same */
1594 58167 : if (exp_match_exp(e1, e2) || exp_refers(e1, e2) || (e1_sub && e2_sub && (exp_match_exp(e1_sub, e2_sub) || exp_refers(e1_sub, e2_sub)))) {
1595 :
1596 : /* use e2 from rel->exps instead of e2 from the rel->r as it can have an alias from the higher rel */
1597 41 : sql_exp *e2_in_exps = exps_uses_nid(rel->exps, e2->alias.label);
1598 41 : assert(e2_in_exps);
1599 :
1600 : /* same as e2 */
1601 41 : sql_exp *e1_in_exps = exps_uses_nid(rel->exps, e1->alias.label);
1602 41 : if (!e1_in_exps)
1603 6 : continue;
1604 :
1605 : /* write e2 as an e1 alias since the expressions are the same */
1606 35 : sql_exp* e2_as_e1_alias = exp_copy(v->sql, e1_in_exps);
1607 : /* NOTE: it is important to get the rname (exp->l) and name (exp->r) from e2 IN the exps
1608 : * (e2_in_exps), and not from e2, since it could carry an alias from the higher rel */
1609 35 : exp_setalias(e2_as_e1_alias, e2_as_e1_alias->alias.label, e2_in_exps->l, e2_in_exps->r);
1610 35 : e2_as_e1_alias->alias.label = e2->alias.label;
1611 :
1612 : /* replace e2 with e2_as_e1_alias in expressions list */
1613 35 : node *e2_exps_node = list_find(rel->exps, e2_in_exps, NULL);
1614 35 : list_append_before(rel->exps, e2_exps_node, e2_as_e1_alias);
1615 35 : list_remove_node(rel->exps, NULL, e2_exps_node);
1616 :
1617 : /* finally remove e2 from the groups' list (->r) since it's redundant */
1618 35 : node *e2_r_node = list_find(rel->r, e2, NULL);
1619 35 : list_remove_node(rel->r, NULL, e2_r_node);
1620 :
1621 35 : v->changes++;
1622 : }
1623 : }
1624 : }
1625 : }
1626 55793 : return rel;
1627 : }
1628 :
1629 : sql_exp *list_exps_uses_exp(list *exps, sql_alias *rname, const char *name);
1630 :
1631 : static sql_exp*
1632 17586 : exp_uses_exp(sql_exp *e, sql_alias *rname, const char *name)
1633 : {
1634 17884 : sql_exp *res = NULL;
1635 :
1636 17884 : switch (e->type) {
1637 : case e_psm:
1638 : break;
1639 1951 : case e_atom: {
1640 1951 : if (e->f)
1641 0 : return list_exps_uses_exp(e->f, rname, name);
1642 : } break;
1643 297 : case e_convert:
1644 297 : return exp_uses_exp(e->l, rname, name);
1645 9186 : case e_column: {
1646 9186 : if (e->l && rname && a_match(e->l, rname) &&
1647 2262 : e->r && name && strcmp(e->r, name) == 0)
1648 : return e;
1649 7088 : if (!e->l && !rname &&
1650 242 : e->r && name && strcmp(e->r, name) == 0)
1651 : return e;
1652 : } break;
1653 6031 : case e_func:
1654 : case e_aggr: {
1655 6031 : if (e->l)
1656 4899 : return list_exps_uses_exp(e->l, rname, name);
1657 : } break;
1658 419 : case e_cmp: {
1659 419 : if (e->flag == cmp_in || e->flag == cmp_notin) {
1660 0 : if ((res = exp_uses_exp(e->l, rname, name)))
1661 : return res;
1662 0 : return list_exps_uses_exp(e->r, rname, name);
1663 419 : } else if (e->flag == cmp_or || e->flag == cmp_filter) {
1664 2 : if ((res = list_exps_uses_exp(e->l, rname, name)))
1665 : return res;
1666 2 : return list_exps_uses_exp(e->r, rname, name);
1667 : } else {
1668 417 : if ((res = exp_uses_exp(e->l, rname, name)))
1669 : return res;
1670 415 : if ((res = exp_uses_exp(e->r, rname, name)))
1671 : return res;
1672 409 : if (e->f)
1673 : return exp_uses_exp(e->f, rname, name);
1674 : }
1675 : } break;
1676 : }
1677 : return NULL;
1678 : }
1679 :
1680 : sql_exp *
1681 9755 : list_exps_uses_exp(list *exps, sql_alias *rname, const char *name)
1682 : {
1683 9755 : sql_exp *res = NULL;
1684 :
1685 9755 : if (!exps)
1686 : return NULL;
1687 26509 : for (node *n = exps->h; n && !res; n = n->next) {
1688 16754 : sql_exp *e = n->data;
1689 16754 : res = exp_uses_exp(e, rname, name);
1690 : }
1691 : return res;
1692 : }
1693 :
1694 : /* find in the list of expression an expression which uses e */
1695 : sql_exp *
1696 868 : exps_uses_exp(list *exps, sql_exp *e)
1697 : {
1698 868 : return list_exps_uses_exp(exps, exp_relname(e), exp_name(e));
1699 : }
1700 : /*
1701 : * Rewrite aggregations over munion all.
1702 : * groupby ([ union all (a, b, c) ], [gbe], [ count, sum ] )
1703 : *
1704 : * into
1705 : * groupby ([ union all( groupby( a, [gbe], [ count, sum] ),
1706 : * groupby( b, [gbe], [ count, sum] ),
1707 : * groupby( c, [gbe], [ count, sum] ) ],
1708 : * [gbe], [sum, sum] )
1709 : */
1710 : static inline bool
1711 1719 : rel_has_groupby(sql_rel *r)
1712 : {
1713 3045 : if (r) {
1714 2876 : if (is_groupby(r->op))
1715 : return true;
1716 2319 : if (is_simple_project(r->op))
1717 1326 : return rel_has_groupby(r->l);
1718 : }
1719 : return false;
1720 : }
1721 :
1722 : static inline sql_rel *
1723 1121 : rel_push_aggr_down_n_arry(visitor *v, sql_rel *rel)
1724 : {
1725 1121 : sql_rel *g = rel;
1726 1121 : sql_rel *u = rel->l, *ou = u;
1727 1121 : sql_rel *r = NULL;
1728 1121 : list *rgbe = NULL, *gbe = NULL, *exps = NULL;
1729 1121 : node *n, *m;
1730 :
1731 : // TODO why?
1732 1121 : if (u->op == op_project && !need_distinct(u))
1733 313 : u = u->l;
1734 :
1735 1121 : if (is_recursive(u))
1736 : return rel;
1737 :
1738 : /* make sure we don't create group by on group by's */
1739 2277 : for (node *n = ((list*)u->l)->h; n; n = n->next) {
1740 1719 : r = n->data;
1741 3438 : if (rel_has_groupby(r))
1742 : return rel;
1743 : }
1744 :
1745 : /* distinct should be done over the full result */
1746 1622 : for (n = g->exps->h; n; n = n->next) {
1747 1097 : sql_exp *e = n->data;
1748 1097 : sql_subfunc *af = e->f;
1749 :
1750 1097 : if (e->type == e_atom ||
1751 1097 : e->type == e_func ||
1752 478 : (e->type == e_aggr &&
1753 478 : ((strcmp(af->func->base.name, "sum") &&
1754 405 : strcmp(af->func->base.name, "count") &&
1755 92 : strcmp(af->func->base.name, "min") &&
1756 478 : strcmp(af->func->base.name, "max")) ||
1757 : need_distinct(e))))
1758 : return rel;
1759 : }
1760 :
1761 525 : list *nl = sa_list(v->sql->sa);
1762 1586 : for (node *n = ((list*)u->l)->h; n; n = n->next) {
1763 1061 : r = rel_dup(n->data);
1764 : //n->data = NULL; /* clean list as we steal the relation r, stealing is needed else (with multiple references) double project cleanup fails */
1765 1061 : if (!is_project(r->op))
1766 47 : r = rel_project(v->sql->sa, r,
1767 : rel_projections(v->sql, r, NULL, 1, 1));
1768 1061 : rel_rename_exps(v->sql, u->exps, r->exps);
1769 1061 : if (u != ou) {
1770 598 : bool isproject = is_project(r->op);
1771 598 : r = rel_project(v->sql->sa, r, NULL);
1772 598 : r->exps = exps_copy(v->sql, ou->exps);
1773 598 : rel_rename_exps(v->sql, ou->exps, r->exps);
1774 598 : set_processed(r);
1775 598 : if (isproject)
1776 598 : r = rel_push_project_down_(v, r); /* cleanup any double projects */
1777 : }
1778 1061 : if (g->r && list_length(g->r) > 0) {
1779 843 : list *gbe = g->r;
1780 843 : rgbe = exps_copy(v->sql, gbe);
1781 : }
1782 1061 : r = rel_groupby(v->sql, r, NULL);
1783 1061 : r->r = rgbe;
1784 1061 : r->nrcols = g->nrcols;
1785 1061 : r->card = g->card;
1786 1061 : r->exps = exps_copy(v->sql, g->exps);
1787 1061 : r->nrcols = list_length(r->exps);
1788 1061 : set_processed(r);
1789 :
1790 1061 : assert(r);
1791 1061 : append(nl, r);
1792 : }
1793 :
1794 : /* group by on primary keys which define the partitioning scheme
1795 : * don't need a finalizing group by */
1796 : /* how to check if a partition is based on some primary key ?
1797 : * */
1798 525 : if (!list_empty(rel->r)) {
1799 1031 : for (node *n = ((list*)rel->r)->h; n; n = n->next) {
1800 611 : sql_exp *e = n->data;
1801 611 : sql_column *c = NULL;
1802 :
1803 611 : if ((c = exp_is_pkey(rel, e)) && partition_find_part(v->sql->session->tr, c->t, NULL)) {
1804 : /* check if key is partition key */
1805 0 : v->changes++;
1806 0 : return rel_inplace_setop_n_ary(v->sql, rel, nl, op_munion,
1807 : rel_projections(v->sql, rel, NULL, 1, 1));
1808 : }
1809 : }
1810 : }
1811 :
1812 525 : if (!list_empty(rel->r)) {
1813 420 : list *ogbe = rel->r;
1814 :
1815 420 : gbe = new_exp_list(v->sql->sa);
1816 1031 : for (n = ogbe->h; n; n = n->next) {
1817 611 : sql_exp *e = n->data, *ne;
1818 :
1819 : /* group by in aggregation list */
1820 611 : ne = exps_uses_exp( rel->exps, e);
1821 611 : if (ne) {
1822 605 : sql_rel *first_munion_rel = nl->h->data;
1823 605 : ne = list_find_exp(first_munion_rel->exps, ne);
1824 : }
1825 605 : if (!ne) {
1826 : /* e only in the u1,u2,...un->r (group by list) */
1827 24 : for (node *n = nl->h; n; n = n->next) {
1828 16 : ne = exp_ref(v->sql, e);
1829 16 : list_append(((sql_rel*)n->data)->exps, ne);
1830 : }
1831 : }
1832 8 : assert(ne);
1833 611 : ne = exp_ref(v->sql, ne);
1834 611 : append(gbe, ne);
1835 : }
1836 : }
1837 :
1838 525 : u = rel_setop_n_ary(v->sql->sa, nl, op_munion);
1839 525 : rel_setop_n_ary_set_exps(v->sql, u,
1840 525 : rel_projections(v->sql, nl->h->data, NULL, 1, 1), false);
1841 525 : set_processed(u);
1842 :
1843 525 : exps = new_exp_list(v->sql->sa);
1844 1586 : for (n = u->exps->h, m = rel->exps->h; n && m; n = n->next, m = m->next) {
1845 1061 : sql_exp *ne, *e = n->data, *oa = m->data;
1846 :
1847 1061 : if (oa->type == e_aggr) {
1848 445 : sql_subfunc *f = oa->f;
1849 445 : int cnt = exp_aggr_is_count(oa);
1850 445 : sql_subfunc *a = sql_bind_func(v->sql, "sys", (cnt)?"sum":f->func->base.name, exp_subtype(e), NULL, F_AGGR, true, true);
1851 :
1852 445 : assert(a);
1853 : /* munion of aggr result may have nils
1854 : * because sum/count of empty set */
1855 445 : set_has_nil(e);
1856 445 : e = exp_ref(v->sql, e);
1857 445 : ne = exp_aggr1(v->sql->sa, e, a, need_distinct(e), 1, e->card, 1);
1858 : } else {
1859 616 : ne = exp_copy(v->sql, oa);
1860 : }
1861 1061 : exp_setalias(ne, oa->alias.label, exp_find_rel_name(oa), exp_name(oa));
1862 1061 : append(exps, ne);
1863 : }
1864 525 : v->changes++;
1865 525 : return rel_inplace_groupby(rel, u, gbe, exps);
1866 : }
1867 :
1868 : /*
1869 : * Rewrite aggregations over union all.
1870 : * groupby ([ union all (a, b) ], [gbe], [ count, sum ] )
1871 : *
1872 : * into
1873 : * groupby ( [ union all( groupby( a, [gbe], [ count, sum] ), [ groupby( b, [gbe], [ count, sum] )) , [gbe], [sum, sum] )
1874 : */
1875 : static inline sql_rel *
1876 55794 : rel_push_aggr_down(visitor *v, sql_rel *rel)
1877 : {
1878 55794 : if (rel->op == op_groupby && rel->l) {
1879 55704 : sql_rel *u = rel->l, *ou = u;
1880 55704 : sql_rel *g = rel;
1881 55704 : sql_rel *ul = u->l;
1882 55704 : sql_rel *ur = u->r;
1883 55704 : node *n, *m;
1884 55704 : list *lgbe = NULL, *rgbe = NULL, *gbe = NULL, *exps = NULL;
1885 :
1886 55704 : if (u->op == op_project && !need_distinct(u))
1887 18601 : u = u->l;
1888 :
1889 55704 : if (!u || !(is_union(u->op) || is_munion(u->op)) || need_distinct(u) || is_single(u) || !u->exps || rel_is_ref(u))
1890 : return rel;
1891 :
1892 1121 : if (is_munion(u->op))
1893 1121 : return rel_push_aggr_down_n_arry(v, rel);
1894 :
1895 0 : ul = u->l;
1896 0 : ur = u->r;
1897 :
1898 : /* make sure we don't create group by on group by's */
1899 0 : if (ul->op == op_groupby || ur->op == op_groupby)
1900 : return rel;
1901 :
1902 : /* distinct should be done over the full result */
1903 0 : for (n = g->exps->h; n; n = n->next) {
1904 0 : sql_exp *e = n->data;
1905 0 : sql_subfunc *af = e->f;
1906 :
1907 0 : if (e->type == e_atom ||
1908 0 : e->type == e_func ||
1909 0 : (e->type == e_aggr &&
1910 0 : ((strcmp(af->func->base.name, "sum") &&
1911 0 : strcmp(af->func->base.name, "count") &&
1912 0 : strcmp(af->func->base.name, "min") &&
1913 0 : strcmp(af->func->base.name, "max")) ||
1914 : need_distinct(e))))
1915 : return rel;
1916 : }
1917 :
1918 0 : ul = rel_dup(ul);
1919 0 : ur = rel_dup(ur);
1920 0 : if (!is_project(ul->op))
1921 0 : ul = rel_project(v->sql->sa, ul,
1922 : rel_projections(v->sql, ul, NULL, 1, 1));
1923 0 : if (!is_project(ur->op))
1924 0 : ur = rel_project(v->sql->sa, ur,
1925 : rel_projections(v->sql, ur, NULL, 1, 1));
1926 0 : rel_rename_exps(v->sql, u->exps, ul->exps);
1927 0 : rel_rename_exps(v->sql, u->exps, ur->exps);
1928 0 : if (u != ou) {
1929 0 : ul = rel_project(v->sql->sa, ul, NULL);
1930 0 : ul->exps = exps_copy(v->sql, ou->exps);
1931 0 : rel_rename_exps(v->sql, ou->exps, ul->exps);
1932 0 : set_processed(ul);
1933 0 : ur = rel_project(v->sql->sa, ur, NULL);
1934 0 : ur->exps = exps_copy(v->sql, ou->exps);
1935 0 : rel_rename_exps(v->sql, ou->exps, ur->exps);
1936 0 : set_processed(ur);
1937 : }
1938 :
1939 0 : if (g->r && list_length(g->r) > 0) {
1940 0 : list *gbe = g->r;
1941 :
1942 0 : lgbe = exps_copy(v->sql, gbe);
1943 0 : rgbe = exps_copy(v->sql, gbe);
1944 : }
1945 0 : ul = rel_groupby(v->sql, ul, NULL);
1946 0 : ul->r = lgbe;
1947 0 : ul->nrcols = g->nrcols;
1948 0 : ul->card = g->card;
1949 0 : ul->exps = exps_copy(v->sql, g->exps);
1950 0 : ul->nrcols = list_length(ul->exps);
1951 0 : set_processed(ul);
1952 :
1953 0 : ur = rel_groupby(v->sql, ur, NULL);
1954 0 : ur->r = rgbe;
1955 0 : ur->nrcols = g->nrcols;
1956 0 : ur->card = g->card;
1957 0 : ur->exps = exps_copy(v->sql, g->exps);
1958 0 : ur->nrcols = list_length(ur->exps);
1959 0 : set_processed(ur);
1960 :
1961 : /* group by on primary keys which define the partitioning scheme
1962 : * don't need a finalizing group by */
1963 : /* how to check if a partition is based on some primary key ?
1964 : * */
1965 0 : if (!list_empty(rel->r)) {
1966 0 : for (node *n = ((list*)rel->r)->h; n; n = n->next) {
1967 0 : sql_exp *e = n->data;
1968 0 : sql_column *c = NULL;
1969 :
1970 0 : if ((c = exp_is_pkey(rel, e)) && partition_find_part(v->sql->session->tr, c->t, NULL)) {
1971 : /* check if key is partition key */
1972 0 : v->changes++;
1973 0 : return rel_inplace_setop(v->sql, rel, ul, ur, op_union,
1974 : rel_projections(v->sql, rel, NULL, 1, 1));
1975 : }
1976 : }
1977 : }
1978 :
1979 0 : if (!list_empty(rel->r)) {
1980 0 : list *ogbe = rel->r;
1981 :
1982 0 : gbe = new_exp_list(v->sql->sa);
1983 0 : for (n = ogbe->h; n; n = n->next) {
1984 0 : sql_exp *e = n->data, *ne;
1985 :
1986 : /* group by in aggregation list */
1987 0 : ne = exps_uses_exp( rel->exps, e);
1988 0 : if (ne)
1989 0 : ne = list_find_exp( ul->exps, ne);
1990 0 : if (!ne) {
1991 : /* e only in the ul/ur->r (group by list) */
1992 0 : ne = exp_ref(v->sql, e);
1993 0 : list_append(ul->exps, ne);
1994 0 : ne = exp_ref(v->sql, e);
1995 0 : list_append(ur->exps, ne);
1996 : }
1997 0 : assert(ne);
1998 0 : ne = exp_ref(v->sql, ne);
1999 0 : append(gbe, ne);
2000 : }
2001 : }
2002 :
2003 0 : u = rel_setop(v->sql->sa, ul, ur, op_union);
2004 0 : rel_setop_set_exps(v->sql, u, rel_projections(v->sql, ul, NULL, 1, 1), false);
2005 0 : set_processed(u);
2006 :
2007 0 : exps = new_exp_list(v->sql->sa);
2008 0 : for (n = u->exps->h, m = rel->exps->h; n && m; n = n->next, m = m->next) {
2009 0 : sql_exp *ne, *e = n->data, *oa = m->data;
2010 :
2011 0 : if (oa->type == e_aggr) {
2012 0 : sql_subfunc *f = oa->f;
2013 0 : int cnt = exp_aggr_is_count(oa);
2014 0 : sql_subfunc *a = sql_bind_func(v->sql, "sys", (cnt)?"sum":f->func->base.name, exp_subtype(e), NULL, F_AGGR, true, true);
2015 :
2016 0 : assert(a);
2017 : /* union of aggr result may have nils
2018 : * because sum/count of empty set */
2019 0 : set_has_nil(e);
2020 0 : e = exp_ref(v->sql, e);
2021 0 : ne = exp_aggr1(v->sql->sa, e, a, need_distinct(e), 1, e->card, 1);
2022 : } else {
2023 0 : ne = exp_copy(v->sql, oa);
2024 : }
2025 0 : exp_setalias(ne, oa->alias.label, exp_find_rel_name(oa), exp_name(oa));
2026 0 : append(exps, ne);
2027 : }
2028 0 : v->changes++;
2029 0 : return rel_inplace_groupby( rel, u, gbe, exps);
2030 : }
2031 : return rel;
2032 : }
2033 :
2034 : /*
2035 : * More general
2036 : * groupby(
2037 : * [ outer ] join(
2038 : * project(
2039 : * table(A) [ c1, c2, .. ]
2040 : * ) [ c1, c2, identity(c2) as I, .. ],
2041 : * table(B) [ c1, c2, .. ]
2042 : * ) [ A.c1 = B.c1 ]
2043 : * ) [ I ] [ a1, a2, .. ]
2044 : *
2045 : * ->
2046 : *
2047 : * [ outer ] join(
2048 : * project(
2049 : * table(A) [ c1, c2, .. ]
2050 : * ) [ c1, c2, .. ],
2051 : * groupby (
2052 : * table(B) [ c1, c2, .. ]
2053 : * ) [ B.c1 ] [ a1, a2, .. ]
2054 : * ) [ A.c1 = B.c1 ]
2055 : */
2056 : static sql_rel *
2057 22985 : gen_push_groupby_down(mvc *sql, sql_rel *rel, int *changes)
2058 : {
2059 22985 : sql_rel *j = rel->l;
2060 22985 : list *gbe = rel->r;
2061 :
2062 22985 : if (rel->op == op_groupby && list_length(gbe) == 1 && j->op == op_join){
2063 5347 : sql_rel *jl = j->l, *jr = j->r, *cr, *cl;
2064 5347 : sql_exp *gb = gbe->h->data, *e;
2065 5347 : node *n;
2066 5347 : int left = 1;
2067 5347 : list *aggrs, *aliases, *gbe;
2068 :
2069 5347 : if (!is_identity(gb, jl) && !is_identity(gb, jr))
2070 : return rel;
2071 6 : if (jl->op == op_project &&
2072 4 : (e = list_find_exp( jl->exps, gb)) != NULL &&
2073 2 : find_prop(e->p, PROP_HASHCOL) != NULL) {
2074 : left = 0;
2075 : cr = jr;
2076 : cl = jl;
2077 4 : } else if (jr->op == op_project &&
2078 8 : (e = list_find_exp( jr->exps, gb)) != NULL &&
2079 4 : find_prop(e->p, PROP_HASHCOL) != NULL) {
2080 : left = 1;
2081 : cr = jl;
2082 : cl = jr;
2083 : } else {
2084 0 : return rel;
2085 : }
2086 :
2087 6 : if ((left && is_base(jl->op)) || (!left && is_base(jr->op))||
2088 0 : (left && is_select(jl->op)) || (!left && is_select(jr->op))
2089 0 : || rel_is_join_on_pkey(j, false))
2090 6 : return rel;
2091 :
2092 : /* only add aggr (based on left/right), and repeat the group by column */
2093 0 : aggrs = sa_list(sql->sa);
2094 0 : aliases = sa_list(sql->sa);
2095 0 : if (rel->exps) for (n = rel->exps->h; n; n = n->next) {
2096 0 : sql_exp *ce = n->data;
2097 :
2098 0 : if (exp_is_atom(ce))
2099 0 : list_append(aliases, ce);
2100 0 : else if (ce->type == e_column) {
2101 0 : if (rel_has_exp(cl, ce, false) == 0) /* collect aliases outside groupby */
2102 0 : list_append(aliases, ce);
2103 : else
2104 0 : list_append(aggrs, ce);
2105 0 : } else if (ce->type == e_aggr) {
2106 0 : list *args = ce->l;
2107 :
2108 : /* check args are part of left/right */
2109 0 : if (!list_empty(args) && rel_has_exps(cl, args, false) == 0)
2110 : return rel;
2111 0 : if (rel->op != op_join && exp_aggr_is_count(ce))
2112 0 : ce->p = prop_create(sql->sa, PROP_COUNT, ce->p);
2113 0 : list_append(aggrs, ce);
2114 : }
2115 : }
2116 : /* TODO move any column expressions (aliases) into the project list */
2117 :
2118 : /* find gb in left or right and should be unique */
2119 0 : gbe = sa_list(sql->sa);
2120 : /* push groupby to right, group on join exps */
2121 0 : if (j->exps) for (n = j->exps->h; n; n = n->next) {
2122 0 : sql_exp *ce = n->data, *l = ce->l, *r = ce->r, *e;
2123 :
2124 : /* get left/right hand of e_cmp */
2125 0 : assert(ce->type == e_cmp);
2126 0 : if (ce->flag == cmp_equal && is_alias(l->type) && is_alias(r->type) &&
2127 0 : (((e = rel_find_exp(cr, l)) && rel_find_exp(cl, r)) ||
2128 0 : ((e = rel_find_exp(cr, r)) && rel_find_exp(cl, l)))) {
2129 0 : e = exp_ref(sql, e);
2130 0 : list_append(gbe, e);
2131 : } else {
2132 0 : return rel;
2133 : }
2134 : }
2135 0 : if (!left)
2136 0 : cr = j->r = rel_groupby(sql, cr, gbe);
2137 : else
2138 0 : cr = j->l = rel_groupby(sql, cr, gbe);
2139 0 : cr->exps = list_merge(cr->exps, aggrs, (fdup)NULL);
2140 0 : set_processed(cr);
2141 0 : if (!is_project(cl->op))
2142 0 : cl = rel_project(sql->sa, cl,
2143 : rel_projections(sql, cl, NULL, 1, 1));
2144 0 : cl->exps = list_merge(cl->exps, aliases, (fdup)NULL);
2145 0 : set_processed(cl);
2146 0 : if (!left)
2147 0 : j->l = cl;
2148 : else
2149 0 : j->r = cl;
2150 0 : rel -> l = NULL;
2151 0 : rel_destroy(rel);
2152 :
2153 0 : if (list_empty(cr->exps) && list_empty(j->exps)) { /* remove crossproduct */
2154 0 : sql_rel *r = cl;
2155 0 : if (!left)
2156 0 : j->l = NULL;
2157 : else
2158 0 : j->r = NULL;
2159 0 : rel_destroy(j);
2160 0 : j = r;
2161 : }
2162 0 : (*changes)++;
2163 0 : return j;
2164 : }
2165 : return rel;
2166 : }
2167 :
2168 : /*
2169 : * Rewrite group(project(join(A,Dict)[a.i==dict.i])[...dict.n])[dict.n][ ... dict.n ]
2170 : * into
2171 : * project(join(groupby (A)[a.i],[a.i]), Dict)[a.i==dict.i])[dict.n]
2172 : *
2173 : */
2174 : static inline sql_rel *
2175 55794 : rel_push_groupby_down(visitor *v, sql_rel *rel)
2176 : {
2177 55794 : sql_rel *p = rel->l;
2178 55794 : list *gbe = rel->r;
2179 :
2180 55794 : if (rel->op == op_groupby && gbe && p && is_join(p->op))
2181 8742 : return gen_push_groupby_down(v->sql, rel, &v->changes);
2182 47052 : if (rel->op == op_groupby && gbe && p && p->op == op_project) {
2183 14243 : sql_rel *j = p->l;
2184 14243 : sql_rel *jl, *jr;
2185 14243 : node *n;
2186 :
2187 14243 : if (!j || j->op != op_join || list_length(j->exps) != 1)
2188 12443 : return gen_push_groupby_down(v->sql, rel, &v->changes);
2189 1800 : jl = j->l;
2190 1800 : jr = j->r;
2191 :
2192 : /* check if jr is a dict with index and var still used */
2193 1800 : if (jr->op != op_basetable || jr->l || !jr->r || list_length(jr->exps) != 2)
2194 1800 : return gen_push_groupby_down(v->sql, rel, &v->changes);
2195 :
2196 : /* check if group by is done on dict column */
2197 0 : for(n = gbe->h; n; n = n->next) {
2198 0 : sql_exp *ge = n->data, *pe = NULL, *e = NULL;
2199 :
2200 : /* find group by exp in project, then in dict */
2201 0 : pe = rel_find_exp(p, ge);
2202 0 : if (pe) /* find project exp in right hand of join, ie dict */
2203 0 : e = rel_find_exp(jr, pe);
2204 0 : if (pe && e) { /* Rewrite: join with dict after the group by */
2205 0 : list *pexps = rel_projections(v->sql, rel, NULL, 1, 1), *npexps;
2206 0 : node *m;
2207 0 : sql_exp *ne = j->exps->h->data; /* join exp */
2208 0 : p->l = jl; /* Project now only on the left side of the join */
2209 :
2210 0 : ne = ne->l; /* The left side of the compare is the index of the left */
2211 :
2212 : /* find ge reference in new projection list */
2213 0 : npexps = sa_list(v->sql->sa);
2214 0 : for (m = pexps->h; m; m = m->next) {
2215 0 : sql_exp *a = m->data;
2216 :
2217 0 : if (exp_refers(ge, a)) {
2218 0 : sql_exp *sc = jr->exps->t->data;
2219 0 : sql_exp *e = exp_ref(v->sql, sc);
2220 0 : if (exp_name(a))
2221 0 : exp_prop_alias(v->sql->sa, e, a);
2222 : a = e;
2223 : }
2224 0 : append(npexps, a);
2225 : }
2226 :
2227 : /* find ge in aggr list */
2228 0 : for (m = rel->exps->h; m; m = m->next) {
2229 0 : sql_exp *a = m->data;
2230 :
2231 0 : if (exp_match_exp(a, ge) || exp_refers(ge, a)) {
2232 0 : a = exp_ref(v->sql, ne);
2233 0 : if (exp_name(ne))
2234 0 : exp_prop_alias(v->sql->sa, a, ne);
2235 0 : m->data = a;
2236 : }
2237 : }
2238 :
2239 : /* change alias pe, ie project out the index */
2240 0 : pe->l = (void*)exp_relname(ne);
2241 0 : pe->r = (void*)exp_name(ne);
2242 0 : if (exp_name(ne))
2243 0 : exp_prop_alias(v->sql->sa, pe, ne);
2244 :
2245 : /* change alias ge */
2246 0 : ge->l = (void*)exp_relname(pe);
2247 0 : ge->r = (void*)exp_name(pe);
2248 0 : if (exp_name(pe))
2249 0 : exp_prop_alias(v->sql->sa, ge, pe);
2250 :
2251 : /* zap both project and groupby name hash tables (as we changed names above) */
2252 0 : list_hash_clear(rel->exps);
2253 0 : list_hash_clear((list*)rel->r);
2254 0 : list_hash_clear(p->exps);
2255 :
2256 : /* add join */
2257 0 : j->l = rel;
2258 0 : rel = rel_project(v->sql->sa, j, npexps);
2259 0 : v->changes++;
2260 : }
2261 : }
2262 : }
2263 : return rel;
2264 : }
2265 :
2266 : /* reduce group by expressions based on pkey info
2267 : *
2268 : * The reduced group by and (derived) aggr expressions are restored via
2269 : * extra (new) aggregate columns.
2270 : */
2271 : static inline sql_rel *
2272 55796 : rel_reduce_groupby_exps(visitor *v, sql_rel *rel)
2273 : {
2274 55796 : list *gbe = rel->r;
2275 :
2276 55796 : if (is_groupby(rel->op) && rel->r && !rel_is_ref(rel) && list_length(gbe)) {
2277 30464 : node *n, *m;
2278 30464 : int k, j, i, ngbe = list_length(gbe);
2279 30464 : int8_t *scores = SA_NEW_ARRAY(v->sql->ta, int8_t, ngbe);
2280 30464 : sql_column *c;
2281 30464 : sql_table **tbls = SA_NEW_ARRAY(v->sql->ta, sql_table*, ngbe);
2282 30464 : sql_rel **bts = SA_NEW_ARRAY(v->sql->ta, sql_rel*, ngbe), *bt = NULL;
2283 :
2284 30463 : gbe = rel->r;
2285 88189 : for (k = 0, i = 0, n = gbe->h; n; n = n->next, k++) {
2286 57727 : sql_exp *e = n->data;
2287 :
2288 57727 : c = exp_find_column_(rel, e, -2, &bt);
2289 57726 : if (c) {
2290 67449 : for(j = 0; j < i; j++)
2291 30605 : if (c->t == tbls[j] && bts[j] == bt)
2292 : break;
2293 50896 : tbls[j] = c->t;
2294 50896 : bts[j] = bt;
2295 50896 : i += (j == i);
2296 : }
2297 : }
2298 30462 : if (i) { /* forall tables find pkey and
2299 : remove useless other columns */
2300 : /* TODO also remove group by columns which are related to
2301 : * the other columns using a foreign-key join (n->1), ie 1
2302 : * on the to be removed side.
2303 : */
2304 64355 : for(j = 0; j < i; j++) {
2305 36833 : int l, nr = 0, cnr = 0;
2306 :
2307 36833 : k = list_length(gbe);
2308 36832 : memset(scores, 0, list_length(gbe));
2309 36833 : if (tbls[j]->pkey) {
2310 22061 : for (l = 0, n = gbe->h; l < k && n; l++, n = n->next) {
2311 16542 : fcmp cmp = (fcmp)&kc_column_cmp;
2312 16542 : sql_exp *e = n->data;
2313 :
2314 16542 : c = exp_find_column_(rel, e, -2, &bt);
2315 23375 : if (c && c->t == tbls[j] && bts[j] == bt &&
2316 6833 : list_find(tbls[j]->pkey->k.columns, c, cmp) != NULL) {
2317 648 : scores[l] = 1;
2318 648 : nr ++;
2319 14463 : } else if (c && c->t == tbls[j] && bts[j] == bt) {
2320 : /* Okay we can cleanup a group by column */
2321 6185 : scores[l] = -1;
2322 6185 : cnr ++;
2323 : }
2324 : }
2325 : }
2326 5519 : if (nr) {
2327 639 : int all = (list_length(tbls[j]->pkey->k.columns) == nr);
2328 639 : sql_kc *kc = tbls[j]->pkey->k.columns->h->data;
2329 :
2330 639 : c = kc->c;
2331 1683 : for (l = 0, n = gbe->h; l < k && n; l++, n = n->next) {
2332 1044 : sql_exp *e = n->data;
2333 :
2334 : /* pkey based group by */
2335 1160 : if (scores[l] == 1 && ((all ||
2336 : /* first of key */
2337 740 : (c == exp_find_column(rel, e, -2))) && !find_prop(e->p, PROP_HASHCOL)))
2338 13 : e->p = prop_create(v->sql->sa, PROP_HASHCOL, e->p);
2339 : }
2340 2233 : for (m = rel->exps->h; m; m = m->next ){
2341 1594 : sql_exp *e = m->data;
2342 :
2343 5417 : for (l = 0, n = gbe->h; l < k && n; l++, n = n->next) {
2344 3836 : sql_exp *gb = n->data;
2345 :
2346 : /* pkey based group by */
2347 3836 : if (scores[l] == 1 && exp_match_exp(e,gb) && find_prop(gb->p, PROP_HASHCOL) && !find_prop(e->p, PROP_HASHCOL)) {
2348 13 : e->p = prop_create(v->sql->sa, PROP_HASHCOL, e->p);
2349 13 : break;
2350 : }
2351 :
2352 : }
2353 : }
2354 : }
2355 36833 : if (cnr && nr && list_length(tbls[j]->pkey->k.columns) == nr) {
2356 108 : list *ngbe = new_exp_list(v->sql->sa);
2357 :
2358 392 : for (l = 0, n = gbe->h; l < k && n; l++, n = n->next) {
2359 284 : sql_exp *e = n->data;
2360 :
2361 : /* keep the group by columns which form a primary key
2362 : * of this table. And those unrelated to this table. */
2363 284 : if (scores[l] != -1)
2364 139 : append(ngbe, e);
2365 : }
2366 108 : rel->r = ngbe;
2367 : /* rewrite gbe and aggr, in the aggr list */
2368 108 : if (0)
2369 : for (m = rel->exps->h; m; m = m->next ){
2370 : sql_exp *e = m->data;
2371 : int fnd = 0;
2372 :
2373 : for (l = 0, n = gbe->h; l < k && n && !fnd; l++, n = n->next) {
2374 : sql_exp *gb = n->data;
2375 :
2376 : if (scores[l] == -1 && exp_refers(gb, e)) {
2377 : /*
2378 : sql_exp *rs = exp_column(v->sql->sa, gb->l?gb->l:exp_relname(gb), gb->r?gb->r:exp_name(gb), exp_subtype(gb), rel->card, has_nil(gb), is_unique(gb), is_intern(gb));
2379 : exp_setalias(rs, e->alias.label, exp_find_rel_name(e), exp_name(e));
2380 : e = rs;
2381 : */
2382 : assert(e->alias.label == e->nid);
2383 : fnd = 1;
2384 : }
2385 : }
2386 : m->data = e;
2387 : }
2388 : /* new reduced aggr expression list */
2389 108 : assert(list_length(rel->exps)>0);
2390 : /* only one reduction at a time */
2391 108 : list_hash_clear(rel->exps);
2392 108 : v->changes++;
2393 108 : return rel;
2394 : }
2395 36725 : gbe = rel->r;
2396 : }
2397 : }
2398 : }
2399 : /* remove constants from group by list */
2400 55686 : if (is_groupby(rel->op) && rel->r && !rel_is_ref(rel)) {
2401 30562 : int i;
2402 30562 : node *n;
2403 :
2404 88003 : for (i = 0, n = gbe->h; n; n = n->next) {
2405 57439 : sql_exp *e = n->data;
2406 :
2407 57439 : if (exp_is_atom(e))
2408 23 : i++;
2409 : }
2410 30564 : if (i) {
2411 23 : list *ngbe = new_exp_list(v->sql->sa);
2412 23 : list *dgbe = new_exp_list(v->sql->sa);
2413 :
2414 47 : for (n = gbe->h; n; n = n->next) {
2415 24 : sql_exp *e = n->data;
2416 :
2417 24 : if (!exp_is_atom(e))
2418 1 : append(ngbe, e);
2419 : /* we need at least one gbe */
2420 23 : else if (!n->next && list_empty(ngbe))
2421 22 : append(ngbe, e);
2422 : else
2423 1 : append(dgbe, e);
2424 : }
2425 23 : rel->r = ngbe;
2426 23 : if (!list_empty(dgbe)) {
2427 : /* use atom's directly in the aggr expr list */
2428 :
2429 3 : for (n = rel->exps->h; n; n = n->next) {
2430 2 : sql_exp *e = n->data, *ne = NULL;
2431 :
2432 2 : if (e->type == e_column) {
2433 2 : if (e->nid)
2434 2 : ne = exps_bind_nid(dgbe, e->nid);
2435 2 : if (ne) {
2436 1 : ne = exp_copy(v->sql, ne);
2437 1 : exp_prop_alias(v->sql->sa, ne, e);
2438 1 : e = ne;
2439 : }
2440 : }
2441 2 : n->data = e;
2442 : }
2443 1 : list_hash_clear(rel->exps);
2444 1 : v->changes++;
2445 : }
2446 : }
2447 : }
2448 : return rel;
2449 : }
2450 :
2451 : /* if all arguments to a distinct aggregate are unique, remove 'distinct' property */
2452 : static inline sql_rel *
2453 55796 : rel_distinct_aggregate_on_unique_values(visitor *v, sql_rel *rel)
2454 : {
2455 55796 : if (is_groupby(rel->op) && !list_empty(rel->exps)) {
2456 170035 : for (node *n = rel->exps->h; n; n = n->next) {
2457 114335 : sql_exp *exp = (sql_exp*) n->data;
2458 :
2459 114335 : if (exp->type == e_aggr && need_distinct(exp)) {
2460 489 : bool all_unique = true;
2461 489 : list *l = exp->l;
2462 :
2463 978 : for (node *m = l->h; m && all_unique; m = m->next) {
2464 489 : sql_exp *arg = (sql_exp*) m->data;
2465 :
2466 623 : all_unique &= arg->type == e_column && is_unique(arg) && (!is_semantics(exp) || !has_nil(arg));
2467 : }
2468 489 : if (!all_unique && exps_card(l) > CARD_ATOM)
2469 193 : all_unique = exps_unique(v->sql, rel, l) && (!is_semantics(exp) || !have_nil(l));
2470 : if (all_unique) {
2471 134 : set_nodistinct(exp);
2472 134 : v->changes++;
2473 : }
2474 : }
2475 : }
2476 : }
2477 55796 : return rel;
2478 : }
2479 :
2480 : static inline sql_rel *
2481 55795 : rel_remove_const_aggr(visitor *v, sql_rel *rel)
2482 : {
2483 55795 : if(!rel) {
2484 : return rel;
2485 : }
2486 :
2487 55795 : list *exps = rel->exps;
2488 :
2489 55795 : if(rel->op != op_groupby || list_empty(exps)) {
2490 6 : return rel;
2491 : }
2492 :
2493 55789 : if(!list_empty(rel->r)) {
2494 : /* in the general case in an expression of an aggregate over
2495 : * a constant can be rewritten as just the const e.g.
2496 : * aggr(const) -> const
2497 : */
2498 :
2499 121648 : for(node *n = exps->h; n; n = n->next) {
2500 89600 : sql_exp *e = n->data;
2501 :
2502 89600 : if(e->type != e_aggr) {
2503 61411 : continue;
2504 : }
2505 :
2506 28189 : sql_func *j = ((sql_subfunc *)e->f)->func;
2507 :
2508 : /* some aggregates with const values can only be eliminated
2509 : * under certain circumstances e.g.
2510 : * sum(NULL) -> NULL, sum(0) -> 0
2511 : * prod(NULL) -> NULL, prod(1) -> 1
2512 : * count(NULL) -> 0
2513 : */
2514 28189 : int sum = strcmp(j->base.name, "sum") == 0,
2515 28189 : prd = strcmp(j->base.name, "prod") == 0,
2516 28189 : cnt = strcmp(j->base.name, "count") == 0;
2517 :
2518 28189 : if(!j->s && j->system == 1) {
2519 23544 : list *se = e->l;
2520 :
2521 23544 : if(se == NULL) {
2522 8642 : continue;
2523 : }
2524 :
2525 29802 : for(node *m = se->h; m; m = m->next) {
2526 14900 : sql_exp *w = m->data;
2527 :
2528 14900 : if(w->type == e_atom && w->card == CARD_ATOM) {
2529 26 : atom *wa = w->l;
2530 :
2531 26 : if(sum && !(wa->isnull || atom_is_zero(wa))) {
2532 3 : continue;
2533 : }
2534 :
2535 23 : if(prd && !(wa->isnull || atom_is_one(wa))) {
2536 0 : continue;
2537 : }
2538 :
2539 23 : if(cnt) {
2540 5 : if(wa->isnull) {
2541 3 : list_remove_node(se, NULL, m);
2542 :
2543 3 : w=exp_atom_lng(v->sql->sa, 0);
2544 3 : list_append(se, w);
2545 : }
2546 : else {
2547 2 : continue;
2548 : }
2549 : }
2550 :
2551 21 : exp_setalias(w, e->alias.label, exp_relname(e) , exp_name(e));
2552 21 : n->data = w;
2553 21 : v->changes++;
2554 : }
2555 : }
2556 : }
2557 : }
2558 : }
2559 :
2560 : /*
2561 : * Below code replaces GROUP BY with PROJECT in some cases;
2562 : * Triggers on...
2563 : * select 1 having true; select 42 from foo group by x; select n from foo group by rollup(n);
2564 : */
2565 :
2566 55790 : if (!rel_is_ref(rel)) {
2567 54177 : int needed = 0;
2568 164732 : for (node *n = rel->exps->h; n; n = n->next) {
2569 110556 : sql_exp *exp = (sql_exp*) n->data;
2570 :
2571 110556 : if (exp_is_atom(exp) && exp->type != e_aggr)
2572 78 : needed++;
2573 : }
2574 54176 : if (needed) {
2575 72 : if (!list_empty(rel->r)) {
2576 21 : int atoms = 0;
2577 : /* corner case, all grouping columns are atoms */
2578 43 : for (node *n = ((list*)rel->r)->h; n; n = n->next) {
2579 22 : sql_exp *exp = (sql_exp*) n->data;
2580 :
2581 22 : if (exp_is_atom(exp))
2582 1 : atoms++;
2583 : }
2584 : /* possible edge case, never triggers in coverage tests */
2585 21 : if (atoms == list_length(rel->r)) {
2586 1 : list *nexps = sa_list(v->sql->sa);
2587 3 : for (node *n = rel->exps->h; n; ) {
2588 2 : node *next = n->next;
2589 2 : sql_exp *e = (sql_exp*) n->data;
2590 :
2591 : /* remove references to constant group by columns */
2592 2 : if (e->type == e_column) {
2593 1 : sql_exp *found = NULL;
2594 1 : found = exps_bind_nid(rel->r, e->nid);
2595 1 : if (found) {
2596 1 : list_append(nexps, found);
2597 1 : list_remove_node(rel->exps, NULL, n);
2598 : }
2599 : }
2600 : n = next;
2601 : }
2602 1 : rel->r = NULL; /* transform it into a global aggregate */
2603 1 : rel->exps = list_merge(nexps, rel->exps, (fdup) NULL); /* add grouping columns back as projections */
2604 : /* global aggregates may return 1 row, so filter it based on the count */
2605 1 : sql_subfunc *cf = sql_bind_func(v->sql, "sys", "count", sql_bind_localtype("void"), NULL, F_AGGR, true, true);
2606 1 : sql_exp *count = exp_aggr(v->sql->sa, NULL, cf, 0, 1, CARD_ATOM, 0);
2607 1 : count = rel_groupby_add_aggr(v->sql, rel, count);
2608 1 : sql_exp *cp = exp_compare(v->sql->sa, exp_ref(v->sql, count), exp_atom(v->sql->sa, atom_int(v->sql->sa, exp_subtype(count), 0)), cmp_notequal);
2609 1 : rel = rel_select(v->sql->sa, rel, cp);
2610 1 : set_processed(rel);
2611 1 : return rel;
2612 : }
2613 51 : } else if (list_length(rel->exps) == needed) { /* all are const */
2614 48 : sql_rel *ll = rel->l;
2615 48 : rel->op = op_project;
2616 : /* TODO check if l->l == const, else change that */
2617 48 : if (ll && ll->l) {
2618 44 : rel_destroy(ll);
2619 44 : rel->l = rel_project_exp(v->sql, exp_atom_bool(v->sql->sa, 1));
2620 : }
2621 48 : return rel;
2622 : }
2623 23 : sql_rel *nrel = rel_project(v->sql->sa, rel, rel_projections(v->sql, rel, NULL, 1, 1));
2624 78 : for (node *n = nrel->exps->h; n; n = n->next) {
2625 55 : sql_exp *exp = (sql_exp*) n->data;
2626 55 : if (exp->type == e_column) {
2627 55 : sql_exp *e = rel_find_exp(rel, exp);
2628 :
2629 55 : if (e && exp_is_atom(e) && e->type == e_atom) {
2630 29 : sql_exp *ne = exp_copy(v->sql, e);
2631 29 : assert(ne->alias.label);
2632 29 : exp_setalias(ne, ne->alias.label, exp_find_rel_name(exp), exp_name(exp));
2633 29 : n->data = ne;
2634 29 : v->changes++;
2635 : }
2636 : }
2637 : }
2638 23 : list *nl = sa_list(v->sql->sa);
2639 78 : for (node *n = rel->exps->h; n; n = n->next) {
2640 55 : sql_exp *exp = (sql_exp*) n->data;
2641 :
2642 55 : if (!exp_is_atom(exp) || exp->type != e_atom)
2643 26 : append(nl, exp);
2644 : }
2645 23 : rel->exps = nl;
2646 23 : return nrel;
2647 : }
2648 : }
2649 :
2650 : return rel;
2651 : }
2652 :
2653 : #if 0
2654 : static sql_rel *
2655 : rel_groupby_distinct2(visitor *v, sql_rel *rel)
2656 : {
2657 : list *ngbes = sa_list(v->sql->sa), *gbes, *naggrs = sa_list(v->sql->sa), *aggrs = sa_list(v->sql->sa);
2658 : sql_rel *l;
2659 : node *n;
2660 :
2661 : gbes = rel->r;
2662 : if (!gbes)
2663 : return rel;
2664 :
2665 : /* check if each aggr is, rewritable (max,min,sum,count)
2666 : * and only has one argument */
2667 : for (n = rel->exps->h; n; n = n->next) {
2668 : sql_exp *e = n->data;
2669 : sql_subfunc *af = e->f;
2670 :
2671 : if (e->type == e_aggr &&
2672 : (strcmp(af->func->base.name, "sum") &&
2673 : strcmp(af->func->base.name, "count") &&
2674 : strcmp(af->func->base.name, "min") &&
2675 : strcmp(af->func->base.name, "max")))
2676 : return rel;
2677 : }
2678 :
2679 : for (n = gbes->h; n; n = n->next) {
2680 : sql_exp *e = n->data;
2681 :
2682 : e = exp_column(v->sql->sa, exp_find_rel_name(e), exp_name(e), exp_subtype(e), e->card, has_nil(e), is_unique(e), is_intern(e));
2683 : append(ngbes, e);
2684 : }
2685 :
2686 : /* 1 for each aggr(distinct v) add the attribute expression v to gbes and aggrs list
2687 : * 2 for each aggr(z) add aggr_phase2('z') to the naggrs list
2688 : * 3 for each group by col, add also to the naggrs list
2689 : * */
2690 : for (n = rel->exps->h; n; n = n->next) {
2691 : sql_exp *e = n->data;
2692 :
2693 : if (e->type == e_aggr && need_distinct(e)) { /* 1 */
2694 : /* need column expression */
2695 : list *args = e->l;
2696 : sql_exp *v = args->h->data;
2697 : append(gbes, v);
2698 : if (!exp_name(v))
2699 : exp_label(v->sql->sa, v, ++v->sql->label);
2700 : v = exp_column(v->sql->sa, exp_find_rel_name(v), exp_name(v), exp_subtype(v), v->card, has_nil(v), is_unique(v), is_intern(v));
2701 : append(aggrs, v);
2702 : v = exp_aggr1(v->sql->sa, v, e->f, need_distinct(e), 1, e->card, 1);
2703 : exp_setalias(v, e->alias.label, exp_find_rel_name(e), exp_name(e));
2704 : append(naggrs, v);
2705 : } else if (e->type == e_aggr && !need_distinct(e)) {
2706 : sql_exp *v;
2707 : sql_subfunc *f = e->f;
2708 : int cnt = exp_aggr_is_count(e);
2709 : sql_subfunc *a = sql_bind_func(v->sql, "sys", (cnt)?"sum":f->func->base.name, exp_subtype(e), NULL, F_AGGR, true, true);
2710 :
2711 : append(aggrs, e);
2712 : if (!exp_name(e))
2713 : exp_label(v->sql->sa, e, ++v->sql->label);
2714 : set_has_nil(e);
2715 : v = exp_column(v->sql->sa, exp_find_rel_name(e), exp_name(e), exp_subtype(e), e->card, has_nil(e), is_unique(e), is_intern(e));
2716 : v = exp_aggr1(v->sql->sa, v, a, 0, 1, e->card, 1);
2717 : if (cnt)
2718 : set_zero_if_empty(v);
2719 : exp_setalias(v, e->alias.label, exp_find_rel_name(e), exp_name(e));
2720 : append(naggrs, v);
2721 : } else { /* group by col */
2722 : if (list_find_exp(gbes, e) || !list_find_exp(naggrs, e)) {
2723 : append(aggrs, e);
2724 :
2725 : e = exp_column(v->sql->sa, exp_find_rel_name(e), exp_name(e), exp_subtype(e), e->card, has_nil(e), is_unique(e), is_intern(e));
2726 : }
2727 : append(naggrs, e);
2728 : }
2729 : }
2730 :
2731 : l = rel->l = rel_groupby(v->sql, rel->l, gbes);
2732 : l->exps = aggrs;
2733 : rel->r = ngbes;
2734 : rel->exps = naggrs;
2735 : v->changes++;
2736 : return rel;
2737 : }
2738 : #endif
2739 :
2740 : /* Rewrite group by expressions with distinct
2741 : *
2742 : * ie select a, count(distinct b) from c where ... groupby a;
2743 : * No other aggregations should be present
2744 : *
2745 : * Rewrite the more general case, good for parallel execution
2746 : *
2747 : * groupby(R) [e,f] [ aggr1 a distinct, aggr2 b distinct, aggr3 c, aggr4 d]
2748 : *
2749 : * into
2750 : *
2751 : * groupby(
2752 : * groupby(R) [e,f,a,b] [ a, b, aggr3 c, aggr4 d]
2753 : * ) [e,f]( aggr1 a distinct, aggr2 b distinct, aggr3_phase2 c, aggr4_phase2 d)
2754 : */
2755 : static inline sql_rel *
2756 55795 : rel_groupby_distinct(visitor *v, sql_rel *rel)
2757 : {
2758 55795 : node *n;
2759 :
2760 55795 : if (is_groupby(rel->op)) {
2761 55706 : sql_rel *l = rel->l;
2762 55706 : if (!l || is_groupby(l->op))
2763 : return rel;
2764 : }
2765 55654 : if (is_groupby(rel->op) && rel->r && !rel_is_ref(rel)) {
2766 30536 : int nr = 0, anr = 0;
2767 30536 : list *gbe, *ngbe, *arg, *exps, *nexps;
2768 30536 : sql_exp *distinct = NULL, *darg, *found;
2769 30536 : sql_rel *l = NULL;
2770 :
2771 116321 : for (n=rel->exps->h; n && nr <= 2; n = n->next) {
2772 85785 : sql_exp *e = n->data;
2773 85785 : if (need_distinct(e)) {
2774 188 : distinct = n->data;
2775 188 : nr++;
2776 : }
2777 85785 : anr += is_aggr(e->type);
2778 : }
2779 30536 : if (nr < 1 || distinct->type != e_aggr)
2780 : return rel;
2781 177 : if (nr > 1 || anr > nr)
2782 : return rel;//rel_groupby_distinct2(v, rel);
2783 117 : arg = distinct->l;
2784 117 : if (list_length(arg) != 1 || list_length(rel->r) + nr != list_length(rel->exps))
2785 2 : return rel;
2786 :
2787 115 : gbe = rel->r;
2788 115 : ngbe = sa_list(v->sql->sa);
2789 115 : exps = sa_list(v->sql->sa);
2790 115 : nexps = sa_list(v->sql->sa);
2791 362 : for (n=rel->exps->h; n; n = n->next) {
2792 247 : sql_exp *e = n->data;
2793 247 : if (e != distinct) {
2794 132 : if (e->type == e_aggr) { /* copy the arguments to the aggregate */
2795 0 : list *args = e->l;
2796 0 : if (args) {
2797 0 : for (node *n = args->h ; n ; n = n->next) {
2798 0 : sql_exp *e = n->data;
2799 0 : list_append(ngbe, exp_copy(v->sql, e));
2800 0 : list_append(exps, exp_copy(v->sql, e));
2801 : }
2802 : }
2803 : } else {
2804 132 : e = exp_ref(v->sql, e);
2805 132 : append(ngbe, e);
2806 132 : append(exps, e);
2807 : }
2808 132 : if (e->type == e_aggr) /* aggregates must be copied */
2809 0 : e = exp_copy(v->sql, e);
2810 : else
2811 132 : e = exp_ref(v->sql, e);
2812 132 : append(nexps, e);
2813 : }
2814 : }
2815 :
2816 115 : darg = arg->h->data;
2817 115 : if ((found = exps_find_exp(exps, darg)) == NULL) { /* not already in the groups projection list */
2818 112 : if ((found = exps_find_exp(gbe, darg))) { /* first find if the aggregate argument already exists in the grouping list */
2819 0 : darg = exp_ref(v->sql, found);
2820 : } else {
2821 112 : list_append(gbe, darg = exp_copy(v->sql, darg));
2822 112 : exp_label(v->sql->sa, darg, ++v->sql->label);
2823 112 : darg = exp_ref(v->sql, darg);
2824 : }
2825 112 : list_append(exps, darg);
2826 112 : darg = exp_ref(v->sql, darg);
2827 : } else {
2828 3 : darg = exp_ref(v->sql, found);
2829 : }
2830 115 : arg->h->data = darg;
2831 115 : l = rel->l = rel_groupby(v->sql, rel->l, gbe);
2832 115 : l->exps = exps;
2833 115 : set_processed(l);
2834 115 : rel->r = ngbe;
2835 115 : rel->exps = nexps;
2836 115 : set_nodistinct(distinct);
2837 115 : append(nexps, distinct);
2838 115 : v->changes++;
2839 : }
2840 : return rel;
2841 : }
2842 :
2843 : /*
2844 : * Push Count inside crossjoin down, and multiply the results
2845 : *
2846 : * project ( project(
2847 : * group by ( crossproduct (
2848 : * crossproduct( project (
2849 : * L, => group by (
2850 : * R L
2851 : * ) [ ] [ count NOT NULL ] ) [ ] [ count NOT NULL ]
2852 : * ) ),
2853 : * ) [ NOT NULL ] project (
2854 : * group by (
2855 : * R
2856 : * ) [ ] [ count NOT NULL ]
2857 : * )
2858 : * ) [ sql_mul(.., .. NOT NULL) ]
2859 : * )
2860 : */
2861 : static inline sql_rel *
2862 55794 : rel_push_count_down(visitor *v, sql_rel *rel)
2863 : {
2864 55794 : sql_rel *r = rel->l;
2865 :
2866 55794 : if (is_groupby(rel->op) && !rel_is_ref(rel) && list_empty(rel->r) &&
2867 23662 : r && !r->exps && r->op == op_join && !(rel_is_ref(r)) &&
2868 : /* currently only single count aggregation is handled, no other projects or aggregation */
2869 56 : list_length(rel->exps) == 1 && exp_aggr_is_count(rel->exps->h->data)) {
2870 12 : sql_exp *nce, *oce, *cnt1 = NULL, *cnt2 = NULL;
2871 12 : sql_rel *gbl = NULL, *gbr = NULL; /* Group By */
2872 12 : sql_rel *cp = NULL; /* Cross Product */
2873 12 : sql_rel *srel;
2874 :
2875 12 : oce = rel->exps->h->data;
2876 12 : if (oce->l) /* we only handle COUNT(*) */
2877 : return rel;
2878 :
2879 10 : srel = r->l;
2880 : {
2881 10 : sql_subfunc *cf = sql_bind_func(v->sql, "sys", "count", sql_bind_localtype("void"), NULL, F_AGGR, true, true);
2882 10 : sql_exp *e = exp_aggr(v->sql->sa, NULL, cf, need_distinct(oce), need_no_nil(oce), oce->card, 0);
2883 :
2884 10 : exp_label(v->sql->sa, e, ++v->sql->label);
2885 10 : cnt1 = exp_ref(v->sql, e);
2886 10 : gbl = rel_groupby(v->sql, rel_dup(srel), NULL);
2887 10 : set_processed(gbl);
2888 10 : rel_groupby_add_aggr(v->sql, gbl, e);
2889 : }
2890 :
2891 10 : srel = r->r;
2892 : {
2893 10 : sql_subfunc *cf = sql_bind_func(v->sql, "sys", "count", sql_bind_localtype("void"), NULL, F_AGGR, true, true);
2894 10 : sql_exp *e = exp_aggr(v->sql->sa, NULL, cf, need_distinct(oce), need_no_nil(oce), oce->card, 0);
2895 :
2896 10 : exp_label(v->sql->sa, e, ++v->sql->label);
2897 10 : cnt2 = exp_ref(v->sql, e);
2898 10 : gbr = rel_groupby(v->sql, rel_dup(srel), NULL);
2899 10 : set_processed(gbr);
2900 10 : rel_groupby_add_aggr(v->sql, gbr, e);
2901 : }
2902 :
2903 10 : cp = rel_crossproduct(v->sql->sa, gbl, gbr, op_join);
2904 10 : set_processed(cp);
2905 :
2906 10 : if (!(nce = rel_binop_(v->sql, NULL, cnt1, cnt2, "sys", "sql_mul", card_value, true))) {
2907 0 : v->sql->session->status = 0;
2908 0 : v->sql->errstr[0] = '\0';
2909 0 : return rel; /* error, fallback to original expression */
2910 : }
2911 : /* because of remote plans, make sure "sql_mul" returns bigint. The cardinality is atomic, so no major performance penalty */
2912 10 : if (subtype_cmp(exp_subtype(oce), exp_subtype(nce)) != 0)
2913 10 : nce = exp_convert(v->sql, nce, exp_subtype(nce), exp_subtype(oce));
2914 10 : if (exp_name(oce))
2915 10 : exp_prop_alias(v->sql->sa, nce, oce);
2916 :
2917 10 : rel_destroy(rel);
2918 10 : rel = rel_project(v->sql->sa, cp, append(new_exp_list(v->sql->sa), nce));
2919 10 : set_processed(rel);
2920 :
2921 10 : v->changes++;
2922 : }
2923 :
2924 : return rel;
2925 : }
2926 :
2927 : static inline sql_rel *
2928 31095 : rel_basecount(visitor *v, sql_rel *rel)
2929 : {
2930 39769 : if (is_groupby(rel->op) && !rel_is_ref(rel) && rel->l && list_empty(rel->r) &&
2931 16679 : list_length(rel->exps) == 1 && exp_aggr_is_count(rel->exps->h->data)) {
2932 4440 : sql_rel *bt = rel->l;
2933 4440 : sql_exp *e = rel->exps->h->data;
2934 4440 : if (is_basetable(bt->op) && list_empty(e->l)) { /* count(*) */
2935 : /* change into select cnt('schema','table') */
2936 659 : sql_table *t = bt->l;
2937 : /* I need to get the declared table's frame number to make this work correctly for those */
2938 659 : if (!isTable(t) || isDeclaredTable(t))
2939 : return rel;
2940 609 : sql_subfunc *cf = sql_bind_func(v->sql, "sys", "cnt", sql_bind_localtype("str"), sql_bind_localtype("str"), F_FUNC, true, true);
2941 609 : list *exps = sa_list(v->sql->sa);
2942 609 : append(exps, exp_atom_str(v->sql->sa, t->s->base.name, sql_bind_localtype("str")));
2943 609 : append(exps, exp_atom_str(v->sql->sa, t->base.name, sql_bind_localtype("str")));
2944 609 : sql_exp *ne = exp_op(v->sql->sa, exps, cf);
2945 :
2946 609 : ne = exp_propagate(v->sql->sa, ne, e);
2947 609 : exp_setalias(ne, e->alias.label, exp_find_rel_name(e), exp_name(e));
2948 609 : rel_destroy(rel);
2949 609 : rel = rel_project(v->sql->sa, NULL, append(sa_list(v->sql->sa), ne));
2950 609 : v->changes++;
2951 : }
2952 : }
2953 : return rel;
2954 : }
2955 :
2956 : static inline sql_rel *
2957 31095 : rel_simplify_count(visitor *v, sql_rel *rel)
2958 : {
2959 31095 : if (is_groupby(rel->op) && !list_empty(rel->exps)) {
2960 30993 : mvc *sql = v->sql;
2961 30993 : int ncountstar = 0;
2962 :
2963 : /* Convert count(no null) into count(*) */
2964 93650 : for (node *n = rel->exps->h; n ; n = n->next) {
2965 62657 : sql_exp *e = n->data;
2966 :
2967 62657 : if (exp_aggr_is_count(e) && !need_distinct(e)) {
2968 14928 : if (list_length(e->l) == 0) {
2969 12167 : ncountstar++;
2970 2761 : } else if (list_length(e->l) == 1 && !has_nil((sql_exp*)((list*)e->l)->h->data)) {
2971 2600 : sql_subfunc *cf = sql_bind_func(sql, "sys", "count", sql_bind_localtype("void"), NULL, F_AGGR, true, true);
2972 2600 : sql_exp *ne = exp_aggr(sql->sa, NULL, cf, 0, 0, e->card, 0);
2973 2600 : if (exp_name(e))
2974 2600 : exp_prop_alias(sql->sa, ne, e);
2975 2600 : n->data = ne;
2976 2600 : ncountstar++;
2977 2600 : v->changes++;
2978 : }
2979 : }
2980 : }
2981 : /* With multiple count(*), use exp_ref to reduce the number of calls to this aggregate */
2982 30993 : if (ncountstar > 1) {
2983 56 : sql_exp *count_star = NULL;
2984 56 : sql_rel *nrel = rel_project(v->sql->sa, rel, NULL);
2985 56 : list *aexps = sa_list(v->sql->sa), *nexps = sa_list(v->sql->sa);
2986 56 : nrel->exps = nexps;
2987 247 : for (node *n = rel->exps->h; n ; n = n->next) {
2988 191 : sql_exp *e = n->data;
2989 :
2990 191 : if (exp_aggr_is_count(e) && !need_distinct(e) && list_length(e->l) == 0) {
2991 126 : if (!count_star) {
2992 56 : count_star = e;
2993 56 : append(aexps, e);
2994 56 : append(nexps, exp_ref(sql, e));
2995 : } else {
2996 70 : sql_exp *ne = exp_ref(sql, count_star);
2997 :
2998 70 : if (exp_name(e))
2999 70 : exp_prop_alias(sql->sa, ne, e);
3000 70 : v->changes++;
3001 70 : append(nexps, ne);
3002 : }
3003 : } else {
3004 65 : append(aexps, e);
3005 65 : append(nexps, exp_ref(sql, e));
3006 : }
3007 : }
3008 56 : rel->exps = aexps;
3009 56 : return nrel;
3010 : }
3011 : }
3012 : return rel;
3013 : }
3014 :
3015 : static sql_rel *
3016 31095 : rel_groupjoin(visitor *v, sql_rel *rel)
3017 : {
3018 31095 : if (!rel || rel_is_ref(rel) || !is_groupby(rel->op) || list_empty(rel->r))
3019 9285 : return rel;
3020 :
3021 21810 : sql_rel *j = rel->l;
3022 21810 : if (!j || rel_is_ref(j) /*|| !is_left(j->op)*/ || j->op != op_join || list_length(rel->exps) > 1 /* only join because left joins aren't optimized yet (TODO), only length 1 as implementation of groupjoins is missing */ || !list_empty(rel->attr))
3023 21605 : return rel;
3024 : /* check group by exps == equi join exps */
3025 205 : list *gbes = rel->r;
3026 205 : if (list_length(gbes) != list_length(j->exps))
3027 : return rel;
3028 123 : int nr = 0;
3029 243 : for(node *n = gbes->h; n; n = n->next) {
3030 123 : sql_exp *gbe = n->data;
3031 243 : for(node *m = j->exps->h; m; m = m->next) {
3032 123 : sql_exp *je = m->data;
3033 123 : if (je->type != e_cmp || je->flag != cmp_equal)
3034 : return rel;
3035 : /* check if its a join exp (ie not a selection) */
3036 125 : if (!( (!rel_has_exp(j->l, je->l, false) && !rel_has_exp(j->r, je->r, false)) ||
3037 10 : (!rel_has_exp(j->l, je->r, false) && !rel_has_exp(j->r, je->l, false))))
3038 0 : return rel;
3039 120 : if (exp_match(je->l, gbe)) {
3040 76 : nr++;
3041 44 : } else if (exp_match(je->r, gbe)) {
3042 11 : nr++;
3043 : }
3044 : }
3045 : }
3046 120 : if (nr == list_length(gbes)) {
3047 : // printf("#group by converted\n");
3048 87 : j = rel_dup(j);
3049 87 : j->attr = rel->exps;
3050 87 : v->changes++;
3051 87 : rel_destroy(rel);
3052 87 : return j;
3053 : }
3054 : return rel;
3055 : }
3056 :
3057 : /* select k1 from bla where k1 = const -> select const from bla where k1 = const */
3058 : static sql_rel *
3059 2036155 : rel_project_select_exp(visitor *v, sql_rel *rel)
3060 : {
3061 2036155 : if (is_simple_project(rel->op) && rel->exps && rel->l) {
3062 711269 : sql_rel *l = rel->l;
3063 711269 : if (is_select(l->op) && l->exps) {
3064 696255 : for(node *n = rel->exps->h; n; n = n->next) {
3065 575713 : sql_exp *col = n->data;
3066 575713 : if (col->type == e_column) {
3067 1246179 : for(node *m = l->exps->h; m; m = m->next) {
3068 782063 : sql_exp *cmp = m->data;
3069 782063 : if (cmp->type == e_cmp && cmp->flag == cmp_equal && !is_anti(cmp) && !is_semantics(cmp) && exp_is_atom(cmp->r)) {
3070 455627 : sql_exp *l = cmp->l;
3071 455627 : if(l->type == e_column && col->nid == l->nid) {
3072 : /* replace column with the constant */
3073 13775 : sql_exp *e = n->data = exp_copy(v->sql, cmp->r);
3074 13775 : exp_setalias(e, col->alias.label, exp_relname(col), exp_name(col));
3075 13775 : exp_propagate(v->sql->sa, e, col);
3076 13775 : list_hash_clear(rel->exps);
3077 : }
3078 : }
3079 : }
3080 : }
3081 : }
3082 : }
3083 : }
3084 2036155 : return rel;
3085 : }
3086 :
3087 : static sql_rel *
3088 2036144 : rel_optimize_projections_(visitor *v, sql_rel *rel)
3089 : {
3090 2036144 : rel = rel_project_cse(v, rel);
3091 2036153 : rel = rel_project_select_exp(v, rel);
3092 :
3093 2036160 : if (!rel || !is_groupby(rel->op))
3094 : return rel;
3095 :
3096 55795 : rel = rel_remove_const_aggr(v, rel);
3097 :
3098 55794 : if (v->value_based_opt) {
3099 31095 : rel = rel_simplify_sum(v, rel);
3100 31095 : rel = rel_simplify_groupby_columns(v, rel);
3101 : }
3102 55794 : rel = rel_groupby_cse(v, rel);
3103 55794 : rel = rel_push_aggr_down(v, rel);
3104 55794 : rel = rel_push_groupby_down(v, rel);
3105 55796 : rel = rel_reduce_groupby_exps(v, rel);
3106 55796 : rel = rel_distinct_aggregate_on_unique_values(v, rel);
3107 55795 : rel = rel_groupby_distinct(v, rel);
3108 55794 : rel = rel_push_count_down(v, rel);
3109 :
3110 : /* only when value_based_opt is on, ie not for dependency resolution */
3111 55796 : if (v->value_based_opt) {
3112 31095 : rel = rel_simplify_count(v, rel);
3113 31095 : rel = rel_basecount(v, rel);
3114 :
3115 31095 : rel = rel_groupjoin(v, rel);
3116 : }
3117 : return rel;
3118 : }
3119 :
3120 : static sql_rel *
3121 192061 : rel_optimize_projections(visitor *v, global_props *gp, sql_rel *rel)
3122 : {
3123 192061 : (void) gp;
3124 192061 : return rel_visitor_topdown(v, rel, &rel_optimize_projections_);
3125 : }
3126 :
3127 : run_optimizer
3128 644954 : bind_optimize_projections(visitor *v, global_props *gp)
3129 : {
3130 644954 : int flag = v->sql->sql_optimizer;
3131 644753 : return gp->opt_level == 1 && (gp->cnt[op_groupby] || gp->cnt[op_project] || gp->cnt[op_union] || gp->cnt[op_munion]
3132 1289707 : || gp->cnt[op_inter] || gp->cnt[op_except]) && (flag & optimize_projections) ? rel_optimize_projections : NULL;
3133 : }
3134 :
3135 :
3136 : static bool
3137 58305 : exps_have_selfref(list *exps)
3138 : {
3139 58305 : bool selfref = false;
3140 58305 : if (list_empty(exps))
3141 : return false;
3142 472323 : for(node *n = exps->h; n && !selfref; n = n->next) {
3143 414018 : sql_exp *e = n->data;
3144 414018 : selfref |= is_selfref(e);
3145 : }
3146 : return selfref;
3147 : }
3148 :
3149 : static inline sql_rel *
3150 1010160 : rel_push_project_down_union(visitor *v, sql_rel *rel)
3151 : {
3152 : /* first remove distinct if already unique */
3153 1010160 : if (rel->op == op_project && need_distinct(rel) && rel->exps && exps_unique(v->sql, rel, rel->exps) && !have_nil(rel->exps)) {
3154 111 : set_nodistinct(rel);
3155 111 : if (exps_card(rel->exps) <= CARD_ATOM && rel->card > CARD_ATOM) { /* if the projection just contains constants, then no topN is needed */
3156 2 : sql_rel *nl = rel->l = rel_topn(v->sql->sa, rel->l, append(sa_list(v->sql->sa), exp_atom_lng(v->sql->sa, 1)));
3157 2 : set_processed(nl);
3158 : }
3159 111 : v->changes++;
3160 : }
3161 :
3162 1010160 : if (rel->op == op_project && rel->l && rel->exps && list_empty(rel->r)) {
3163 475012 : int need_distinct = need_distinct(rel);
3164 475012 : sql_rel *u = rel->l;
3165 475012 : sql_rel *p = rel;
3166 :
3167 475012 : if (!u || !(is_union(u->op) || is_munion(u->op)) || need_distinct(u) || is_recursive(u) || !u->exps || rel_is_ref(u) || project_unsafe(rel, false))
3168 445830 : return rel;
3169 :
3170 29182 : sql_rel *r;
3171 :
3172 29182 : assert(!is_union(u->op));
3173 29182 : if (is_recursive(u))
3174 : return rel;
3175 :
3176 : /* don't push project down union of single values */
3177 87757 : for (node *n = ((list*)u->l)->h; n; n = n->next) {
3178 58590 : r = n->data;
3179 : // TODO: does this check make sense?
3180 58590 : if (is_project(r->op) && !r->l)
3181 : return rel;
3182 : }
3183 :
3184 87742 : for (node *n = ((list*)u->l)->h; n; n = n->next) {
3185 58575 : r = rel_dup(n->data);
3186 :
3187 : /* introduce projection around each operand if needed */
3188 58575 : if (!is_project(r->op) || exps_have_selfref(r->exps))
3189 270 : r = rel_project(v->sql->sa, r,
3190 : rel_projections(v->sql, r, NULL, 1, 1));
3191 : /* check if we need distinct */
3192 117150 : need_distinct &=
3193 58575 : (!exps_unique(v->sql, r, r->exps) || have_nil(r->exps));
3194 58575 : rel_rename_exps(v->sql, u->exps, r->exps);
3195 :
3196 58575 : rel_destroy(n->data);
3197 58575 : n->data = r;
3198 : }
3199 :
3200 : /* once we have checked for need_distinct in every rel we can
3201 : * introduce the projects under the munion which are gonna be
3202 : * copies of the single project above munion */
3203 87742 : for (node *n = ((list*)u->l)->h; n; n = n->next) {
3204 58575 : r = rel_dup(n->data);
3205 :
3206 58575 : r = rel_project(v->sql->sa, r, NULL);
3207 58575 : if (need_distinct)
3208 0 : set_distinct(r);
3209 58575 : r->exps = exps_copy(v->sql, p->exps);
3210 58575 : set_processed(r);
3211 :
3212 58575 : rel_destroy(n->data);
3213 58575 : n->data = r;
3214 : }
3215 :
3216 : /* turn the project-munion on top into munion. incr operand
3217 : * rels count to make sure that they are not deleted by the
3218 : * subsequent rel_inplace_setop_n_ary */
3219 87742 : for (node *n = ((list*)u->l)->h; n; n = n->next)
3220 58575 : rel_dup(n->data);
3221 29167 : rel = rel_inplace_setop_n_ary(v->sql, rel, u->l, op_munion,
3222 : rel_projections(v->sql, rel, NULL, 1, 1));
3223 29167 : if (need_distinct)
3224 0 : set_distinct(rel);
3225 29167 : if (is_single(u))
3226 2 : set_single(rel);
3227 :
3228 29167 : v->changes++;
3229 :
3230 : /* if any operand has two project above then squash them */
3231 87742 : for (node *n = ((list*)u->l)->h; n; n = n->next) {
3232 58575 : r = rel_dup(n->data);
3233 58575 : r = rel_merge_projects_(v, r);
3234 58575 : rel_destroy(n->data);
3235 58575 : n->data = r;
3236 : }
3237 :
3238 : return rel;
3239 : }
3240 : return rel;
3241 : }
3242 :
3243 : static inline sql_rel *
3244 1010160 : rel_merge_unions(visitor *v, sql_rel *rel)
3245 : {
3246 : /* stacked munion flattening e.g.
3247 : * munion( munion(a, b, c), munion(d, e)) => munion(a,b,c,d,e)
3248 : */
3249 1010160 : if (rel && is_munion(rel->op) && !is_recursive(rel)) {
3250 56369 : list *l = rel->l;
3251 272639 : for(node *n = l->h; n; ) {
3252 159901 : node *next = n->next;
3253 159901 : sql_rel *oc = n->data;
3254 159901 : sql_rel *c = oc;
3255 :
3256 : /* account for any group-bys pushed down between stacked munions */
3257 159901 : if (oc->op == op_groupby)
3258 2593 : c = oc->l;
3259 :
3260 159901 : if (is_munion(c->op)) {
3261 10700 : c = rel_dup(c);
3262 10700 : list_remove_node(l, NULL, n);
3263 10700 : l = list_merge(l, c->l, (fdup)NULL);
3264 10700 : c->l = NULL;
3265 10700 : rel_destroy(oc);
3266 10700 : if (!next)
3267 3 : next = l->h;
3268 10700 : v->changes++;
3269 : }
3270 : n = next;
3271 : }
3272 56369 : rel->l = l;
3273 : }
3274 1010160 : return rel;
3275 : }
3276 :
3277 :
3278 : /*
3279 : * Push (semi)joins down unions, this is basically for merge tables, where
3280 : * we know that the fk-indices are split over two clustered merge tables.
3281 : */
3282 : static inline sql_rel *
3283 1010160 : rel_push_join_down_munion(visitor *v, sql_rel *rel)
3284 : {
3285 1010160 : if ((is_join(rel->op) && !is_outerjoin(rel->op) && !is_single(rel)) || is_semi(rel->op)) {
3286 120865 : sql_rel *l = rel->l, *r = rel->r, *ol = l, *or = r;
3287 120865 : list *exps = rel->exps, *attr = rel->attr;
3288 120865 : sql_exp *je = NULL;
3289 :
3290 120865 : if (is_recursive(l) || is_recursive(r))
3291 : return rel;
3292 : /* we would like to optimize in place reference rels which point
3293 : * to replica tables and let the replica optimizer handle those
3294 : * later. otherwise we miss the push join down optimization due
3295 : * to the rel_is_ref bailout
3296 : */
3297 120861 : if (rel_is_ref(l) && is_basetable(l->op) && l->l && isReplicaTable((sql_table*)l->l)) {
3298 1 : rel->l = rel_copy(v->sql, l, true);
3299 1 : rel_destroy(l);
3300 : }
3301 120861 : if (rel_is_ref(r) && is_basetable(r->op) && r->l && isReplicaTable((sql_table*)r->l)) {
3302 4 : rel->r = rel_copy(v->sql, r, true);
3303 4 : rel_destroy(r);
3304 : }
3305 :
3306 : // TODO: do we need to check if it's l/r are refs?
3307 120861 : if (!l || !r || need_distinct(l) || need_distinct(r) || rel_is_ref(l) || rel_is_ref(r))
3308 : return rel;
3309 75491 : if (l->op == op_project)
3310 4482 : l = l->l;
3311 75491 : if (r->op == op_project)
3312 7797 : r = r->l;
3313 :
3314 : /* both sides only if we have a join index ASSUMING pkey-fkey are aligned */
3315 : // TODO: we could also check if the join cols are (not) unique
3316 75491 : bool aligned_pk_fk = true;
3317 75491 : if (!l || !r || (is_munion(l->op) && is_munion(r->op) &&
3318 161 : !(je = rel_is_join_on_pkey(rel, aligned_pk_fk))))
3319 708 : return rel;
3320 :
3321 : // TODO: why? bailout for union semijoin without pkey joins expressions
3322 74783 : if (is_semi(rel->op) && is_munion(l->op) && !je)
3323 : return rel;
3324 :
3325 : /* if both sides are munions we assume that they will have the same number of children */
3326 73934 : if (is_munion(l->op) && is_munion(r->op) && list_length(l->l) != list_length(r->l))
3327 : return rel;
3328 :
3329 73934 : if (is_munion(l->op) && !need_distinct(l) && !is_single(l) && !is_recursive(l) &&
3330 4570 : !is_munion(r->op)){
3331 : /* join(munion(a,b,c), d) -> munion(join(a,d), join(b,d), join(c,d)) */
3332 4570 : list *js = sa_list(v->sql->sa);
3333 13716 : for (node *n = ((list*)l->l)->h; n; n = n->next) {
3334 9146 : sql_rel *pc = rel_dup(n->data);
3335 9146 : if (!is_project(pc->op))
3336 12 : pc = rel_project(v->sql->sa, pc, rel_projections(v->sql, pc, NULL, 1, 1));
3337 9146 : rel_rename_exps(v->sql, l->exps, pc->exps);
3338 9146 : if (l != ol) {
3339 2 : pc = rel_project(v->sql->sa, pc, NULL);
3340 2 : pc->exps = exps_copy(v->sql, ol->exps);
3341 2 : set_processed(pc);
3342 : }
3343 9146 : pc = rel_crossproduct(v->sql->sa, pc, rel_dup(or), rel->op);
3344 9146 : pc->exps = exps_copy(v->sql, exps);
3345 9146 : pc->attr = exps_copy(v->sql, attr);
3346 9146 : set_processed(pc);
3347 9146 : pc = rel_project(v->sql->sa, pc, rel_projections(v->sql, pc, NULL, 1, 1));
3348 9146 : js = append(js, pc);
3349 : }
3350 4570 : v->changes++;
3351 4570 : return rel_inplace_setop_n_ary(v->sql, rel, js, op_munion,
3352 : rel_projections(v->sql, rel, NULL, 1, 1));
3353 69364 : } else if (is_munion(l->op) && !need_distinct(l) && !is_single(l) && !is_recursive(l) &&
3354 0 : is_munion(r->op) && !need_distinct(r) && !is_single(r) && !is_recursive(r) &&
3355 : je) {
3356 : /* join(munion(a,b,c), munion(d,e,f)) -> munion(join(a,d), join(b,e), join(c,f)) */
3357 0 : list *cps = sa_list(v->sql->sa);
3358 : /* create pairwise joins between left and right parts. assume eq num of parts (see earlier bailout) */
3359 0 : for (node *n = ((list*)l->l)->h, *m=((list*)r->l)->h; n && m; n = n->next, m = m->next) {
3360 : /* left part */
3361 0 : sql_rel *lp = rel_dup(n->data);
3362 0 : if (!is_project(lp->op))
3363 0 : lp = rel_project(v->sql->sa, lp, rel_projections(v->sql, lp, NULL, 1, 1));
3364 0 : rel_rename_exps(v->sql, l->exps, lp->exps);
3365 0 : if (l != ol) {
3366 0 : lp = rel_project(v->sql->sa, lp, NULL);
3367 0 : lp->exps = exps_copy(v->sql, ol->exps);
3368 0 : set_processed(lp);
3369 : }
3370 : /* right part */
3371 0 : sql_rel *rp = rel_dup(m->data);
3372 0 : if (!is_project(rp->op))
3373 0 : rp = rel_project(v->sql->sa, rp, rel_projections(v->sql, rp, NULL, 1, 1));
3374 0 : rel_rename_exps(v->sql, r->exps, rp->exps);
3375 0 : if (r != or) {
3376 0 : rp = rel_project(v->sql->sa, rp, NULL);
3377 0 : rp->exps = exps_copy(v->sql, or->exps);
3378 0 : set_processed(rp);
3379 : }
3380 : /* combine them */
3381 0 : sql_rel *cp = rel_crossproduct(v->sql->sa, lp, rp, rel->op);
3382 0 : cp->exps = exps_copy(v->sql, exps);
3383 0 : cp->attr = exps_copy(v->sql, attr);
3384 0 : set_processed(cp);
3385 0 : cp = rel_project(v->sql->sa, cp, rel_projections(v->sql, cp, NULL, 1, 1));
3386 0 : cps = append(cps, cp);
3387 : }
3388 0 : v->changes++;
3389 0 : return rel_inplace_setop_n_ary(v->sql, rel, cps, op_munion,
3390 : rel_projections(v->sql, rel, NULL, 1, 1));
3391 69364 : } else if (!is_munion(l->op) &&
3392 69356 : is_munion(r->op) && !need_distinct(r) && !is_single(r) && !is_recursive(r) &&
3393 2995 : !is_semi(rel->op)) {
3394 : /* join(a, munion(b,c,d)) -> munion(join(a,b), join(a,c), join(a,d)) */
3395 1616 : list *js = sa_list(v->sql->sa);
3396 4850 : for (node *n = ((list*)r->l)->h; n; n = n->next) {
3397 3234 : sql_rel *pc = rel_dup(n->data);
3398 3234 : if (!is_project(pc->op))
3399 17 : pc = rel_project(v->sql->sa, pc, rel_projections(v->sql, pc, NULL, 1, 1));
3400 3234 : rel_rename_exps(v->sql, r->exps, pc->exps);
3401 3234 : if (r != or) {
3402 122 : pc = rel_project(v->sql->sa, pc, NULL);
3403 122 : pc->exps = exps_copy(v->sql, or->exps);
3404 122 : set_processed(pc);
3405 : }
3406 3234 : pc = rel_crossproduct(v->sql->sa, rel_dup(ol), pc, rel->op);
3407 3234 : pc->exps = exps_copy(v->sql, exps);
3408 3234 : pc->attr = exps_copy(v->sql, attr);
3409 3234 : set_processed(pc);
3410 3234 : pc = rel_project(v->sql->sa, pc, rel_projections(v->sql, pc, NULL, 1, 1));
3411 3234 : js = append(js, pc);
3412 : }
3413 1616 : v->changes++;
3414 1616 : return rel_inplace_setop_n_ary(v->sql, rel, js, op_munion,
3415 : rel_projections(v->sql, rel, NULL, 1, 1));
3416 67748 : } else if (!is_munion(l->op) &&
3417 67740 : is_munion(r->op) && !need_distinct(r) && !is_single(r) && !is_recursive(r) &&
3418 1379 : is_semi(rel->op) && je) {
3419 : /* {semi}join ( A1, munion (B, A2a, C, A2b)) [A1.partkey = A2.partkey] ->
3420 : * {semi}join ( A1, munion (A2a, A2b))
3421 : * (ie some parts of an n-th munion operand)
3422 : *
3423 : * How to detect that a relation isn't matching?
3424 : * partitioning is currently done only on pkey/fkey's
3425 : * ie only matching per part if join is on pkey/fkey (parts)
3426 : * and part numbers should match.
3427 : * */
3428 0 : int lpnr = rel_part_nr(l, je);
3429 0 : if (lpnr < 0)
3430 : return rel;
3431 :
3432 0 : list *ups = sa_list(v->sql->sa);
3433 0 : for (node *n = ((list*)r->l)->h; n; n = n->next) {
3434 0 : if (rel_uses_part_nr(n->data, je, lpnr)) {
3435 0 : sql_rel *pc = rel_dup(n->data);
3436 0 : ups = append(ups, pc);
3437 : }
3438 : }
3439 0 : v->changes++;
3440 0 : return rel_inplace_setop_n_ary(v->sql, r, ups, op_munion,
3441 : rel_projections(v->sql, rel, NULL, 1, 1));
3442 : }
3443 : }
3444 : return rel;
3445 : }
3446 :
3447 : static sql_rel *
3448 1010160 : rel_optimize_unions_topdown_(visitor *v, sql_rel *rel)
3449 : {
3450 1010160 : rel = rel_push_project_down_union(v, rel);
3451 1010160 : rel = rel_merge_unions(v, rel);
3452 1010160 : rel = rel_push_join_down_munion(v, rel);
3453 1010160 : return rel;
3454 : }
3455 :
3456 : static sql_rel *
3457 29040 : rel_optimize_unions_topdown(visitor *v, global_props *gp, sql_rel *rel)
3458 : {
3459 29040 : (void) gp;
3460 29040 : return rel_visitor_topdown(v, rel, &rel_optimize_unions_topdown_);
3461 : }
3462 :
3463 : run_optimizer
3464 645159 : bind_optimize_unions_topdown(visitor *v, global_props *gp)
3465 : {
3466 645159 : (void) v;
3467 645159 : return gp->opt_level == 1 && gp->cnt[op_munion] ? rel_optimize_unions_topdown : NULL;
3468 : }
3469 :
3470 :
3471 : static sql_column *
3472 0 : is_fk_column_of_pk(mvc *sql, sql_rel *rel, sql_column *pkc, sql_exp *e) /* test if e is a foreign key column for the pk on pkc */
3473 : {
3474 0 : sql_trans *tr = sql->session->tr;
3475 0 : sql_column *c = exp_find_column(rel, e, -2);
3476 :
3477 0 : if (c) {
3478 0 : sql_table *t = c->t;
3479 :
3480 0 : for (node *n = ol_first_node(t->idxs); n; n = n->next) {
3481 0 : sql_idx *li = n->data;
3482 :
3483 0 : if (li->type == join_idx) {
3484 0 : for (node *m = li->columns->h ; m ; m = m->next) {
3485 0 : sql_kc *fkc = m->data;
3486 :
3487 0 : if (strcmp(fkc->c->base.name, c->base.name) == 0) { /* same fkey column */
3488 0 : sql_key *fkey = (sql_key*)os_find_id(tr->cat->objects, tr, ((sql_fkey*)li->key)->rkey);
3489 :
3490 0 : if (strcmp(fkey->t->base.name, pkc->t->base.name) == 0) { /* to same pk table */
3491 0 : for (node *o = fkey->columns->h ; o ; o = n->next) {
3492 0 : sql_kc *kc = m->data;
3493 :
3494 0 : if (strcmp(kc->c->base.name, pkc->base.name) == 0) /* to same pk table column */
3495 : return c;
3496 : }
3497 : }
3498 : }
3499 : }
3500 : }
3501 : }
3502 : }
3503 : return NULL;
3504 : }
3505 :
3506 : static bool
3507 0 : has_no_selectivity(mvc *sql, sql_rel *rel)
3508 : {
3509 0 : if (!rel)
3510 : return true;
3511 :
3512 0 : switch(rel->op){
3513 : case op_basetable:
3514 : case op_truncate:
3515 : case op_table:
3516 : return true;
3517 0 : case op_topn:
3518 : case op_sample:
3519 : case op_project:
3520 : case op_groupby:
3521 0 : return has_no_selectivity(sql, rel->l);
3522 : case op_ddl:
3523 : case op_insert:
3524 : case op_update:
3525 : case op_delete:
3526 : case op_merge:
3527 : case op_join:
3528 : case op_left:
3529 : case op_right:
3530 : case op_full:
3531 : case op_semi:
3532 : case op_anti:
3533 : case op_union:
3534 : case op_inter:
3535 : case op_except:
3536 : case op_munion:
3537 : case op_select:
3538 : return false;
3539 : }
3540 : return true;
3541 : }
3542 :
3543 : static sql_rel *
3544 117599 : rel_distinct_project2groupby_(visitor *v, sql_rel *rel)
3545 : {
3546 117599 : sql_rel *l = rel->l;
3547 :
3548 : /* rewrite distinct project (table) [ constant ] -> project [ constant ] */
3549 125347 : if (rel->op == op_project && rel->l && !rel->r /* no order by */ && need_distinct(rel) &&
3550 7748 : exps_card(rel->exps) <= CARD_ATOM) {
3551 476 : set_nodistinct(rel);
3552 476 : if (rel->card > CARD_ATOM) { /* if the projection just contains constants, then no topN is needed */
3553 193 : sql_rel *nl = rel->l = rel_topn(v->sql->sa, rel->l, append(sa_list(v->sql->sa), exp_atom_lng(v->sql->sa, 1)));
3554 193 : set_processed(nl);
3555 : }
3556 476 : v->changes++;
3557 : }
3558 :
3559 : /* rewrite distinct project [ pk ] ( select ( table ) [ e op val ])
3560 : * into project [ pk ] ( select/semijoin ( table ) */
3561 117599 : if (rel->op == op_project && rel->l && !rel->r /* no order by */ && need_distinct(rel) &&
3562 7498 : (l->op == op_select || l->op == op_semi) && exps_unique(v->sql, rel, rel->exps) &&
3563 226 : (!have_semantics(l->exps) || !have_nil(rel->exps))) {
3564 226 : set_nodistinct(rel);
3565 226 : v->changes++;
3566 : }
3567 :
3568 : /* rewrite distinct project ( join(p,f) [ p.pk = f.fk ] ) [ p.pk ]
3569 : * into project( (semi)join(p,f) [ p.pk = f.fk ] ) [ p.pk ] */
3570 117599 : if (rel->op == op_project && rel->l && !rel->r /* no order by */ && need_distinct(rel) &&
3571 7046 : l && (is_select(l->op) || l->op == op_join) && rel_is_join_on_pkey(l, true) /* [ pk == fk ] */) {
3572 0 : sql_exp *found = NULL, *pk = NULL, *fk = NULL;
3573 0 : bool all_exps_atoms = true;
3574 0 : sql_column *pkc = NULL;
3575 :
3576 0 : for (node *m = l->exps->h ; m ; m = m->next) { /* find a primary key join */
3577 0 : sql_exp *je = (sql_exp *) m->data;
3578 0 : sql_exp *le = je->l, *re = je->r;
3579 :
3580 0 : if (!find_prop(je->p, PROP_JOINIDX)) /* must be a pk-fk join expression */
3581 0 : continue;
3582 :
3583 0 : if ((pkc = exp_is_pkey(l, le))) { /* le is the primary key */
3584 0 : all_exps_atoms = true;
3585 :
3586 0 : for (node *n = rel->exps->h; n && all_exps_atoms; n = n->next) {
3587 0 : sql_exp *e = (sql_exp *) n->data;
3588 :
3589 0 : if (exp_match(e, le) || exp_refers(e, le))
3590 : found = e;
3591 0 : else if (e->card > CARD_ATOM)
3592 0 : all_exps_atoms = false;
3593 : }
3594 : pk = le;
3595 : fk = re;
3596 : }
3597 0 : if (!found && (pkc = exp_is_pkey(l, re))) { /* re is the primary key */
3598 0 : all_exps_atoms = true;
3599 :
3600 0 : for (node *n = rel->exps->h; n && all_exps_atoms; n = n->next) {
3601 0 : sql_exp *e = (sql_exp *) n->data;
3602 :
3603 0 : if (exp_match(e, re) || exp_refers(e, re))
3604 : found = e;
3605 0 : else if (e->card > CARD_ATOM)
3606 0 : all_exps_atoms = false;
3607 : }
3608 : pk = re;
3609 : fk = le;
3610 : }
3611 : }
3612 :
3613 0 : if (all_exps_atoms && found) { /* rel must have the same primary key on the projection list */
3614 : /* if the foreign key has no selectivity, the join can be removed */
3615 0 : if (!(rel_is_ref(l)) && ((rel_find_exp(l->l, fk) && is_fk_column_of_pk(v->sql, l->l, pkc, fk) && has_no_selectivity(v->sql, l->l)) ||
3616 0 : (l->r && rel_find_exp(l->r, fk) && is_fk_column_of_pk(v->sql, l->r, pkc, fk) && has_no_selectivity(v->sql, l->r)))) {
3617 0 : sql_rel *side = (rel_find_exp(l->l, pk) != NULL)?l->l:l->r;
3618 :
3619 0 : rel->l = rel_dup(side);
3620 0 : rel_destroy(l);
3621 0 : v->changes++;
3622 0 : set_nodistinct(rel);
3623 0 : return rel;
3624 : }
3625 : /* if the join has no multiple references it can be re-written into a semijoin */
3626 0 : if (l->op == op_join && !(rel_is_ref(l)) && list_length(rel->exps) == 1) { /* other expressions may come from the other side */
3627 0 : if (l->r && rel_find_exp(l->r, pk)) {
3628 0 : sql_rel *temp = l->l;
3629 0 : l->l = l->r;
3630 0 : l->r = temp;
3631 :
3632 0 : l->op = op_semi;
3633 0 : } else if (rel_find_exp(l->l, pk)) {
3634 0 : l->op = op_semi;
3635 : }
3636 : }
3637 0 : v->changes++;
3638 0 : set_nodistinct(rel);
3639 0 : return rel;
3640 : }
3641 : }
3642 : /* rewrite distinct project [ gbe ] ( select ( groupby [ gbe ] [ gbe, e ] )[ e op val ])
3643 : * into project [ gbe ] ( select ( group etc ) */
3644 117599 : if (rel->op == op_project && rel->l && !rel->r /* no order by */ &&
3645 7046 : need_distinct(rel) && l->op == op_select){
3646 2580 : sql_rel *g = l->l;
3647 2580 : if (is_groupby(g->op)) {
3648 4 : list *used = sa_list(v->sql->sa);
3649 4 : list *gbe = g->r;
3650 4 : node *n;
3651 4 : int fnd = 1;
3652 :
3653 8 : for (n = rel->exps->h; n && fnd; n = n->next) {
3654 4 : sql_exp *e = n->data;
3655 :
3656 4 : if (e->card > CARD_ATOM) {
3657 : /* find e in gbe */
3658 4 : sql_exp *ne = list_find_exp(g->exps, e);
3659 :
3660 4 : if (ne)
3661 3 : ne = list_find_exp( gbe, ne);
3662 3 : if (ne && !list_find_exp(used, ne)) {
3663 2 : fnd++;
3664 2 : list_append(used, ne);
3665 : }
3666 3 : if (!ne)
3667 : fnd = 0;
3668 : }
3669 : }
3670 4 : if (fnd == (list_length(gbe)+1)) {
3671 0 : v->changes++;
3672 0 : set_nodistinct(rel);
3673 : }
3674 : }
3675 : }
3676 117599 : if (rel->op == op_project && rel->l &&
3677 7046 : need_distinct(rel) && exps_card(rel->exps) > CARD_ATOM) {
3678 7046 : node *n;
3679 7046 : list *exps = new_exp_list(v->sql->sa), *gbe = new_exp_list(v->sql->sa);
3680 7046 : list *obe = rel->r; /* we need to read the ordering later */
3681 :
3682 7046 : if (obe) {
3683 0 : int fnd = 0;
3684 :
3685 0 : for(n = obe->h; n && !fnd; n = n->next) {
3686 0 : sql_exp *e = n->data;
3687 :
3688 0 : if (e->type != e_column)
3689 : fnd = 1;
3690 0 : else if (exps_bind_nid(rel->exps, e->nid) == NULL)
3691 0 : fnd = 1;
3692 : }
3693 0 : if (fnd)
3694 : return rel;
3695 : }
3696 7046 : rel->l = rel_project(v->sql->sa, rel->l, rel->exps);
3697 :
3698 21701 : for (n = rel->exps->h; n; n = n->next) {
3699 14655 : sql_exp *e = n->data, *ne;
3700 :
3701 14655 : set_nodistinct(e);
3702 14655 : ne = exp_ref(v->sql, e);
3703 14655 : if (e->card > CARD_ATOM && !list_find_exp(gbe, ne)) { /* no need to group by on constants, or the same column multiple times */
3704 14279 : append(gbe, ne);
3705 14279 : ne = exp_ref(v->sql, ne);
3706 : }
3707 14655 : append(exps, ne);
3708 : }
3709 7046 : rel->op = op_groupby;
3710 7046 : rel->exps = exps;
3711 7046 : rel->r = gbe;
3712 7046 : set_nodistinct(rel);
3713 7046 : if (obe) {
3714 : /* add order again */
3715 0 : rel = rel_project(v->sql->sa, rel, rel_projections(v->sql, rel, NULL, 1, 1));
3716 0 : rel->r = obe;
3717 : }
3718 7046 : v->changes++;
3719 : }
3720 : return rel;
3721 : }
3722 :
3723 : static sql_rel *
3724 5693 : rel_distinct_project2groupby(visitor *v, global_props *gp, sql_rel *rel)
3725 : {
3726 5693 : (void) gp;
3727 5693 : return rel_visitor_bottomup(v, rel, &rel_distinct_project2groupby_);
3728 : }
3729 :
3730 : run_optimizer
3731 645116 : bind_distinct_project2groupby(visitor *v, global_props *gp)
3732 : {
3733 645116 : int flag = v->sql->sql_optimizer;
3734 565590 : return gp->opt_cycle == 0 && gp->opt_level == 1 && gp->needs_distinct && gp->cnt[op_project] &&
3735 650809 : (flag & distinct_project2groupby)? rel_distinct_project2groupby : NULL;
3736 : }
|