Line data Source code
1 : /*
2 : * SPDX-License-Identifier: MPL-2.0
3 : *
4 : * This Source Code Form is subject to the terms of the Mozilla Public
5 : * License, v. 2.0. If a copy of the MPL was not distributed with this
6 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
7 : *
8 : * Copyright 2024, 2025 MonetDB Foundation;
9 : * Copyright August 2008 - 2023 MonetDB B.V.;
10 : * Copyright 1997 - July 2008 CWI.
11 : */
12 :
13 : #include "monetdb_config.h"
14 : #include "rel_optimizer_private.h"
15 : #include "rel_basetable.h"
16 : #include "rel_exp.h"
17 : #include "rel_select.h"
18 : #include "rel_rewriter.h"
19 :
20 : static int
21 2812986 : exp_is_rename(sql_exp *e)
22 : {
23 2812986 : return (e->type == e_column);
24 : }
25 :
26 : static int
27 1401869 : exp_is_useless_rename(sql_exp *e)
28 : {
29 1401869 : return (e->type == e_column && e->alias.label == e->nid);
30 : }
31 :
32 : static list *
33 20669 : rel_used_projections(mvc *sql, list *exps, list *users)
34 : {
35 20669 : list *nexps = sa_list(sql->sa);
36 20668 : bool *used = SA_ZNEW_ARRAY(sql->ta, bool, list_length(exps));
37 20669 : int i = 0;
38 :
39 132805 : for(node *n = users->h; n; n = n->next) {
40 112137 : sql_exp *e = n->data, *ne = NULL;
41 112137 : assert(e->nid && exps_bind_nid(exps, e->nid));
42 112137 : if (e->nid && (ne = exps_bind_nid(exps, e->nid))) {
43 112137 : used[list_position(exps, ne)] = 1;
44 : }
45 : }
46 143547 : for(node *n = exps->h; n; n = n->next, i++) {
47 122879 : sql_exp *e = n->data;
48 122879 : if (is_intern(e) || used[i])
49 122743 : append(nexps, e);
50 : }
51 20668 : return nexps;
52 : }
53 :
54 : /* move projects down with the goal op removing them completely (ie push renames/reduced lists into basetable)
55 : * for some cases we can directly remove iff renames rename into same alias
56 : * */
57 : static sql_rel *
58 2157154 : rel_push_project_down_(visitor *v, sql_rel *rel)
59 : {
60 : /* for now only push down renames */
61 2157154 : if (v->depth > 1 && is_simple_project(rel->op) && !need_distinct(rel) && !rel_is_ref(rel) && rel->l && !rel->r &&
62 662717 : v->parent &&
63 662709 : !is_modify(v->parent->op) && !is_topn(v->parent->op) && !is_sample(v->parent->op) &&
64 1081795 : !is_ddl(v->parent->op) && !is_set(v->parent->op) && !is_munion(v->parent->op) &&
65 456044 : list_check_prop_all(rel->exps, (prop_check_func)&exp_is_rename)) {
66 228580 : sql_rel *l = rel->l;
67 :
68 228580 : if (rel_is_ref(l))
69 : return rel;
70 219800 : if (is_basetable(l->op)) {
71 35843 : if (list_check_prop_all(rel->exps, (prop_check_func)&exp_is_useless_rename)) {
72 : /* TODO reduce list (those in the project + internal) */
73 20669 : rel->l = NULL;
74 20669 : l->exps = rel_used_projections(v->sql, l->exps, rel->exps);
75 20669 : rel_destroy(rel);
76 20668 : v->changes++;
77 20668 : return l;
78 : }
79 : return rel;
80 183957 : } else if (list_check_prop_all(rel->exps, (prop_check_func)&exp_is_useless_rename)) {
81 81072 : if ((is_project(l->op) && list_length(l->exps) == list_length(rel->exps)) ||
82 64234 : ((v->parent && is_project(v->parent->op)) &&
83 35795 : (is_mset(l->op) || is_set(l->op) || is_select(l->op) || is_join(l->op) || is_semi(l->op) || is_topn(l->op) || is_sample(l->op)))) {
84 51635 : rel->l = NULL;
85 51635 : rel_destroy(rel);
86 51635 : v->changes++;
87 51635 : return l;
88 : }
89 : }
90 : }
91 : /* ToDo handle useful renames, ie new relation name and unique set of attribute names (could reduce set of * attributes) */
92 : /* handle both useless and useful with project [ group by ] */
93 : return rel;
94 : }
95 :
96 : static sql_rel *
97 192308 : rel_push_project_down(visitor *v, global_props *gp, sql_rel *rel)
98 : {
99 192308 : (void) gp;
100 192308 : return rel_visitor_bottomup(v, rel, &rel_push_project_down_);
101 : }
102 :
103 : run_optimizer
104 643141 : bind_push_project_down(visitor *v, global_props *gp)
105 : {
106 643141 : int flag = v->sql->sql_optimizer;
107 642945 : return gp->opt_level == 1 && (flag & push_project_down) &&
108 1286086 : (gp->cnt[op_project] || gp->cnt[op_groupby]) ? rel_push_project_down : NULL;
109 : }
110 :
111 :
112 : static bool exp_shares_exps(sql_exp *e, list *shared, uint64_t *uses);
113 :
114 : static bool
115 111448 : exps_shares_exps(list *exps, list *shared, uint64_t *uses)
116 : {
117 111448 : if (!exps || !shared)
118 : return false;
119 387607 : for (node *n = exps->h; n; n = n->next) {
120 276525 : sql_exp *e = n->data;
121 :
122 276525 : if (exp_shares_exps(e, shared, uses))
123 : return true;
124 : }
125 : return false;
126 : }
127 :
128 : static bool
129 3058167 : exp_shares_exps(sql_exp *e, list *shared, uint64_t *uses)
130 : {
131 5599667 : switch(e->type) {
132 81 : case e_cmp:
133 81 : if (e->flag == cmp_or || e->flag == cmp_filter)
134 58 : return exps_shares_exps(e->l, shared, uses) || exps_shares_exps(e->r, shared, uses);
135 23 : else if (e->flag == cmp_in || e->flag == cmp_notin)
136 0 : return exp_shares_exps(e->l, shared, uses) || exps_shares_exps(e->r, shared, uses);
137 : else
138 46 : return exp_shares_exps(e->l, shared, uses) || exp_shares_exps(e->r, shared, uses) || (e->f && exp_shares_exps(e->f, shared, uses));
139 113364 : case e_atom:
140 113364 : if (e->f)
141 0 : return exps_shares_exps(e->f, shared, uses);
142 : return false;
143 5102354 : case e_column:
144 : {
145 5102354 : sql_exp *ne = NULL;
146 5102354 : assert(e->nid);
147 5102354 : if (e->nid)
148 5102354 : ne = exps_bind_nid(shared, e->nid);
149 5102354 : if (!ne)
150 : return false;
151 3920567 : if (ne->type != e_column) {
152 557103 : int i = list_position(shared, ne);
153 557103 : if (i < 0)
154 : return false;
155 557103 : uint64_t used = (uint64_t) 1 << i;
156 557103 : if (used & *uses)
157 : return true;
158 556725 : *uses |= used;
159 556725 : return false;
160 : }
161 3363464 : if (ne != e && (list_position(shared, e) < 0 || list_position(shared, e) > list_position(shared, ne)))
162 : /* maybe ne refers to a local complex exp */
163 2268964 : return exp_shares_exps(ne, shared, uses);
164 : return false;
165 : }
166 272536 : case e_convert:
167 272536 : return exp_shares_exps(e->l, shared, uses);
168 111332 : case e_aggr:
169 : case e_func:
170 111332 : return exps_shares_exps(e->l, shared, uses);
171 : case e_psm:
172 0 : assert(0); /* not in projection list */
173 : }
174 : return false;
175 : }
176 :
177 : static bool
178 369329 : exps_share_expensive_exp(list *exps, list *shared )
179 : {
180 369329 : uint64_t uses = 0;
181 :
182 369329 : if (list_empty(exps) || list_empty(shared))
183 0 : return false;
184 369329 : if (list_length(shared) > 64)
185 : return true;
186 3149812 : for (node *n = exps->h; n; n = n->next) {
187 2781580 : sql_exp *e = n->data;
188 :
189 2781580 : if (exp_shares_exps(e, shared, &uses))
190 : return true;
191 : }
192 : return false;
193 : }
194 :
195 : static bool ambigious_ref( list *exps, sql_exp *e);
196 : static bool
197 86833 : ambigious_refs( list *exps, list *refs)
198 : {
199 86833 : if (list_empty(refs))
200 : return false;
201 264424 : for(node *n=refs->h; n; n = n->next) {
202 198165 : if (ambigious_ref(exps, n->data))
203 : return true;
204 : }
205 : return false;
206 : }
207 :
208 : static bool
209 4186219 : ambigious_ref( list *exps, sql_exp *e)
210 : {
211 4186219 : sql_exp *ne = NULL;
212 :
213 4186219 : if (e->type == e_column) {
214 3613361 : assert(e->nid);
215 3613361 : if (e->nid)
216 3613361 : ne = exps_bind_nid(exps, e->nid);
217 3613361 : if (ne && e != ne)
218 : return true;
219 : }
220 4177592 : if (e->type == e_func)
221 86833 : return ambigious_refs(exps, e->l);
222 : return false;
223 : }
224 :
225 : /* merge 2 projects into the lower one */
226 : static sql_rel *
227 2417624 : rel_merge_projects_(visitor *v, sql_rel *rel)
228 : {
229 2485944 : list *exps = rel->exps;
230 2485944 : sql_rel *prj = rel->l;
231 2485944 : node *n;
232 :
233 2485944 : if (rel->op == op_project &&
234 1158809 : prj && prj->op == op_project && !(rel_is_ref(prj)) && list_empty(prj->r)) {
235 574752 : int all = 1;
236 :
237 574752 : if (project_unsafe(rel, false) || project_unsafe(prj, false) || exps_share_expensive_exp(rel->exps, prj->exps))
238 206520 : return rel;
239 :
240 : /* here we try to fix aliases */
241 368232 : list *nexps = NULL;
242 : /* for each exp check if we can rename it */
243 2503588 : for (n = exps->h; n && all; n = n->next) {
244 2143983 : sql_exp *e = n->data, *ne = NULL;
245 :
246 : /* We do not handle expressions pointing back in the list */
247 2143983 : if (ambigious_ref(exps, e)) {
248 : all = 0;
249 : break;
250 : }
251 2135356 : ne = exp_push_down_prj(v->sql, e, prj, prj->l);
252 : /* check if the referred alias name isn't used twice */
253 2135356 : if (ne && ambigious_ref(nexps, ne)) {
254 : all = 0;
255 : break;
256 : }
257 1844071 : if (ne) {
258 1844071 : if (exp_name(e))
259 1837668 : exp_prop_alias(v->sql->sa, ne, e);
260 1844071 : if (!nexps)
261 359483 : nexps = new_exp_list(v->sql->sa);
262 1844071 : list_append(nexps, ne);
263 : } else {
264 : all = 0;
265 : }
266 : }
267 368232 : if (all) {
268 68320 : rel->exps = nexps;
269 : /* we can now remove the intermediate project */
270 : /* push order by expressions */
271 68320 : if (!list_empty(rel->r)) {
272 0 : list *nr = new_exp_list(v->sql->sa), *res = rel->r;
273 0 : for (n = res->h; n; n = n->next) {
274 0 : sql_exp *e = n->data, *ne = NULL;
275 :
276 0 : ne = exp_push_down_prj(v->sql, e, prj, prj->l);
277 0 : if (ne) {
278 0 : if (exp_name(e))
279 0 : exp_prop_alias(v->sql->sa, ne, e);
280 0 : list_append(nr, ne);
281 : } else {
282 : all = 0;
283 : }
284 : }
285 0 : if (all) {
286 0 : rel->r = nr;
287 : } else {
288 : /* leave as is */
289 0 : rel->exps = exps;
290 0 : return rel;
291 : }
292 : }
293 68320 : rel->l = prj->l;
294 68320 : prj->l = NULL;
295 68320 : rel_destroy(prj);
296 68320 : v->changes++;
297 68320 : return rel_merge_projects_(v, rel);
298 : } else {
299 : /* leave as is */
300 299912 : rel->exps = exps;
301 : }
302 299912 : return rel;
303 : }
304 : return rel;
305 : }
306 :
307 : static sql_rel *
308 192308 : rel_merge_projects(visitor *v, global_props *gp, sql_rel *rel)
309 : {
310 192308 : (void) gp;
311 192308 : return rel_visitor_bottomup(v, rel, &rel_merge_projects_);
312 : }
313 :
314 : run_optimizer
315 643155 : bind_merge_projects(visitor *v, global_props *gp)
316 : {
317 643155 : int flag = v->sql->sql_optimizer;
318 642959 : return gp->opt_level == 1 && (flag & merge_projects) &&
319 1286106 : (gp->cnt[op_project] || gp->cnt[op_groupby]) ? rel_merge_projects : NULL;
320 : }
321 :
322 :
323 : static sql_exp *split_aggr_and_project(mvc *sql, list *aexps, sql_exp *e);
324 :
325 : static void
326 0 : list_split_aggr_and_project(mvc *sql, list *aexps, list *exps)
327 : {
328 0 : if (list_empty(exps))
329 : return ;
330 0 : for(node *n = exps->h; n; n = n->next)
331 0 : n->data = split_aggr_and_project(sql, aexps, n->data);
332 : }
333 :
334 : static sql_exp *
335 0 : split_aggr_and_project(mvc *sql, list *aexps, sql_exp *e)
336 : {
337 0 : switch(e->type) {
338 0 : case e_aggr:
339 : /* add to the aggrs */
340 0 : if (!exp_name(e))
341 0 : exp_label(sql->sa, e, ++sql->label);
342 0 : list_append(aexps, e);
343 0 : return exp_ref(sql, e);
344 : case e_cmp:
345 : /* e_cmp's shouldn't exist in an aggr expression list */
346 0 : assert(0);
347 : case e_convert:
348 0 : e->l = split_aggr_and_project(sql, aexps, e->l);
349 0 : return e;
350 0 : case e_func:
351 0 : list_split_aggr_and_project(sql, aexps, e->l);
352 0 : return e;
353 : case e_atom:
354 : case e_column: /* constants and columns shouldn't be rewritten */
355 : case e_psm:
356 : return e;
357 : }
358 : return NULL;
359 : }
360 :
361 : /* exp_rename */
362 : static sql_exp * exp_rename(mvc *sql, sql_exp *e, sql_rel *f, sql_rel *t);
363 :
364 : static list *
365 53247 : exps_rename(mvc *sql, list *l, sql_rel *f, sql_rel *t)
366 : {
367 53247 : if (list_empty(l))
368 : return l;
369 47171 : for (node *n=l->h; n; n=n->next)
370 30307 : n->data = exp_rename(sql, n->data, f, t);
371 : return l;
372 : }
373 :
374 : /* exp_rename */
375 : static sql_exp *
376 181726 : exp_rename(mvc *sql, sql_exp *e, sql_rel *f, sql_rel *t)
377 : {
378 181726 : sql_exp *ne = NULL;
379 :
380 181726 : assert(is_project(f->op));
381 :
382 181726 : switch(e->type) {
383 81424 : case e_column:
384 81424 : assert(e->nid);
385 81424 : ne = exps_bind_nid(f->exps, e->nid);
386 81424 : if (!ne)
387 : return e;
388 53462 : sql_exp *oe = e;
389 53462 : e = NULL;
390 53462 : if (ne && ne->nid)
391 53303 : e = rel_find_exp(t, ne);
392 53303 : if (!e) {
393 159 : sql->session->status = 0;
394 159 : sql->errstr[0] = 0;
395 159 : if (exp_is_atom(ne))
396 : return ne;
397 : return oe;
398 : }
399 53303 : ne = exp_ref(sql, e);
400 53303 : if (oe)
401 53303 : exp_propagate(sql->sa, ne, oe);
402 53303 : return ne;
403 56031 : case e_cmp:
404 56031 : if (e->flag == cmp_or || e->flag == cmp_filter) {
405 3781 : e->l = exps_rename(sql, e->l, f, t);
406 3781 : e->r = exps_rename(sql, e->r, f, t);
407 52250 : } else if (e->flag == cmp_in || e->flag == cmp_notin) {
408 6447 : e->l = exp_rename(sql, e->l, f, t);
409 6447 : e->r = exps_rename(sql, e->r, f, t);
410 : } else {
411 45803 : e->l = exp_rename(sql, e->l, f, t);
412 45803 : e->r = exp_rename(sql, e->r, f, t);
413 45803 : if (e->f)
414 225 : e->f = exp_rename(sql, e->f, f, t);
415 : }
416 : break;
417 5033 : case e_convert:
418 5033 : e->l = exp_rename(sql, e->l, f, t);
419 5033 : break;
420 2870 : case e_aggr:
421 : case e_func:
422 2870 : e->l = exps_rename(sql, e->l, f, t);
423 2870 : break;
424 36368 : case e_atom:
425 36368 : e->f = exps_rename(sql, e->f, f, t);
426 36368 : break;
427 : case e_psm:
428 : break;
429 : }
430 : return e;
431 : }
432 :
433 : static int
434 5942907 : exp_match_exp_cmp( sql_exp *e1, sql_exp *e2)
435 : {
436 5942907 : if (exp_match_exp(e1,e2))
437 61413 : return 0;
438 : return -1;
439 : }
440 :
441 : /* Pushing projects up the tree. Done very early in the optimizer.
442 : * Makes later steps easier.
443 : */
444 : extern void _rel_print(mvc *sql, sql_rel *rel);
445 : static sql_rel *
446 2067760 : rel_push_project_up_(visitor *v, sql_rel *rel)
447 : {
448 2067760 : if (is_simple_project(rel->op) && rel->l && !rel_is_ref(rel)) {
449 738252 : sql_rel *l = rel->l;
450 738252 : if (is_simple_project(l->op))
451 274819 : return rel_merge_projects_(v, rel);
452 : /* find equal column references, later cases are rewritten into references back to the first
453 : * project () [ i.i L1, i.i L2 ] -> project() [ i.i L1, L1 L2 ] */
454 463433 : if (list_length(rel->exps) > 1) {
455 357668 : list *exps = rel->exps;
456 357668 : node *n = exps->h;
457 357668 : bool needed = false;
458 2467548 : for(n = n->next; n && !needed; n = n->next) {
459 2109883 : sql_exp *e = n->data;
460 2109883 : if (e->type == e_column && !is_selfref(e)) {
461 16326941 : for(node *m = exps->h; m && m != n && !needed; m = m->next) {
462 14644545 : sql_exp *h = m->data;
463 14644545 : if (exp_match_exp(h,e))
464 9624 : needed = true;
465 : }
466 : }
467 : }
468 357665 : if (needed) {
469 9624 : rel->exps = sa_list(v->sql->sa);
470 9624 : node *n = exps->h;
471 9624 : list_append(rel->exps, n->data);
472 120854 : for(n = n->next; n; n = n->next) {
473 111230 : sql_exp *e = n->data;
474 111230 : if (e->type == e_column && !is_selfref(e)) {
475 69699 : node *m = list_find(rel->exps, e, (fcmp)&exp_match_exp_cmp);
476 69699 : if (m) {
477 12064 : sql_exp *me = m->data;
478 12064 : if (me->alias.label != me->nid) {
479 788 : sql_exp *ne = exp_ref(v->sql, m->data);
480 788 : exp_setalias(ne, e->alias.label, exp_relname(e), exp_name(e));
481 788 : exp_propagate(v->sql->sa, ne, e);
482 788 : set_selfref(ne);
483 788 : e = ne;
484 : }
485 : }
486 : }
487 111230 : list_append(rel->exps, e);
488 : }
489 : }
490 : }
491 : }
492 :
493 : /* project/project cleanup is done later */
494 1792939 : if (is_join(rel->op) || is_select(rel->op)) {
495 515440 : node *n;
496 515440 : list *exps = NULL, *l_exps, *r_exps;
497 515440 : sql_rel *l = rel->l;
498 515440 : sql_rel *r = rel->r;
499 515440 : sql_rel *t;
500 515440 : int nlexps = 0, i = 0;
501 :
502 : /* Don't rewrite refs, non projections or constant or
503 : order by projections */
504 515440 : if (!l || rel_is_ref(l) || is_topn(l->op) || is_sample(l->op) ||
505 486048 : (is_join(rel->op) && !list_empty(rel->attr)) ||
506 476785 : (is_join(rel->op) && (!r || rel_is_ref(r))) ||
507 454898 : (is_left(rel->op) && (rel->flag&MERGE_LEFT) /* can't push projections above merge statements left joins */) ||
508 454896 : (is_select(rel->op) && l->op != op_project) ||
509 248114 : (is_join(rel->op) && ((l->op != op_project && r->op != op_project) || is_topn(r->op) || is_sample(r->op))) ||
510 92620 : ((l->op == op_project && (!l->l || l->r || project_unsafe(l, is_select(rel->op)))) ||
511 76614 : (is_join(rel->op) && (r->op == op_project && (!r->l || r->r || project_unsafe(r, false))))))
512 443261 : return rel;
513 :
514 72179 : if (l->op == op_project && l->l) {
515 980525 : for (n = l->exps->h; n; n = n->next) {
516 939618 : sql_exp *e = n->data;
517 939618 : if (!(is_column(e->type) && exp_is_atom(e) && !(is_right(rel->op) || is_full(rel->op))) &&
518 938646 : !(e->type == e_column && !has_label(e)))
519 : return rel;
520 : }
521 : }
522 61355 : if (is_join(rel->op) && r->op == op_project && r->l) {
523 63930 : for (n = r->exps->h; n; n = n->next) {
524 58698 : sql_exp *e = n->data;
525 58698 : if (!(is_column(e->type) && exp_is_atom(e) && !(is_right(rel->op) || is_full(rel->op))) &&
526 58303 : !(e->type == e_column && !has_label(e)))
527 : return rel;
528 : }
529 : }
530 :
531 43957 : if (l->op == op_project && l->l) {
532 : /* Go through the list of project expressions.
533 : Check if they can be pushed up, ie are they not
534 : changing or introducing any columns used
535 : by the upper operator. */
536 :
537 40092 : exps = new_exp_list(v->sql->sa);
538 907893 : for (n = l->exps->h; n; n = n->next) {
539 867801 : sql_exp *e = n->data;
540 :
541 : /* we cannot rewrite projection with atomic values from outer joins */
542 867801 : if (is_column(e->type) && exp_is_atom(e) && !(is_right(rel->op) || is_full(rel->op))) {
543 661 : list_append(exps, e);
544 867140 : } else if (e->type == e_column) {
545 867140 : if (has_label(e))
546 : return rel;
547 867140 : list_append(exps, e);
548 : } else {
549 : return rel;
550 : }
551 : }
552 : } else {
553 3865 : exps = rel_projections(v->sql, l, NULL, 1, 1);
554 : }
555 43957 : nlexps = list_length(exps);
556 : /* also handle right hand of join */
557 43957 : if (is_join(rel->op) && r->op == op_project && r->l && list_empty(rel->attr)) {
558 : /* Here we also check all expressions of r like above
559 : but also we need to check for ambiguous names. */
560 :
561 43872 : for (n = r->exps->h; n; n = n->next) {
562 38878 : sql_exp *e = n->data;
563 :
564 : /* we cannot rewrite projection with atomic values from outer joins */
565 38878 : if (is_column(e->type) && exp_is_atom(e) && !(is_left(rel->op) || is_full(rel->op))) {
566 148 : list_append(exps, e);
567 38730 : } else if (e->type == e_column) {
568 38492 : if (has_label(e))
569 : return rel;
570 38492 : list_append(exps, e);
571 : } else {
572 : return rel;
573 : }
574 : }
575 38725 : } else if (is_join(rel->op) && list_empty(rel->attr)) {
576 21264 : list *r_exps = rel_projections(v->sql, r, NULL, 1, 1);
577 21264 : list_merge(exps, r_exps, (fdup)NULL);
578 : }
579 43719 : if (!list_empty(rel->attr))
580 0 : append(exps, exp_ref(v->sql, rel->attr->h->data));
581 : /* Here we should check for ambiguous names ? */
582 43719 : if (is_join(rel->op) && r && list_empty(rel->attr)) {
583 26258 : t = (l->op == op_project && l->l)?l->l:l;
584 26258 : l_exps = rel_projections(v->sql, t, NULL, 1, 1);
585 : /* conflict with old right expressions */
586 26258 : r_exps = rel_projections(v->sql, r, NULL, 1, 1);
587 26258 : if (rel->attr)
588 0 : append(r_exps, exp_ref(v->sql, rel->attr->h->data));
589 26258 : t = (r->op == op_project && r->l)?r->l:r;
590 26258 : r_exps = rel_projections(v->sql, t, NULL, 1, 1);
591 : /* conflict with new right expressions */
592 695746 : for(n = l_exps->h; n; n = n->next) {
593 669853 : sql_exp *e = n->data;
594 :
595 669853 : if (exp_is_atom(e))
596 0 : continue;
597 669853 : if (e->nid && exps_bind_nid(r_exps, e->nid))
598 : return rel;
599 669488 : if (e->alias.label && exps_bind_nid(r_exps, e->alias.label))
600 : return rel;
601 : }
602 : /* conflict with new left expressions */
603 177101 : for(n = r_exps->h; n; n = n->next) {
604 151208 : sql_exp *e = n->data;
605 :
606 151208 : if (exp_is_atom(e))
607 0 : continue;
608 151208 : if (e->nid && exps_bind_nid(l_exps, e->nid))
609 : return rel;
610 151208 : if (e->alias.label && exps_bind_nid(l_exps, e->alias.label))
611 : return rel;
612 : }
613 : }
614 :
615 : /* rename operator expressions */
616 43354 : if (l->op == op_project) {
617 : /* rewrite rel from rel->l into rel->l->l */
618 39696 : if (rel->exps) {
619 81839 : for (n = rel->exps->h; n; n = n->next) {
620 43540 : sql_exp *e = n->data, *ne;
621 :
622 43540 : ne = exp_rename(v->sql, e, l, l->l);
623 43540 : assert(ne);
624 43540 : if (ne != e && exp_name(e))
625 0 : exp_propagate(v->sql->sa, ne, e);
626 43540 : n->data = ne;
627 : }
628 : }
629 39696 : rel->l = l->l;
630 39696 : l->l = NULL;
631 39696 : rel_destroy(l);
632 : }
633 43354 : if (is_join(rel->op) && r->op == op_project && list_empty(rel->attr)) {
634 : /* rewrite rel from rel->r into rel->r->l */
635 4639 : if (rel->exps) {
636 8547 : for (n = rel->exps->h; n; n = n->next) {
637 4568 : sql_exp *e = n->data, *ne;
638 :
639 4568 : ne = exp_rename(v->sql, e, r, r->l);
640 4568 : assert(ne);
641 4568 : if (ne != e && exp_name(e))
642 0 : exp_propagate(v->sql->sa, ne, e);
643 4568 : n->data = ne;
644 : }
645 : }
646 4639 : rel->r = r->l;
647 4639 : r->l = NULL;
648 4639 : rel_destroy(r);
649 : }
650 : /* Done, ie introduce new project */
651 43354 : exps_fix_card(exps, rel->card);
652 : /* Fix nil flag */
653 43354 : if (!list_empty(exps)) {
654 930563 : for (n = exps->h ; n && i < nlexps ; n = n->next, i++) {
655 887209 : sql_exp *e = n->data;
656 :
657 887209 : if (is_right(rel->op) || is_full(rel->op))
658 449 : set_has_nil(e);
659 887209 : set_not_unique(e);
660 : }
661 187851 : for (; n ; n = n->next) {
662 144497 : sql_exp *e = n->data;
663 :
664 144497 : if (is_left(rel->op) || is_full(rel->op))
665 44819 : set_has_nil(e);
666 144497 : set_not_unique(e);
667 : }
668 : }
669 43354 : v->changes++;
670 43354 : return rel_inplace_project(v->sql->sa, rel, NULL, exps);
671 : }
672 1277499 : if (is_groupby(rel->op) && !rel_is_ref(rel) && rel->exps && list_length(rel->exps) > 1) {
673 24158 : node *n;
674 24158 : int fnd = 0;
675 24158 : list *aexps, *pexps;
676 :
677 : /* check if some are expressions aren't e_aggr */
678 103321 : for (n = rel->exps->h; n && !fnd; n = n->next) {
679 79163 : sql_exp *e = n->data;
680 :
681 79163 : if (e->type != e_aggr && e->type != e_column && e->type != e_atom && e->card > CARD_ATOM) {
682 79163 : fnd = 1;
683 : }
684 : }
685 : /* only aggr, no rewrite needed */
686 24158 : if (!fnd)
687 : return rel;
688 :
689 0 : aexps = sa_list(v->sql->sa);
690 0 : pexps = sa_list(v->sql->sa);
691 0 : for (n = rel->exps->h; n; n = n->next) {
692 0 : sql_exp *e = n->data, *ne = NULL;
693 :
694 0 : switch (e->type) {
695 0 : case e_atom: /* move over to the projection */
696 0 : list_append(pexps, e);
697 0 : break;
698 0 : case e_func:
699 0 : list_append(pexps, e);
700 0 : list_split_aggr_and_project(v->sql, aexps, e->l);
701 0 : break;
702 0 : case e_convert:
703 0 : list_append(pexps, e);
704 0 : e->l = split_aggr_and_project(v->sql, aexps, e->l);
705 0 : break;
706 0 : default: /* simple alias */
707 0 : list_append(aexps, e);
708 0 : ne = exp_column(v->sql->sa, exp_find_rel_name(e), exp_name(e), exp_subtype(e), e->card, has_nil(e), is_unique(e), is_intern(e));
709 0 : assert(0);
710 : list_append(pexps, ne);
711 : break;
712 : }
713 : }
714 0 : v->changes++;
715 0 : rel->exps = aexps;
716 0 : return rel_inplace_project( v->sql->sa, rel, NULL, pexps);
717 : }
718 : return rel;
719 : }
720 :
721 : static sql_rel *
722 191597 : rel_push_project_up(visitor *v, global_props *gp, sql_rel *rel)
723 : {
724 191597 : (void) gp;
725 191597 : return rel_visitor_bottomup(v, rel, &rel_push_project_up_);
726 : }
727 :
728 : run_optimizer
729 643099 : bind_push_project_up(visitor *v, global_props *gp)
730 : {
731 643099 : int flag = v->sql->sql_optimizer;
732 642903 : return gp->opt_level == 1 && (flag & push_project_up) &&
733 1286002 : gp->cnt[op_project] ? rel_push_project_up : NULL;
734 : }
735 :
736 :
737 : static void split_exps(mvc *sql, list *exps, sql_rel *rel);
738 :
739 : static int
740 3193548 : exp_refers_cmp( sql_exp *e1, sql_exp *e2)
741 : {
742 3193548 : if (exp_refers(e1,e2))
743 0 : return 0;
744 : return -1;
745 : }
746 :
747 : sql_exp *
748 134852 : add_exp_too_project(mvc *sql, sql_exp *e, sql_rel *rel)
749 : {
750 134852 : node *n = list_find(rel->exps, e, (fcmp)&exp_match_exp_cmp);
751 :
752 : /* if not matching we may refer to an older expression */
753 134852 : if (!n)
754 85503 : n = list_find(rel->exps, e, (fcmp)&exp_refers_cmp);
755 85503 : if (!n) {
756 85503 : exp_label(sql->sa, e, ++sql->label);
757 85503 : append(rel->exps, e);
758 : } else {
759 49349 : sql_exp *ne = n->data;
760 :
761 49349 : if (rel && rel->l) {
762 49349 : if (ne && ne->alias.label && rel_find_nid(rel->l, ne->alias.label)) {
763 0 : exp_label(sql->sa, e, ++sql->label);
764 0 : append(rel->exps, e);
765 0 : ne = e;
766 : }
767 : }
768 : e = ne;
769 : }
770 134852 : e = exp_ref(sql, e);
771 134852 : return e;
772 : }
773 :
774 : static void
775 156262 : add_exps_too_project(mvc *sql, list *exps, sql_rel *rel)
776 : {
777 156262 : node *n;
778 :
779 156262 : if (!exps)
780 : return;
781 483915 : for(n=exps->h; n; n = n->next) {
782 327653 : sql_exp *e = n->data;
783 :
784 327653 : if (e->type != e_column && !exp_is_atom(e))
785 134569 : n->data = add_exp_too_project(sql, e, rel);
786 : }
787 : }
788 :
789 : static sql_exp *
790 515006 : split_exp(mvc *sql, sql_exp *e, sql_rel *rel)
791 : {
792 515006 : if (exp_is_atom(e))
793 : return e;
794 382570 : switch(e->type) {
795 : case e_column:
796 : return e;
797 17081 : case e_convert:
798 17081 : e->l = split_exp(sql, e->l, rel);
799 17081 : return e;
800 166143 : case e_aggr:
801 : case e_func:
802 166143 : if (!is_analytic(e) && !exp_has_sideeffect(e)) {
803 164196 : sql_subfunc *f = e->f;
804 164196 : if (e->type == e_func && !f->func->s && is_caselike_func(f) /*is_ifthenelse_func(f)*/) {
805 : return e;
806 : } else {
807 156262 : split_exps(sql, e->l, rel);
808 156262 : add_exps_too_project(sql, e->l, rel);
809 : }
810 : }
811 : return e;
812 492 : case e_cmp:
813 492 : if (e->flag == cmp_or || e->flag == cmp_filter) {
814 469 : split_exps(sql, e->l, rel);
815 469 : split_exps(sql, e->r, rel);
816 23 : } else if (e->flag == cmp_in || e->flag == cmp_notin) {
817 0 : e->l = split_exp(sql, e->l, rel);
818 0 : split_exps(sql, e->r, rel);
819 : } else {
820 23 : e->l = split_exp(sql, e->l, rel);
821 23 : e->r = split_exp(sql, e->r, rel);
822 23 : if (e->f)
823 21 : e->f = split_exp(sql, e->f, rel);
824 : }
825 : return e;
826 : case e_atom:
827 : case e_psm:
828 : return e;
829 : }
830 : return e;
831 : }
832 :
833 : static void
834 157207 : split_exps(mvc *sql, list *exps, sql_rel *rel)
835 : {
836 157207 : if (list_empty(exps))
837 : return;
838 485819 : for(node *n=exps->h; n; n = n->next){
839 328612 : sql_exp *e = n->data;
840 :
841 328612 : e = split_exp(sql, e, rel);
842 328612 : n->data = e;
843 : }
844 : }
845 :
846 : sql_rel *
847 649966 : rel_split_project_(visitor *v, sql_rel *rel, int top)
848 : {
849 1299937 : if (mvc_highwater(v->sql))
850 : return rel;
851 :
852 649983 : if (!rel)
853 : return NULL;
854 649983 : if (is_project(rel->op) && list_length(rel->exps) && (is_groupby(rel->op) || rel->l) && !need_distinct(rel) && !is_single(rel)) {
855 182632 : list *exps = rel->exps;
856 182632 : node *n;
857 182632 : int funcs = 0;
858 182632 : sql_rel *nrel;
859 :
860 : /* are there functions */
861 1411256 : for (n=exps->h; n && !funcs; n = n->next) {
862 1228631 : sql_exp *e = n->data;
863 :
864 1228631 : funcs = exp_has_func(e);
865 : }
866 : /* introduce extra project */
867 182625 : if (funcs && rel->op != op_project) {
868 7 : nrel = rel_project(v->sql->sa, rel->l,
869 7 : rel_projections(v->sql, rel->l, NULL, 1, 1));
870 7 : rel->l = nrel;
871 : /* recursively split all functions and add those to the projection list */
872 7 : split_exps(v->sql, rel->exps, nrel);
873 14 : if (nrel->l && !(nrel->l = rel_split_project_(v, nrel->l, (is_topn(rel->op)||is_sample(rel->op))?top:0)))
874 : return NULL;
875 7 : return rel;
876 182618 : } else if (funcs && !top && list_empty(rel->r)) {
877 : /* projects can have columns point back into the expression list, ie
878 : * create a new list including the split expressions */
879 17328 : node *n;
880 17328 : list *exps = rel->exps;
881 :
882 17328 : rel->exps = sa_list(v->sql->sa);
883 186574 : for (n=exps->h; n; n = n->next)
884 169246 : append(rel->exps, split_exp(v->sql, n->data, rel));
885 165290 : } else if (funcs && top && rel_is_ref(rel) && list_empty(rel->r)) {
886 : /* inplace */
887 9 : list *exps = rel_projections(v->sql, rel, NULL, 1, 1);
888 9 : sql_rel *l = rel_project(v->sql->sa, rel->l, NULL);
889 9 : rel->l = l;
890 9 : l->exps = rel->exps;
891 9 : set_processed(l);
892 9 : rel->exps = exps;
893 : }
894 : }
895 649974 : if (is_mset(rel->op) || is_set(rel->op) || is_basetable(rel->op))
896 : return rel;
897 425806 : if (rel->l && (rel->op != op_table || rel->flag != TRIGGER_WRAPPER)) {
898 446034 : rel->l = rel_split_project_(v, rel->l, (is_topn(rel->op)||is_sample(rel->op)||is_ddl(rel->op)||is_modify(rel->op))?top:0);
899 415205 : if (!rel->l)
900 : return NULL;
901 : }
902 425810 : if ((is_join(rel->op) || is_semi(rel->op)) && rel->r) {
903 122003 : rel->r = rel_split_project_(v, rel->r, (is_topn(rel->op)||is_sample(rel->op)||is_ddl(rel->op)||is_modify(rel->op))?top:0);
904 122003 : if (!rel->r)
905 : return NULL;
906 : }
907 : return rel;
908 : }
909 :
910 : static sql_rel *
911 112760 : rel_split_project(visitor *v, global_props *gp, sql_rel *rel)
912 : {
913 112760 : (void) gp;
914 112760 : return rel_split_project_(v, rel, 1);
915 : }
916 :
917 : run_optimizer
918 643081 : bind_split_project(visitor *v, global_props *gp)
919 : {
920 643081 : int flag = v->sql->sql_optimizer;
921 563322 : return gp->opt_cycle == 0 && gp->opt_level == 1 && (flag & split_project) &&
922 1206395 : (gp->cnt[op_project] || gp->cnt[op_groupby]) ? rel_split_project : NULL;
923 : }
924 :
925 :
926 : static sql_rel *
927 191786 : rel_project_reduce_casts(visitor *v, global_props *gp, sql_rel *rel)
928 : {
929 191786 : if (!rel)
930 : return NULL;
931 191786 : (void) gp;
932 191786 : if (gp->opt_level == 1 && v->value_based_opt && is_simple_project(rel->op) && list_length(rel->exps)) {
933 104185 : list *exps = rel->exps;
934 104185 : node *n;
935 :
936 525234 : for (n=exps->h; n; n = n->next) {
937 421049 : sql_exp *e = n->data;
938 :
939 421049 : if (e && e->type == e_func) {
940 30750 : sql_subfunc *f = e->f;
941 30750 : sql_subtype *res = f->res->h->data;
942 :
943 30750 : if (!f->func->s && !strcmp(f->func->base.name, "sql_mul") && res->scale > 0) {
944 6 : list *args = e->l;
945 6 : sql_exp *h = args->h->data;
946 6 : sql_exp *t = args->t->data;
947 6 : atom *ha = exp_value(v->sql, h), *ta = exp_value(v->sql, t);
948 :
949 6 : if (ha || ta) {
950 5 : atom *a = ha ? ha : ta;
951 5 : atom *na = reduce_scale(v->sql, a);
952 :
953 5 : if (na && na != a) {
954 2 : int rs = a->tpe.scale - na->tpe.scale;
955 2 : res->scale -= rs;
956 2 : if (ha) {
957 2 : h->r = NULL;
958 2 : h->l = na;
959 : } else {
960 0 : t->r = NULL;
961 0 : t->l = na;
962 : }
963 2 : v->changes++;
964 : }
965 : }
966 : }
967 : }
968 : }
969 : }
970 : return rel;
971 : }
972 :
973 : run_optimizer
974 643119 : bind_project_reduce_casts(visitor *v, global_props *gp)
975 : {
976 643119 : int flag = v->sql->sql_optimizer;
977 643119 : return gp->cnt[op_project] && (flag & project_reduce_casts) ? rel_project_reduce_casts : NULL;
978 : }
979 :
980 : static sql_column *
981 74159 : exp_find_column_( sql_rel *rel, sql_exp *exp, int pnr, sql_rel **bt )
982 : {
983 74159 : if (exp->type == e_column)
984 74136 : return name_find_column(rel, exp->l, exp->r, pnr, bt);
985 : return NULL;
986 : }
987 :
988 : static int
989 0 : rel_part_nr( sql_rel *rel, sql_exp *e )
990 : {
991 0 : sql_column *c = NULL;
992 0 : sql_rel *bt = NULL;
993 0 : assert(e->type == e_cmp);
994 :
995 0 : c = exp_find_column_(rel, e->l, -1, &bt);
996 0 : if (!c)
997 0 : c = exp_find_column_(rel, e->r, -1, &bt);
998 0 : if (!c && e->f)
999 0 : c = exp_find_column_(rel, e->f, -1, &bt);
1000 0 : if (!c || !bt || !rel_base_get_mergetable(bt))
1001 0 : return -1;
1002 0 : sql_table *pp = c->t;
1003 0 : sql_table *mt = rel_base_get_mergetable(bt);
1004 0 : return find_member_pos(mt->members, pp);
1005 : }
1006 :
1007 : static int
1008 0 : rel_uses_part_nr( sql_rel *rel, sql_exp *e, int pnr )
1009 : {
1010 0 : sql_column *c = NULL;
1011 0 : sql_rel *bt = NULL;
1012 0 : assert(e->type == e_cmp);
1013 :
1014 : /*
1015 : * following case fails.
1016 : *
1017 : * semijoin( A1, union [A1, A2] )
1018 : * The union will never return proper column (from A2).
1019 : * ie need different solution (probably pass pnr).
1020 : */
1021 0 : c = exp_find_column_(rel, e->l, pnr, &bt);
1022 0 : if (!c)
1023 0 : c = exp_find_column_(rel, e->r, pnr, &bt);
1024 0 : if (c && bt && rel_base_get_mergetable(bt)) {
1025 0 : sql_table *pp = c->t;
1026 0 : sql_table *mt = rel_base_get_mergetable(bt);
1027 0 : if (find_member_pos(mt->members, pp) == pnr)
1028 : return 1;
1029 : }
1030 : /* for projects we may need to do a rename! */
1031 0 : if (is_project(rel->op) || is_topn(rel->op) || is_sample(rel->op))
1032 0 : return rel_uses_part_nr( rel->l, e, pnr);
1033 :
1034 0 : if (is_munion(rel->op)) {
1035 : list *l = rel->l;
1036 : if (rel_uses_part_nr( l->h->data, e, pnr))
1037 : return 1;
1038 0 : } else if (is_union(rel->op) || is_join(rel->op) || is_semi(rel->op)) {
1039 0 : if (rel_uses_part_nr( rel->l, e, pnr))
1040 : return 1;
1041 0 : if (!is_semi(rel->op) && rel_uses_part_nr( rel->r, e, pnr))
1042 : return 1;
1043 : }
1044 : return 0;
1045 : }
1046 :
1047 : static sql_column *
1048 3002 : exp_is_pkey(sql_rel *rel, sql_exp *e)
1049 : {
1050 3002 : if (find_prop(e->p, PROP_HASHCOL)) { /* aligned PKEY JOIN */
1051 250 : fcmp cmp = (fcmp)&kc_column_cmp;
1052 250 : sql_column *c = exp_find_column(rel, e, -2);
1053 :
1054 250 : if (c && c->t->pkey && list_find(c->t->pkey->k.columns, c, cmp) != NULL)
1055 : return c;
1056 : }
1057 : return NULL;
1058 : }
1059 :
1060 : static sql_exp *
1061 3530 : rel_is_join_on_pkey(sql_rel *rel, bool pk_fk) /* pk_fk is used to verify is a join on pk-fk */
1062 : {
1063 3530 : if (!rel || !rel->exps)
1064 : return NULL;
1065 4851 : for (node *n = rel->exps->h; n; n = n->next) {
1066 2575 : sql_exp *je = n->data;
1067 :
1068 3892 : if (je->type == e_cmp && je->flag == cmp_equal &&
1069 2634 : (exp_is_pkey(rel, je->l) || exp_is_pkey(rel, je->r)) &&
1070 249 : (!pk_fk || find_prop(je->p, PROP_JOINIDX)))
1071 0 : return je;
1072 : }
1073 : return NULL;
1074 : }
1075 :
1076 : static int
1077 0 : exps_has_predicate( list *l )
1078 : {
1079 0 : node *n;
1080 :
1081 0 : for( n = l->h; n; n = n->next){
1082 0 : sql_exp *e = n->data;
1083 :
1084 0 : if (e->card <= CARD_ATOM)
1085 : return 1;
1086 : }
1087 : return 0;
1088 : }
1089 :
1090 : static sql_rel *
1091 369 : rel_find_select( sql_rel *r)
1092 : {
1093 1308 : while (!is_select(r->op) && r->l && is_project(r->op))
1094 : r = r->l;
1095 369 : if (is_select(r->op))
1096 15 : return r;
1097 : return NULL;
1098 : }
1099 :
1100 : static bool
1101 73792 : rels_share_rel(list *l)
1102 : {
1103 73792 : sql_rel *ref = NULL;
1104 98001 : for (node *n = l->h; n; n = n->next) {
1105 97632 : sql_rel *r = n->data;
1106 97632 : if(!ref) {
1107 73792 : if ((ref = rel_find_ref(r)) == NULL)
1108 : return false;
1109 23840 : } else if (ref != rel_find_ref(r)) {
1110 : return false;
1111 : }
1112 : }
1113 : return true;
1114 : }
1115 :
1116 : static inline sql_rel *
1117 999516 : rel_merge_munion(visitor *v, sql_rel *rel)
1118 : {
1119 999516 : if (is_munion(rel->op) && rels_share_rel(rel->l)) {
1120 369 : list *rels = rel->l, *nrels = NULL;
1121 369 : sql_rel *cur = NULL, *curs = NULL;
1122 :
1123 : /* Find selects and try to merge */
1124 369 : for(node *n = rels->h; n; n = n->next) {
1125 369 : sql_rel *r = n->data;
1126 369 : sql_rel *s = rel_find_select(r);
1127 :
1128 369 : if (!s)
1129 : return rel;
1130 15 : if (cur) {
1131 0 : if (s != r->l || curs->l != s->l || !rel_is_ref(s->l) ||
1132 : /* for now only union(project*(select(R),project*(select(R))) */
1133 0 : !s->exps || !curs->exps || exps_has_predicate(s->exps) || exps_has_predicate(curs->exps)) {
1134 0 : if (nrels)
1135 0 : append(nrels, r);
1136 : else
1137 : return rel;
1138 : }
1139 :
1140 : /* merge, ie. add 'or exp' */
1141 0 : curs->exps = append(new_exp_list(v->sql->sa), exp_or(v->sql->sa, cur->exps, s->exps, 0));
1142 0 : if (!nrels) {
1143 0 : nrels = sa_list(v->sql->sa);
1144 0 : append(nrels, cur);
1145 : }
1146 : } else {
1147 15 : if (s != r->l || !rel_is_ref(s->l)) {
1148 15 : if (nrels) {
1149 0 : append(nrels, r);
1150 : } else {
1151 : return rel;
1152 : }
1153 : }
1154 : cur = r;
1155 : curs = s;
1156 : }
1157 : }
1158 0 : if (nrels) {
1159 0 : v->changes++;
1160 0 : rel->l = nrels;
1161 : }
1162 0 : return rel;
1163 : }
1164 : return rel;
1165 : }
1166 :
1167 : static sql_rel *
1168 999516 : rel_optimize_munions_bottomup_(visitor *v, sql_rel *rel)
1169 : {
1170 999516 : rel = rel_merge_munion(v, rel);
1171 999516 : return rel;
1172 : }
1173 :
1174 : static sql_rel *
1175 29207 : rel_optimize_munions_bottomup(visitor *v, global_props *gp, sql_rel *rel)
1176 : {
1177 29207 : (void) gp;
1178 29207 : return rel_visitor_bottomup(v, rel, &rel_optimize_munions_bottomup_);
1179 : }
1180 :
1181 : run_optimizer
1182 643078 : bind_optimize_unions_bottomup(visitor *v, global_props *gp)
1183 : {
1184 643078 : int flag = v->sql->sql_optimizer;
1185 642882 : return gp->opt_level == 1 && gp->cnt[op_munion] && (flag & optimize_unions_bottomup)
1186 643078 : ? rel_optimize_munions_bottomup : NULL;
1187 : }
1188 :
1189 :
1190 : static inline sql_rel *
1191 2044385 : rel_project_cse(visitor *v, sql_rel *rel)
1192 : {
1193 2044385 : if (is_project(rel->op) && rel->exps) {
1194 888720 : node *n, *m;
1195 888720 : list *nexps;
1196 888720 : int needed = 0;
1197 :
1198 6448216 : for (n=rel->exps->h; n && !needed; n = n->next) {
1199 5559494 : sql_exp *e1 = n->data;
1200 :
1201 5559494 : if (e1->type != e_column && !exp_is_atom(e1) && exp_name(e1)) {
1202 16243173 : for (m=n->next; m && !needed; m = m->next){
1203 15639941 : sql_exp *e2 = m->data;
1204 :
1205 15639941 : if (exp_name(e2) && exp_match_exp(e1, e2))
1206 15639941 : needed = 1;
1207 : }
1208 : }
1209 : }
1210 :
1211 888722 : if (!needed)
1212 : return rel;
1213 :
1214 742 : nexps = new_exp_list(v->sql->sa);
1215 8651 : for (n=rel->exps->h; n; n = n->next) {
1216 7909 : sql_exp *e1 = n->data;
1217 :
1218 7909 : if (e1->type != e_column && !exp_is_atom(e1) && exp_name(e1)) {
1219 33856 : for (m=nexps->h; m; m = m->next){
1220 29244 : sql_exp *e2 = m->data;
1221 :
1222 29244 : if (exp_name(e2) && exp_match_exp(e1, e2)) {
1223 782 : assert(e2->alias.label);
1224 782 : sql_exp *ne = exp_ref(v->sql, e2);
1225 :
1226 782 : ne = exp_propagate(v->sql->sa, ne, e1);
1227 782 : assert(!e1->ref);
1228 782 : exp_setalias(ne, e1->alias.label, exp_relname(e1), exp_name(e1));
1229 782 : set_selfref(ne);
1230 782 : assert(ne->nid);
1231 : e1 = ne;
1232 : break;
1233 : }
1234 : }
1235 : }
1236 7909 : append(nexps, e1);
1237 : }
1238 742 : rel->exps = nexps;
1239 : }
1240 : return rel;
1241 : }
1242 :
1243 : static int exp_is_const_op(sql_exp *exp, sql_exp *tope, sql_rel *expr);
1244 :
1245 : static int
1246 37 : exps_are_const_op(list *exps, sql_exp *tope, sql_rel *expr)
1247 : {
1248 37 : int ok = 1;
1249 :
1250 37 : if (list_empty(exps))
1251 : return 1;
1252 74 : for (node *n = exps->h; n && ok; n = n->next)
1253 37 : ok &= exp_is_const_op(n->data, tope, expr);
1254 : return ok;
1255 : }
1256 :
1257 : static int
1258 341 : exp_is_const_op(sql_exp *exp, sql_exp *tope, sql_rel *expr)
1259 : {
1260 452 : switch (exp->type) {
1261 30 : case e_atom:
1262 30 : return exp->f ? 0 : 1;
1263 2 : case e_convert:
1264 2 : return exp_is_const_op(exp->l, tope, expr);
1265 37 : case e_func:
1266 : case e_aggr: {
1267 37 : sql_subfunc *f = exp->f;
1268 37 : if (f->func->side_effect || IS_ANALYTIC(f->func))
1269 : return 0;
1270 37 : return exps_are_const_op(exp->l, tope, expr);
1271 : }
1272 0 : case e_cmp:
1273 0 : if (exp->flag == cmp_or || exp->flag == cmp_filter)
1274 0 : return exps_are_const_op(exp->l, tope, expr) && exps_are_const_op(exp->r, tope, expr);
1275 0 : if (exp->flag == cmp_in || exp->flag == cmp_notin)
1276 0 : return exp_is_const_op(exp->l, tope, expr) && exps_are_const_op(exp->r, tope, expr);
1277 0 : return exp_is_const_op(exp->l, tope, expr) && exp_is_const_op(exp->r, tope, expr) && (!exp->f || exp_is_const_op(exp->f, tope, expr));
1278 383 : case e_column: {
1279 383 : if (is_simple_project(expr->op) || is_groupby(expr->op)) {
1280 : /* in a simple projection, self-references may occur */
1281 383 : sql_exp *nexp = exps_bind_nid(expr->exps, exp->nid);
1282 383 : if (nexp && list_position(expr->exps, nexp) < list_position(expr->exps, tope))
1283 : return exp_is_const_op(nexp, exp, expr);
1284 : }
1285 : return 0;
1286 : }
1287 : default:
1288 : return 0;
1289 : }
1290 : }
1291 :
1292 : static sql_exp *
1293 25 : rel_groupby_add_count_star(mvc *sql, sql_rel *rel, sql_exp *count_star_exp, bool *count_added)
1294 : {
1295 25 : if (count_star_exp)
1296 : return count_star_exp;
1297 18 : if (!list_empty(rel->exps)) {
1298 49 : for (node *n=rel->exps->h; n ; n = n->next) {
1299 35 : sql_exp *e = n->data;
1300 :
1301 35 : if (exp_aggr_is_count(e) && !need_distinct(e) && list_length(e->l) == 0)
1302 4 : return e;
1303 : }
1304 : }
1305 14 : sql_subfunc *cf = sql_bind_func(sql, "sys", "count", sql_bind_localtype("void"), NULL, F_AGGR, true, true);
1306 14 : *count_added = true;
1307 14 : return rel_groupby_add_aggr(sql, rel, exp_aggr(sql->sa, NULL, cf, 0, 0, rel->card, 0));
1308 : }
1309 :
1310 : /* optimize sum(x + 12) into sum(x) + 12*count(*) */
1311 : static inline sql_rel *
1312 31084 : rel_simplify_sum(visitor *v, sql_rel *rel)
1313 : {
1314 31084 : if (is_groupby(rel->op) && !list_empty(rel->exps)) {
1315 31010 : sql_rel *upper = NULL, *groupby = rel, *l = groupby->l;
1316 31010 : sql_exp *count_star_exp = NULL;
1317 31010 : bool count_added = false;
1318 :
1319 93817 : for (node *n=groupby->exps->h; n ; n = n->next) {
1320 62807 : sql_exp *e = n->data;
1321 62807 : list *el = e->l;
1322 62807 : sql_subfunc *sf = e->f;
1323 :
1324 62807 : if (e->type == e_aggr && !need_distinct(e) && sf->func->type == F_AGGR && !sf->func->s && !strcmp(sf->func->base.name, "sum")) {
1325 5543 : sql_rel *expr = groupby;
1326 5543 : sql_exp *exp = (sql_exp*) el->h->data, *oexp = exp;
1327 :
1328 5543 : while (is_numeric_upcast(exp))
1329 0 : exp = exp->l;
1330 : /* we want to find a +/- call, so expect them to appear only on simple projections */
1331 12215 : while (exp && exp->type == e_column && (is_simple_project(expr->op) || is_groupby(expr->op)) && expr->l) {
1332 7550 : sql_rel *nexpr = NULL;
1333 7550 : sql_exp *nexp = rel_find_exp_and_corresponding_rel(l, exp, false, &nexpr, NULL);
1334 :
1335 : /* break when it loops on the same relation */
1336 7550 : if (nexpr == expr && list_position(expr->exps, nexp) >= list_position(expr->exps, exp))
1337 : break;
1338 6672 : expr = nexpr;
1339 6672 : exp = oexp = nexp;
1340 6674 : while (exp && is_numeric_upcast(exp))
1341 2 : exp = exp->l;
1342 : }
1343 :
1344 5543 : list *expl = exp ? exp->l : NULL;
1345 62807 : sql_subfunc *expf = exp ? exp->f : NULL;
1346 : /* found a candidate function */
1347 4346 : if (exp && exp->type == e_func && expf->func->type == F_FUNC && !expf->func->s &&
1348 897 : (!strcmp(expf->func->base.name, "sql_sub") || !strcmp(expf->func->base.name, "sql_add"))) {
1349 152 : sql_exp *e1 = (sql_exp*) expl->h->data, *e2 = (sql_exp*) expl->h->next->data;
1350 152 : int e1ok = exp_is_const_op(e1, oexp, expr), e2ok = exp_is_const_op(e2, oexp, expr);
1351 :
1352 152 : if ((!e1ok && e2ok) || (e1ok && !e2ok)) {
1353 26 : sql_exp *ocol = e1ok ? e2 : e1, *constant = e1ok ? e1 : e2, *mul, *colref, *naggr, *newop, *col = ocol, *match;
1354 26 : bool add_col = true, prepend = false;
1355 :
1356 : /* if 'col' is a projection from the under relation, then use it */
1357 26 : while (is_numeric_upcast(col))
1358 0 : col = col->l;
1359 26 : if (col->type == e_column) {
1360 24 : sql_exp *colf = exps_find_exp(l->exps, col);
1361 :
1362 : /* col is already found in the inner relation. Also look for a new reference for col, eg sql_add(col, 1), 1 as col */
1363 24 : if (colf && list_position(l->exps, colf) < list_position(l->exps, oexp)) {
1364 : add_col = false;
1365 11 : } else if (!colf && is_simple_project(l->op) && list_empty(l->r) && !rel_is_ref(l) && !need_distinct(l)) {
1366 : prepend = true;
1367 : add_col = false;
1368 1 : } else if (!colf && (is_simple_project(l->op) || is_groupby(l->op))) {
1369 : /* on these scenarios the new column expression will be ordered/(grouped for distinct) or create potential ambiguity (multiple ref), so skip */
1370 1 : continue;
1371 : }
1372 2 : } else if ((is_simple_project(l->op) && (!list_empty(l->r) || rel_is_ref(l) || need_distinct(l))) || is_groupby(l->op)) {
1373 0 : continue;
1374 : }
1375 :
1376 : /* add count star */
1377 25 : count_star_exp = rel_groupby_add_count_star(v->sql, groupby, count_star_exp, &count_added);
1378 : /* multiply constant by count star */
1379 25 : if (!(mul = rel_binop_(v->sql, NULL, constant, exp_ref(v->sql, count_star_exp), "sys", "sql_mul", card_value, true))) {
1380 0 : v->sql->session->status = 0;
1381 0 : v->sql->errstr[0] = '\0';
1382 0 : continue;
1383 : }
1384 25 : if (!has_label(mul))
1385 25 : exp_label(v->sql->sa, mul, ++v->sql->label);
1386 :
1387 25 : colref = exp_ref(v->sql, ocol);
1388 25 : if (add_col) /* if 'col' will be added, then make sure it has an unique label */
1389 3 : exp_label(v->sql->sa, colref, ++v->sql->label);
1390 :
1391 : /* 'oexp' contains the type for the input for the 'sum' aggregate */
1392 25 : if (!(colref = exp_check_type(v->sql, exp_subtype(oexp), groupby, colref, type_equal))) {
1393 0 : v->sql->session->status = 0;
1394 0 : v->sql->errstr[0] = '\0';
1395 0 : continue;
1396 : }
1397 : /* update sum to use the column side only */
1398 25 : sql_subfunc *a = sql_bind_func(v->sql, "sys", "sum", exp_subtype(colref), NULL, F_AGGR, true, true);
1399 25 : if (!a)
1400 0 : continue;
1401 25 : naggr = exp_aggr(v->sql->sa, list_append(sa_list(v->sql->sa), colref), a, need_distinct(e), need_no_nil(e), groupby->card, has_nil(e));
1402 25 : if ((match = exps_any_match(groupby->exps, naggr)) && list_position(groupby->exps, match) < list_position(groupby->exps, e)) { /* found a matching aggregate, use it */
1403 9 : naggr = exp_ref(v->sql, match);
1404 9 : exp_label(v->sql->sa, naggr, ++v->sql->label);
1405 16 : } else if (!has_label(naggr)) { /* otherwise use the new one */
1406 16 : exp_label(v->sql->sa, naggr, ++v->sql->label);
1407 : }
1408 :
1409 : /* generate addition/subtraction. subtraction is not commutative, so keep original order! */
1410 25 : if (!(newop = rel_binop_(v->sql, NULL, e1 == constant ? mul : exp_ref(v->sql, naggr), e1 == constant ? exp_ref(v->sql, naggr) : mul, "sys", expf->func->base.name, card_value, true))) {
1411 0 : v->sql->session->status = 0;
1412 0 : v->sql->errstr[0] = '\0';
1413 0 : continue;
1414 : }
1415 25 : if (!(newop = exp_check_type(v->sql, exp_subtype(e), groupby, newop, type_equal))) {
1416 0 : v->sql->session->status = 0;
1417 0 : v->sql->errstr[0] = '\0';
1418 0 : continue;
1419 : }
1420 :
1421 : /* a column reference can be prepended to the inner relation, add it after all the check type calls succeed */
1422 25 : if (prepend)
1423 10 : list_prepend(l->exps, col);
1424 :
1425 : /* the new generate function calls are valid, update relations */
1426 : /* we need a new relation for the multiplication and addition/subtraction */
1427 25 : if (!upper) {
1428 : /* be careful with relations with more than 1 reference, so do in-place replacement */
1429 18 : list *projs = rel_projections(v->sql, rel, NULL, 1, 1);
1430 18 : sql_rel *nrel = rel_groupby(v->sql, rel->l, NULL);
1431 18 : nrel->exps = rel->exps;
1432 18 : nrel->r = rel->r;
1433 18 : nrel->card = rel->card;
1434 18 : nrel->nrcols = list_length(rel->exps);
1435 18 : set_processed(nrel);
1436 18 : rel_dup(rel->l);
1437 18 : upper = rel = rel_inplace_project(v->sql->sa, rel, nrel, projs);
1438 18 : rel->card = exps_card(projs);
1439 18 : groupby = nrel; /* update pointers :) */
1440 18 : l = groupby->l;
1441 : }
1442 98 : for (node *n = upper->exps->h ; n ; ) {
1443 73 : node *next = n->next;
1444 73 : sql_exp *re = n->data;
1445 :
1446 : /* remove the old reference to the aggregate because we will use the addition/subtraction instead,
1447 : as well as the count star if count_added */
1448 73 : if (exp_refers(e, re) || (count_added && exp_refers(count_star_exp, re)))
1449 39 : list_remove_node(upper->exps, NULL, n);
1450 : n = next;
1451 : }
1452 :
1453 : /* update sum aggregate with new aggregate or reference to an existing one */
1454 25 : n->data = naggr;
1455 25 : list_hash_clear(groupby->exps);
1456 :
1457 : /* add column reference with new label, if 'col' was not found */
1458 25 : if (add_col) {
1459 3 : if (!is_simple_project(l->op) || !list_empty(l->r) || rel_is_ref(l) || need_distinct(l))
1460 0 : groupby->l = l = rel_project(v->sql->sa, l, rel_projections(v->sql, l, NULL, 1, 1));
1461 3 : list_append(l->exps, ocol);
1462 : }
1463 :
1464 : /* propagate alias and add new expression */
1465 25 : exp_prop_alias(v->sql->sa, newop, e);
1466 25 : list_append(upper->exps, newop);
1467 25 : v->changes++;
1468 : }
1469 : }
1470 : }
1471 : }
1472 : }
1473 31084 : return rel;
1474 : }
1475 :
1476 : /* optimize group by x+1,(y-2)*3,2-z into group by x,y,z */
1477 : static inline sql_rel *
1478 31084 : rel_simplify_groupby_columns(visitor *v, sql_rel *rel)
1479 : {
1480 31084 : if (is_groupby(rel->op) && !list_empty(rel->r)) {
1481 22225 : sql_rel *l = rel->l;
1482 :
1483 57011 : for (node *n=((list*)rel->r)->h; n ; n = n->next) {
1484 34786 : sql_exp *e = n->data;
1485 34786 : e->used = 0; /* we need to use this flag, clean it first */
1486 : }
1487 57011 : for (node *n=((list*)rel->r)->h; n ; n = n->next) {
1488 34786 : sql_exp *e = n->data;
1489 :
1490 34786 : if (e->type == e_column) {
1491 34752 : bool searching = true;
1492 34752 : sql_rel *efrel = NULL;
1493 34752 : sql_exp *exp = rel_find_exp_and_corresponding_rel(l, e, false, &efrel, NULL), *col = NULL, *tope = exp;
1494 :
1495 69522 : while (searching && !col) {
1496 34770 : sql_exp *exp_col = exp;
1497 :
1498 34770 : if (exp && is_numeric_upcast(exp))
1499 9 : exp = exp->l;
1500 34770 : if (exp && exp->type == e_func) {
1501 639 : list *el = exp->l;
1502 639 : sql_subfunc *sf = exp->f;
1503 : /* At the moment look only at injective math functions */
1504 639 : if (sf->func->type == F_FUNC && !sf->func->s &&
1505 627 : (!strcmp(sf->func->base.name, "sql_sub") || !strcmp(sf->func->base.name, "sql_add") || !strcmp(sf->func->base.name, "sql_mul"))) {
1506 107 : sql_exp *e1 = (sql_exp*) el->h->data, *e2 = (sql_exp*) el->h->next->data;
1507 : /* the optimization cannot be done if side-effect calls (e.g. rand()) are present */
1508 194 : int e1ok = exp_is_atom(e1) && !exp_unsafe(e1, true, false) && !exp_has_sideeffect(e1), e2ok = exp_is_atom(e2) && !exp_unsafe(e2, true, false) && !exp_has_sideeffect(e2);
1509 :
1510 107 : if ((!e1ok && e2ok) || (e1ok && !e2ok)) {
1511 59 : sql_exp *c = e1ok ? e2 : e1;
1512 59 : bool done = false;
1513 59 : exp_col = exps_find_exp(efrel->exps, c);
1514 59 : if (exp_col)
1515 28 : c = exp_col;
1516 :
1517 120 : while (!done) {
1518 61 : if (is_numeric_upcast(c))
1519 0 : c = c->l;
1520 61 : if (c->type == e_column) {
1521 43 : if (is_simple_project(efrel->op) || is_groupby(efrel->op)) {
1522 : /* in a simple projection, self-references may occur */
1523 43 : sql_exp *nc = exps_find_exp(efrel->exps, c);
1524 43 : if (nc && list_position(efrel->exps, nc) < list_position(efrel->exps, exp_col)) {
1525 2 : exp_col = c;
1526 2 : c = nc;
1527 2 : continue;
1528 : }
1529 : }
1530 : col = c; /* 'c' is a column reference from the left relation */
1531 : done = true;
1532 : } else {
1533 : exp = c; /* maybe a nested function call, let's continue searching */
1534 : done = true;
1535 : }
1536 : }
1537 : } else {
1538 : searching = false;
1539 : }
1540 : } else {
1541 : searching = false;
1542 : }
1543 : } else {
1544 : searching = false;
1545 : }
1546 : }
1547 34752 : if (col) { /* a column reference was found */
1548 41 : const char *rname = exp_relname(e), *name = exp_name(e);
1549 :
1550 : /* the grouping column has an alias, we have to keep it */
1551 41 : if ((rname && name && (strcmp(rname, e->l) != 0 || strcmp(name, e->r) != 0)) || (!rname && name && strcmp(name, e->r) != 0)) {
1552 2 : if (!has_label(e)) /* dangerous to merge, skip it */
1553 0 : continue;
1554 2 : if (!is_simple_project(l->op) || !list_empty(l->r) || rel_is_ref(l) || need_distinct(l))
1555 0 : rel->l = l = rel_project(v->sql->sa, l, rel_projections(v->sql, l, NULL, 1, 1));
1556 2 : list_append(l->exps, e);
1557 2 : n->data = e = exp_ref(v->sql, e);
1558 2 : list_hash_clear(rel->r);
1559 : }
1560 :
1561 41 : sql_exp *f = exps_find_exp(rel->r, col);
1562 :
1563 41 : if (f && list_position(rel->r, f) < list_position(rel->r, e)) { /* if already present, remove it */
1564 10 : e->used = 1;
1565 : } else {
1566 : /* Use an unique reference to the column found. If there's another grouping column label pointing into it,
1567 : rel_groupby_cse will hopefully remove it */
1568 31 : sql_exp *colf = exps_find_exp(l->exps, col);
1569 :
1570 : /* col is already found in the inner relation. Also look for a new reference for col, eg sql_add(col, 1), 1 as col */
1571 31 : if (colf && list_position(l->exps, colf) < list_position(l->exps, tope)) {
1572 2 : n->data = exp_ref(v->sql, col);
1573 25 : } else if (!colf && is_simple_project(l->op) && list_empty(l->r) && !rel_is_ref(l) && !need_distinct(l)) { /* trivial case, it can be added */
1574 25 : sql_exp *ne = col;
1575 25 : list_prepend(l->exps, ne);
1576 25 : n->data = exp_ref(v->sql, ne);
1577 0 : } else if (!colf && (is_simple_project(l->op) || is_groupby(l->op))) {
1578 : /* on these scenarios the new column expression will be ordered/(grouped for distinct) or create potential ambiguity (multiple ref), so skip */
1579 0 : continue;
1580 : } else {
1581 4 : sql_exp *ne = exp_ref(v->sql, col);
1582 :
1583 4 : if (colf) /* a col reference is already there, add a new label */
1584 4 : exp_label(v->sql->sa, ne, ++v->sql->label);
1585 4 : if (!is_simple_project(l->op) || !list_empty(l->r) || rel_is_ref(l) || need_distinct(l))
1586 0 : rel->l = l = rel_project(v->sql->sa, l, rel_projections(v->sql, l, NULL, 1, 1));
1587 4 : list_append(l->exps, ne);
1588 4 : n->data = exp_ref(v->sql, ne);
1589 : }
1590 31 : list_hash_clear(rel->r);
1591 : }
1592 41 : v->changes++;
1593 : }
1594 : }
1595 : }
1596 57011 : for (node *n=((list*)rel->r)->h; n ; ) {
1597 34786 : node *next = n->next;
1598 34786 : sql_exp *e = n->data;
1599 :
1600 34786 : if (e->used) /* remove unnecessary grouping columns */
1601 10 : list_remove_node(rel->r, NULL, n);
1602 : n = next;
1603 : }
1604 : }
1605 31084 : return rel;
1606 : }
1607 :
1608 : /* remove identical grouping columns */
1609 : static inline sql_rel *
1610 56144 : rel_groupby_cse(visitor *v, sql_rel *rel)
1611 : {
1612 56144 : if (is_groupby(rel->op) && !list_empty(rel->r)) {
1613 32200 : sql_rel *l = rel->l;
1614 :
1615 : /* for every group expression e1 */
1616 94461 : for (node *n=((list*)rel->r)->h; n ; n = n->next) {
1617 62262 : sql_exp *e1 = n->data;
1618 : /* it's good to examine the same expression in the subrelation e.g. in case it's an alias */
1619 : /* TODO maybe cover more cases? Here I only look at the left relation */
1620 124525 : sql_exp *e1_sub = e1->type == e_column ?
1621 62262 : e1->nid ? exps_bind_nid(l->exps, e1->nid) : exps_find_exp(l->exps, e1) : NULL;
1622 :
1623 : /* for every other group expression */
1624 121709 : for (node *m=n->next; m; m = m->next) {
1625 59448 : sql_exp *e2 = m->data;
1626 118896 : sql_exp *e2_sub = e2->type == e_column ?
1627 59448 : e2->nid ? exps_bind_nid(l->exps, e2->nid) : exps_find_exp(l->exps, e2) : NULL;
1628 :
1629 : /* check if the expression are the same */
1630 59448 : if (exp_match_exp(e1, e2) || exp_refers(e1, e2) || (e1_sub && e2_sub && (exp_match_exp(e1_sub, e2_sub) || exp_refers(e1_sub, e2_sub)))) {
1631 :
1632 : /* use e2 from rel->exps instead of e2 from the rel->r as it can have an alias from the higher rel */
1633 41 : sql_exp *e2_in_exps = exps_uses_nid(rel->exps, e2->alias.label);
1634 41 : assert(e2_in_exps);
1635 :
1636 : /* same as e2 */
1637 41 : sql_exp *e1_in_exps = exps_uses_nid(rel->exps, e1->alias.label);
1638 41 : if (!e1_in_exps)
1639 6 : continue;
1640 :
1641 : /* write e2 as an e1 alias since the expressions are the same */
1642 35 : sql_exp* e2_as_e1_alias = exp_copy(v->sql, e1_in_exps);
1643 : /* NOTE: it is important to get the rname (exp->l) and name (exp->r) from e2 IN the exps
1644 : * (e2_in_exps), and not from e2, since it could carry an alias from the higher rel */
1645 35 : exp_setalias(e2_as_e1_alias, e2_as_e1_alias->alias.label, e2_in_exps->l, e2_in_exps->r);
1646 35 : e2_as_e1_alias->alias.label = e2->alias.label;
1647 :
1648 : /* replace e2 with e2_as_e1_alias in expressions list */
1649 35 : node *e2_exps_node = list_find(rel->exps, e2_in_exps, NULL);
1650 35 : list_append_before(rel->exps, e2_exps_node, e2_as_e1_alias);
1651 35 : list_remove_node(rel->exps, NULL, e2_exps_node);
1652 :
1653 : /* finally remove e2 from the groups' list (->r) since it's redundant */
1654 35 : node *e2_r_node = list_find(rel->r, e2, NULL);
1655 35 : list_remove_node(rel->r, NULL, e2_r_node);
1656 :
1657 35 : v->changes++;
1658 : }
1659 : }
1660 : }
1661 : }
1662 56144 : return rel;
1663 : }
1664 :
1665 : sql_exp *list_exps_uses_exp(list *exps, const char *rname, const char *name);
1666 :
1667 : static sql_exp*
1668 17513 : exp_uses_exp(sql_exp *e, const char *rname, const char *name)
1669 : {
1670 17810 : sql_exp *res = NULL;
1671 :
1672 17810 : switch (e->type) {
1673 : case e_psm:
1674 : break;
1675 1943 : case e_atom: {
1676 1943 : if (e->f)
1677 0 : return list_exps_uses_exp(e->f, rname, name);
1678 : } break;
1679 296 : case e_convert:
1680 296 : return exp_uses_exp(e->l, rname, name);
1681 9147 : case e_column: {
1682 9147 : if (e->l && rname && strcmp(e->l, rname) == 0 &&
1683 2257 : e->r && name && strcmp(e->r, name) == 0)
1684 : return e;
1685 7054 : if (!e->l && !rname &&
1686 242 : e->r && name && strcmp(e->r, name) == 0)
1687 : return e;
1688 : } break;
1689 6006 : case e_func:
1690 : case e_aggr: {
1691 6006 : if (e->l)
1692 4874 : return list_exps_uses_exp(e->l, rname, name);
1693 : } break;
1694 418 : case e_cmp: {
1695 418 : if (e->flag == cmp_in || e->flag == cmp_notin) {
1696 0 : if ((res = exp_uses_exp(e->l, rname, name)))
1697 : return res;
1698 0 : return list_exps_uses_exp(e->r, rname, name);
1699 418 : } else if (e->flag == cmp_or || e->flag == cmp_filter) {
1700 2 : if ((res = list_exps_uses_exp(e->l, rname, name)))
1701 : return res;
1702 2 : return list_exps_uses_exp(e->r, rname, name);
1703 : } else {
1704 416 : if ((res = exp_uses_exp(e->l, rname, name)))
1705 : return res;
1706 414 : if ((res = exp_uses_exp(e->r, rname, name)))
1707 : return res;
1708 408 : if (e->f)
1709 : return exp_uses_exp(e->f, rname, name);
1710 : }
1711 : } break;
1712 : }
1713 : return NULL;
1714 : }
1715 :
1716 : sql_exp *
1717 9718 : list_exps_uses_exp(list *exps, const char *rname, const char *name)
1718 : {
1719 9718 : sql_exp *res = NULL;
1720 :
1721 9718 : if (!exps)
1722 : return NULL;
1723 26401 : for (node *n = exps->h; n && !res; n = n->next) {
1724 16683 : sql_exp *e = n->data;
1725 16683 : res = exp_uses_exp(e, rname, name);
1726 : }
1727 : return res;
1728 : }
1729 :
1730 : /* find in the list of expression an expression which uses e */
1731 : sql_exp *
1732 868 : exps_uses_exp(list *exps, sql_exp *e)
1733 : {
1734 868 : return list_exps_uses_exp(exps, exp_relname(e), exp_name(e));
1735 : }
1736 : /*
1737 : * Rewrite aggregations over munion all.
1738 : * groupby ([ union all (a, b, c) ], [gbe], [ count, sum ] )
1739 : *
1740 : * into
1741 : * groupby ([ union all( groupby( a, [gbe], [ count, sum] ),
1742 : * groupby( b, [gbe], [ count, sum] ),
1743 : * groupby( c, [gbe], [ count, sum] ) ],
1744 : * [gbe], [sum, sum] )
1745 : */
1746 : static inline bool
1747 1715 : rel_has_groupby(sql_rel *r)
1748 : {
1749 3034 : if (r) {
1750 2865 : if (is_groupby(r->op))
1751 : return true;
1752 2308 : if (is_simple_project(r->op))
1753 1319 : return rel_has_groupby(r->l);
1754 : }
1755 : return false;
1756 : }
1757 :
1758 : static inline sql_rel *
1759 1120 : rel_push_aggr_down_n_arry(visitor *v, sql_rel *rel)
1760 : {
1761 1120 : sql_rel *g = rel;
1762 1120 : sql_rel *u = rel->l, *ou = u;
1763 1120 : sql_rel *r = NULL;
1764 1120 : list *rgbe = NULL, *gbe = NULL, *exps = NULL;
1765 1120 : node *n, *m;
1766 :
1767 : // TODO why?
1768 1120 : if (u->op == op_project && !need_distinct(u))
1769 314 : u = u->l;
1770 :
1771 1120 : if (is_recursive(u))
1772 : return rel;
1773 :
1774 : /* make sure we don't create group by on group by's */
1775 2271 : for (node *n = ((list*)u->l)->h; n; n = n->next) {
1776 1715 : r = n->data;
1777 3430 : if (rel_has_groupby(r))
1778 : return rel;
1779 : }
1780 :
1781 : /* distinct should be done over the full result */
1782 1620 : for (n = g->exps->h; n; n = n->next) {
1783 1095 : sql_exp *e = n->data;
1784 1095 : sql_subfunc *af = e->f;
1785 :
1786 1095 : if (e->type == e_atom ||
1787 1095 : e->type == e_func ||
1788 476 : (e->type == e_aggr &&
1789 476 : ((strcmp(af->func->base.name, "sum") &&
1790 403 : strcmp(af->func->base.name, "count") &&
1791 90 : strcmp(af->func->base.name, "min") &&
1792 476 : strcmp(af->func->base.name, "max")) ||
1793 : need_distinct(e))))
1794 : return rel;
1795 : }
1796 :
1797 525 : list *nl = sa_list(v->sql->sa);
1798 1586 : for (node *n = ((list*)u->l)->h; n; n = n->next) {
1799 1061 : r = rel_dup(n->data);
1800 : //n->data = NULL; /* clean list as we steal the relation r, stealing is needed else (with multiple references) double project cleanup fails */
1801 1061 : if (!is_project(r->op))
1802 47 : r = rel_project(v->sql->sa, r,
1803 : rel_projections(v->sql, r, NULL, 1, 1));
1804 1061 : rel_rename_exps(v->sql, u->exps, r->exps);
1805 1061 : if (u != ou) {
1806 598 : bool isproject = is_project(r->op);
1807 598 : r = rel_project(v->sql->sa, r, NULL);
1808 598 : r->exps = exps_copy(v->sql, ou->exps);
1809 598 : rel_rename_exps(v->sql, ou->exps, r->exps);
1810 598 : set_processed(r);
1811 598 : if (isproject)
1812 598 : r = rel_push_project_down_(v, r); /* cleanup any double projects */
1813 : }
1814 1061 : if (g->r && list_length(g->r) > 0) {
1815 843 : list *gbe = g->r;
1816 843 : rgbe = exps_copy(v->sql, gbe);
1817 : }
1818 1061 : r = rel_groupby(v->sql, r, NULL);
1819 1061 : r->r = rgbe;
1820 1061 : r->nrcols = g->nrcols;
1821 1061 : r->card = g->card;
1822 1061 : r->exps = exps_copy(v->sql, g->exps);
1823 1061 : r->nrcols = list_length(r->exps);
1824 1061 : set_processed(r);
1825 :
1826 1061 : assert(r);
1827 1061 : append(nl, r);
1828 : }
1829 :
1830 : /* group by on primary keys which define the partitioning scheme
1831 : * don't need a finalizing group by */
1832 : /* how to check if a partition is based on some primary key ?
1833 : * */
1834 525 : if (!list_empty(rel->r)) {
1835 1031 : for (node *n = ((list*)rel->r)->h; n; n = n->next) {
1836 611 : sql_exp *e = n->data;
1837 611 : sql_column *c = NULL;
1838 :
1839 611 : if ((c = exp_is_pkey(rel, e)) && partition_find_part(v->sql->session->tr, c->t, NULL)) {
1840 : /* check if key is partition key */
1841 0 : v->changes++;
1842 0 : return rel_inplace_setop_n_ary(v->sql, rel, nl, op_munion,
1843 : rel_projections(v->sql, rel, NULL, 1, 1));
1844 : }
1845 : }
1846 : }
1847 :
1848 525 : if (!list_empty(rel->r)) {
1849 420 : list *ogbe = rel->r;
1850 :
1851 420 : gbe = new_exp_list(v->sql->sa);
1852 1031 : for (n = ogbe->h; n; n = n->next) {
1853 611 : sql_exp *e = n->data, *ne;
1854 :
1855 : /* group by in aggregation list */
1856 611 : ne = exps_uses_exp( rel->exps, e);
1857 611 : if (ne) {
1858 605 : sql_rel *first_munion_rel = nl->h->data;
1859 605 : ne = list_find_exp(first_munion_rel->exps, ne);
1860 : }
1861 605 : if (!ne) {
1862 : /* e only in the u1,u2,...un->r (group by list) */
1863 24 : for (node *n = nl->h; n; n = n->next) {
1864 16 : ne = exp_ref(v->sql, e);
1865 16 : list_append(((sql_rel*)n->data)->exps, ne);
1866 : }
1867 : }
1868 8 : assert(ne);
1869 611 : ne = exp_ref(v->sql, ne);
1870 611 : append(gbe, ne);
1871 : }
1872 : }
1873 :
1874 525 : u = rel_setop_n_ary(v->sql->sa, nl, op_munion);
1875 525 : rel_setop_n_ary_set_exps(v->sql, u,
1876 525 : rel_projections(v->sql, nl->h->data, NULL, 1, 1), false);
1877 525 : set_processed(u);
1878 :
1879 525 : exps = new_exp_list(v->sql->sa);
1880 1586 : for (n = u->exps->h, m = rel->exps->h; n && m; n = n->next, m = m->next) {
1881 1061 : sql_exp *ne, *e = n->data, *oa = m->data;
1882 :
1883 1061 : if (oa->type == e_aggr) {
1884 445 : sql_subfunc *f = oa->f;
1885 445 : int cnt = exp_aggr_is_count(oa);
1886 445 : sql_subfunc *a = sql_bind_func(v->sql, "sys", (cnt)?"sum":f->func->base.name, exp_subtype(e), NULL, F_AGGR, true, true);
1887 :
1888 445 : assert(a);
1889 : /* munion of aggr result may have nils
1890 : * because sum/count of empty set */
1891 445 : set_has_nil(e);
1892 445 : e = exp_ref(v->sql, e);
1893 445 : ne = exp_aggr1(v->sql->sa, e, a, need_distinct(e), 1, e->card, 1);
1894 : } else {
1895 616 : ne = exp_copy(v->sql, oa);
1896 : }
1897 1061 : exp_setalias(ne, oa->alias.label, exp_find_rel_name(oa), exp_name(oa));
1898 1061 : append(exps, ne);
1899 : }
1900 525 : v->changes++;
1901 525 : return rel_inplace_groupby(rel, u, gbe, exps);
1902 : }
1903 :
1904 : /*
1905 : * Rewrite aggregations over union all.
1906 : * groupby ([ union all (a, b) ], [gbe], [ count, sum ] )
1907 : *
1908 : * into
1909 : * groupby ( [ union all( groupby( a, [gbe], [ count, sum] ), [ groupby( b, [gbe], [ count, sum] )) , [gbe], [sum, sum] )
1910 : */
1911 : static inline sql_rel *
1912 56144 : rel_push_aggr_down(visitor *v, sql_rel *rel)
1913 : {
1914 56144 : if (rel->op == op_groupby && rel->l) {
1915 56054 : sql_rel *u = rel->l, *ou = u;
1916 56054 : sql_rel *g = rel;
1917 56054 : sql_rel *ul = u->l;
1918 56054 : sql_rel *ur = u->r;
1919 56054 : node *n, *m;
1920 56054 : list *lgbe = NULL, *rgbe = NULL, *gbe = NULL, *exps = NULL;
1921 :
1922 56054 : if (u->op == op_project && !need_distinct(u))
1923 18828 : u = u->l;
1924 :
1925 56054 : if (!u || !(is_union(u->op) || is_munion(u->op)) || need_distinct(u) || is_single(u) || !u->exps || rel_is_ref(u))
1926 : return rel;
1927 :
1928 1120 : if (is_munion(u->op))
1929 1120 : return rel_push_aggr_down_n_arry(v, rel);
1930 :
1931 0 : ul = u->l;
1932 0 : ur = u->r;
1933 :
1934 : /* make sure we don't create group by on group by's */
1935 0 : if (ul->op == op_groupby || ur->op == op_groupby)
1936 : return rel;
1937 :
1938 : /* distinct should be done over the full result */
1939 0 : for (n = g->exps->h; n; n = n->next) {
1940 0 : sql_exp *e = n->data;
1941 0 : sql_subfunc *af = e->f;
1942 :
1943 0 : if (e->type == e_atom ||
1944 0 : e->type == e_func ||
1945 0 : (e->type == e_aggr &&
1946 0 : ((strcmp(af->func->base.name, "sum") &&
1947 0 : strcmp(af->func->base.name, "count") &&
1948 0 : strcmp(af->func->base.name, "min") &&
1949 0 : strcmp(af->func->base.name, "max")) ||
1950 : need_distinct(e))))
1951 : return rel;
1952 : }
1953 :
1954 0 : ul = rel_dup(ul);
1955 0 : ur = rel_dup(ur);
1956 0 : if (!is_project(ul->op))
1957 0 : ul = rel_project(v->sql->sa, ul,
1958 : rel_projections(v->sql, ul, NULL, 1, 1));
1959 0 : if (!is_project(ur->op))
1960 0 : ur = rel_project(v->sql->sa, ur,
1961 : rel_projections(v->sql, ur, NULL, 1, 1));
1962 0 : rel_rename_exps(v->sql, u->exps, ul->exps);
1963 0 : rel_rename_exps(v->sql, u->exps, ur->exps);
1964 0 : if (u != ou) {
1965 0 : ul = rel_project(v->sql->sa, ul, NULL);
1966 0 : ul->exps = exps_copy(v->sql, ou->exps);
1967 0 : rel_rename_exps(v->sql, ou->exps, ul->exps);
1968 0 : set_processed(ul);
1969 0 : ur = rel_project(v->sql->sa, ur, NULL);
1970 0 : ur->exps = exps_copy(v->sql, ou->exps);
1971 0 : rel_rename_exps(v->sql, ou->exps, ur->exps);
1972 0 : set_processed(ur);
1973 : }
1974 :
1975 0 : if (g->r && list_length(g->r) > 0) {
1976 0 : list *gbe = g->r;
1977 :
1978 0 : lgbe = exps_copy(v->sql, gbe);
1979 0 : rgbe = exps_copy(v->sql, gbe);
1980 : }
1981 0 : ul = rel_groupby(v->sql, ul, NULL);
1982 0 : ul->r = lgbe;
1983 0 : ul->nrcols = g->nrcols;
1984 0 : ul->card = g->card;
1985 0 : ul->exps = exps_copy(v->sql, g->exps);
1986 0 : ul->nrcols = list_length(ul->exps);
1987 0 : set_processed(ul);
1988 :
1989 0 : ur = rel_groupby(v->sql, ur, NULL);
1990 0 : ur->r = rgbe;
1991 0 : ur->nrcols = g->nrcols;
1992 0 : ur->card = g->card;
1993 0 : ur->exps = exps_copy(v->sql, g->exps);
1994 0 : ur->nrcols = list_length(ur->exps);
1995 0 : set_processed(ur);
1996 :
1997 : /* group by on primary keys which define the partitioning scheme
1998 : * don't need a finalizing group by */
1999 : /* how to check if a partition is based on some primary key ?
2000 : * */
2001 0 : if (!list_empty(rel->r)) {
2002 0 : for (node *n = ((list*)rel->r)->h; n; n = n->next) {
2003 0 : sql_exp *e = n->data;
2004 0 : sql_column *c = NULL;
2005 :
2006 0 : if ((c = exp_is_pkey(rel, e)) && partition_find_part(v->sql->session->tr, c->t, NULL)) {
2007 : /* check if key is partition key */
2008 0 : v->changes++;
2009 0 : return rel_inplace_setop(v->sql, rel, ul, ur, op_union,
2010 : rel_projections(v->sql, rel, NULL, 1, 1));
2011 : }
2012 : }
2013 : }
2014 :
2015 0 : if (!list_empty(rel->r)) {
2016 0 : list *ogbe = rel->r;
2017 :
2018 0 : gbe = new_exp_list(v->sql->sa);
2019 0 : for (n = ogbe->h; n; n = n->next) {
2020 0 : sql_exp *e = n->data, *ne;
2021 :
2022 : /* group by in aggregation list */
2023 0 : ne = exps_uses_exp( rel->exps, e);
2024 0 : if (ne)
2025 0 : ne = list_find_exp( ul->exps, ne);
2026 0 : if (!ne) {
2027 : /* e only in the ul/ur->r (group by list) */
2028 0 : ne = exp_ref(v->sql, e);
2029 0 : list_append(ul->exps, ne);
2030 0 : ne = exp_ref(v->sql, e);
2031 0 : list_append(ur->exps, ne);
2032 : }
2033 0 : assert(ne);
2034 0 : ne = exp_ref(v->sql, ne);
2035 0 : append(gbe, ne);
2036 : }
2037 : }
2038 :
2039 0 : u = rel_setop(v->sql->sa, ul, ur, op_union);
2040 0 : rel_setop_set_exps(v->sql, u, rel_projections(v->sql, ul, NULL, 1, 1), false);
2041 0 : set_processed(u);
2042 :
2043 0 : exps = new_exp_list(v->sql->sa);
2044 0 : for (n = u->exps->h, m = rel->exps->h; n && m; n = n->next, m = m->next) {
2045 0 : sql_exp *ne, *e = n->data, *oa = m->data;
2046 :
2047 0 : if (oa->type == e_aggr) {
2048 0 : sql_subfunc *f = oa->f;
2049 0 : int cnt = exp_aggr_is_count(oa);
2050 0 : sql_subfunc *a = sql_bind_func(v->sql, "sys", (cnt)?"sum":f->func->base.name, exp_subtype(e), NULL, F_AGGR, true, true);
2051 :
2052 0 : assert(a);
2053 : /* union of aggr result may have nils
2054 : * because sum/count of empty set */
2055 0 : set_has_nil(e);
2056 0 : e = exp_ref(v->sql, e);
2057 0 : ne = exp_aggr1(v->sql->sa, e, a, need_distinct(e), 1, e->card, 1);
2058 : } else {
2059 0 : ne = exp_copy(v->sql, oa);
2060 : }
2061 0 : exp_setalias(ne, oa->alias.label, exp_find_rel_name(oa), exp_name(oa));
2062 0 : append(exps, ne);
2063 : }
2064 0 : v->changes++;
2065 0 : return rel_inplace_groupby( rel, u, gbe, exps);
2066 : }
2067 : return rel;
2068 : }
2069 :
2070 : /*
2071 : * More general
2072 : * groupby(
2073 : * [ outer ] join(
2074 : * project(
2075 : * table(A) [ c1, c2, .. ]
2076 : * ) [ c1, c2, identity(c2) as I, .. ],
2077 : * table(B) [ c1, c2, .. ]
2078 : * ) [ A.c1 = B.c1 ]
2079 : * ) [ I ] [ a1, a2, .. ]
2080 : *
2081 : * ->
2082 : *
2083 : * [ outer ] join(
2084 : * project(
2085 : * table(A) [ c1, c2, .. ]
2086 : * ) [ c1, c2, .. ],
2087 : * groupby (
2088 : * table(B) [ c1, c2, .. ]
2089 : * ) [ B.c1 ] [ a1, a2, .. ]
2090 : * ) [ A.c1 = B.c1 ]
2091 : */
2092 : static sql_rel *
2093 23184 : gen_push_groupby_down(mvc *sql, sql_rel *rel, int *changes)
2094 : {
2095 23184 : sql_rel *j = rel->l;
2096 23184 : list *gbe = rel->r;
2097 :
2098 23184 : if (rel->op == op_groupby && list_length(gbe) == 1 && j->op == op_join){
2099 5327 : sql_rel *jl = j->l, *jr = j->r, *cr, *cl;
2100 5327 : sql_exp *gb = gbe->h->data, *e;
2101 5327 : node *n;
2102 5327 : int left = 1;
2103 5327 : list *aggrs, *aliases, *gbe;
2104 :
2105 5327 : if (!is_identity(gb, jl) && !is_identity(gb, jr))
2106 : return rel;
2107 6 : if (jl->op == op_project &&
2108 4 : (e = list_find_exp( jl->exps, gb)) != NULL &&
2109 2 : find_prop(e->p, PROP_HASHCOL) != NULL) {
2110 : left = 0;
2111 : cr = jr;
2112 : cl = jl;
2113 4 : } else if (jr->op == op_project &&
2114 8 : (e = list_find_exp( jr->exps, gb)) != NULL &&
2115 4 : find_prop(e->p, PROP_HASHCOL) != NULL) {
2116 : left = 1;
2117 : cr = jl;
2118 : cl = jr;
2119 : } else {
2120 0 : return rel;
2121 : }
2122 :
2123 6 : if ((left && is_base(jl->op)) || (!left && is_base(jr->op))||
2124 0 : (left && is_select(jl->op)) || (!left && is_select(jr->op))
2125 0 : || rel_is_join_on_pkey(j, false))
2126 6 : return rel;
2127 :
2128 : /* only add aggr (based on left/right), and repeat the group by column */
2129 0 : aggrs = sa_list(sql->sa);
2130 0 : aliases = sa_list(sql->sa);
2131 0 : if (rel->exps) for (n = rel->exps->h; n; n = n->next) {
2132 0 : sql_exp *ce = n->data;
2133 :
2134 0 : if (exp_is_atom(ce))
2135 0 : list_append(aliases, ce);
2136 0 : else if (ce->type == e_column) {
2137 0 : if (rel_has_exp(cl, ce, false) == 0) /* collect aliases outside groupby */
2138 0 : list_append(aliases, ce);
2139 : else
2140 0 : list_append(aggrs, ce);
2141 0 : } else if (ce->type == e_aggr) {
2142 0 : list *args = ce->l;
2143 :
2144 : /* check args are part of left/right */
2145 0 : if (!list_empty(args) && rel_has_exps(cl, args, false) == 0)
2146 : return rel;
2147 0 : if (rel->op != op_join && exp_aggr_is_count(ce))
2148 0 : ce->p = prop_create(sql->sa, PROP_COUNT, ce->p);
2149 0 : list_append(aggrs, ce);
2150 : }
2151 : }
2152 : /* TODO move any column expressions (aliases) into the project list */
2153 :
2154 : /* find gb in left or right and should be unique */
2155 0 : gbe = sa_list(sql->sa);
2156 : /* push groupby to right, group on join exps */
2157 0 : if (j->exps) for (n = j->exps->h; n; n = n->next) {
2158 0 : sql_exp *ce = n->data, *l = ce->l, *r = ce->r, *e;
2159 :
2160 : /* get left/right hand of e_cmp */
2161 0 : assert(ce->type == e_cmp);
2162 0 : if (ce->flag == cmp_equal && is_alias(l->type) && is_alias(r->type) &&
2163 0 : (((e = rel_find_exp(cr, l)) && rel_find_exp(cl, r)) ||
2164 0 : ((e = rel_find_exp(cr, r)) && rel_find_exp(cl, l)))) {
2165 0 : e = exp_ref(sql, e);
2166 0 : list_append(gbe, e);
2167 : } else {
2168 0 : return rel;
2169 : }
2170 : }
2171 0 : if (!left)
2172 0 : cr = j->r = rel_groupby(sql, cr, gbe);
2173 : else
2174 0 : cr = j->l = rel_groupby(sql, cr, gbe);
2175 0 : cr->exps = list_merge(cr->exps, aggrs, (fdup)NULL);
2176 0 : set_processed(cr);
2177 0 : if (!is_project(cl->op))
2178 0 : cl = rel_project(sql->sa, cl,
2179 : rel_projections(sql, cl, NULL, 1, 1));
2180 0 : cl->exps = list_merge(cl->exps, aliases, (fdup)NULL);
2181 0 : set_processed(cl);
2182 0 : if (!left)
2183 0 : j->l = cl;
2184 : else
2185 0 : j->r = cl;
2186 0 : rel -> l = NULL;
2187 0 : rel_destroy(rel);
2188 :
2189 0 : if (list_empty(cr->exps) && list_empty(j->exps)) { /* remove crossproduct */
2190 0 : sql_rel *r = cl;
2191 0 : if (!left)
2192 0 : j->l = NULL;
2193 : else
2194 0 : j->r = NULL;
2195 0 : rel_destroy(j);
2196 0 : j = r;
2197 : }
2198 0 : (*changes)++;
2199 0 : return j;
2200 : }
2201 : return rel;
2202 : }
2203 :
2204 : /*
2205 : * Rewrite group(project(join(A,Dict)[a.i==dict.i])[...dict.n])[dict.n][ ... dict.n ]
2206 : * into
2207 : * project(join(groupby (A)[a.i],[a.i]), Dict)[a.i==dict.i])[dict.n]
2208 : *
2209 : */
2210 : static inline sql_rel *
2211 56144 : rel_push_groupby_down(visitor *v, sql_rel *rel)
2212 : {
2213 56144 : sql_rel *p = rel->l;
2214 56144 : list *gbe = rel->r;
2215 :
2216 56144 : if (rel->op == op_groupby && gbe && p && is_join(p->op))
2217 8712 : return gen_push_groupby_down(v->sql, rel, &v->changes);
2218 47432 : if (rel->op == op_groupby && gbe && p && p->op == op_project) {
2219 14472 : sql_rel *j = p->l;
2220 14472 : sql_rel *jl, *jr;
2221 14472 : node *n;
2222 :
2223 14472 : if (!j || j->op != op_join || list_length(j->exps) != 1)
2224 12682 : return gen_push_groupby_down(v->sql, rel, &v->changes);
2225 1790 : jl = j->l;
2226 1790 : jr = j->r;
2227 :
2228 : /* check if jr is a dict with index and var still used */
2229 1790 : if (jr->op != op_basetable || jr->l || !jr->r || list_length(jr->exps) != 2)
2230 1790 : return gen_push_groupby_down(v->sql, rel, &v->changes);
2231 :
2232 : /* check if group by is done on dict column */
2233 0 : for(n = gbe->h; n; n = n->next) {
2234 0 : sql_exp *ge = n->data, *pe = NULL, *e = NULL;
2235 :
2236 : /* find group by exp in project, then in dict */
2237 0 : pe = rel_find_exp(p, ge);
2238 0 : if (pe) /* find project exp in right hand of join, ie dict */
2239 0 : e = rel_find_exp(jr, pe);
2240 0 : if (pe && e) { /* Rewrite: join with dict after the group by */
2241 0 : list *pexps = rel_projections(v->sql, rel, NULL, 1, 1), *npexps;
2242 0 : node *m;
2243 0 : sql_exp *ne = j->exps->h->data; /* join exp */
2244 0 : p->l = jl; /* Project now only on the left side of the join */
2245 :
2246 0 : ne = ne->l; /* The left side of the compare is the index of the left */
2247 :
2248 : /* find ge reference in new projection list */
2249 0 : npexps = sa_list(v->sql->sa);
2250 0 : for (m = pexps->h; m; m = m->next) {
2251 0 : sql_exp *a = m->data;
2252 :
2253 0 : if (exp_refers(ge, a)) {
2254 0 : sql_exp *sc = jr->exps->t->data;
2255 0 : sql_exp *e = exp_ref(v->sql, sc);
2256 0 : if (exp_name(a))
2257 0 : exp_prop_alias(v->sql->sa, e, a);
2258 : a = e;
2259 : }
2260 0 : append(npexps, a);
2261 : }
2262 :
2263 : /* find ge in aggr list */
2264 0 : for (m = rel->exps->h; m; m = m->next) {
2265 0 : sql_exp *a = m->data;
2266 :
2267 0 : if (exp_match_exp(a, ge) || exp_refers(ge, a)) {
2268 0 : a = exp_ref(v->sql, ne);
2269 0 : if (exp_name(ne))
2270 0 : exp_prop_alias(v->sql->sa, a, ne);
2271 0 : m->data = a;
2272 : }
2273 : }
2274 :
2275 : /* change alias pe, ie project out the index */
2276 0 : pe->l = (void*)exp_relname(ne);
2277 0 : pe->r = (void*)exp_name(ne);
2278 0 : if (exp_name(ne))
2279 0 : exp_prop_alias(v->sql->sa, pe, ne);
2280 :
2281 : /* change alias ge */
2282 0 : ge->l = (void*)exp_relname(pe);
2283 0 : ge->r = (void*)exp_name(pe);
2284 0 : if (exp_name(pe))
2285 0 : exp_prop_alias(v->sql->sa, ge, pe);
2286 :
2287 : /* zap both project and groupby name hash tables (as we changed names above) */
2288 0 : list_hash_clear(rel->exps);
2289 0 : list_hash_clear((list*)rel->r);
2290 0 : list_hash_clear(p->exps);
2291 :
2292 : /* add join */
2293 0 : j->l = rel;
2294 0 : rel = rel_project(v->sql->sa, j, npexps);
2295 0 : v->changes++;
2296 : }
2297 : }
2298 : }
2299 : return rel;
2300 : }
2301 :
2302 : /* reduce group by expressions based on pkey info
2303 : *
2304 : * The reduced group by and (derived) aggr expressions are restored via
2305 : * extra (new) aggregate columns.
2306 : */
2307 : static inline sql_rel *
2308 56145 : rel_reduce_groupby_exps(visitor *v, sql_rel *rel)
2309 : {
2310 56145 : list *gbe = rel->r;
2311 :
2312 56145 : if (is_groupby(rel->op) && rel->r && !rel_is_ref(rel) && list_length(gbe)) {
2313 30409 : node *n, *m;
2314 30409 : int k, j, i, ngbe = list_length(gbe);
2315 30409 : int8_t *scores = SA_NEW_ARRAY(v->sql->ta, int8_t, ngbe);
2316 30409 : sql_column *c;
2317 30409 : sql_table **tbls = SA_NEW_ARRAY(v->sql->ta, sql_table*, ngbe);
2318 30409 : sql_rel **bts = SA_NEW_ARRAY(v->sql->ta, sql_rel*, ngbe), *bt = NULL;
2319 :
2320 30409 : gbe = rel->r;
2321 88029 : for (k = 0, i = 0, n = gbe->h; n; n = n->next, k++) {
2322 57620 : sql_exp *e = n->data;
2323 :
2324 57620 : c = exp_find_column_(rel, e, -2, &bt);
2325 57620 : if (c) {
2326 67342 : for(j = 0; j < i; j++)
2327 30554 : if (c->t == tbls[j] && bts[j] == bt)
2328 : break;
2329 50818 : tbls[j] = c->t;
2330 50818 : bts[j] = bt;
2331 50818 : i += (j == i);
2332 : }
2333 : }
2334 30409 : if (i) { /* forall tables find pkey and
2335 : remove useless other columns */
2336 : /* TODO also remove group by columns which are related to
2337 : * the other columns using a foreign-key join (n->1), ie 1
2338 : * on the to be removed side.
2339 : */
2340 64255 : for(j = 0; j < i; j++) {
2341 36775 : int l, nr = 0, cnr = 0;
2342 :
2343 36775 : k = list_length(gbe);
2344 36776 : memset(scores, 0, list_length(gbe));
2345 36776 : if (tbls[j]->pkey) {
2346 22055 : for (l = 0, n = gbe->h; l < k && n; l++, n = n->next) {
2347 16539 : fcmp cmp = (fcmp)&kc_column_cmp;
2348 16539 : sql_exp *e = n->data;
2349 :
2350 16539 : c = exp_find_column_(rel, e, -2, &bt);
2351 23369 : if (c && c->t == tbls[j] && bts[j] == bt &&
2352 6830 : list_find(tbls[j]->pkey->k.columns, c, cmp) != NULL) {
2353 648 : scores[l] = 1;
2354 648 : nr ++;
2355 14460 : } else if (c && c->t == tbls[j] && bts[j] == bt) {
2356 : /* Okay we can cleanup a group by column */
2357 6182 : scores[l] = -1;
2358 6182 : cnr ++;
2359 : }
2360 : }
2361 : }
2362 5516 : if (nr) {
2363 639 : int all = (list_length(tbls[j]->pkey->k.columns) == nr);
2364 639 : sql_kc *kc = tbls[j]->pkey->k.columns->h->data;
2365 :
2366 639 : c = kc->c;
2367 1683 : for (l = 0, n = gbe->h; l < k && n; l++, n = n->next) {
2368 1044 : sql_exp *e = n->data;
2369 :
2370 : /* pkey based group by */
2371 1160 : if (scores[l] == 1 && ((all ||
2372 : /* first of key */
2373 740 : (c == exp_find_column(rel, e, -2))) && !find_prop(e->p, PROP_HASHCOL)))
2374 13 : e->p = prop_create(v->sql->sa, PROP_HASHCOL, e->p);
2375 : }
2376 2233 : for (m = rel->exps->h; m; m = m->next ){
2377 1594 : sql_exp *e = m->data;
2378 :
2379 5417 : for (l = 0, n = gbe->h; l < k && n; l++, n = n->next) {
2380 3836 : sql_exp *gb = n->data;
2381 :
2382 : /* pkey based group by */
2383 3836 : if (scores[l] == 1 && exp_match_exp(e,gb) && find_prop(gb->p, PROP_HASHCOL) && !find_prop(e->p, PROP_HASHCOL)) {
2384 13 : e->p = prop_create(v->sql->sa, PROP_HASHCOL, e->p);
2385 13 : break;
2386 : }
2387 :
2388 : }
2389 : }
2390 : }
2391 36776 : if (cnr && nr && list_length(tbls[j]->pkey->k.columns) == nr) {
2392 108 : list *ngbe = new_exp_list(v->sql->sa);
2393 :
2394 392 : for (l = 0, n = gbe->h; l < k && n; l++, n = n->next) {
2395 284 : sql_exp *e = n->data;
2396 :
2397 : /* keep the group by columns which form a primary key
2398 : * of this table. And those unrelated to this table. */
2399 284 : if (scores[l] != -1)
2400 139 : append(ngbe, e);
2401 : }
2402 108 : rel->r = ngbe;
2403 : /* rewrite gbe and aggr, in the aggr list */
2404 108 : if (0)
2405 : for (m = rel->exps->h; m; m = m->next ){
2406 : sql_exp *e = m->data;
2407 : int fnd = 0;
2408 :
2409 : for (l = 0, n = gbe->h; l < k && n && !fnd; l++, n = n->next) {
2410 : sql_exp *gb = n->data;
2411 :
2412 : if (scores[l] == -1 && exp_refers(gb, e)) {
2413 : /*
2414 : sql_exp *rs = exp_column(v->sql->sa, gb->l?gb->l:exp_relname(gb), gb->r?gb->r:exp_name(gb), exp_subtype(gb), rel->card, has_nil(gb), is_unique(gb), is_intern(gb));
2415 : exp_setalias(rs, e->alias.label, exp_find_rel_name(e), exp_name(e));
2416 : e = rs;
2417 : */
2418 : assert(e->alias.label == e->nid);
2419 : fnd = 1;
2420 : }
2421 : }
2422 : m->data = e;
2423 : }
2424 : /* new reduced aggr expression list */
2425 108 : assert(list_length(rel->exps)>0);
2426 : /* only one reduction at a time */
2427 108 : list_hash_clear(rel->exps);
2428 108 : v->changes++;
2429 108 : return rel;
2430 : }
2431 36668 : gbe = rel->r;
2432 : }
2433 : }
2434 : }
2435 : /* remove constants from group by list */
2436 56037 : if (is_groupby(rel->op) && rel->r && !rel_is_ref(rel)) {
2437 30510 : int i;
2438 30510 : node *n;
2439 :
2440 87847 : for (i = 0, n = gbe->h; n; n = n->next) {
2441 57337 : sql_exp *e = n->data;
2442 :
2443 57337 : if (exp_is_atom(e))
2444 23 : i++;
2445 : }
2446 30510 : if (i) {
2447 23 : list *ngbe = new_exp_list(v->sql->sa);
2448 23 : list *dgbe = new_exp_list(v->sql->sa);
2449 :
2450 47 : for (n = gbe->h; n; n = n->next) {
2451 24 : sql_exp *e = n->data;
2452 :
2453 24 : if (!exp_is_atom(e))
2454 1 : append(ngbe, e);
2455 : /* we need at least one gbe */
2456 23 : else if (!n->next && list_empty(ngbe))
2457 22 : append(ngbe, e);
2458 : else
2459 1 : append(dgbe, e);
2460 : }
2461 23 : rel->r = ngbe;
2462 23 : if (!list_empty(dgbe)) {
2463 : /* use atom's directly in the aggr expr list */
2464 :
2465 3 : for (n = rel->exps->h; n; n = n->next) {
2466 2 : sql_exp *e = n->data, *ne = NULL;
2467 :
2468 2 : if (e->type == e_column) {
2469 2 : if (e->nid)
2470 2 : ne = exps_bind_nid(dgbe, e->nid);
2471 2 : if (ne) {
2472 1 : ne = exp_copy(v->sql, ne);
2473 1 : exp_prop_alias(v->sql->sa, ne, e);
2474 1 : e = ne;
2475 : }
2476 : }
2477 2 : n->data = e;
2478 : }
2479 1 : list_hash_clear(rel->exps);
2480 1 : v->changes++;
2481 : }
2482 : }
2483 : }
2484 : return rel;
2485 : }
2486 :
2487 : /* if all arguments to a distinct aggregate are unique, remove 'distinct' property */
2488 : static inline sql_rel *
2489 56144 : rel_distinct_aggregate_on_unique_values(visitor *v, sql_rel *rel)
2490 : {
2491 56144 : if (is_groupby(rel->op) && !list_empty(rel->exps)) {
2492 171411 : for (node *n = rel->exps->h; n; n = n->next) {
2493 115363 : sql_exp *exp = (sql_exp*) n->data;
2494 :
2495 115363 : if (exp->type == e_aggr && need_distinct(exp)) {
2496 496 : bool all_unique = true;
2497 496 : list *l = exp->l;
2498 :
2499 992 : for (node *m = l->h; m && all_unique; m = m->next) {
2500 496 : sql_exp *arg = (sql_exp*) m->data;
2501 :
2502 630 : all_unique &= arg->type == e_column && is_unique(arg) && (!is_semantics(exp) || !has_nil(arg));
2503 : }
2504 496 : if (!all_unique && exps_card(l) > CARD_ATOM)
2505 200 : all_unique = exps_unique(v->sql, rel, l) && (!is_semantics(exp) || !have_nil(l));
2506 : if (all_unique) {
2507 134 : set_nodistinct(exp);
2508 134 : v->changes++;
2509 : }
2510 : }
2511 : }
2512 : }
2513 56144 : return rel;
2514 : }
2515 :
2516 : static inline sql_rel *
2517 56144 : rel_remove_const_aggr(visitor *v, sql_rel *rel)
2518 : {
2519 56144 : if(!rel) {
2520 : return rel;
2521 : }
2522 :
2523 56144 : list *exps = rel->exps;
2524 :
2525 56144 : if(rel->op != op_groupby || list_empty(exps)) {
2526 6 : return rel;
2527 : }
2528 :
2529 56139 : if(!list_empty(rel->r)) {
2530 : /* in the general case in an expression of an aggregate over
2531 : * a constant can be rewritten as just the const e.g.
2532 : * aggr(const) -> const
2533 : */
2534 :
2535 122674 : for(node *n = exps->h; n; n = n->next) {
2536 90450 : sql_exp *e = n->data;
2537 :
2538 90450 : if(e->type != e_aggr) {
2539 62266 : continue;
2540 : }
2541 :
2542 28184 : sql_func *j = ((sql_subfunc *)e->f)->func;
2543 :
2544 : /* some aggregates with const values can only be eliminated
2545 : * under certain circumstances e.g.
2546 : * sum(NULL) -> NULL, sum(0) -> 0
2547 : * prod(NULL) -> NULL, prod(1) -> 1
2548 : * count(NULL) -> 0
2549 : */
2550 28184 : int sum = strcmp(j->base.name, "sum") == 0,
2551 28184 : prd = strcmp(j->base.name, "prod") == 0,
2552 28184 : cnt = strcmp(j->base.name, "count") == 0;
2553 :
2554 28184 : if(!j->s && j->system == 1) {
2555 23539 : list *se = e->l;
2556 :
2557 23539 : if(se == NULL) {
2558 8600 : continue;
2559 : }
2560 :
2561 29876 : for(node *m = se->h; m; m = m->next) {
2562 14937 : sql_exp *w = m->data;
2563 :
2564 14937 : if(w->type == e_atom && w->card == CARD_ATOM) {
2565 26 : atom *wa = w->l;
2566 :
2567 26 : if(sum && !(wa->isnull || atom_is_zero(wa))) {
2568 3 : continue;
2569 : }
2570 :
2571 23 : if(prd && !(wa->isnull || atom_is_one(wa))) {
2572 0 : continue;
2573 : }
2574 :
2575 23 : if(cnt) {
2576 5 : if(wa->isnull) {
2577 3 : list_remove_node(se, NULL, m);
2578 :
2579 3 : w=exp_atom_lng(v->sql->sa, 0);
2580 3 : list_append(se, w);
2581 : }
2582 : else {
2583 2 : continue;
2584 : }
2585 : }
2586 :
2587 21 : exp_setalias(w,e->alias.label,e->alias.rname,e->alias.name);
2588 :
2589 21 : n->data = w;
2590 21 : v->changes++;
2591 : }
2592 : }
2593 : }
2594 : }
2595 : }
2596 :
2597 : /*
2598 : * Below code replaces GROUP BY with PROJECT in some cases;
2599 : * Triggers on...
2600 : * select 1 having true; select 42 from foo group by x; select n from foo group by rollup(n);
2601 : */
2602 :
2603 56139 : if (!rel_is_ref(rel)) {
2604 54297 : int needed = 0;
2605 164952 : for (node *n = rel->exps->h; n; n = n->next) {
2606 110655 : sql_exp *exp = (sql_exp*) n->data;
2607 :
2608 110655 : if (exp_is_atom(exp) && exp->type != e_aggr)
2609 78 : needed++;
2610 : }
2611 54297 : if (needed) {
2612 72 : if (!list_empty(rel->r)) {
2613 21 : int atoms = 0;
2614 : /* corner case, all grouping columns are atoms */
2615 43 : for (node *n = ((list*)rel->r)->h; n; n = n->next) {
2616 22 : sql_exp *exp = (sql_exp*) n->data;
2617 :
2618 22 : if (exp_is_atom(exp))
2619 1 : atoms++;
2620 : }
2621 : /* possible edge case, never triggers in coverage tests */
2622 21 : if (atoms == list_length(rel->r)) {
2623 1 : list *nexps = sa_list(v->sql->sa);
2624 3 : for (node *n = rel->exps->h; n; ) {
2625 2 : node *next = n->next;
2626 2 : sql_exp *e = (sql_exp*) n->data;
2627 :
2628 : /* remove references to constant group by columns */
2629 2 : if (e->type == e_column) {
2630 1 : sql_exp *found = NULL;
2631 1 : found = exps_bind_nid(rel->r, e->nid);
2632 1 : if (found) {
2633 1 : list_append(nexps, found);
2634 1 : list_remove_node(rel->exps, NULL, n);
2635 : }
2636 : }
2637 : n = next;
2638 : }
2639 1 : rel->r = NULL; /* transform it into a global aggregate */
2640 1 : rel->exps = list_merge(nexps, rel->exps, (fdup) NULL); /* add grouping columns back as projections */
2641 : /* global aggregates may return 1 row, so filter it based on the count */
2642 1 : sql_subfunc *cf = sql_bind_func(v->sql, "sys", "count", sql_bind_localtype("void"), NULL, F_AGGR, true, true);
2643 1 : sql_exp *count = exp_aggr(v->sql->sa, NULL, cf, 0, 1, CARD_ATOM, 0);
2644 1 : count = rel_groupby_add_aggr(v->sql, rel, count);
2645 1 : sql_exp *cp = exp_compare(v->sql->sa, exp_ref(v->sql, count), exp_atom(v->sql->sa, atom_int(v->sql->sa, exp_subtype(count), 0)), cmp_notequal);
2646 1 : rel = rel_select(v->sql->sa, rel, cp);
2647 1 : set_processed(rel);
2648 1 : return rel;
2649 : }
2650 51 : } else if (list_length(rel->exps) == needed) { /* all are const */
2651 48 : sql_rel *ll = rel->l;
2652 48 : rel->op = op_project;
2653 : /* TODO check if l->l == const, else change that */
2654 48 : if (ll && ll->l) {
2655 44 : rel_destroy(ll);
2656 44 : rel->l = rel_project_exp(v->sql, exp_atom_bool(v->sql->sa, 1));
2657 : }
2658 48 : return rel;
2659 : }
2660 23 : sql_rel *nrel = rel_project(v->sql->sa, rel, rel_projections(v->sql, rel, NULL, 1, 1));
2661 78 : for (node *n = nrel->exps->h; n; n = n->next) {
2662 55 : sql_exp *exp = (sql_exp*) n->data;
2663 55 : if (exp->type == e_column) {
2664 55 : sql_exp *e = rel_find_exp(rel, exp);
2665 :
2666 55 : if (e && exp_is_atom(e) && e->type == e_atom) {
2667 29 : sql_exp *ne = exp_copy(v->sql, e);
2668 29 : assert(ne->alias.label);
2669 29 : exp_setalias(ne, ne->alias.label, exp_find_rel_name(exp), exp_name(exp));
2670 29 : n->data = ne;
2671 29 : v->changes++;
2672 : }
2673 : }
2674 : }
2675 23 : list *nl = sa_list(v->sql->sa);
2676 78 : for (node *n = rel->exps->h; n; n = n->next) {
2677 55 : sql_exp *exp = (sql_exp*) n->data;
2678 :
2679 55 : if (!exp_is_atom(exp) || exp->type != e_atom)
2680 26 : append(nl, exp);
2681 : }
2682 23 : rel->exps = nl;
2683 23 : return nrel;
2684 : }
2685 : }
2686 :
2687 : return rel;
2688 : }
2689 :
2690 : #if 0
2691 : static sql_rel *
2692 : rel_groupby_distinct2(visitor *v, sql_rel *rel)
2693 : {
2694 : list *ngbes = sa_list(v->sql->sa), *gbes, *naggrs = sa_list(v->sql->sa), *aggrs = sa_list(v->sql->sa);
2695 : sql_rel *l;
2696 : node *n;
2697 :
2698 : gbes = rel->r;
2699 : if (!gbes)
2700 : return rel;
2701 :
2702 : /* check if each aggr is, rewritable (max,min,sum,count)
2703 : * and only has one argument */
2704 : for (n = rel->exps->h; n; n = n->next) {
2705 : sql_exp *e = n->data;
2706 : sql_subfunc *af = e->f;
2707 :
2708 : if (e->type == e_aggr &&
2709 : (strcmp(af->func->base.name, "sum") &&
2710 : strcmp(af->func->base.name, "count") &&
2711 : strcmp(af->func->base.name, "min") &&
2712 : strcmp(af->func->base.name, "max")))
2713 : return rel;
2714 : }
2715 :
2716 : for (n = gbes->h; n; n = n->next) {
2717 : sql_exp *e = n->data;
2718 :
2719 : e = exp_column(v->sql->sa, exp_find_rel_name(e), exp_name(e), exp_subtype(e), e->card, has_nil(e), is_unique(e), is_intern(e));
2720 : append(ngbes, e);
2721 : }
2722 :
2723 : /* 1 for each aggr(distinct v) add the attribute expression v to gbes and aggrs list
2724 : * 2 for each aggr(z) add aggr_phase2('z') to the naggrs list
2725 : * 3 for each group by col, add also to the naggrs list
2726 : * */
2727 : for (n = rel->exps->h; n; n = n->next) {
2728 : sql_exp *e = n->data;
2729 :
2730 : if (e->type == e_aggr && need_distinct(e)) { /* 1 */
2731 : /* need column expression */
2732 : list *args = e->l;
2733 : sql_exp *v = args->h->data;
2734 : append(gbes, v);
2735 : if (!exp_name(v))
2736 : exp_label(v->sql->sa, v, ++v->sql->label);
2737 : v = exp_column(v->sql->sa, exp_find_rel_name(v), exp_name(v), exp_subtype(v), v->card, has_nil(v), is_unique(v), is_intern(v));
2738 : append(aggrs, v);
2739 : v = exp_aggr1(v->sql->sa, v, e->f, need_distinct(e), 1, e->card, 1);
2740 : exp_setalias(v, e->alias.label, exp_find_rel_name(e), exp_name(e));
2741 : append(naggrs, v);
2742 : } else if (e->type == e_aggr && !need_distinct(e)) {
2743 : sql_exp *v;
2744 : sql_subfunc *f = e->f;
2745 : int cnt = exp_aggr_is_count(e);
2746 : sql_subfunc *a = sql_bind_func(v->sql, "sys", (cnt)?"sum":f->func->base.name, exp_subtype(e), NULL, F_AGGR, true, true);
2747 :
2748 : append(aggrs, e);
2749 : if (!exp_name(e))
2750 : exp_label(v->sql->sa, e, ++v->sql->label);
2751 : set_has_nil(e);
2752 : v = exp_column(v->sql->sa, exp_find_rel_name(e), exp_name(e), exp_subtype(e), e->card, has_nil(e), is_unique(e), is_intern(e));
2753 : v = exp_aggr1(v->sql->sa, v, a, 0, 1, e->card, 1);
2754 : if (cnt)
2755 : set_zero_if_empty(v);
2756 : exp_setalias(v, e->alias.label, exp_find_rel_name(e), exp_name(e));
2757 : append(naggrs, v);
2758 : } else { /* group by col */
2759 : if (list_find_exp(gbes, e) || !list_find_exp(naggrs, e)) {
2760 : append(aggrs, e);
2761 :
2762 : e = exp_column(v->sql->sa, exp_find_rel_name(e), exp_name(e), exp_subtype(e), e->card, has_nil(e), is_unique(e), is_intern(e));
2763 : }
2764 : append(naggrs, e);
2765 : }
2766 : }
2767 :
2768 : l = rel->l = rel_groupby(v->sql, rel->l, gbes);
2769 : l->exps = aggrs;
2770 : rel->r = ngbes;
2771 : rel->exps = naggrs;
2772 : v->changes++;
2773 : return rel;
2774 : }
2775 : #endif
2776 :
2777 : /* Rewrite group by expressions with distinct
2778 : *
2779 : * ie select a, count(distinct b) from c where ... groupby a;
2780 : * No other aggregations should be present
2781 : *
2782 : * Rewrite the more general case, good for parallel execution
2783 : *
2784 : * groupby(R) [e,f] [ aggr1 a distinct, aggr2 b distinct, aggr3 c, aggr4 d]
2785 : *
2786 : * into
2787 : *
2788 : * groupby(
2789 : * groupby(R) [e,f,a,b] [ a, b, aggr3 c, aggr4 d]
2790 : * ) [e,f]( aggr1 a distinct, aggr2 b distinct, aggr3_phase2 c, aggr4_phase2 d)
2791 : */
2792 : static inline sql_rel *
2793 56145 : rel_groupby_distinct(visitor *v, sql_rel *rel)
2794 : {
2795 56145 : node *n;
2796 :
2797 56145 : if (is_groupby(rel->op)) {
2798 56055 : sql_rel *l = rel->l;
2799 56055 : if (!l || is_groupby(l->op))
2800 : return rel;
2801 : }
2802 56007 : if (is_groupby(rel->op) && rel->r && !rel_is_ref(rel)) {
2803 30486 : int nr = 0, anr = 0;
2804 30486 : list *gbe, *ngbe, *arg, *exps, *nexps;
2805 30486 : sql_exp *distinct = NULL, *darg, *found;
2806 30486 : sql_rel *l = NULL;
2807 :
2808 116199 : for (n=rel->exps->h; n && nr <= 2; n = n->next) {
2809 85713 : sql_exp *e = n->data;
2810 85713 : if (need_distinct(e)) {
2811 197 : distinct = n->data;
2812 197 : nr++;
2813 : }
2814 85713 : anr += is_aggr(e->type);
2815 : }
2816 30486 : if (nr < 1 || distinct->type != e_aggr)
2817 : return rel;
2818 190 : if (nr > 1 || anr > nr)
2819 : return rel;//rel_groupby_distinct2(v, rel);
2820 117 : arg = distinct->l;
2821 117 : if (list_length(arg) != 1 || list_length(rel->r) + nr != list_length(rel->exps))
2822 2 : return rel;
2823 :
2824 115 : gbe = rel->r;
2825 115 : ngbe = sa_list(v->sql->sa);
2826 115 : exps = sa_list(v->sql->sa);
2827 115 : nexps = sa_list(v->sql->sa);
2828 362 : for (n=rel->exps->h; n; n = n->next) {
2829 247 : sql_exp *e = n->data;
2830 247 : if (e != distinct) {
2831 132 : if (e->type == e_aggr) { /* copy the arguments to the aggregate */
2832 0 : list *args = e->l;
2833 0 : if (args) {
2834 0 : for (node *n = args->h ; n ; n = n->next) {
2835 0 : sql_exp *e = n->data;
2836 0 : list_append(ngbe, exp_copy(v->sql, e));
2837 0 : list_append(exps, exp_copy(v->sql, e));
2838 : }
2839 : }
2840 : } else {
2841 132 : e = exp_ref(v->sql, e);
2842 132 : append(ngbe, e);
2843 132 : append(exps, e);
2844 : }
2845 132 : if (e->type == e_aggr) /* aggregates must be copied */
2846 0 : e = exp_copy(v->sql, e);
2847 : else
2848 132 : e = exp_ref(v->sql, e);
2849 132 : append(nexps, e);
2850 : }
2851 : }
2852 :
2853 115 : darg = arg->h->data;
2854 115 : if ((found = exps_find_exp(exps, darg)) == NULL) { /* not already in the groups projection list */
2855 112 : if ((found = exps_find_exp(gbe, darg))) { /* first find if the aggregate argument already exists in the grouping list */
2856 0 : darg = exp_ref(v->sql, found);
2857 : } else {
2858 112 : list_append(gbe, darg = exp_copy(v->sql, darg));
2859 112 : exp_label(v->sql->sa, darg, ++v->sql->label);
2860 112 : darg = exp_ref(v->sql, darg);
2861 : }
2862 112 : list_append(exps, darg);
2863 112 : darg = exp_ref(v->sql, darg);
2864 : } else {
2865 3 : darg = exp_ref(v->sql, found);
2866 : }
2867 115 : arg->h->data = darg;
2868 115 : l = rel->l = rel_groupby(v->sql, rel->l, gbe);
2869 115 : l->exps = exps;
2870 115 : set_processed(l);
2871 115 : rel->r = ngbe;
2872 115 : rel->exps = nexps;
2873 115 : set_nodistinct(distinct);
2874 115 : append(nexps, distinct);
2875 115 : v->changes++;
2876 : }
2877 : return rel;
2878 : }
2879 :
2880 : /*
2881 : * Push Count inside crossjoin down, and multiply the results
2882 : *
2883 : * project ( project(
2884 : * group by ( crossproduct (
2885 : * crossproduct( project (
2886 : * L, => group by (
2887 : * R L
2888 : * ) [ ] [ count NOT NULL ] ) [ ] [ count NOT NULL ]
2889 : * ) ),
2890 : * ) [ NOT NULL ] project (
2891 : * group by (
2892 : * R
2893 : * ) [ ] [ count NOT NULL ]
2894 : * )
2895 : * ) [ sql_mul(.., .. NOT NULL) ]
2896 : * )
2897 : */
2898 : static inline sql_rel *
2899 56144 : rel_push_count_down(visitor *v, sql_rel *rel)
2900 : {
2901 56144 : sql_rel *r = rel->l;
2902 :
2903 56144 : if (is_groupby(rel->op) && !rel_is_ref(rel) && list_empty(rel->r) &&
2904 23835 : r && !r->exps && r->op == op_join && !(rel_is_ref(r)) &&
2905 : /* currently only single count aggregation is handled, no other projects or aggregation */
2906 56 : list_length(rel->exps) == 1 && exp_aggr_is_count(rel->exps->h->data)) {
2907 12 : sql_exp *nce, *oce, *cnt1 = NULL, *cnt2 = NULL;
2908 12 : sql_rel *gbl = NULL, *gbr = NULL; /* Group By */
2909 12 : sql_rel *cp = NULL; /* Cross Product */
2910 12 : sql_rel *srel;
2911 :
2912 12 : oce = rel->exps->h->data;
2913 12 : if (oce->l) /* we only handle COUNT(*) */
2914 : return rel;
2915 :
2916 10 : srel = r->l;
2917 : {
2918 10 : sql_subfunc *cf = sql_bind_func(v->sql, "sys", "count", sql_bind_localtype("void"), NULL, F_AGGR, true, true);
2919 10 : sql_exp *e = exp_aggr(v->sql->sa, NULL, cf, need_distinct(oce), need_no_nil(oce), oce->card, 0);
2920 :
2921 10 : exp_label(v->sql->sa, e, ++v->sql->label);
2922 10 : cnt1 = exp_ref(v->sql, e);
2923 10 : gbl = rel_groupby(v->sql, rel_dup(srel), NULL);
2924 10 : set_processed(gbl);
2925 10 : rel_groupby_add_aggr(v->sql, gbl, e);
2926 : }
2927 :
2928 10 : srel = r->r;
2929 : {
2930 10 : sql_subfunc *cf = sql_bind_func(v->sql, "sys", "count", sql_bind_localtype("void"), NULL, F_AGGR, true, true);
2931 10 : sql_exp *e = exp_aggr(v->sql->sa, NULL, cf, need_distinct(oce), need_no_nil(oce), oce->card, 0);
2932 :
2933 10 : exp_label(v->sql->sa, e, ++v->sql->label);
2934 10 : cnt2 = exp_ref(v->sql, e);
2935 10 : gbr = rel_groupby(v->sql, rel_dup(srel), NULL);
2936 10 : set_processed(gbr);
2937 10 : rel_groupby_add_aggr(v->sql, gbr, e);
2938 : }
2939 :
2940 10 : cp = rel_crossproduct(v->sql->sa, gbl, gbr, op_join);
2941 10 : set_processed(cp);
2942 :
2943 10 : if (!(nce = rel_binop_(v->sql, NULL, cnt1, cnt2, "sys", "sql_mul", card_value, true))) {
2944 0 : v->sql->session->status = 0;
2945 0 : v->sql->errstr[0] = '\0';
2946 0 : return rel; /* error, fallback to original expression */
2947 : }
2948 : /* because of remote plans, make sure "sql_mul" returns bigint. The cardinality is atomic, so no major performance penalty */
2949 10 : if (subtype_cmp(exp_subtype(oce), exp_subtype(nce)) != 0)
2950 10 : nce = exp_convert(v->sql, nce, exp_subtype(nce), exp_subtype(oce));
2951 10 : if (exp_name(oce))
2952 10 : exp_prop_alias(v->sql->sa, nce, oce);
2953 :
2954 10 : rel_destroy(rel);
2955 10 : rel = rel_project(v->sql->sa, cp, append(new_exp_list(v->sql->sa), nce));
2956 10 : set_processed(rel);
2957 :
2958 10 : v->changes++;
2959 : }
2960 :
2961 : return rel;
2962 : }
2963 :
2964 : static inline sql_rel *
2965 31084 : rel_basecount(visitor *v, sql_rel *rel)
2966 : {
2967 39761 : if (is_groupby(rel->op) && !rel_is_ref(rel) && rel->l && list_empty(rel->r) &&
2968 16686 : list_length(rel->exps) == 1 && exp_aggr_is_count(rel->exps->h->data)) {
2969 4442 : sql_rel *bt = rel->l;
2970 4442 : sql_exp *e = rel->exps->h->data;
2971 4442 : if (is_basetable(bt->op) && list_empty(e->l)) { /* count(*) */
2972 : /* change into select cnt('schema','table') */
2973 659 : sql_table *t = bt->l;
2974 : /* I need to get the declared table's frame number to make this work correctly for those */
2975 659 : if (!isTable(t) || isDeclaredTable(t))
2976 : return rel;
2977 609 : sql_subfunc *cf = sql_bind_func(v->sql, "sys", "cnt", sql_bind_localtype("str"), sql_bind_localtype("str"), F_FUNC, true, true);
2978 609 : list *exps = sa_list(v->sql->sa);
2979 609 : append(exps, exp_atom_str(v->sql->sa, t->s->base.name, sql_bind_localtype("str")));
2980 609 : append(exps, exp_atom_str(v->sql->sa, t->base.name, sql_bind_localtype("str")));
2981 609 : sql_exp *ne = exp_op(v->sql->sa, exps, cf);
2982 :
2983 609 : ne = exp_propagate(v->sql->sa, ne, e);
2984 609 : exp_setalias(ne, e->alias.label, exp_find_rel_name(e), exp_name(e));
2985 609 : rel_destroy(rel);
2986 609 : rel = rel_project(v->sql->sa, NULL, append(sa_list(v->sql->sa), ne));
2987 609 : v->changes++;
2988 : }
2989 : }
2990 : return rel;
2991 : }
2992 :
2993 : static inline sql_rel *
2994 31084 : rel_simplify_count(visitor *v, sql_rel *rel)
2995 : {
2996 31084 : if (is_groupby(rel->op) && !list_empty(rel->exps)) {
2997 30982 : mvc *sql = v->sql;
2998 30982 : int ncountstar = 0;
2999 :
3000 : /* Convert count(no null) into count(*) */
3001 93730 : for (node *n = rel->exps->h; n ; n = n->next) {
3002 62748 : sql_exp *e = n->data;
3003 :
3004 62748 : if (exp_aggr_is_count(e) && !need_distinct(e)) {
3005 14892 : if (list_length(e->l) == 0) {
3006 12126 : ncountstar++;
3007 2766 : } else if (list_length(e->l) == 1 && !has_nil((sql_exp*)((list*)e->l)->h->data)) {
3008 2604 : sql_subfunc *cf = sql_bind_func(sql, "sys", "count", sql_bind_localtype("void"), NULL, F_AGGR, true, true);
3009 2604 : sql_exp *ne = exp_aggr(sql->sa, NULL, cf, 0, 0, e->card, 0);
3010 2604 : if (exp_name(e))
3011 2604 : exp_prop_alias(sql->sa, ne, e);
3012 2604 : n->data = ne;
3013 2604 : ncountstar++;
3014 2604 : v->changes++;
3015 : }
3016 : }
3017 : }
3018 : /* With multiple count(*), use exp_ref to reduce the number of calls to this aggregate */
3019 30982 : if (ncountstar > 1) {
3020 56 : sql_exp *count_star = NULL;
3021 56 : sql_rel *nrel = rel_project(v->sql->sa, rel, NULL);
3022 56 : list *aexps = sa_list(v->sql->sa), *nexps = sa_list(v->sql->sa);
3023 56 : nrel->exps = nexps;
3024 247 : for (node *n = rel->exps->h; n ; n = n->next) {
3025 191 : sql_exp *e = n->data;
3026 :
3027 191 : if (exp_aggr_is_count(e) && !need_distinct(e) && list_length(e->l) == 0) {
3028 126 : if (!count_star) {
3029 56 : count_star = e;
3030 56 : append(aexps, e);
3031 56 : append(nexps, exp_ref(sql, e));
3032 : } else {
3033 70 : sql_exp *ne = exp_ref(sql, count_star);
3034 :
3035 70 : if (exp_name(e))
3036 70 : exp_prop_alias(sql->sa, ne, e);
3037 70 : v->changes++;
3038 70 : append(nexps, ne);
3039 : }
3040 : } else {
3041 65 : append(aexps, e);
3042 65 : append(nexps, exp_ref(sql, e));
3043 : }
3044 : }
3045 56 : rel->exps = aexps;
3046 56 : return nrel;
3047 : }
3048 : }
3049 : return rel;
3050 : }
3051 :
3052 : static sql_rel *
3053 31084 : rel_groupjoin(visitor *v, sql_rel *rel)
3054 : {
3055 31084 : if (!rel || rel_is_ref(rel) || !is_groupby(rel->op) || list_empty(rel->r))
3056 9293 : return rel;
3057 :
3058 21791 : sql_rel *j = rel->l;
3059 21791 : if (!j || rel_is_ref(j) /*|| !is_left(j->op)*/ || j->op != op_join || list_length(rel->exps) > 1 /* only join because left joins aren't optimized yet (TODO), only length 1 as implementation of groupjoins is missing */ || !list_empty(rel->attr))
3060 21586 : return rel;
3061 : /* check group by exps == equi join exps */
3062 205 : list *gbes = rel->r;
3063 205 : if (list_length(gbes) != list_length(j->exps))
3064 : return rel;
3065 123 : int nr = 0;
3066 243 : for(node *n = gbes->h; n; n = n->next) {
3067 123 : sql_exp *gbe = n->data;
3068 243 : for(node *m = j->exps->h; m; m = m->next) {
3069 123 : sql_exp *je = m->data;
3070 123 : if (je->type != e_cmp || je->flag != cmp_equal)
3071 : return rel;
3072 : /* check if its a join exp (ie not a selection) */
3073 125 : if (!( (!rel_has_exp(j->l, je->l, false) && !rel_has_exp(j->r, je->r, false)) ||
3074 10 : (!rel_has_exp(j->l, je->r, false) && !rel_has_exp(j->r, je->l, false))))
3075 0 : return rel;
3076 120 : if (exp_match(je->l, gbe)) {
3077 76 : nr++;
3078 44 : } else if (exp_match(je->r, gbe)) {
3079 11 : nr++;
3080 : }
3081 : }
3082 : }
3083 120 : if (nr == list_length(gbes)) {
3084 : // printf("#group by converted\n");
3085 87 : j = rel_dup(j);
3086 87 : j->attr = rel->exps;
3087 87 : v->changes++;
3088 87 : rel_destroy(rel);
3089 87 : return j;
3090 : }
3091 : return rel;
3092 : }
3093 :
3094 : /* select k1 from bla where k1 = const -> select const from bla where k1 = const */
3095 : static sql_rel *
3096 2044393 : rel_project_select_exp(visitor *v, sql_rel *rel)
3097 : {
3098 2044393 : if (is_simple_project(rel->op) && rel->exps && rel->l) {
3099 718099 : sql_rel *l = rel->l;
3100 718099 : if (is_select(l->op) && l->exps) {
3101 695260 : for(node *n = rel->exps->h; n; n = n->next) {
3102 574688 : sql_exp *col = n->data;
3103 574688 : if (col->type == e_column) {
3104 1244703 : for(node *m = l->exps->h; m; m = m->next) {
3105 781567 : sql_exp *cmp = m->data;
3106 781567 : if (cmp->type == e_cmp && cmp->flag == cmp_equal && !is_anti(cmp) && !is_semantics(cmp) && exp_is_atom(cmp->r)) {
3107 454925 : sql_exp *l = cmp->l;
3108 454925 : if(l->type == e_column && col->nid == l->nid) {
3109 : /* replace column with the constant */
3110 13731 : sql_exp *e = n->data = exp_copy(v->sql, cmp->r);
3111 13731 : exp_setalias(e, col->alias.label, exp_relname(col), exp_name(col));
3112 13731 : exp_propagate(v->sql->sa, e, col);
3113 13731 : list_hash_clear(rel->exps);
3114 : }
3115 : }
3116 : }
3117 : }
3118 : }
3119 : }
3120 : }
3121 2044393 : return rel;
3122 : }
3123 :
3124 : static sql_rel *
3125 2044382 : rel_optimize_projections_(visitor *v, sql_rel *rel)
3126 : {
3127 2044382 : rel = rel_project_cse(v, rel);
3128 2044395 : rel = rel_project_select_exp(v, rel);
3129 :
3130 2044398 : if (!rel || !is_groupby(rel->op))
3131 : return rel;
3132 :
3133 56144 : rel = rel_remove_const_aggr(v, rel);
3134 :
3135 56144 : if (v->value_based_opt) {
3136 31084 : rel = rel_simplify_sum(v, rel);
3137 31084 : rel = rel_simplify_groupby_columns(v, rel);
3138 : }
3139 56144 : rel = rel_groupby_cse(v, rel);
3140 56144 : rel = rel_push_aggr_down(v, rel);
3141 56144 : rel = rel_push_groupby_down(v, rel);
3142 56145 : rel = rel_reduce_groupby_exps(v, rel);
3143 56144 : rel = rel_distinct_aggregate_on_unique_values(v, rel);
3144 56145 : rel = rel_groupby_distinct(v, rel);
3145 56144 : rel = rel_push_count_down(v, rel);
3146 :
3147 : /* only when value_based_opt is on, ie not for dependency resolution */
3148 56145 : if (v->value_based_opt) {
3149 31084 : rel = rel_simplify_count(v, rel);
3150 31084 : rel = rel_basecount(v, rel);
3151 :
3152 31084 : rel = rel_groupjoin(v, rel);
3153 : }
3154 : return rel;
3155 : }
3156 :
3157 : static sql_rel *
3158 192310 : rel_optimize_projections(visitor *v, global_props *gp, sql_rel *rel)
3159 : {
3160 192310 : (void) gp;
3161 192310 : return rel_visitor_topdown(v, rel, &rel_optimize_projections_);
3162 : }
3163 :
3164 : run_optimizer
3165 642971 : bind_optimize_projections(visitor *v, global_props *gp)
3166 : {
3167 642971 : int flag = v->sql->sql_optimizer;
3168 642775 : return gp->opt_level == 1 && (gp->cnt[op_groupby] || gp->cnt[op_project] || gp->cnt[op_union] || gp->cnt[op_munion]
3169 1285746 : || gp->cnt[op_inter] || gp->cnt[op_except]) && (flag & optimize_projections) ? rel_optimize_projections : NULL;
3170 : }
3171 :
3172 :
3173 : static bool
3174 58287 : exps_have_selfref(list *exps)
3175 : {
3176 58287 : bool selfref = false;
3177 58287 : if (list_empty(exps))
3178 : return false;
3179 469933 : for(node *n = exps->h; n && !selfref; n = n->next) {
3180 411646 : sql_exp *e = n->data;
3181 411646 : selfref |= is_selfref(e);
3182 : }
3183 : return selfref;
3184 : }
3185 :
3186 : static inline sql_rel *
3187 1018580 : rel_push_project_down_union(visitor *v, sql_rel *rel)
3188 : {
3189 : /* first remove distinct if already unique */
3190 1018580 : if (rel->op == op_project && need_distinct(rel) && rel->exps && exps_unique(v->sql, rel, rel->exps) && !have_nil(rel->exps)) {
3191 109 : set_nodistinct(rel);
3192 109 : if (exps_card(rel->exps) <= CARD_ATOM && rel->card > CARD_ATOM) { /* if the projection just contains constants, then no topN is needed */
3193 2 : sql_rel *nl = rel->l = rel_topn(v->sql->sa, rel->l, append(sa_list(v->sql->sa), exp_atom_lng(v->sql->sa, 1)));
3194 2 : set_processed(nl);
3195 : }
3196 109 : v->changes++;
3197 : }
3198 :
3199 1018580 : if (rel->op == op_project && rel->l && rel->exps && list_empty(rel->r)) {
3200 480386 : int need_distinct = need_distinct(rel);
3201 480386 : sql_rel *u = rel->l;
3202 480386 : sql_rel *p = rel;
3203 :
3204 480386 : if (!u || !(is_union(u->op) || is_munion(u->op)) || need_distinct(u) || is_recursive(u) || !u->exps || rel_is_ref(u) || project_unsafe(rel, false))
3205 451213 : return rel;
3206 :
3207 29173 : sql_rel *r;
3208 :
3209 29173 : assert(!is_union(u->op));
3210 29173 : if (is_recursive(u))
3211 : return rel;
3212 :
3213 : /* don't push project down union of single values */
3214 87730 : for (node *n = ((list*)u->l)->h; n; n = n->next) {
3215 58572 : r = n->data;
3216 : // TODO: does this check make sense?
3217 58572 : if (is_project(r->op) && !r->l)
3218 : return rel;
3219 : }
3220 :
3221 87715 : for (node *n = ((list*)u->l)->h; n; n = n->next) {
3222 58557 : r = rel_dup(n->data);
3223 :
3224 : /* introduce projection around each operand if needed */
3225 58557 : if (!is_project(r->op) || exps_have_selfref(r->exps))
3226 270 : r = rel_project(v->sql->sa, r,
3227 : rel_projections(v->sql, r, NULL, 1, 1));
3228 : /* check if we need distinct */
3229 117114 : need_distinct &=
3230 58557 : (!exps_unique(v->sql, r, r->exps) || have_nil(r->exps));
3231 58557 : rel_rename_exps(v->sql, u->exps, r->exps);
3232 :
3233 58557 : rel_destroy(n->data);
3234 58557 : n->data = r;
3235 : }
3236 :
3237 : /* once we have checked for need_distinct in every rel we can
3238 : * introduce the projects under the munion which are gonna be
3239 : * copies of the single project above munion */
3240 87715 : for (node *n = ((list*)u->l)->h; n; n = n->next) {
3241 58557 : r = rel_dup(n->data);
3242 :
3243 58557 : r = rel_project(v->sql->sa, r, NULL);
3244 58557 : if (need_distinct)
3245 0 : set_distinct(r);
3246 58557 : r->exps = exps_copy(v->sql, p->exps);
3247 58557 : set_processed(r);
3248 :
3249 58557 : rel_destroy(n->data);
3250 58557 : n->data = r;
3251 : }
3252 :
3253 : /* turn the project-munion on top into munion. incr operand
3254 : * rels count to make sure that they are not deleted by the
3255 : * subsequent rel_inplace_setop_n_ary */
3256 87715 : for (node *n = ((list*)u->l)->h; n; n = n->next)
3257 58557 : rel_dup(n->data);
3258 29158 : rel = rel_inplace_setop_n_ary(v->sql, rel, u->l, op_munion,
3259 : rel_projections(v->sql, rel, NULL, 1, 1));
3260 29158 : if (need_distinct)
3261 0 : set_distinct(rel);
3262 29158 : if (is_single(u))
3263 2 : set_single(rel);
3264 :
3265 29158 : v->changes++;
3266 :
3267 : /* if any operand has two project above then squash them */
3268 87715 : for (node *n = ((list*)u->l)->h; n; n = n->next) {
3269 58557 : r = rel_dup(n->data);
3270 58557 : r = rel_merge_projects_(v, r);
3271 58557 : rel_destroy(n->data);
3272 58557 : n->data = r;
3273 : }
3274 :
3275 : return rel;
3276 : }
3277 : return rel;
3278 : }
3279 :
3280 : static inline sql_rel *
3281 1018580 : rel_merge_unions(visitor *v, sql_rel *rel)
3282 : {
3283 : /* stacked munion flattening e.g.
3284 : * munion( munion(a, b, c), munion(d, e)) => munion(a,b,c,d,e)
3285 : */
3286 1018580 : if (rel && is_munion(rel->op) && !is_recursive(rel)) {
3287 56809 : list *l = rel->l;
3288 274377 : for(node *n = l->h; n; ) {
3289 160759 : node *next = n->next;
3290 160759 : sql_rel *oc = n->data;
3291 160759 : sql_rel *c = oc;
3292 :
3293 : /* account for any group-bys pushed down between stacked munions */
3294 160759 : if (oc->op == op_groupby)
3295 2593 : c = oc->l;
3296 :
3297 160759 : if (is_munion(c->op)) {
3298 10675 : c = rel_dup(c);
3299 10675 : list_remove_node(l, NULL, n);
3300 10675 : l = list_merge(l, c->l, (fdup)NULL);
3301 10675 : c->l = NULL;
3302 10675 : rel_destroy(oc);
3303 10675 : if (!next)
3304 3 : next = l->h;
3305 10675 : v->changes++;
3306 : }
3307 : n = next;
3308 : }
3309 56809 : rel->l = l;
3310 : }
3311 1018580 : return rel;
3312 : }
3313 :
3314 :
3315 : /*
3316 : * Push (semi)joins down unions, this is basically for merge tables, where
3317 : * we know that the fk-indices are split over two clustered merge tables.
3318 : */
3319 : static inline sql_rel *
3320 1018580 : rel_push_join_down_munion(visitor *v, sql_rel *rel)
3321 : {
3322 1018580 : if ((is_join(rel->op) && !is_outerjoin(rel->op) && !is_single(rel)) || is_semi(rel->op)) {
3323 121179 : sql_rel *l = rel->l, *r = rel->r, *ol = l, *or = r;
3324 121179 : list *exps = rel->exps, *attr = rel->attr;
3325 121179 : sql_exp *je = NULL;
3326 :
3327 121179 : if (is_recursive(l) || is_recursive(r))
3328 : return rel;
3329 : /* we would like to optimize in place reference rels which point
3330 : * to replica tables and let the replica optimizer handle those
3331 : * later. otherwise we miss the push join down optimization due
3332 : * to the rel_is_ref bailout
3333 : */
3334 121175 : if (rel_is_ref(l) && is_basetable(l->op) && l->l && isReplicaTable((sql_table*)l->l)) {
3335 1 : rel->l = rel_copy(v->sql, l, true);
3336 1 : rel_destroy(l);
3337 : }
3338 121175 : if (rel_is_ref(r) && is_basetable(r->op) && r->l && isReplicaTable((sql_table*)r->l)) {
3339 4 : rel->r = rel_copy(v->sql, r, true);
3340 4 : rel_destroy(r);
3341 : }
3342 :
3343 : // TODO: do we need to check if it's l/r are refs?
3344 121175 : if (!l || !r || need_distinct(l) || need_distinct(r) || rel_is_ref(l) || rel_is_ref(r))
3345 : return rel;
3346 75893 : if (l->op == op_project)
3347 4704 : l = l->l;
3348 75893 : if (r->op == op_project)
3349 8023 : r = r->l;
3350 :
3351 : /* both sides only if we have a join index ASSUMING pkey-fkey are aligned */
3352 : // TODO: we could also check if the join cols are (not) unique
3353 75893 : bool aligned_pk_fk = true;
3354 75893 : if (!l || !r || (is_munion(l->op) && is_munion(r->op) &&
3355 161 : !(je = rel_is_join_on_pkey(rel, aligned_pk_fk))))
3356 709 : return rel;
3357 :
3358 : // TODO: why? bailout for union semijoin without pkey joins expressions
3359 75184 : if (is_semi(rel->op) && is_munion(l->op) && !je)
3360 : return rel;
3361 :
3362 : /* if both sides are munions we assume that they will have the same number of children */
3363 74335 : if (is_munion(l->op) && is_munion(r->op) && list_length(l->l) != list_length(r->l))
3364 : return rel;
3365 :
3366 74335 : if (is_munion(l->op) && !need_distinct(l) && !is_single(l) && !is_recursive(l) &&
3367 4553 : !is_munion(r->op)){
3368 : /* join(munion(a,b,c), d) -> munion(join(a,d), join(b,d), join(c,d)) */
3369 4553 : list *js = sa_list(v->sql->sa);
3370 13665 : for (node *n = ((list*)l->l)->h; n; n = n->next) {
3371 9112 : sql_rel *pc = rel_dup(n->data);
3372 9112 : if (!is_project(pc->op))
3373 12 : pc = rel_project(v->sql->sa, pc, rel_projections(v->sql, pc, NULL, 1, 1));
3374 9112 : rel_rename_exps(v->sql, l->exps, pc->exps);
3375 9112 : if (l != ol) {
3376 2 : pc = rel_project(v->sql->sa, pc, NULL);
3377 2 : pc->exps = exps_copy(v->sql, ol->exps);
3378 2 : set_processed(pc);
3379 : }
3380 9112 : pc = rel_crossproduct(v->sql->sa, pc, rel_dup(or), rel->op);
3381 9112 : pc->exps = exps_copy(v->sql, exps);
3382 9112 : pc->attr = exps_copy(v->sql, attr);
3383 9112 : set_processed(pc);
3384 9112 : pc = rel_project(v->sql->sa, pc, rel_projections(v->sql, pc, NULL, 1, 1));
3385 9112 : js = append(js, pc);
3386 : }
3387 4553 : v->changes++;
3388 4553 : return rel_inplace_setop_n_ary(v->sql, rel, js, op_munion,
3389 : rel_projections(v->sql, rel, NULL, 1, 1));
3390 69782 : } else if (is_munion(l->op) && !need_distinct(l) && !is_single(l) && !is_recursive(l) &&
3391 0 : is_munion(r->op) && !need_distinct(r) && !is_single(r) && !is_recursive(r) &&
3392 : je) {
3393 : /* join(munion(a,b,c), munion(d,e,f)) -> munion(join(a,d), join(b,e), join(c,f)) */
3394 0 : list *cps = sa_list(v->sql->sa);
3395 : /* create pairwise joins between left and right parts. assume eq num of parts (see earlier bailout) */
3396 0 : for (node *n = ((list*)l->l)->h, *m=((list*)r->l)->h; n && m; n = n->next, m = m->next) {
3397 : /* left part */
3398 0 : sql_rel *lp = rel_dup(n->data);
3399 0 : if (!is_project(lp->op))
3400 0 : lp = rel_project(v->sql->sa, lp, rel_projections(v->sql, lp, NULL, 1, 1));
3401 0 : rel_rename_exps(v->sql, l->exps, lp->exps);
3402 0 : if (l != ol) {
3403 0 : lp = rel_project(v->sql->sa, lp, NULL);
3404 0 : lp->exps = exps_copy(v->sql, ol->exps);
3405 0 : set_processed(lp);
3406 : }
3407 : /* right part */
3408 0 : sql_rel *rp = rel_dup(m->data);
3409 0 : if (!is_project(rp->op))
3410 0 : rp = rel_project(v->sql->sa, rp, rel_projections(v->sql, rp, NULL, 1, 1));
3411 0 : rel_rename_exps(v->sql, r->exps, rp->exps);
3412 0 : if (r != or) {
3413 0 : rp = rel_project(v->sql->sa, rp, NULL);
3414 0 : rp->exps = exps_copy(v->sql, or->exps);
3415 0 : set_processed(rp);
3416 : }
3417 : /* combine them */
3418 0 : sql_rel *cp = rel_crossproduct(v->sql->sa, lp, rp, rel->op);
3419 0 : cp->exps = exps_copy(v->sql, exps);
3420 0 : cp->attr = exps_copy(v->sql, attr);
3421 0 : set_processed(cp);
3422 0 : cp = rel_project(v->sql->sa, cp, rel_projections(v->sql, cp, NULL, 1, 1));
3423 0 : cps = append(cps, cp);
3424 : }
3425 0 : v->changes++;
3426 0 : return rel_inplace_setop_n_ary(v->sql, rel, cps, op_munion,
3427 : rel_projections(v->sql, rel, NULL, 1, 1));
3428 69782 : } else if (!is_munion(l->op) &&
3429 69774 : is_munion(r->op) && !need_distinct(r) && !is_single(r) && !is_recursive(r) &&
3430 2991 : !is_semi(rel->op)) {
3431 : /* join(a, munion(b,c,d)) -> munion(join(a,b), join(a,c), join(a,d)) */
3432 1612 : list *js = sa_list(v->sql->sa);
3433 4838 : for (node *n = ((list*)r->l)->h; n; n = n->next) {
3434 3226 : sql_rel *pc = rel_dup(n->data);
3435 3226 : if (!is_project(pc->op))
3436 17 : pc = rel_project(v->sql->sa, pc, rel_projections(v->sql, pc, NULL, 1, 1));
3437 3226 : rel_rename_exps(v->sql, r->exps, pc->exps);
3438 3226 : if (r != or) {
3439 122 : pc = rel_project(v->sql->sa, pc, NULL);
3440 122 : pc->exps = exps_copy(v->sql, or->exps);
3441 122 : set_processed(pc);
3442 : }
3443 3226 : pc = rel_crossproduct(v->sql->sa, rel_dup(ol), pc, rel->op);
3444 3226 : pc->exps = exps_copy(v->sql, exps);
3445 3226 : pc->attr = exps_copy(v->sql, attr);
3446 3226 : set_processed(pc);
3447 3226 : pc = rel_project(v->sql->sa, pc, rel_projections(v->sql, pc, NULL, 1, 1));
3448 3226 : js = append(js, pc);
3449 : }
3450 1612 : v->changes++;
3451 1612 : return rel_inplace_setop_n_ary(v->sql, rel, js, op_munion,
3452 : rel_projections(v->sql, rel, NULL, 1, 1));
3453 68170 : } else if (!is_munion(l->op) &&
3454 68162 : is_munion(r->op) && !need_distinct(r) && !is_single(r) && !is_recursive(r) &&
3455 1379 : is_semi(rel->op) && je) {
3456 : /* {semi}join ( A1, munion (B, A2a, C, A2b)) [A1.partkey = A2.partkey] ->
3457 : * {semi}join ( A1, munion (A2a, A2b))
3458 : * (ie some parts of an n-th munion operand)
3459 : *
3460 : * How to detect that a relation isn't matching?
3461 : * partitioning is currently done only on pkey/fkey's
3462 : * ie only matching per part if join is on pkey/fkey (parts)
3463 : * and part numbers should match.
3464 : * */
3465 0 : int lpnr = rel_part_nr(l, je);
3466 0 : if (lpnr < 0)
3467 : return rel;
3468 :
3469 0 : list *ups = sa_list(v->sql->sa);
3470 0 : for (node *n = ((list*)r->l)->h; n; n = n->next) {
3471 0 : if (rel_uses_part_nr(n->data, je, lpnr)) {
3472 0 : sql_rel *pc = rel_dup(n->data);
3473 0 : ups = append(ups, pc);
3474 : }
3475 : }
3476 0 : v->changes++;
3477 0 : return rel_inplace_setop_n_ary(v->sql, r, ups, op_munion,
3478 : rel_projections(v->sql, rel, NULL, 1, 1));
3479 : }
3480 : }
3481 : return rel;
3482 : }
3483 :
3484 : static sql_rel *
3485 1018580 : rel_optimize_unions_topdown_(visitor *v, sql_rel *rel)
3486 : {
3487 1018580 : rel = rel_push_project_down_union(v, rel);
3488 1018580 : rel = rel_merge_unions(v, rel);
3489 1018580 : rel = rel_push_join_down_munion(v, rel);
3490 1018580 : return rel;
3491 : }
3492 :
3493 : static sql_rel *
3494 29207 : rel_optimize_unions_topdown(visitor *v, global_props *gp, sql_rel *rel)
3495 : {
3496 29207 : (void) gp;
3497 29207 : return rel_visitor_topdown(v, rel, &rel_optimize_unions_topdown_);
3498 : }
3499 :
3500 : run_optimizer
3501 643075 : bind_optimize_unions_topdown(visitor *v, global_props *gp)
3502 : {
3503 643075 : (void) v;
3504 643075 : return gp->opt_level == 1 && gp->cnt[op_munion] ? rel_optimize_unions_topdown : NULL;
3505 : }
3506 :
3507 :
3508 : static sql_column *
3509 0 : is_fk_column_of_pk(mvc *sql, sql_rel *rel, sql_column *pkc, sql_exp *e) /* test if e is a foreign key column for the pk on pkc */
3510 : {
3511 0 : sql_trans *tr = sql->session->tr;
3512 0 : sql_column *c = exp_find_column(rel, e, -2);
3513 :
3514 0 : if (c) {
3515 0 : sql_table *t = c->t;
3516 :
3517 0 : for (node *n = ol_first_node(t->idxs); n; n = n->next) {
3518 0 : sql_idx *li = n->data;
3519 :
3520 0 : if (li->type == join_idx) {
3521 0 : for (node *m = li->columns->h ; m ; m = m->next) {
3522 0 : sql_kc *fkc = m->data;
3523 :
3524 0 : if (strcmp(fkc->c->base.name, c->base.name) == 0) { /* same fkey column */
3525 0 : sql_key *fkey = (sql_key*)os_find_id(tr->cat->objects, tr, ((sql_fkey*)li->key)->rkey);
3526 :
3527 0 : if (strcmp(fkey->t->base.name, pkc->t->base.name) == 0) { /* to same pk table */
3528 0 : for (node *o = fkey->columns->h ; o ; o = n->next) {
3529 0 : sql_kc *kc = m->data;
3530 :
3531 0 : if (strcmp(kc->c->base.name, pkc->base.name) == 0) /* to same pk table column */
3532 : return c;
3533 : }
3534 : }
3535 : }
3536 : }
3537 : }
3538 : }
3539 : }
3540 : return NULL;
3541 : }
3542 :
3543 : static bool
3544 0 : has_no_selectivity(mvc *sql, sql_rel *rel)
3545 : {
3546 0 : if (!rel)
3547 : return true;
3548 :
3549 0 : switch(rel->op){
3550 : case op_basetable:
3551 : case op_truncate:
3552 : case op_table:
3553 : return true;
3554 0 : case op_topn:
3555 : case op_sample:
3556 : case op_project:
3557 : case op_groupby:
3558 0 : return has_no_selectivity(sql, rel->l);
3559 : case op_ddl:
3560 : case op_insert:
3561 : case op_update:
3562 : case op_delete:
3563 : case op_merge:
3564 : case op_join:
3565 : case op_left:
3566 : case op_right:
3567 : case op_full:
3568 : case op_semi:
3569 : case op_anti:
3570 : case op_union:
3571 : case op_inter:
3572 : case op_except:
3573 : case op_munion:
3574 : case op_select:
3575 : return false;
3576 : }
3577 : return true;
3578 : }
3579 :
3580 : static sql_rel *
3581 119820 : rel_distinct_project2groupby_(visitor *v, sql_rel *rel)
3582 : {
3583 119820 : sql_rel *l = rel->l;
3584 :
3585 : /* rewrite distinct project (table) [ constant ] -> project [ constant ] */
3586 127557 : if (rel->op == op_project && rel->l && !rel->r /* no order by */ && need_distinct(rel) &&
3587 7737 : exps_card(rel->exps) <= CARD_ATOM) {
3588 476 : set_nodistinct(rel);
3589 476 : if (rel->card > CARD_ATOM) { /* if the projection just contains constants, then no topN is needed */
3590 193 : sql_rel *nl = rel->l = rel_topn(v->sql->sa, rel->l, append(sa_list(v->sql->sa), exp_atom_lng(v->sql->sa, 1)));
3591 193 : set_processed(nl);
3592 : }
3593 476 : v->changes++;
3594 : }
3595 :
3596 : /* rewrite distinct project [ pk ] ( select ( table ) [ e op val ])
3597 : * into project [ pk ] ( select/semijoin ( table ) */
3598 119820 : if (rel->op == op_project && rel->l && !rel->r /* no order by */ && need_distinct(rel) &&
3599 7473 : (l->op == op_select || l->op == op_semi) && exps_unique(v->sql, rel, rel->exps) &&
3600 212 : (!have_semantics(l->exps) || !have_nil(rel->exps))) {
3601 212 : set_nodistinct(rel);
3602 212 : v->changes++;
3603 : }
3604 :
3605 : /* rewrite distinct project ( join(p,f) [ p.pk = f.fk ] ) [ p.pk ]
3606 : * into project( (semi)join(p,f) [ p.pk = f.fk ] ) [ p.pk ] */
3607 119820 : if (rel->op == op_project && rel->l && !rel->r /* no order by */ && need_distinct(rel) &&
3608 7049 : l && (is_select(l->op) || l->op == op_join) && rel_is_join_on_pkey(l, true) /* [ pk == fk ] */) {
3609 0 : sql_exp *found = NULL, *pk = NULL, *fk = NULL;
3610 0 : bool all_exps_atoms = true;
3611 0 : sql_column *pkc = NULL;
3612 :
3613 0 : for (node *m = l->exps->h ; m ; m = m->next) { /* find a primary key join */
3614 0 : sql_exp *je = (sql_exp *) m->data;
3615 0 : sql_exp *le = je->l, *re = je->r;
3616 :
3617 0 : if (!find_prop(je->p, PROP_JOINIDX)) /* must be a pk-fk join expression */
3618 0 : continue;
3619 :
3620 0 : if ((pkc = exp_is_pkey(l, le))) { /* le is the primary key */
3621 0 : all_exps_atoms = true;
3622 :
3623 0 : for (node *n = rel->exps->h; n && all_exps_atoms; n = n->next) {
3624 0 : sql_exp *e = (sql_exp *) n->data;
3625 :
3626 0 : if (exp_match(e, le) || exp_refers(e, le))
3627 : found = e;
3628 0 : else if (e->card > CARD_ATOM)
3629 0 : all_exps_atoms = false;
3630 : }
3631 : pk = le;
3632 : fk = re;
3633 : }
3634 0 : if (!found && (pkc = exp_is_pkey(l, re))) { /* re is the primary key */
3635 0 : all_exps_atoms = true;
3636 :
3637 0 : for (node *n = rel->exps->h; n && all_exps_atoms; n = n->next) {
3638 0 : sql_exp *e = (sql_exp *) n->data;
3639 :
3640 0 : if (exp_match(e, re) || exp_refers(e, re))
3641 : found = e;
3642 0 : else if (e->card > CARD_ATOM)
3643 0 : all_exps_atoms = false;
3644 : }
3645 : pk = re;
3646 : fk = le;
3647 : }
3648 : }
3649 :
3650 0 : if (all_exps_atoms && found) { /* rel must have the same primary key on the projection list */
3651 : /* if the foreign key has no selectivity, the join can be removed */
3652 0 : if (!(rel_is_ref(l)) && ((rel_find_exp(l->l, fk) && is_fk_column_of_pk(v->sql, l->l, pkc, fk) && has_no_selectivity(v->sql, l->l)) ||
3653 0 : (l->r && rel_find_exp(l->r, fk) && is_fk_column_of_pk(v->sql, l->r, pkc, fk) && has_no_selectivity(v->sql, l->r)))) {
3654 0 : sql_rel *side = (rel_find_exp(l->l, pk) != NULL)?l->l:l->r;
3655 :
3656 0 : rel->l = rel_dup(side);
3657 0 : rel_destroy(l);
3658 0 : v->changes++;
3659 0 : set_nodistinct(rel);
3660 0 : return rel;
3661 : }
3662 : /* if the join has no multiple references it can be re-written into a semijoin */
3663 0 : if (l->op == op_join && !(rel_is_ref(l)) && list_length(rel->exps) == 1) { /* other expressions may come from the other side */
3664 0 : if (l->r && rel_find_exp(l->r, pk)) {
3665 0 : sql_rel *temp = l->l;
3666 0 : l->l = l->r;
3667 0 : l->r = temp;
3668 :
3669 0 : l->op = op_semi;
3670 0 : } else if (rel_find_exp(l->l, pk)) {
3671 0 : l->op = op_semi;
3672 : }
3673 : }
3674 0 : v->changes++;
3675 0 : set_nodistinct(rel);
3676 0 : return rel;
3677 : }
3678 : }
3679 : /* rewrite distinct project [ gbe ] ( select ( groupby [ gbe ] [ gbe, e ] )[ e op val ])
3680 : * into project [ gbe ] ( select ( group etc ) */
3681 119820 : if (rel->op == op_project && rel->l && !rel->r /* no order by */ &&
3682 7049 : need_distinct(rel) && l->op == op_select){
3683 2593 : sql_rel *g = l->l;
3684 2593 : if (is_groupby(g->op)) {
3685 4 : list *used = sa_list(v->sql->sa);
3686 4 : list *gbe = g->r;
3687 4 : node *n;
3688 4 : int fnd = 1;
3689 :
3690 8 : for (n = rel->exps->h; n && fnd; n = n->next) {
3691 4 : sql_exp *e = n->data;
3692 :
3693 4 : if (e->card > CARD_ATOM) {
3694 : /* find e in gbe */
3695 4 : sql_exp *ne = list_find_exp(g->exps, e);
3696 :
3697 4 : if (ne)
3698 3 : ne = list_find_exp( gbe, ne);
3699 3 : if (ne && !list_find_exp(used, ne)) {
3700 2 : fnd++;
3701 2 : list_append(used, ne);
3702 : }
3703 3 : if (!ne)
3704 : fnd = 0;
3705 : }
3706 : }
3707 4 : if (fnd == (list_length(gbe)+1)) {
3708 0 : v->changes++;
3709 0 : set_nodistinct(rel);
3710 : }
3711 : }
3712 : }
3713 119820 : if (rel->op == op_project && rel->l &&
3714 7049 : need_distinct(rel) && exps_card(rel->exps) > CARD_ATOM) {
3715 7049 : node *n;
3716 7049 : list *exps = new_exp_list(v->sql->sa), *gbe = new_exp_list(v->sql->sa);
3717 7049 : list *obe = rel->r; /* we need to read the ordering later */
3718 :
3719 7049 : if (obe) {
3720 0 : int fnd = 0;
3721 :
3722 0 : for(n = obe->h; n && !fnd; n = n->next) {
3723 0 : sql_exp *e = n->data;
3724 :
3725 0 : if (e->type != e_column)
3726 : fnd = 1;
3727 0 : else if (exps_bind_nid(rel->exps, e->nid) == NULL)
3728 0 : fnd = 1;
3729 : }
3730 0 : if (fnd)
3731 : return rel;
3732 : }
3733 7049 : rel->l = rel_project(v->sql->sa, rel->l, rel->exps);
3734 :
3735 21689 : for (n = rel->exps->h; n; n = n->next) {
3736 14640 : sql_exp *e = n->data, *ne;
3737 :
3738 14640 : set_nodistinct(e);
3739 14640 : ne = exp_ref(v->sql, e);
3740 14640 : if (e->card > CARD_ATOM && !list_find_exp(gbe, ne)) { /* no need to group by on constants, or the same column multiple times */
3741 14265 : append(gbe, ne);
3742 14265 : ne = exp_ref(v->sql, ne);
3743 : }
3744 14640 : append(exps, ne);
3745 : }
3746 7049 : rel->op = op_groupby;
3747 7049 : rel->exps = exps;
3748 7049 : rel->r = gbe;
3749 7049 : set_nodistinct(rel);
3750 7049 : if (obe) {
3751 : /* add order again */
3752 0 : rel = rel_project(v->sql->sa, rel, rel_projections(v->sql, rel, NULL, 1, 1));
3753 0 : rel->r = obe;
3754 : }
3755 7049 : v->changes++;
3756 : }
3757 : return rel;
3758 : }
3759 :
3760 : static sql_rel *
3761 5685 : rel_distinct_project2groupby(visitor *v, global_props *gp, sql_rel *rel)
3762 : {
3763 5685 : (void) gp;
3764 5685 : return rel_visitor_bottomup(v, rel, &rel_distinct_project2groupby_);
3765 : }
3766 :
3767 : run_optimizer
3768 643149 : bind_distinct_project2groupby(visitor *v, global_props *gp)
3769 : {
3770 643149 : int flag = v->sql->sql_optimizer;
3771 563340 : return gp->opt_cycle == 0 && gp->opt_level == 1 && gp->needs_distinct && gp->cnt[op_project] &&
3772 648834 : (flag & distinct_project2groupby)? rel_distinct_project2groupby : NULL;
3773 : }
|