Line data Source code
1 : /*
2 : * SPDX-License-Identifier: MPL-2.0
3 : *
4 : * This Source Code Form is subject to the terms of the Mozilla Public
5 : * License, v. 2.0. If a copy of the MPL was not distributed with this
6 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
7 : *
8 : * Copyright 2024, 2025 MonetDB Foundation;
9 : * Copyright August 2008 - 2023 MonetDB B.V.;
10 : * Copyright 1997 - July 2008 CWI.
11 : */
12 :
13 : #include "monetdb_config.h"
14 : #include "rel_optimizer_private.h"
15 : #include "rel_basetable.h"
16 : #include "rel_exp.h"
17 : #include "rel_select.h"
18 : #include "rel_rewriter.h"
19 :
20 : static int
21 3744791 : exp_is_rename(sql_exp *e)
22 : {
23 3744791 : return (e->type == e_column);
24 : }
25 :
26 : static int
27 2086975 : exp_is_useless_rename(sql_exp *e)
28 : {
29 2086975 : return (e->type == e_column && e->alias.label == e->nid);
30 : }
31 :
32 : static list *
33 22047 : rel_used_projections(mvc *sql, list *exps, list *users)
34 : {
35 22047 : list *nexps = sa_list(sql->sa);
36 22047 : bool *used = SA_ZNEW_ARRAY(sql->ta, bool, list_length(exps));
37 22047 : int i = 0;
38 :
39 142450 : for(node *n = users->h; n; n = n->next) {
40 120403 : sql_exp *e = n->data, *ne = NULL;
41 120403 : assert(e->nid && exps_bind_nid(exps, e->nid));
42 120403 : if (e->nid && (ne = exps_bind_nid(exps, e->nid))) {
43 120403 : used[list_position(exps, ne)] = 1;
44 : }
45 : }
46 153201 : for(node *n = exps->h; n; n = n->next, i++) {
47 131154 : sql_exp *e = n->data;
48 131154 : if (is_intern(e) || used[i])
49 131017 : append(nexps, e);
50 : }
51 22047 : return nexps;
52 : }
53 :
54 : /* move projects down with the goal op removing them completely (ie push renames/reduced lists into basetable)
55 : * for some cases we can directly remove iff renames rename into same alias
56 : * */
57 : static sql_rel *
58 4041251 : rel_push_project_down_(visitor *v, sql_rel *rel)
59 : {
60 : /* for now only push down renames */
61 4041251 : if (v->depth > 1 && is_simple_project(rel->op) && !need_distinct(rel) && !rel_is_ref(rel) && rel->l && !rel->r &&
62 1177539 : v->parent &&
63 1177531 : !is_modify(v->parent->op) && !is_topn(v->parent->op) && !is_sample(v->parent->op) &&
64 1866745 : !is_ddl(v->parent->op) && !is_set(v->parent->op) && !is_munion(v->parent->op) &&
65 726184 : list_check_prop_all(rel->exps, (prop_check_func)&exp_is_rename)) {
66 422418 : sql_rel *l = rel->l;
67 :
68 422418 : if (rel_is_ref(l))
69 : return rel;
70 391251 : if (is_basetable(l->op)) {
71 42026 : if (list_check_prop_all(rel->exps, (prop_check_func)&exp_is_useless_rename)) {
72 : /* TODO reduce list (those in the project + internal) */
73 22047 : rel->l = NULL;
74 22047 : l->exps = rel_used_projections(v->sql, l->exps, rel->exps);
75 22047 : rel_destroy(rel);
76 22047 : v->changes++;
77 22047 : return l;
78 : }
79 : return rel;
80 349225 : } else if (list_check_prop_all(rel->exps, (prop_check_func)&exp_is_useless_rename)) {
81 140459 : if ((is_project(l->op) && list_length(l->exps) == list_length(rel->exps)) ||
82 123484 : ((v->parent && is_project(v->parent->op)) &&
83 42753 : (is_mset(l->op) || is_set(l->op) || is_select(l->op) || is_join(l->op) || is_semi(l->op) || is_topn(l->op) || is_sample(l->op)))) {
84 58715 : rel->l = NULL;
85 58715 : rel_destroy(rel);
86 58715 : v->changes++;
87 58715 : return l;
88 : }
89 : }
90 : }
91 : /* ToDo handle useful renames, ie new relation name and unique set of attribute names (could reduce set of * attributes) */
92 : /* handle both useless and useful with project [ group by ] */
93 : return rel;
94 : }
95 :
96 : static sql_rel *
97 187414 : rel_push_project_down(visitor *v, global_props *gp, sql_rel *rel)
98 : {
99 187414 : (void) gp;
100 187414 : return rel_visitor_bottomup(v, rel, &rel_push_project_down_);
101 : }
102 :
103 : run_optimizer
104 638407 : bind_push_project_down(visitor *v, global_props *gp)
105 : {
106 638407 : int flag = v->sql->sql_optimizer;
107 638211 : return gp->opt_level == 1 && (flag & push_project_down) &&
108 1276618 : (gp->cnt[op_project] || gp->cnt[op_groupby]) ? rel_push_project_down : NULL;
109 : }
110 :
111 :
112 : static bool exp_shares_exps(sql_exp *e, list *shared, uint64_t *uses);
113 :
114 : static bool
115 113854 : exps_shares_exps(list *exps, list *shared, uint64_t *uses)
116 : {
117 113854 : if (!exps || !shared)
118 : return false;
119 394761 : for (node *n = exps->h; n; n = n->next) {
120 281321 : sql_exp *e = n->data;
121 :
122 281321 : if (exp_shares_exps(e, shared, uses))
123 : return true;
124 : }
125 : return false;
126 : }
127 :
128 : static bool
129 3228714 : exp_shares_exps(sql_exp *e, list *shared, uint64_t *uses)
130 : {
131 5919352 : switch(e->type) {
132 81 : case e_cmp:
133 81 : if (e->flag == cmp_or || e->flag == cmp_filter)
134 58 : return exps_shares_exps(e->l, shared, uses) || exps_shares_exps(e->r, shared, uses);
135 23 : else if (e->flag == cmp_in || e->flag == cmp_notin)
136 0 : return exp_shares_exps(e->l, shared, uses) || exps_shares_exps(e->r, shared, uses);
137 : else
138 46 : return exp_shares_exps(e->l, shared, uses) || exp_shares_exps(e->r, shared, uses) || (e->f && exp_shares_exps(e->f, shared, uses));
139 114475 : case e_atom:
140 114475 : if (e->f)
141 0 : return exps_shares_exps(e->f, shared, uses);
142 : return false;
143 5402482 : case e_column:
144 : {
145 5402482 : sql_exp *ne = NULL;
146 5402482 : assert(e->nid);
147 5402482 : if (e->nid)
148 5402482 : ne = exps_bind_nid(shared, e->nid);
149 5402482 : if (!ne)
150 : return false;
151 4127944 : if (ne->type != e_column) {
152 591015 : int i = list_position(shared, ne);
153 591015 : if (i < 0)
154 : return false;
155 591015 : uint64_t used = (uint64_t) 1 << i;
156 591015 : if (used & *uses)
157 : return true;
158 590597 : *uses |= used;
159 590597 : return false;
160 : }
161 3536929 : if (ne != e && (list_position(shared, e) < 0 || list_position(shared, e) > list_position(shared, ne)))
162 : /* maybe ne refers to a local complex exp */
163 2402062 : return exp_shares_exps(ne, shared, uses);
164 : return false;
165 : }
166 288576 : case e_convert:
167 288576 : return exp_shares_exps(e->l, shared, uses);
168 113738 : case e_aggr:
169 : case e_func:
170 113738 : return exps_shares_exps(e->l, shared, uses);
171 : case e_psm:
172 0 : assert(0); /* not in projection list */
173 : }
174 : return false;
175 : }
176 :
177 : static bool
178 390207 : exps_share_expensive_exp(list *exps, list *shared )
179 : {
180 390207 : uint64_t uses = 0;
181 :
182 390207 : if (list_empty(exps) || list_empty(shared))
183 0 : return false;
184 390207 : if (list_length(shared) > 64)
185 : return true;
186 3336373 : for (node *n = exps->h; n; n = n->next) {
187 2947331 : sql_exp *e = n->data;
188 :
189 2947331 : if (exp_shares_exps(e, shared, &uses))
190 : return true;
191 : }
192 : return false;
193 : }
194 :
195 : static bool ambigious_ref( list *exps, sql_exp *e);
196 : static bool
197 87625 : ambigious_refs( list *exps, list *refs)
198 : {
199 87625 : if (list_empty(refs))
200 : return false;
201 265444 : for(node *n=refs->h; n; n = n->next) {
202 199133 : if (ambigious_ref(exps, n->data))
203 : return true;
204 : }
205 : return false;
206 : }
207 :
208 : static bool
209 4414862 : ambigious_ref( list *exps, sql_exp *e)
210 : {
211 4414862 : sql_exp *ne = NULL;
212 :
213 4414862 : if (e->type == e_column) {
214 3817316 : assert(e->nid);
215 3817316 : if (e->nid)
216 3817316 : ne = exps_bind_nid(exps, e->nid);
217 3817316 : if (ne && e != ne)
218 : return true;
219 : }
220 4405819 : if (e->type == e_func)
221 87625 : return ambigious_refs(exps, e->l);
222 : return false;
223 : }
224 :
225 : /* merge 2 projects into the lower one */
226 : static sql_rel *
227 4329006 : rel_merge_projects_(visitor *v, sql_rel *rel)
228 : {
229 4397431 : list *exps = rel->exps;
230 4397431 : sql_rel *prj = rel->l;
231 4397431 : node *n;
232 :
233 4397431 : if (rel->op == op_project &&
234 1769663 : prj && prj->op == op_project && !(rel_is_ref(prj)) && list_empty(prj->r)) {
235 676366 : int all = 1;
236 :
237 676366 : if (project_unsafe(rel, false) || project_unsafe(prj, false) || exps_share_expensive_exp(rel->exps, prj->exps))
238 287324 : return rel;
239 :
240 : /* here we try to fix aliases */
241 389042 : list *nexps = NULL;
242 : /* for each exp check if we can rename it */
243 2648172 : for (n = exps->h; n && all; n = n->next) {
244 2268173 : sql_exp *e = n->data, *ne = NULL;
245 :
246 : /* We do not handle expressions pointing back in the list */
247 2268173 : if (ambigious_ref(exps, e)) {
248 : all = 0;
249 : break;
250 : }
251 2259130 : ne = exp_push_down_prj(v->sql, e, prj, prj->l);
252 : /* check if the referred alias name isn't used twice */
253 2259130 : if (ne && ambigious_ref(nexps, ne)) {
254 : all = 0;
255 : break;
256 : }
257 1947556 : if (ne) {
258 1947556 : if (exp_name(e))
259 1941153 : exp_prop_alias(v->sql->sa, ne, e);
260 1947556 : if (!nexps)
261 379990 : nexps = new_exp_list(v->sql->sa);
262 1947556 : list_append(nexps, ne);
263 : } else {
264 : all = 0;
265 : }
266 : }
267 389042 : if (all) {
268 68425 : rel->exps = nexps;
269 : /* we can now remove the intermediate project */
270 : /* push order by expressions */
271 68425 : if (!list_empty(rel->r)) {
272 0 : list *nr = new_exp_list(v->sql->sa), *res = rel->r;
273 0 : for (n = res->h; n; n = n->next) {
274 0 : sql_exp *e = n->data, *ne = NULL;
275 :
276 0 : ne = exp_push_down_prj(v->sql, e, prj, prj->l);
277 0 : if (ne) {
278 0 : if (exp_name(e))
279 0 : exp_prop_alias(v->sql->sa, ne, e);
280 0 : list_append(nr, ne);
281 : } else {
282 : all = 0;
283 : }
284 : }
285 0 : if (all) {
286 0 : rel->r = nr;
287 : } else {
288 : /* leave as is */
289 0 : rel->exps = exps;
290 0 : return rel;
291 : }
292 : }
293 68425 : rel->l = prj->l;
294 68425 : prj->l = NULL;
295 68425 : rel_destroy(prj);
296 68425 : v->changes++;
297 68425 : return rel_merge_projects_(v, rel);
298 : } else {
299 : /* leave as is */
300 320617 : rel->exps = exps;
301 : }
302 320617 : return rel;
303 : }
304 : return rel;
305 : }
306 :
307 : static sql_rel *
308 187407 : rel_merge_projects(visitor *v, global_props *gp, sql_rel *rel)
309 : {
310 187407 : (void) gp;
311 187407 : return rel_visitor_bottomup(v, rel, &rel_merge_projects_);
312 : }
313 :
314 : run_optimizer
315 638402 : bind_merge_projects(visitor *v, global_props *gp)
316 : {
317 638402 : int flag = v->sql->sql_optimizer;
318 638206 : return gp->opt_level == 1 && (flag & merge_projects) &&
319 1276600 : (gp->cnt[op_project] || gp->cnt[op_groupby]) ? rel_merge_projects : NULL;
320 : }
321 :
322 :
323 : static sql_exp *split_aggr_and_project(mvc *sql, list *aexps, sql_exp *e);
324 :
325 : static void
326 0 : list_split_aggr_and_project(mvc *sql, list *aexps, list *exps)
327 : {
328 0 : if (list_empty(exps))
329 : return ;
330 0 : for(node *n = exps->h; n; n = n->next)
331 0 : n->data = split_aggr_and_project(sql, aexps, n->data);
332 : }
333 :
334 : static sql_exp *
335 0 : split_aggr_and_project(mvc *sql, list *aexps, sql_exp *e)
336 : {
337 0 : switch(e->type) {
338 0 : case e_aggr:
339 : /* add to the aggrs */
340 0 : if (!exp_name(e))
341 0 : exp_label(sql->sa, e, ++sql->label);
342 0 : list_append(aexps, e);
343 0 : return exp_ref(sql, e);
344 : case e_cmp:
345 : /* e_cmp's shouldn't exist in an aggr expression list */
346 0 : assert(0);
347 : case e_convert:
348 0 : e->l = split_aggr_and_project(sql, aexps, e->l);
349 0 : return e;
350 0 : case e_func:
351 0 : list_split_aggr_and_project(sql, aexps, e->l);
352 0 : return e;
353 : case e_atom:
354 : case e_column: /* constants and columns shouldn't be rewritten */
355 : case e_psm:
356 : return e;
357 : }
358 : return NULL;
359 : }
360 :
361 : /* exp_rename */
362 : static sql_exp * exp_rename(mvc *sql, sql_exp *e, sql_rel *f, sql_rel *t);
363 :
364 : static list *
365 53288 : exps_rename(mvc *sql, list *l, sql_rel *f, sql_rel *t)
366 : {
367 53288 : if (list_empty(l))
368 : return l;
369 47204 : for (node *n=l->h; n; n=n->next)
370 30329 : n->data = exp_rename(sql, n->data, f, t);
371 : return l;
372 : }
373 :
374 : /* exp_rename */
375 : static sql_exp *
376 181790 : exp_rename(mvc *sql, sql_exp *e, sql_rel *f, sql_rel *t)
377 : {
378 181790 : sql_exp *ne = NULL;
379 :
380 181790 : assert(is_project(f->op));
381 :
382 181790 : switch(e->type) {
383 81431 : case e_column:
384 81431 : assert(e->nid);
385 81431 : ne = exps_bind_nid(f->exps, e->nid);
386 81431 : if (!ne)
387 : return e;
388 53471 : sql_exp *oe = e;
389 53471 : e = NULL;
390 53471 : if (ne && ne->nid)
391 53312 : e = rel_find_exp(t, ne);
392 53312 : if (!e) {
393 159 : sql->session->status = 0;
394 159 : sql->errstr[0] = 0;
395 159 : if (exp_is_atom(ne))
396 : return ne;
397 : return oe;
398 : }
399 53312 : ne = exp_ref(sql, e);
400 53312 : if (oe)
401 53312 : exp_propagate(sql->sa, ne, oe);
402 53312 : return ne;
403 56040 : case e_cmp:
404 56040 : if (e->flag == cmp_or || e->flag == cmp_filter) {
405 3782 : e->l = exps_rename(sql, e->l, f, t);
406 3782 : e->r = exps_rename(sql, e->r, f, t);
407 52258 : } else if (e->flag == cmp_in || e->flag == cmp_notin) {
408 6447 : e->l = exp_rename(sql, e->l, f, t);
409 6447 : e->r = exps_rename(sql, e->r, f, t);
410 : } else {
411 45811 : e->l = exp_rename(sql, e->l, f, t);
412 45811 : e->r = exp_rename(sql, e->r, f, t);
413 45811 : if (e->f)
414 233 : e->f = exp_rename(sql, e->f, f, t);
415 : }
416 : break;
417 5042 : case e_convert:
418 5042 : e->l = exp_rename(sql, e->l, f, t);
419 5042 : break;
420 2879 : case e_aggr:
421 : case e_func:
422 2879 : e->l = exps_rename(sql, e->l, f, t);
423 2879 : break;
424 36398 : case e_atom:
425 36398 : e->f = exps_rename(sql, e->f, f, t);
426 36398 : break;
427 : case e_psm:
428 : break;
429 : }
430 : return e;
431 : }
432 :
433 : static int
434 5943398 : exp_match_exp_cmp( sql_exp *e1, sql_exp *e2)
435 : {
436 5943398 : if (exp_match_exp(e1,e2))
437 61588 : return 0;
438 : return -1;
439 : }
440 :
441 : /* Pushing projects up the tree. Done very early in the optimizer.
442 : * Makes later steps easier.
443 : */
444 : extern void _rel_print(mvc *sql, sql_rel *rel);
445 : static sql_rel *
446 3926671 : rel_push_project_up_(visitor *v, sql_rel *rel)
447 : {
448 3926671 : if (is_simple_project(rel->op) && rel->l && !rel_is_ref(rel)) {
449 1265352 : sql_rel *l = rel->l;
450 1265352 : if (is_simple_project(l->op))
451 326296 : return rel_merge_projects_(v, rel);
452 : /* find equal column references, later cases are rewritten into references back to the first
453 : * project () [ i.i L1, i.i L2 ] -> project() [ i.i L1, L1 L2 ] */
454 939056 : if (list_length(rel->exps) > 1) {
455 792366 : list *exps = rel->exps;
456 792366 : node *n = exps->h;
457 792366 : bool needed = false;
458 4079041 : for(n = n->next; n && !needed; n = n->next) {
459 3286679 : sql_exp *e = n->data;
460 3286679 : if (e->type == e_column && !is_selfref(e)) {
461 21435799 : for(node *m = exps->h; m && m != n && !needed; m = m->next) {
462 18639902 : sql_exp *h = m->data;
463 18639902 : if (exp_match_exp(h,e))
464 9789 : needed = true;
465 : }
466 : }
467 : }
468 792362 : if (needed) {
469 9789 : rel->exps = sa_list(v->sql->sa);
470 9789 : node *n = exps->h;
471 9789 : list_append(rel->exps, n->data);
472 121371 : for(n = n->next; n; n = n->next) {
473 111582 : sql_exp *e = n->data;
474 111582 : if (e->type == e_column && !is_selfref(e)) {
475 70021 : node *m = list_find(rel->exps, e, (fcmp)&exp_match_exp_cmp);
476 70021 : if (m) {
477 12239 : sql_exp *me = m->data;
478 12239 : if (me->alias.label != me->nid) {
479 799 : sql_exp *ne = exp_ref(v->sql, m->data);
480 799 : exp_setalias(ne, e->alias.label, exp_relname(e), exp_name(e));
481 799 : exp_propagate(v->sql->sa, ne, e);
482 799 : set_selfref(ne);
483 799 : e = ne;
484 : }
485 : }
486 : }
487 111582 : list_append(rel->exps, e);
488 : }
489 : }
490 : }
491 : }
492 :
493 : /* project/project cleanup is done later */
494 3600371 : if (is_join(rel->op) || is_select(rel->op)) {
495 1158713 : node *n;
496 1158713 : list *exps = NULL, *l_exps, *r_exps;
497 1158713 : sql_rel *l = rel->l;
498 1158713 : sql_rel *r = rel->r;
499 1158713 : sql_rel *t;
500 1158713 : int nlexps = 0, i = 0;
501 :
502 : /* Don't rewrite refs, non projections or constant or
503 : order by projections */
504 1158713 : if (!l || rel_is_ref(l) || is_topn(l->op) || is_sample(l->op) ||
505 1070300 : (is_join(rel->op) && !list_empty(rel->attr)) ||
506 1019219 : (is_join(rel->op) && (!r || rel_is_ref(r))) ||
507 993678 : (is_left(rel->op) && (rel->flag&MERGE_LEFT) /* can't push projections above merge statements left joins */) ||
508 993676 : (is_select(rel->op) && l->op != op_project) ||
509 488429 : (is_join(rel->op) && ((l->op != op_project && r->op != op_project) || is_topn(r->op) || is_sample(r->op))) ||
510 169242 : ((l->op == op_project && (!l->l || l->r || project_unsafe(l, is_select(rel->op)))) ||
511 98988 : (is_join(rel->op) && (r->op == op_project && (!r->l || r->r || project_unsafe(r, false))))))
512 1073905 : return rel;
513 :
514 84808 : if (l->op == op_project && l->l) {
515 1025014 : for (n = l->exps->h; n; n = n->next) {
516 984044 : sql_exp *e = n->data;
517 984044 : if (!(is_column(e->type) && exp_is_atom(e) && !(is_right(rel->op) || is_full(rel->op))) &&
518 982797 : !(e->type == e_column && !has_label(e)))
519 : return rel;
520 : }
521 : }
522 62504 : if (is_join(rel->op) && r->op == op_project && r->l) {
523 65620 : for (n = r->exps->h; n; n = n->next) {
524 60367 : sql_exp *e = n->data;
525 60367 : if (!(is_column(e->type) && exp_is_atom(e) && !(is_right(rel->op) || is_full(rel->op))) &&
526 59954 : !(e->type == e_column && !has_label(e)))
527 : return rel;
528 : }
529 : }
530 :
531 43985 : if (l->op == op_project && l->l) {
532 : /* Go through the list of project expressions.
533 : Check if they can be pushed up, ie are they not
534 : changing or introducing any columns used
535 : by the upper operator. */
536 :
537 40104 : exps = new_exp_list(v->sql->sa);
538 909039 : for (n = l->exps->h; n; n = n->next) {
539 868935 : sql_exp *e = n->data;
540 :
541 : /* we cannot rewrite projection with atomic values from outer joins */
542 868935 : if (is_column(e->type) && exp_is_atom(e) && !(is_right(rel->op) || is_full(rel->op))) {
543 661 : list_append(exps, e);
544 868274 : } else if (e->type == e_column) {
545 868274 : if (has_label(e))
546 : return rel;
547 868274 : list_append(exps, e);
548 : } else {
549 : return rel;
550 : }
551 : }
552 : } else {
553 3881 : exps = rel_projections(v->sql, l, NULL, 1, 1);
554 : }
555 43985 : nlexps = list_length(exps);
556 : /* also handle right hand of join */
557 43985 : if (is_join(rel->op) && r->op == op_project && r->l && list_empty(rel->attr)) {
558 : /* Here we also check all expressions of r like above
559 : but also we need to check for ambiguous names. */
560 :
561 43922 : for (n = r->exps->h; n; n = n->next) {
562 38923 : sql_exp *e = n->data;
563 :
564 : /* we cannot rewrite projection with atomic values from outer joins */
565 38923 : if (is_column(e->type) && exp_is_atom(e) && !(is_left(rel->op) || is_full(rel->op))) {
566 148 : list_append(exps, e);
567 38775 : } else if (e->type == e_column) {
568 38521 : if (has_label(e))
569 : return rel;
570 38521 : list_append(exps, e);
571 : } else {
572 : return rel;
573 : }
574 : }
575 38732 : } else if (is_join(rel->op) && list_empty(rel->attr)) {
576 21260 : list *r_exps = rel_projections(v->sql, r, NULL, 1, 1);
577 21260 : list_merge(exps, r_exps, (fdup)NULL);
578 : }
579 43731 : if (!list_empty(rel->attr))
580 0 : append(exps, exp_ref(v->sql, rel->attr->h->data));
581 : /* Here we should check for ambiguous names ? */
582 43731 : if (is_join(rel->op) && r && list_empty(rel->attr)) {
583 26259 : t = (l->op == op_project && l->l)?l->l:l;
584 26259 : l_exps = rel_projections(v->sql, t, NULL, 1, 1);
585 : /* conflict with old right expressions */
586 26259 : t = (r->op == op_project && r->l)?r->l:r;
587 26259 : r_exps = rel_projections(v->sql, t, NULL, 1, 1);
588 : /* conflict with new right expressions */
589 695711 : for(n = l_exps->h; n; n = n->next) {
590 669823 : sql_exp *e = n->data;
591 :
592 669823 : if (exp_is_atom(e))
593 0 : continue;
594 669823 : if (e->nid && exps_bind_nid(r_exps, e->nid))
595 : return rel;
596 669452 : if (e->alias.label && exps_bind_nid(r_exps, e->alias.label))
597 : return rel;
598 : }
599 : /* conflict with new left expressions */
600 177076 : for(n = r_exps->h; n; n = n->next) {
601 151188 : sql_exp *e = n->data;
602 :
603 151188 : if (exp_is_atom(e))
604 0 : continue;
605 151188 : if (e->nid && exps_bind_nid(l_exps, e->nid))
606 : return rel;
607 151188 : if (e->alias.label && exps_bind_nid(l_exps, e->alias.label))
608 : return rel;
609 : }
610 : }
611 :
612 : /* rename operator expressions */
613 43360 : if (l->op == op_project) {
614 : /* rewrite rel from rel->l into rel->l->l */
615 39702 : if (rel->exps) {
616 81856 : for (n = rel->exps->h; n; n = n->next) {
617 43550 : sql_exp *e = n->data, *ne;
618 :
619 43550 : ne = exp_rename(v->sql, e, l, l->l);
620 43550 : assert(ne);
621 43550 : if (ne != e && exp_name(e))
622 0 : exp_propagate(v->sql->sa, ne, e);
623 43550 : n->data = ne;
624 : }
625 : }
626 39702 : rel->l = l->l;
627 39702 : l->l = NULL;
628 39702 : rel_destroy(l);
629 : }
630 43360 : if (is_join(rel->op) && r->op == op_project && list_empty(rel->attr)) {
631 : /* rewrite rel from rel->r into rel->r->l */
632 4638 : if (rel->exps) {
633 8545 : for (n = rel->exps->h; n; n = n->next) {
634 4567 : sql_exp *e = n->data, *ne;
635 :
636 4567 : ne = exp_rename(v->sql, e, r, r->l);
637 4567 : assert(ne);
638 4567 : if (ne != e && exp_name(e))
639 0 : exp_propagate(v->sql->sa, ne, e);
640 4567 : n->data = ne;
641 : }
642 : }
643 4638 : rel->r = r->l;
644 4638 : r->l = NULL;
645 4638 : rel_destroy(r);
646 : }
647 : /* Done, ie introduce new project */
648 43360 : exps_fix_card(exps, rel->card);
649 : /* Fix nil flag */
650 43360 : if (!list_empty(exps)) {
651 931679 : for (n = exps->h ; n && i < nlexps ; n = n->next, i++) {
652 888319 : sql_exp *e = n->data;
653 :
654 888319 : if (is_right(rel->op) || is_full(rel->op))
655 449 : set_has_nil(e);
656 888319 : set_not_unique(e);
657 : }
658 187838 : for (; n ; n = n->next) {
659 144478 : sql_exp *e = n->data;
660 :
661 144478 : if (is_left(rel->op) || is_full(rel->op))
662 44810 : set_has_nil(e);
663 144478 : set_not_unique(e);
664 : }
665 : }
666 43360 : v->changes++;
667 43360 : return rel_inplace_project(v->sql->sa, rel, NULL, exps);
668 : }
669 2441658 : if (is_groupby(rel->op) && !rel_is_ref(rel) && rel->exps && list_length(rel->exps) > 1) {
670 41805 : node *n;
671 41805 : int fnd = 0;
672 41805 : list *aexps, *pexps;
673 :
674 : /* check if some are expressions aren't e_aggr */
675 163966 : for (n = rel->exps->h; n && !fnd; n = n->next) {
676 122161 : sql_exp *e = n->data;
677 :
678 122161 : if (e->type != e_aggr && e->type != e_column && e->type != e_atom && e->card > CARD_ATOM) {
679 122161 : fnd = 1;
680 : }
681 : }
682 : /* only aggr, no rewrite needed */
683 41805 : if (!fnd)
684 : return rel;
685 :
686 0 : aexps = sa_list(v->sql->sa);
687 0 : pexps = sa_list(v->sql->sa);
688 0 : for (n = rel->exps->h; n; n = n->next) {
689 0 : sql_exp *e = n->data, *ne = NULL;
690 :
691 0 : switch (e->type) {
692 0 : case e_atom: /* move over to the projection */
693 0 : list_append(pexps, e);
694 0 : break;
695 0 : case e_func:
696 0 : list_append(pexps, e);
697 0 : list_split_aggr_and_project(v->sql, aexps, e->l);
698 0 : break;
699 0 : case e_convert:
700 0 : list_append(pexps, e);
701 0 : e->l = split_aggr_and_project(v->sql, aexps, e->l);
702 0 : break;
703 0 : default: /* simple alias */
704 0 : list_append(aexps, e);
705 0 : ne = exp_column(v->sql->sa, exp_find_rel_name(e), exp_name(e), exp_subtype(e), e->card, has_nil(e), is_unique(e), is_intern(e));
706 0 : assert(0);
707 : list_append(pexps, ne);
708 : break;
709 : }
710 : }
711 0 : v->changes++;
712 0 : rel->exps = aexps;
713 0 : return rel_inplace_project( v->sql->sa, rel, NULL, pexps);
714 : }
715 : return rel;
716 : }
717 :
718 : static sql_rel *
719 186696 : rel_push_project_up(visitor *v, global_props *gp, sql_rel *rel)
720 : {
721 186696 : (void) gp;
722 186696 : return rel_visitor_bottomup(v, rel, &rel_push_project_up_);
723 : }
724 :
725 : run_optimizer
726 638403 : bind_push_project_up(visitor *v, global_props *gp)
727 : {
728 638403 : int flag = v->sql->sql_optimizer;
729 638207 : return gp->opt_level == 1 && (flag & push_project_up) &&
730 1276610 : gp->cnt[op_project] ? rel_push_project_up : NULL;
731 : }
732 :
733 :
734 : static void split_exps(mvc *sql, list *exps, sql_rel *rel);
735 :
736 : static int
737 3193548 : exp_refers_cmp( sql_exp *e1, sql_exp *e2)
738 : {
739 3193548 : if (exp_refers(e1,e2))
740 0 : return 0;
741 : return -1;
742 : }
743 :
744 : sql_exp *
745 134852 : add_exp_too_project(mvc *sql, sql_exp *e, sql_rel *rel)
746 : {
747 134852 : node *n = list_find(rel->exps, e, (fcmp)&exp_match_exp_cmp);
748 :
749 : /* if not matching we may refer to an older expression */
750 134852 : if (!n)
751 85503 : n = list_find(rel->exps, e, (fcmp)&exp_refers_cmp);
752 85503 : if (!n) {
753 85503 : exp_label(sql->sa, e, ++sql->label);
754 85503 : append(rel->exps, e);
755 : } else {
756 49349 : sql_exp *ne = n->data;
757 :
758 49349 : if (rel && rel->l) {
759 49349 : if (ne && ne->alias.label && rel_find_nid(rel->l, ne->alias.label)) {
760 0 : exp_label(sql->sa, e, ++sql->label);
761 0 : append(rel->exps, e);
762 0 : ne = e;
763 : }
764 : }
765 : e = ne;
766 : }
767 134852 : e = exp_ref(sql, e);
768 134852 : return e;
769 : }
770 :
771 : static void
772 156260 : add_exps_too_project(mvc *sql, list *exps, sql_rel *rel)
773 : {
774 156260 : node *n;
775 :
776 156260 : if (!exps)
777 : return;
778 483909 : for(n=exps->h; n; n = n->next) {
779 327649 : sql_exp *e = n->data;
780 :
781 327649 : if (e->type != e_column && !exp_is_atom(e))
782 134569 : n->data = add_exp_too_project(sql, e, rel);
783 : }
784 : }
785 :
786 : static sql_exp *
787 515002 : split_exp(mvc *sql, sql_exp *e, sql_rel *rel)
788 : {
789 515002 : if (exp_is_atom(e))
790 : return e;
791 382568 : switch(e->type) {
792 : case e_column:
793 : return e;
794 17081 : case e_convert:
795 17081 : e->l = split_exp(sql, e->l, rel);
796 17081 : return e;
797 166142 : case e_aggr:
798 : case e_func:
799 166142 : if (!is_analytic(e) && !exp_has_sideeffect(e)) {
800 164194 : sql_subfunc *f = e->f;
801 164194 : if (e->type == e_func && !f->func->s && is_caselike_func(f) /*is_ifthenelse_func(f)*/) {
802 : return e;
803 : } else {
804 156260 : split_exps(sql, e->l, rel);
805 156260 : add_exps_too_project(sql, e->l, rel);
806 : }
807 : }
808 : return e;
809 492 : case e_cmp:
810 492 : if (e->flag == cmp_or || e->flag == cmp_filter) {
811 469 : split_exps(sql, e->l, rel);
812 469 : split_exps(sql, e->r, rel);
813 23 : } else if (e->flag == cmp_in || e->flag == cmp_notin) {
814 0 : e->l = split_exp(sql, e->l, rel);
815 0 : split_exps(sql, e->r, rel);
816 : } else {
817 23 : e->l = split_exp(sql, e->l, rel);
818 23 : e->r = split_exp(sql, e->r, rel);
819 23 : if (e->f)
820 21 : e->f = split_exp(sql, e->f, rel);
821 : }
822 : return e;
823 : case e_atom:
824 : case e_psm:
825 : return e;
826 : }
827 : return e;
828 : }
829 :
830 : static void
831 157205 : split_exps(mvc *sql, list *exps, sql_rel *rel)
832 : {
833 157205 : if (list_empty(exps))
834 : return;
835 485813 : for(node *n=exps->h; n; n = n->next){
836 328608 : sql_exp *e = n->data;
837 :
838 328608 : e = split_exp(sql, e, rel);
839 328608 : n->data = e;
840 : }
841 : }
842 :
843 : sql_rel *
844 642752 : rel_split_project_(visitor *v, sql_rel *rel, int top)
845 : {
846 1285504 : if (mvc_highwater(v->sql))
847 : return rel;
848 :
849 642752 : if (!rel)
850 : return NULL;
851 642752 : if (is_project(rel->op) && list_length(rel->exps) && (is_groupby(rel->op) || rel->l) && !need_distinct(rel) && !is_single(rel)) {
852 180213 : list *exps = rel->exps;
853 180213 : node *n;
854 180213 : int funcs = 0;
855 180213 : sql_rel *nrel;
856 :
857 : /* are there functions */
858 1405443 : for (n=exps->h; n && !funcs; n = n->next) {
859 1225230 : sql_exp *e = n->data;
860 :
861 1225230 : funcs = exp_has_func(e);
862 : }
863 : /* introduce extra project */
864 180213 : if (funcs && rel->op != op_project) {
865 7 : nrel = rel_project(v->sql->sa, rel->l,
866 7 : rel_projections(v->sql, rel->l, NULL, 1, 1));
867 7 : rel->l = nrel;
868 : /* recursively split all functions and add those to the projection list */
869 7 : split_exps(v->sql, rel->exps, nrel);
870 14 : if (nrel->l && !(nrel->l = rel_split_project_(v, nrel->l, (is_topn(rel->op)||is_sample(rel->op))?top:0)))
871 : return NULL;
872 7 : return rel;
873 180206 : } else if (funcs && !top && list_empty(rel->r)) {
874 : /* projects can have columns point back into the expression list, ie
875 : * create a new list including the split expressions */
876 17328 : node *n;
877 17328 : list *exps = rel->exps;
878 :
879 17328 : rel->exps = sa_list(v->sql->sa);
880 186574 : for (n=exps->h; n; n = n->next)
881 169246 : append(rel->exps, split_exp(v->sql, n->data, rel));
882 162878 : } else if (funcs && top && rel_is_ref(rel) && list_empty(rel->r)) {
883 : /* inplace */
884 9 : list *exps = rel_projections(v->sql, rel, NULL, 1, 1);
885 9 : sql_rel *l = rel_project(v->sql->sa, rel->l, NULL);
886 9 : rel->l = l;
887 9 : l->exps = rel->exps;
888 9 : set_processed(l);
889 9 : rel->exps = exps;
890 : }
891 : }
892 642745 : if (is_mset(rel->op) || is_set(rel->op) || is_basetable(rel->op))
893 : return rel;
894 420986 : if (rel->l && (rel->op != op_table || rel->flag != TRIGGER_WRAPPER)) {
895 441217 : rel->l = rel_split_project_(v, rel->l, (is_topn(rel->op)||is_sample(rel->op)||is_ddl(rel->op)||is_modify(rel->op))?top:0);
896 410380 : if (!rel->l)
897 : return NULL;
898 : }
899 420986 : if ((is_join(rel->op) || is_semi(rel->op)) && rel->r) {
900 122003 : rel->r = rel_split_project_(v, rel->r, (is_topn(rel->op)||is_sample(rel->op)||is_ddl(rel->op)||is_modify(rel->op))?top:0);
901 122003 : if (!rel->r)
902 : return NULL;
903 : }
904 : return rel;
905 : }
906 :
907 : static sql_rel *
908 110362 : rel_split_project(visitor *v, global_props *gp, sql_rel *rel)
909 : {
910 110362 : (void) gp;
911 110362 : return rel_split_project_(v, rel, 1);
912 : }
913 :
914 : run_optimizer
915 638407 : bind_split_project(visitor *v, global_props *gp)
916 : {
917 638407 : int flag = v->sql->sql_optimizer;
918 561147 : return gp->opt_cycle == 0 && gp->opt_level == 1 && (flag & split_project) &&
919 1199546 : (gp->cnt[op_project] || gp->cnt[op_groupby]) ? rel_split_project : NULL;
920 : }
921 :
922 :
923 : static sql_rel *
924 186893 : rel_project_reduce_casts(visitor *v, global_props *gp, sql_rel *rel)
925 : {
926 186893 : if (!rel)
927 : return NULL;
928 186893 : (void) gp;
929 186893 : if (gp->opt_level == 1 && v->value_based_opt && is_simple_project(rel->op) && list_length(rel->exps)) {
930 99294 : list *exps = rel->exps;
931 99294 : node *n;
932 :
933 516013 : for (n=exps->h; n; n = n->next) {
934 416719 : sql_exp *e = n->data;
935 :
936 416719 : if (e && e->type == e_func) {
937 30736 : sql_subfunc *f = e->f;
938 30736 : sql_subtype *res = f->res->h->data;
939 :
940 30736 : if (!f->func->s && !strcmp(f->func->base.name, "sql_mul") && res->scale > 0) {
941 6 : list *args = e->l;
942 6 : sql_exp *h = args->h->data;
943 6 : sql_exp *t = args->t->data;
944 6 : atom *ha = exp_value(v->sql, h), *ta = exp_value(v->sql, t);
945 :
946 6 : if (ha || ta) {
947 5 : atom *a = ha ? ha : ta;
948 5 : atom *na = reduce_scale(v->sql, a);
949 :
950 5 : if (na && na != a) {
951 2 : int rs = a->tpe.scale - na->tpe.scale;
952 2 : res->scale -= rs;
953 2 : if (ha) {
954 2 : h->r = NULL;
955 2 : h->l = na;
956 : } else {
957 0 : t->r = NULL;
958 0 : t->l = na;
959 : }
960 2 : v->changes++;
961 : }
962 : }
963 : }
964 : }
965 : }
966 : }
967 : return rel;
968 : }
969 :
970 : run_optimizer
971 638405 : bind_project_reduce_casts(visitor *v, global_props *gp)
972 : {
973 638405 : int flag = v->sql->sql_optimizer;
974 638405 : return gp->cnt[op_project] && (flag & project_reduce_casts) ? rel_project_reduce_casts : NULL;
975 : }
976 :
977 : static sql_column *
978 99554 : exp_find_column_( sql_rel *rel, sql_exp *exp, int pnr, sql_rel **bt )
979 : {
980 99554 : if (exp->type == e_column)
981 99529 : return name_find_column(rel, exp->l, exp->r, pnr, bt);
982 : return NULL;
983 : }
984 :
985 : static int
986 0 : rel_part_nr( sql_rel *rel, sql_exp *e )
987 : {
988 0 : sql_column *c = NULL;
989 0 : sql_rel *bt = NULL;
990 0 : assert(e->type == e_cmp);
991 :
992 0 : c = exp_find_column_(rel, e->l, -1, &bt);
993 0 : if (!c)
994 0 : c = exp_find_column_(rel, e->r, -1, &bt);
995 0 : if (!c && e->f)
996 0 : c = exp_find_column_(rel, e->f, -1, &bt);
997 0 : if (!c || !bt || !rel_base_get_mergetable(bt))
998 0 : return -1;
999 0 : sql_table *pp = c->t;
1000 0 : sql_table *mt = rel_base_get_mergetable(bt);
1001 0 : return find_member_pos(mt->members, pp);
1002 : }
1003 :
1004 : static int
1005 0 : rel_uses_part_nr( sql_rel *rel, sql_exp *e, int pnr )
1006 : {
1007 0 : sql_column *c = NULL;
1008 0 : sql_rel *bt = NULL;
1009 0 : assert(e->type == e_cmp);
1010 :
1011 : /*
1012 : * following case fails.
1013 : *
1014 : * semijoin( A1, union [A1, A2] )
1015 : * The union will never return proper column (from A2).
1016 : * ie need different solution (probably pass pnr).
1017 : */
1018 0 : c = exp_find_column_(rel, e->l, pnr, &bt);
1019 0 : if (!c)
1020 0 : c = exp_find_column_(rel, e->r, pnr, &bt);
1021 0 : if (c && bt && rel_base_get_mergetable(bt)) {
1022 0 : sql_table *pp = c->t;
1023 0 : sql_table *mt = rel_base_get_mergetable(bt);
1024 0 : if (find_member_pos(mt->members, pp) == pnr)
1025 : return 1;
1026 : }
1027 : /* for projects we may need to do a rename! */
1028 0 : if (is_project(rel->op) || is_topn(rel->op) || is_sample(rel->op))
1029 0 : return rel_uses_part_nr( rel->l, e, pnr);
1030 :
1031 0 : if (is_munion(rel->op)) {
1032 : list *l = rel->l;
1033 : if (rel_uses_part_nr( l->h->data, e, pnr))
1034 : return 1;
1035 0 : } else if (is_union(rel->op) || is_join(rel->op) || is_semi(rel->op)) {
1036 0 : if (rel_uses_part_nr( rel->l, e, pnr))
1037 : return 1;
1038 0 : if (!is_semi(rel->op) && rel_uses_part_nr( rel->r, e, pnr))
1039 : return 1;
1040 : }
1041 : return 0;
1042 : }
1043 :
1044 : static sql_column *
1045 3028 : exp_is_pkey(sql_rel *rel, sql_exp *e)
1046 : {
1047 3028 : if (find_prop(e->p, PROP_HASHCOL)) { /* aligned PKEY JOIN */
1048 250 : fcmp cmp = (fcmp)&kc_column_cmp;
1049 250 : sql_column *c = exp_find_column(rel, e, -2);
1050 :
1051 250 : if (c && c->t->pkey && list_find(c->t->pkey->k.columns, c, cmp) != NULL)
1052 : return c;
1053 : }
1054 : return NULL;
1055 : }
1056 :
1057 : static sql_exp *
1058 3542 : rel_is_join_on_pkey(sql_rel *rel, bool pk_fk) /* pk_fk is used to verify is a join on pk-fk */
1059 : {
1060 3542 : if (!rel || !rel->exps)
1061 : return NULL;
1062 4875 : for (node *n = rel->exps->h; n; n = n->next) {
1063 2587 : sql_exp *je = n->data;
1064 :
1065 3916 : if (je->type == e_cmp && je->flag == cmp_equal &&
1066 2658 : (exp_is_pkey(rel, je->l) || exp_is_pkey(rel, je->r)) &&
1067 249 : (!pk_fk || find_prop(je->p, PROP_JOINIDX)))
1068 0 : return je;
1069 : }
1070 : return NULL;
1071 : }
1072 :
1073 : static int
1074 0 : exps_has_predicate( list *l )
1075 : {
1076 0 : node *n;
1077 :
1078 0 : for( n = l->h; n; n = n->next){
1079 0 : sql_exp *e = n->data;
1080 :
1081 0 : if (e->card <= CARD_ATOM)
1082 : return 1;
1083 : }
1084 : return 0;
1085 : }
1086 :
1087 : static sql_rel *
1088 737 : rel_find_select( sql_rel *r)
1089 : {
1090 2416 : while (!is_select(r->op) && r->l && is_project(r->op))
1091 : r = r->l;
1092 737 : if (is_select(r->op))
1093 15 : return r;
1094 : return NULL;
1095 : }
1096 :
1097 : static bool
1098 145369 : rels_share_rel(list *l)
1099 : {
1100 145369 : sql_rel *ref = NULL;
1101 223826 : for (node *n = l->h; n; n = n->next) {
1102 223089 : sql_rel *r = n->data;
1103 223089 : if(!ref) {
1104 145369 : if ((ref = rel_find_ref(r)) == NULL)
1105 : return false;
1106 77720 : } else if (ref != rel_find_ref(r)) {
1107 : return false;
1108 : }
1109 : }
1110 : return true;
1111 : }
1112 :
1113 : static inline sql_rel *
1114 2822101 : rel_merge_munion(visitor *v, sql_rel *rel)
1115 : {
1116 2822101 : if (is_munion(rel->op) && rels_share_rel(rel->l)) {
1117 737 : list *rels = rel->l, *nrels = NULL;
1118 737 : sql_rel *cur = NULL, *curs = NULL;
1119 :
1120 : /* Find selects and try to merge */
1121 737 : for(node *n = rels->h; n; n = n->next) {
1122 737 : sql_rel *r = n->data;
1123 737 : sql_rel *s = rel_find_select(r);
1124 :
1125 737 : if (!s)
1126 : return rel;
1127 15 : if (cur) {
1128 0 : if (s != r->l || curs->l != s->l || !rel_is_ref(s->l) ||
1129 : /* for now only union(project*(select(R),project*(select(R))) */
1130 0 : !s->exps || !curs->exps || exps_has_predicate(s->exps) || exps_has_predicate(curs->exps)) {
1131 0 : if (nrels)
1132 0 : append(nrels, r);
1133 : else
1134 : return rel;
1135 : }
1136 :
1137 : /* merge, ie. add 'or exp' */
1138 0 : curs->exps = append(new_exp_list(v->sql->sa), exp_or(v->sql->sa, cur->exps, s->exps, 0));
1139 0 : if (!nrels) {
1140 0 : nrels = sa_list(v->sql->sa);
1141 0 : append(nrels, cur);
1142 : }
1143 : } else {
1144 15 : if (s != r->l || !rel_is_ref(s->l)) {
1145 15 : if (nrels) {
1146 0 : append(nrels, r);
1147 : } else {
1148 : return rel;
1149 : }
1150 : }
1151 : cur = r;
1152 : curs = s;
1153 : }
1154 : }
1155 0 : if (nrels) {
1156 0 : v->changes++;
1157 0 : rel->l = nrels;
1158 : }
1159 0 : return rel;
1160 : }
1161 : return rel;
1162 : }
1163 :
1164 : static sql_rel *
1165 2822101 : rel_optimize_munions_bottomup_(visitor *v, sql_rel *rel)
1166 : {
1167 2822101 : rel = rel_merge_munion(v, rel);
1168 2822101 : return rel;
1169 : }
1170 :
1171 : static sql_rel *
1172 29160 : rel_optimize_munions_bottomup(visitor *v, global_props *gp, sql_rel *rel)
1173 : {
1174 29160 : (void) gp;
1175 29160 : return rel_visitor_bottomup(v, rel, &rel_optimize_munions_bottomup_);
1176 : }
1177 :
1178 : run_optimizer
1179 638406 : bind_optimize_unions_bottomup(visitor *v, global_props *gp)
1180 : {
1181 638406 : int flag = v->sql->sql_optimizer;
1182 638210 : return gp->opt_level == 1 && gp->cnt[op_munion] && (flag & optimize_unions_bottomup)
1183 638406 : ? rel_optimize_munions_bottomup : NULL;
1184 : }
1185 :
1186 :
1187 : static inline sql_rel *
1188 3902076 : rel_project_cse(visitor *v, sql_rel *rel)
1189 : {
1190 3902076 : if (is_project(rel->op) && rel->exps) {
1191 1583606 : node *n, *m;
1192 1583606 : list *nexps;
1193 1583606 : int needed = 0;
1194 :
1195 9451163 : for (n=rel->exps->h; n && !needed; n = n->next) {
1196 7867557 : sql_exp *e1 = n->data;
1197 :
1198 7867557 : if (e1->type != e_column && !exp_is_atom(e1) && exp_name(e1)) {
1199 16740591 : for (m=n->next; m && !needed; m = m->next){
1200 15951931 : sql_exp *e2 = m->data;
1201 :
1202 15951931 : if (exp_name(e2) && exp_match_exp(e1, e2))
1203 15951931 : needed = 1;
1204 : }
1205 : }
1206 : }
1207 :
1208 1583606 : if (!needed)
1209 : return rel;
1210 :
1211 742 : nexps = new_exp_list(v->sql->sa);
1212 8651 : for (n=rel->exps->h; n; n = n->next) {
1213 7909 : sql_exp *e1 = n->data;
1214 :
1215 7909 : if (e1->type != e_column && !exp_is_atom(e1) && exp_name(e1)) {
1216 33856 : for (m=nexps->h; m; m = m->next){
1217 29244 : sql_exp *e2 = m->data;
1218 :
1219 29244 : if (exp_name(e2) && exp_match_exp(e1, e2)) {
1220 782 : assert(e2->alias.label);
1221 782 : sql_exp *ne = exp_ref(v->sql, e2);
1222 :
1223 782 : ne = exp_propagate(v->sql->sa, ne, e1);
1224 782 : assert(!e1->ref);
1225 782 : exp_setalias(ne, e1->alias.label, exp_relname(e1), exp_name(e1));
1226 782 : set_selfref(ne);
1227 782 : assert(ne->nid);
1228 : e1 = ne;
1229 : break;
1230 : }
1231 : }
1232 : }
1233 7909 : append(nexps, e1);
1234 : }
1235 742 : rel->exps = nexps;
1236 : }
1237 : return rel;
1238 : }
1239 :
1240 : static int exp_is_const_op(sql_exp *exp, sql_exp *tope, sql_rel *expr);
1241 :
1242 : static int
1243 76 : exps_are_const_op(list *exps, sql_exp *tope, sql_rel *expr)
1244 : {
1245 76 : int ok = 1;
1246 :
1247 76 : if (list_empty(exps))
1248 : return 1;
1249 152 : for (node *n = exps->h; n && ok; n = n->next)
1250 76 : ok &= exp_is_const_op(n->data, tope, expr);
1251 : return ok;
1252 : }
1253 :
1254 : static int
1255 552 : exp_is_const_op(sql_exp *exp, sql_exp *tope, sql_rel *expr)
1256 : {
1257 716 : switch (exp->type) {
1258 30 : case e_atom:
1259 30 : return exp->f ? 0 : 1;
1260 2 : case e_convert:
1261 2 : return exp_is_const_op(exp->l, tope, expr);
1262 76 : case e_func:
1263 : case e_aggr: {
1264 76 : sql_subfunc *f = exp->f;
1265 76 : if (f->func->side_effect || IS_ANALYTIC(f->func))
1266 : return 0;
1267 76 : return exps_are_const_op(exp->l, tope, expr);
1268 : }
1269 0 : case e_cmp:
1270 0 : if (exp->flag == cmp_or || exp->flag == cmp_filter)
1271 0 : return exps_are_const_op(exp->l, tope, expr) && exps_are_const_op(exp->r, tope, expr);
1272 0 : if (exp->flag == cmp_in || exp->flag == cmp_notin)
1273 0 : return exp_is_const_op(exp->l, tope, expr) && exps_are_const_op(exp->r, tope, expr);
1274 0 : return exp_is_const_op(exp->l, tope, expr) && exp_is_const_op(exp->r, tope, expr) && (!exp->f || exp_is_const_op(exp->f, tope, expr));
1275 608 : case e_column: {
1276 608 : if (is_simple_project(expr->op) || is_groupby(expr->op)) {
1277 : /* in a simple projection, self-references may occur */
1278 608 : sql_exp *nexp = exps_bind_nid(expr->exps, exp->nid);
1279 608 : if (nexp && list_position(expr->exps, nexp) < list_position(expr->exps, tope))
1280 : return exp_is_const_op(nexp, exp, expr);
1281 : }
1282 : return 0;
1283 : }
1284 : default:
1285 : return 0;
1286 : }
1287 : }
1288 :
1289 : static sql_exp *
1290 25 : rel_groupby_add_count_star(mvc *sql, sql_rel *rel, sql_exp *count_star_exp, bool *count_added)
1291 : {
1292 25 : if (count_star_exp)
1293 : return count_star_exp;
1294 18 : if (!list_empty(rel->exps)) {
1295 49 : for (node *n=rel->exps->h; n ; n = n->next) {
1296 35 : sql_exp *e = n->data;
1297 :
1298 35 : if (exp_aggr_is_count(e) && !need_distinct(e) && list_length(e->l) == 0)
1299 4 : return e;
1300 : }
1301 : }
1302 14 : sql_subfunc *cf = sql_bind_func(sql, "sys", "count", sql_bind_localtype("void"), NULL, F_AGGR, true, true);
1303 14 : *count_added = true;
1304 14 : return rel_groupby_add_aggr(sql, rel, exp_aggr(sql->sa, NULL, cf, 0, 0, rel->card, 0));
1305 : }
1306 :
1307 : /* optimize sum(x + 12) into sum(x) + 12*count(*) */
1308 : static inline sql_rel *
1309 42622 : rel_simplify_sum(visitor *v, sql_rel *rel)
1310 : {
1311 42622 : if (is_groupby(rel->op) && !list_empty(rel->exps)) {
1312 42548 : sql_rel *upper = NULL, *groupby = rel, *l = groupby->l;
1313 42548 : sql_exp *count_star_exp = NULL;
1314 42548 : bool count_added = false;
1315 :
1316 133914 : for (node *n=groupby->exps->h; n ; n = n->next) {
1317 91366 : sql_exp *e = n->data;
1318 91366 : list *el = e->l;
1319 91366 : sql_subfunc *sf = e->f;
1320 :
1321 91366 : if (e->type == e_aggr && !need_distinct(e) && sf->func->type == F_AGGR && !sf->func->s && !strcmp(sf->func->base.name, "sum")) {
1322 8561 : sql_rel *expr = groupby;
1323 8561 : sql_exp *exp = (sql_exp*) el->h->data, *oexp = exp;
1324 :
1325 8561 : while (is_numeric_upcast(exp))
1326 0 : exp = exp->l;
1327 : /* we want to find a +/- call, so expect them to appear only on simple projections */
1328 18368 : while (exp && exp->type == e_column && (is_simple_project(expr->op) || is_groupby(expr->op)) && expr->l) {
1329 11373 : sql_rel *nexpr = NULL;
1330 11373 : sql_exp *nexp = rel_find_exp_and_corresponding_rel(l, exp, false, &nexpr, NULL);
1331 :
1332 : /* break when it loops on the same relation */
1333 11373 : if (nexpr == expr && list_position(expr->exps, nexp) >= list_position(expr->exps, exp))
1334 : break;
1335 9807 : expr = nexpr;
1336 9807 : exp = oexp = nexp;
1337 9809 : while (exp && is_numeric_upcast(exp))
1338 2 : exp = exp->l;
1339 : }
1340 :
1341 8561 : list *expl = exp ? exp->l : NULL;
1342 91366 : sql_subfunc *expf = exp ? exp->f : NULL;
1343 : /* found a candidate function */
1344 7238 : if (exp && exp->type == e_func && expf->func->type == F_FUNC && !expf->func->s &&
1345 1220 : (!strcmp(expf->func->base.name, "sql_sub") || !strcmp(expf->func->base.name, "sql_add"))) {
1346 238 : sql_exp *e1 = (sql_exp*) expl->h->data, *e2 = (sql_exp*) expl->h->next->data;
1347 238 : int e1ok = exp_is_const_op(e1, oexp, expr), e2ok = exp_is_const_op(e2, oexp, expr);
1348 :
1349 238 : if ((!e1ok && e2ok) || (e1ok && !e2ok)) {
1350 26 : sql_exp *ocol = e1ok ? e2 : e1, *constant = e1ok ? e1 : e2, *mul, *colref, *naggr, *newop, *col = ocol, *match;
1351 26 : bool add_col = true, prepend = false;
1352 :
1353 : /* if 'col' is a projection from the under relation, then use it */
1354 26 : while (is_numeric_upcast(col))
1355 0 : col = col->l;
1356 26 : if (col->type == e_column) {
1357 24 : sql_exp *colf = exps_find_exp(l->exps, col);
1358 :
1359 : /* col is already found in the inner relation. Also look for a new reference for col, eg sql_add(col, 1), 1 as col */
1360 24 : if (colf && list_position(l->exps, colf) < list_position(l->exps, oexp)) {
1361 : add_col = false;
1362 11 : } else if (!colf && is_simple_project(l->op) && list_empty(l->r) && !rel_is_ref(l) && !need_distinct(l)) {
1363 : prepend = true;
1364 : add_col = false;
1365 1 : } else if (!colf && (is_simple_project(l->op) || is_groupby(l->op))) {
1366 : /* on these scenarios the new column expression will be ordered/(grouped for distinct) or create potential ambiguity (multiple ref), so skip */
1367 1 : continue;
1368 : }
1369 2 : } else if ((is_simple_project(l->op) && (!list_empty(l->r) || rel_is_ref(l) || need_distinct(l))) || is_groupby(l->op)) {
1370 0 : continue;
1371 : }
1372 :
1373 : /* add count star */
1374 25 : count_star_exp = rel_groupby_add_count_star(v->sql, groupby, count_star_exp, &count_added);
1375 : /* multiply constant by count star */
1376 25 : if (!(mul = rel_binop_(v->sql, NULL, constant, exp_ref(v->sql, count_star_exp), "sys", "sql_mul", card_value, true))) {
1377 0 : v->sql->session->status = 0;
1378 0 : v->sql->errstr[0] = '\0';
1379 0 : continue;
1380 : }
1381 25 : if (!has_label(mul))
1382 25 : exp_label(v->sql->sa, mul, ++v->sql->label);
1383 :
1384 25 : colref = exp_ref(v->sql, ocol);
1385 25 : if (add_col) /* if 'col' will be added, then make sure it has an unique label */
1386 3 : exp_label(v->sql->sa, colref, ++v->sql->label);
1387 :
1388 : /* 'oexp' contains the type for the input for the 'sum' aggregate */
1389 25 : if (!(colref = exp_check_type(v->sql, exp_subtype(oexp), groupby, colref, type_equal))) {
1390 0 : v->sql->session->status = 0;
1391 0 : v->sql->errstr[0] = '\0';
1392 0 : continue;
1393 : }
1394 : /* update sum to use the column side only */
1395 25 : sql_subfunc *a = sql_bind_func(v->sql, "sys", "sum", exp_subtype(colref), NULL, F_AGGR, true, true);
1396 25 : if (!a)
1397 0 : continue;
1398 25 : naggr = exp_aggr(v->sql->sa, list_append(sa_list(v->sql->sa), colref), a, need_distinct(e), need_no_nil(e), groupby->card, has_nil(e));
1399 25 : if ((match = exps_any_match(groupby->exps, naggr)) && list_position(groupby->exps, match) < list_position(groupby->exps, e)) { /* found a matching aggregate, use it */
1400 9 : naggr = exp_ref(v->sql, match);
1401 9 : exp_label(v->sql->sa, naggr, ++v->sql->label);
1402 16 : } else if (!has_label(naggr)) { /* otherwise use the new one */
1403 16 : exp_label(v->sql->sa, naggr, ++v->sql->label);
1404 : }
1405 :
1406 : /* generate addition/subtraction. subtraction is not commutative, so keep original order! */
1407 25 : if (!(newop = rel_binop_(v->sql, NULL, e1 == constant ? mul : exp_ref(v->sql, naggr), e1 == constant ? exp_ref(v->sql, naggr) : mul, "sys", expf->func->base.name, card_value, true))) {
1408 0 : v->sql->session->status = 0;
1409 0 : v->sql->errstr[0] = '\0';
1410 0 : continue;
1411 : }
1412 25 : if (!(newop = exp_check_type(v->sql, exp_subtype(e), groupby, newop, type_equal))) {
1413 0 : v->sql->session->status = 0;
1414 0 : v->sql->errstr[0] = '\0';
1415 0 : continue;
1416 : }
1417 :
1418 : /* a column reference can be prepended to the inner relation, add it after all the check type calls succeed */
1419 25 : if (prepend)
1420 10 : list_prepend(l->exps, col);
1421 :
1422 : /* the new generate function calls are valid, update relations */
1423 : /* we need a new relation for the multiplication and addition/subtraction */
1424 25 : if (!upper) {
1425 : /* be careful with relations with more than 1 reference, so do in-place replacement */
1426 18 : list *projs = rel_projections(v->sql, rel, NULL, 1, 1);
1427 18 : sql_rel *nrel = rel_groupby(v->sql, rel->l, NULL);
1428 18 : nrel->exps = rel->exps;
1429 18 : nrel->r = rel->r;
1430 18 : nrel->card = rel->card;
1431 18 : nrel->nrcols = list_length(rel->exps);
1432 18 : set_processed(nrel);
1433 18 : rel_dup(rel->l);
1434 18 : upper = rel = rel_inplace_project(v->sql->sa, rel, nrel, projs);
1435 18 : rel->card = exps_card(projs);
1436 18 : groupby = nrel; /* update pointers :) */
1437 18 : l = groupby->l;
1438 : }
1439 98 : for (node *n = upper->exps->h ; n ; ) {
1440 73 : node *next = n->next;
1441 73 : sql_exp *re = n->data;
1442 :
1443 : /* remove the old reference to the aggregate because we will use the addition/subtraction instead,
1444 : as well as the count star if count_added */
1445 73 : if (exp_refers(e, re) || (count_added && exp_refers(count_star_exp, re)))
1446 39 : list_remove_node(upper->exps, NULL, n);
1447 : n = next;
1448 : }
1449 :
1450 : /* update sum aggregate with new aggregate or reference to an existing one */
1451 25 : n->data = naggr;
1452 25 : list_hash_clear(groupby->exps);
1453 :
1454 : /* add column reference with new label, if 'col' was not found */
1455 25 : if (add_col) {
1456 3 : if (!is_simple_project(l->op) || !list_empty(l->r) || rel_is_ref(l) || need_distinct(l))
1457 0 : groupby->l = l = rel_project(v->sql->sa, l, rel_projections(v->sql, l, NULL, 1, 1));
1458 3 : list_append(l->exps, ocol);
1459 : }
1460 :
1461 : /* propagate alias and add new expression */
1462 25 : exp_prop_alias(v->sql->sa, newop, e);
1463 25 : list_append(upper->exps, newop);
1464 25 : v->changes++;
1465 : }
1466 : }
1467 : }
1468 : }
1469 : }
1470 42622 : return rel;
1471 : }
1472 :
1473 : /* optimize group by x+1,(y-2)*3,2-z into group by x,y,z */
1474 : static inline sql_rel *
1475 42622 : rel_simplify_groupby_columns(visitor *v, sql_rel *rel)
1476 : {
1477 42622 : if (is_groupby(rel->op) && !list_empty(rel->r)) {
1478 32017 : sql_rel *l = rel->l;
1479 :
1480 83014 : for (node *n=((list*)rel->r)->h; n ; n = n->next) {
1481 50997 : sql_exp *e = n->data;
1482 50997 : e->used = 0; /* we need to use this flag, clean it first */
1483 : }
1484 83014 : for (node *n=((list*)rel->r)->h; n ; n = n->next) {
1485 50997 : sql_exp *e = n->data;
1486 :
1487 50997 : if (e->type == e_column) {
1488 50961 : bool searching = true;
1489 50961 : sql_rel *efrel = NULL;
1490 50961 : sql_exp *exp = rel_find_exp_and_corresponding_rel(l, e, false, &efrel, NULL), *col = NULL, *tope = exp;
1491 :
1492 101941 : while (searching && !col) {
1493 50980 : sql_exp *exp_col = exp;
1494 :
1495 50980 : if (exp && is_numeric_upcast(exp))
1496 9 : exp = exp->l;
1497 50980 : if (exp && exp->type == e_func) {
1498 669 : list *el = exp->l;
1499 669 : sql_subfunc *sf = exp->f;
1500 : /* At the moment look only at injective math functions */
1501 669 : if (sf->func->type == F_FUNC && !sf->func->s &&
1502 656 : (!strcmp(sf->func->base.name, "sql_sub") || !strcmp(sf->func->base.name, "sql_add") || !strcmp(sf->func->base.name, "sql_mul"))) {
1503 114 : sql_exp *e1 = (sql_exp*) el->h->data, *e2 = (sql_exp*) el->h->next->data;
1504 : /* the optimization cannot be done if side-effect calls (e.g. rand()) are present */
1505 207 : int e1ok = exp_is_atom(e1) && !exp_unsafe(e1, true, false) && !exp_has_sideeffect(e1), e2ok = exp_is_atom(e2) && !exp_unsafe(e2, true, false) && !exp_has_sideeffect(e2);
1506 :
1507 114 : if ((!e1ok && e2ok) || (e1ok && !e2ok)) {
1508 60 : sql_exp *c = e1ok ? e2 : e1;
1509 60 : bool done = false;
1510 60 : exp_col = exps_find_exp(efrel->exps, c);
1511 60 : if (exp_col)
1512 29 : c = exp_col;
1513 :
1514 122 : while (!done) {
1515 62 : if (is_numeric_upcast(c))
1516 0 : c = c->l;
1517 62 : if (c->type == e_column) {
1518 43 : if (is_simple_project(efrel->op) || is_groupby(efrel->op)) {
1519 : /* in a simple projection, self-references may occur */
1520 43 : sql_exp *nc = exps_find_exp(efrel->exps, c);
1521 43 : if (nc && list_position(efrel->exps, nc) < list_position(efrel->exps, exp_col)) {
1522 2 : exp_col = c;
1523 2 : c = nc;
1524 2 : continue;
1525 : }
1526 : }
1527 : col = c; /* 'c' is a column reference from the left relation */
1528 : done = true;
1529 : } else {
1530 : exp = c; /* maybe a nested function call, let's continue searching */
1531 : done = true;
1532 : }
1533 : }
1534 : } else {
1535 : searching = false;
1536 : }
1537 : } else {
1538 : searching = false;
1539 : }
1540 : } else {
1541 : searching = false;
1542 : }
1543 : }
1544 50961 : if (col) { /* a column reference was found */
1545 41 : const char *rname = exp_relname(e), *name = exp_name(e);
1546 :
1547 : /* the grouping column has an alias, we have to keep it */
1548 41 : if ((rname && name && (strcmp(rname, e->l) != 0 || strcmp(name, e->r) != 0)) || (!rname && name && strcmp(name, e->r) != 0)) {
1549 2 : if (!has_label(e)) /* dangerous to merge, skip it */
1550 0 : continue;
1551 2 : if (!is_simple_project(l->op) || !list_empty(l->r) || rel_is_ref(l) || need_distinct(l))
1552 0 : rel->l = l = rel_project(v->sql->sa, l, rel_projections(v->sql, l, NULL, 1, 1));
1553 2 : list_append(l->exps, e);
1554 2 : n->data = e = exp_ref(v->sql, e);
1555 2 : list_hash_clear(rel->r);
1556 : }
1557 :
1558 41 : sql_exp *f = exps_find_exp(rel->r, col);
1559 :
1560 41 : if (f && list_position(rel->r, f) < list_position(rel->r, e)) { /* if already present, remove it */
1561 10 : e->used = 1;
1562 : } else {
1563 : /* Use an unique reference to the column found. If there's another grouping column label pointing into it,
1564 : rel_groupby_cse will hopefully remove it */
1565 31 : sql_exp *colf = exps_find_exp(l->exps, col);
1566 :
1567 : /* col is already found in the inner relation. Also look for a new reference for col, eg sql_add(col, 1), 1 as col */
1568 31 : if (colf && list_position(l->exps, colf) < list_position(l->exps, tope)) {
1569 2 : n->data = exp_ref(v->sql, col);
1570 25 : } else if (!colf && is_simple_project(l->op) && list_empty(l->r) && !rel_is_ref(l) && !need_distinct(l)) { /* trivial case, it can be added */
1571 25 : sql_exp *ne = col;
1572 25 : list_prepend(l->exps, ne);
1573 25 : n->data = exp_ref(v->sql, ne);
1574 0 : } else if (!colf && (is_simple_project(l->op) || is_groupby(l->op))) {
1575 : /* on these scenarios the new column expression will be ordered/(grouped for distinct) or create potential ambiguity (multiple ref), so skip */
1576 0 : continue;
1577 : } else {
1578 4 : sql_exp *ne = exp_ref(v->sql, col);
1579 :
1580 4 : if (colf) /* a col reference is already there, add a new label */
1581 4 : exp_label(v->sql->sa, ne, ++v->sql->label);
1582 4 : if (!is_simple_project(l->op) || !list_empty(l->r) || rel_is_ref(l) || need_distinct(l))
1583 0 : rel->l = l = rel_project(v->sql->sa, l, rel_projections(v->sql, l, NULL, 1, 1));
1584 4 : list_append(l->exps, ne);
1585 4 : n->data = exp_ref(v->sql, ne);
1586 : }
1587 31 : list_hash_clear(rel->r);
1588 : }
1589 41 : v->changes++;
1590 : }
1591 : }
1592 : }
1593 83014 : for (node *n=((list*)rel->r)->h; n ; ) {
1594 50997 : node *next = n->next;
1595 50997 : sql_exp *e = n->data;
1596 :
1597 50997 : if (e->used) /* remove unnecessary grouping columns */
1598 10 : list_remove_node(rel->r, NULL, n);
1599 : n = next;
1600 : }
1601 : }
1602 42622 : return rel;
1603 : }
1604 :
1605 : /* remove identical grouping columns */
1606 : static inline sql_rel *
1607 98073 : rel_groupby_cse(visitor *v, sql_rel *rel)
1608 : {
1609 98073 : if (is_groupby(rel->op) && !list_empty(rel->r)) {
1610 72376 : sql_rel *l = rel->l;
1611 :
1612 : /* for every group expression e1 */
1613 201618 : for (node *n=((list*)rel->r)->h; n ; n = n->next) {
1614 129242 : sql_exp *e1 = n->data;
1615 : /* it's good to examine the same expression in the subrelation e.g. in case it's an alias */
1616 : /* TODO maybe cover more cases? Here I only look at the left relation */
1617 258484 : sql_exp *e1_sub = e1->type == e_column ?
1618 129242 : e1->nid ? exps_bind_nid(l->exps, e1->nid) : exps_find_exp(l->exps, e1) : NULL;
1619 :
1620 : /* for every other group expression */
1621 222532 : for (node *m=n->next; m; m = m->next) {
1622 93290 : sql_exp *e2 = m->data;
1623 186580 : sql_exp *e2_sub = e2->type == e_column ?
1624 93290 : e2->nid ? exps_bind_nid(l->exps, e2->nid) : exps_find_exp(l->exps, e2) : NULL;
1625 :
1626 : /* check if the expression are the same */
1627 93290 : if (exp_match_exp(e1, e2) || exp_refers(e1, e2) || (e1_sub && e2_sub && (exp_match_exp(e1_sub, e2_sub) || exp_refers(e1_sub, e2_sub)))) {
1628 :
1629 : /* use e2 from rel->exps instead of e2 from the rel->r as it can have an alias from the higher rel */
1630 41 : sql_exp *e2_in_exps = exps_uses_nid(rel->exps, e2->alias.label);
1631 41 : assert(e2_in_exps);
1632 :
1633 : /* same as e2 */
1634 41 : sql_exp *e1_in_exps = exps_uses_nid(rel->exps, e1->alias.label);
1635 41 : if (!e1_in_exps)
1636 6 : continue;
1637 :
1638 : /* write e2 as an e1 alias since the expressions are the same */
1639 35 : sql_exp* e2_as_e1_alias = exp_copy(v->sql, e1_in_exps);
1640 : /* NOTE: it is important to get the rname (exp->l) and name (exp->r) from e2 IN the exps
1641 : * (e2_in_exps), and not from e2, since it could carry an alias from the higher rel */
1642 35 : exp_setalias(e2_as_e1_alias, e2_as_e1_alias->alias.label, e2_in_exps->l, e2_in_exps->r);
1643 35 : e2_as_e1_alias->alias.label = e2->alias.label;
1644 :
1645 : /* replace e2 with e2_as_e1_alias in expressions list */
1646 35 : node *e2_exps_node = list_find(rel->exps, e2_in_exps, NULL);
1647 35 : list_append_before(rel->exps, e2_exps_node, e2_as_e1_alias);
1648 35 : list_remove_node(rel->exps, NULL, e2_exps_node);
1649 :
1650 : /* finally remove e2 from the groups' list (->r) since it's redundant */
1651 35 : node *e2_r_node = list_find(rel->r, e2, NULL);
1652 35 : list_remove_node(rel->r, NULL, e2_r_node);
1653 :
1654 35 : v->changes++;
1655 : }
1656 : }
1657 : }
1658 : }
1659 98073 : return rel;
1660 : }
1661 :
1662 : sql_exp *list_exps_uses_exp(list *exps, const char *rname, const char *name);
1663 :
1664 : static sql_exp*
1665 19577 : exp_uses_exp(sql_exp *e, const char *rname, const char *name)
1666 : {
1667 19876 : sql_exp *res = NULL;
1668 :
1669 19876 : switch (e->type) {
1670 : case e_psm:
1671 : break;
1672 1945 : case e_atom: {
1673 1945 : if (e->f)
1674 0 : return list_exps_uses_exp(e->f, rname, name);
1675 : } break;
1676 298 : case e_convert:
1677 298 : return exp_uses_exp(e->l, rname, name);
1678 10966 : case e_column: {
1679 10966 : if (e->l && rname && strcmp(e->l, rname) == 0 &&
1680 2647 : e->r && name && strcmp(e->r, name) == 0)
1681 : return e;
1682 8483 : if (!e->l && !rname &&
1683 242 : e->r && name && strcmp(e->r, name) == 0)
1684 : return e;
1685 : } break;
1686 6249 : case e_func:
1687 : case e_aggr: {
1688 6249 : if (e->l)
1689 5030 : return list_exps_uses_exp(e->l, rname, name);
1690 : } break;
1691 418 : case e_cmp: {
1692 418 : if (e->flag == cmp_in || e->flag == cmp_notin) {
1693 0 : if ((res = exp_uses_exp(e->l, rname, name)))
1694 : return res;
1695 0 : return list_exps_uses_exp(e->r, rname, name);
1696 418 : } else if (e->flag == cmp_or || e->flag == cmp_filter) {
1697 2 : if ((res = list_exps_uses_exp(e->l, rname, name)))
1698 : return res;
1699 2 : return list_exps_uses_exp(e->r, rname, name);
1700 : } else {
1701 416 : if ((res = exp_uses_exp(e->l, rname, name)))
1702 : return res;
1703 414 : if ((res = exp_uses_exp(e->r, rname, name)))
1704 : return res;
1705 408 : if (e->f)
1706 : return exp_uses_exp(e->f, rname, name);
1707 : }
1708 : } break;
1709 : }
1710 : return NULL;
1711 : }
1712 :
1713 : sql_exp *
1714 10301 : list_exps_uses_exp(list *exps, const char *rname, const char *name)
1715 : {
1716 10301 : sql_exp *res = NULL;
1717 :
1718 10301 : if (!exps)
1719 : return NULL;
1720 29048 : for (node *n = exps->h; n && !res; n = n->next) {
1721 18747 : sql_exp *e = n->data;
1722 18747 : res = exp_uses_exp(e, rname, name);
1723 : }
1724 : return res;
1725 : }
1726 :
1727 : /* find in the list of expression an expression which uses e */
1728 : sql_exp *
1729 969 : exps_uses_exp(list *exps, sql_exp *e)
1730 : {
1731 969 : return list_exps_uses_exp(exps, exp_relname(e), exp_name(e));
1732 : }
1733 : /*
1734 : * Rewrite aggregations over munion all.
1735 : * groupby ([ union all (a, b, c) ], [gbe], [ count, sum ] )
1736 : *
1737 : * into
1738 : * groupby ([ union all( groupby( a, [gbe], [ count, sum] ),
1739 : * groupby( b, [gbe], [ count, sum] ),
1740 : * groupby( c, [gbe], [ count, sum] ) ],
1741 : * [gbe], [sum, sum] )
1742 : */
1743 : static inline bool
1744 2090 : rel_has_groupby(sql_rel *r)
1745 : {
1746 3642 : if (r) {
1747 3473 : if (is_groupby(r->op))
1748 : return true;
1749 2740 : if (is_simple_project(r->op))
1750 1552 : return rel_has_groupby(r->l);
1751 : }
1752 : return false;
1753 : }
1754 :
1755 : static inline sql_rel *
1756 1371 : rel_push_aggr_down_n_arry(visitor *v, sql_rel *rel)
1757 : {
1758 1371 : sql_rel *g = rel;
1759 1371 : sql_rel *u = rel->l, *ou = u;
1760 1371 : sql_rel *r = NULL;
1761 1371 : list *rgbe = NULL, *gbe = NULL, *exps = NULL;
1762 1371 : node *n, *m;
1763 :
1764 : // TODO why?
1765 1371 : if (u->op == op_project && !need_distinct(u))
1766 343 : u = u->l;
1767 :
1768 1371 : if (is_recursive(u))
1769 : return rel;
1770 :
1771 : /* make sure we don't create group by on group by's */
1772 2721 : for (node *n = ((list*)u->l)->h; n; n = n->next) {
1773 2090 : r = n->data;
1774 4180 : if (rel_has_groupby(r))
1775 : return rel;
1776 : }
1777 :
1778 : /* distinct should be done over the full result */
1779 1699 : for (n = g->exps->h; n; n = n->next) {
1780 1172 : sql_exp *e = n->data;
1781 1172 : sql_subfunc *af = e->f;
1782 :
1783 1172 : if (e->type == e_atom ||
1784 1172 : e->type == e_func ||
1785 551 : (e->type == e_aggr &&
1786 551 : ((strcmp(af->func->base.name, "sum") &&
1787 478 : strcmp(af->func->base.name, "count") &&
1788 165 : strcmp(af->func->base.name, "min") &&
1789 551 : strcmp(af->func->base.name, "max")) ||
1790 : need_distinct(e))))
1791 : return rel;
1792 : }
1793 :
1794 527 : list *nl = sa_list(v->sql->sa);
1795 1594 : for (node *n = ((list*)u->l)->h; n; n = n->next) {
1796 1067 : r = rel_dup(n->data);
1797 : //n->data = NULL; /* clean list as we steal the relation r, stealing is needed else (with multiple references) double project cleanup fails */
1798 1067 : if (!is_project(r->op))
1799 47 : r = rel_project(v->sql->sa, r,
1800 : rel_projections(v->sql, r, NULL, 1, 1));
1801 1067 : rel_rename_exps(v->sql, u->exps, r->exps);
1802 1067 : if (u != ou) {
1803 596 : bool isproject = is_project(r->op);
1804 596 : r = rel_project(v->sql->sa, r, NULL);
1805 596 : r->exps = exps_copy(v->sql, ou->exps);
1806 596 : rel_rename_exps(v->sql, ou->exps, r->exps);
1807 596 : set_processed(r);
1808 596 : if (isproject)
1809 596 : r = rel_push_project_down_(v, r); /* cleanup any double projects */
1810 : }
1811 1067 : if (g->r && list_length(g->r) > 0) {
1812 849 : list *gbe = g->r;
1813 849 : rgbe = exps_copy(v->sql, gbe);
1814 : }
1815 1067 : r = rel_groupby(v->sql, r, NULL);
1816 1067 : r->r = rgbe;
1817 1067 : r->nrcols = g->nrcols;
1818 1067 : r->card = g->card;
1819 1067 : r->exps = exps_copy(v->sql, g->exps);
1820 1067 : r->nrcols = list_length(r->exps);
1821 1067 : set_processed(r);
1822 :
1823 1067 : assert(r);
1824 1067 : append(nl, r);
1825 : }
1826 :
1827 : /* group by on primary keys which define the partitioning scheme
1828 : * don't need a finalizing group by */
1829 : /* how to check if a partition is based on some primary key ?
1830 : * */
1831 527 : if (!list_empty(rel->r)) {
1832 1035 : for (node *n = ((list*)rel->r)->h; n; n = n->next) {
1833 613 : sql_exp *e = n->data;
1834 613 : sql_column *c = NULL;
1835 :
1836 613 : if ((c = exp_is_pkey(rel, e)) && partition_find_part(v->sql->session->tr, c->t, NULL)) {
1837 : /* check if key is partition key */
1838 0 : v->changes++;
1839 0 : return rel_inplace_setop_n_ary(v->sql, rel, nl, op_munion,
1840 : rel_projections(v->sql, rel, NULL, 1, 1));
1841 : }
1842 : }
1843 : }
1844 :
1845 527 : if (!list_empty(rel->r)) {
1846 422 : list *ogbe = rel->r;
1847 :
1848 422 : gbe = new_exp_list(v->sql->sa);
1849 1035 : for (n = ogbe->h; n; n = n->next) {
1850 613 : sql_exp *e = n->data, *ne;
1851 :
1852 : /* group by in aggregation list */
1853 613 : ne = exps_uses_exp( rel->exps, e);
1854 613 : if (ne) {
1855 607 : sql_rel *first_munion_rel = nl->h->data;
1856 607 : ne = list_find_exp(first_munion_rel->exps, ne);
1857 : }
1858 607 : if (!ne) {
1859 : /* e only in the u1,u2,...un->r (group by list) */
1860 24 : for (node *n = nl->h; n; n = n->next) {
1861 16 : ne = exp_ref(v->sql, e);
1862 16 : list_append(((sql_rel*)n->data)->exps, ne);
1863 : }
1864 : }
1865 8 : assert(ne);
1866 613 : ne = exp_ref(v->sql, ne);
1867 613 : append(gbe, ne);
1868 : }
1869 : }
1870 :
1871 527 : u = rel_setop_n_ary(v->sql->sa, nl, op_munion);
1872 527 : rel_setop_n_ary_set_exps(v->sql, u,
1873 527 : rel_projections(v->sql, nl->h->data, NULL, 1, 1), false);
1874 527 : set_processed(u);
1875 :
1876 527 : exps = new_exp_list(v->sql->sa);
1877 1592 : for (n = u->exps->h, m = rel->exps->h; n && m; n = n->next, m = m->next) {
1878 1065 : sql_exp *ne, *e = n->data, *oa = m->data;
1879 :
1880 1065 : if (oa->type == e_aggr) {
1881 447 : sql_subfunc *f = oa->f;
1882 447 : int cnt = exp_aggr_is_count(oa);
1883 447 : sql_subfunc *a = sql_bind_func(v->sql, "sys", (cnt)?"sum":f->func->base.name, exp_subtype(e), NULL, F_AGGR, true, true);
1884 :
1885 447 : assert(a);
1886 : /* munion of aggr result may have nils
1887 : * because sum/count of empty set */
1888 447 : set_has_nil(e);
1889 447 : e = exp_ref(v->sql, e);
1890 447 : ne = exp_aggr1(v->sql->sa, e, a, need_distinct(e), 1, e->card, 1);
1891 : } else {
1892 618 : ne = exp_copy(v->sql, oa);
1893 : }
1894 1065 : exp_setalias(ne, oa->alias.label, exp_find_rel_name(oa), exp_name(oa));
1895 1065 : append(exps, ne);
1896 : }
1897 527 : v->changes++;
1898 527 : return rel_inplace_groupby(rel, u, gbe, exps);
1899 : }
1900 :
1901 : /*
1902 : * Rewrite aggregations over union all.
1903 : * groupby ([ union all (a, b) ], [gbe], [ count, sum ] )
1904 : *
1905 : * into
1906 : * groupby ( [ union all( groupby( a, [gbe], [ count, sum] ), [ groupby( b, [gbe], [ count, sum] )) , [gbe], [sum, sum] )
1907 : */
1908 : static inline sql_rel *
1909 98073 : rel_push_aggr_down(visitor *v, sql_rel *rel)
1910 : {
1911 98073 : if (rel->op == op_groupby && rel->l) {
1912 97983 : sql_rel *u = rel->l, *ou = u;
1913 97983 : sql_rel *g = rel;
1914 97983 : sql_rel *ul = u->l;
1915 97983 : sql_rel *ur = u->r;
1916 97983 : node *n, *m;
1917 97983 : list *lgbe = NULL, *rgbe = NULL, *gbe = NULL, *exps = NULL;
1918 :
1919 97983 : if (u->op == op_project && !need_distinct(u))
1920 42351 : u = u->l;
1921 :
1922 97983 : if (!u || !(is_union(u->op) || is_munion(u->op)) || need_distinct(u) || is_single(u) || !u->exps || rel_is_ref(u))
1923 : return rel;
1924 :
1925 1371 : if (is_munion(u->op))
1926 1371 : return rel_push_aggr_down_n_arry(v, rel);
1927 :
1928 0 : ul = u->l;
1929 0 : ur = u->r;
1930 :
1931 : /* make sure we don't create group by on group by's */
1932 0 : if (ul->op == op_groupby || ur->op == op_groupby)
1933 : return rel;
1934 :
1935 : /* distinct should be done over the full result */
1936 0 : for (n = g->exps->h; n; n = n->next) {
1937 0 : sql_exp *e = n->data;
1938 0 : sql_subfunc *af = e->f;
1939 :
1940 0 : if (e->type == e_atom ||
1941 0 : e->type == e_func ||
1942 0 : (e->type == e_aggr &&
1943 0 : ((strcmp(af->func->base.name, "sum") &&
1944 0 : strcmp(af->func->base.name, "count") &&
1945 0 : strcmp(af->func->base.name, "min") &&
1946 0 : strcmp(af->func->base.name, "max")) ||
1947 : need_distinct(e))))
1948 : return rel;
1949 : }
1950 :
1951 0 : ul = rel_dup(ul);
1952 0 : ur = rel_dup(ur);
1953 0 : if (!is_project(ul->op))
1954 0 : ul = rel_project(v->sql->sa, ul,
1955 : rel_projections(v->sql, ul, NULL, 1, 1));
1956 0 : if (!is_project(ur->op))
1957 0 : ur = rel_project(v->sql->sa, ur,
1958 : rel_projections(v->sql, ur, NULL, 1, 1));
1959 0 : rel_rename_exps(v->sql, u->exps, ul->exps);
1960 0 : rel_rename_exps(v->sql, u->exps, ur->exps);
1961 0 : if (u != ou) {
1962 0 : ul = rel_project(v->sql->sa, ul, NULL);
1963 0 : ul->exps = exps_copy(v->sql, ou->exps);
1964 0 : rel_rename_exps(v->sql, ou->exps, ul->exps);
1965 0 : set_processed(ul);
1966 0 : ur = rel_project(v->sql->sa, ur, NULL);
1967 0 : ur->exps = exps_copy(v->sql, ou->exps);
1968 0 : rel_rename_exps(v->sql, ou->exps, ur->exps);
1969 0 : set_processed(ur);
1970 : }
1971 :
1972 0 : if (g->r && list_length(g->r) > 0) {
1973 0 : list *gbe = g->r;
1974 :
1975 0 : lgbe = exps_copy(v->sql, gbe);
1976 0 : rgbe = exps_copy(v->sql, gbe);
1977 : }
1978 0 : ul = rel_groupby(v->sql, ul, NULL);
1979 0 : ul->r = lgbe;
1980 0 : ul->nrcols = g->nrcols;
1981 0 : ul->card = g->card;
1982 0 : ul->exps = exps_copy(v->sql, g->exps);
1983 0 : ul->nrcols = list_length(ul->exps);
1984 0 : set_processed(ul);
1985 :
1986 0 : ur = rel_groupby(v->sql, ur, NULL);
1987 0 : ur->r = rgbe;
1988 0 : ur->nrcols = g->nrcols;
1989 0 : ur->card = g->card;
1990 0 : ur->exps = exps_copy(v->sql, g->exps);
1991 0 : ur->nrcols = list_length(ur->exps);
1992 0 : set_processed(ur);
1993 :
1994 : /* group by on primary keys which define the partitioning scheme
1995 : * don't need a finalizing group by */
1996 : /* how to check if a partition is based on some primary key ?
1997 : * */
1998 0 : if (!list_empty(rel->r)) {
1999 0 : for (node *n = ((list*)rel->r)->h; n; n = n->next) {
2000 0 : sql_exp *e = n->data;
2001 0 : sql_column *c = NULL;
2002 :
2003 0 : if ((c = exp_is_pkey(rel, e)) && partition_find_part(v->sql->session->tr, c->t, NULL)) {
2004 : /* check if key is partition key */
2005 0 : v->changes++;
2006 0 : return rel_inplace_setop(v->sql, rel, ul, ur, op_union,
2007 : rel_projections(v->sql, rel, NULL, 1, 1));
2008 : }
2009 : }
2010 : }
2011 :
2012 0 : if (!list_empty(rel->r)) {
2013 0 : list *ogbe = rel->r;
2014 :
2015 0 : gbe = new_exp_list(v->sql->sa);
2016 0 : for (n = ogbe->h; n; n = n->next) {
2017 0 : sql_exp *e = n->data, *ne;
2018 :
2019 : /* group by in aggregation list */
2020 0 : ne = exps_uses_exp( rel->exps, e);
2021 0 : if (ne)
2022 0 : ne = list_find_exp( ul->exps, ne);
2023 0 : if (!ne) {
2024 : /* e only in the ul/ur->r (group by list) */
2025 0 : ne = exp_ref(v->sql, e);
2026 0 : list_append(ul->exps, ne);
2027 0 : ne = exp_ref(v->sql, e);
2028 0 : list_append(ur->exps, ne);
2029 : }
2030 0 : assert(ne);
2031 0 : ne = exp_ref(v->sql, ne);
2032 0 : append(gbe, ne);
2033 : }
2034 : }
2035 :
2036 0 : u = rel_setop(v->sql->sa, ul, ur, op_union);
2037 0 : rel_setop_set_exps(v->sql, u, rel_projections(v->sql, ul, NULL, 1, 1), false);
2038 0 : set_processed(u);
2039 :
2040 0 : exps = new_exp_list(v->sql->sa);
2041 0 : for (n = u->exps->h, m = rel->exps->h; n && m; n = n->next, m = m->next) {
2042 0 : sql_exp *ne, *e = n->data, *oa = m->data;
2043 :
2044 0 : if (oa->type == e_aggr) {
2045 0 : sql_subfunc *f = oa->f;
2046 0 : int cnt = exp_aggr_is_count(oa);
2047 0 : sql_subfunc *a = sql_bind_func(v->sql, "sys", (cnt)?"sum":f->func->base.name, exp_subtype(e), NULL, F_AGGR, true, true);
2048 :
2049 0 : assert(a);
2050 : /* union of aggr result may have nils
2051 : * because sum/count of empty set */
2052 0 : set_has_nil(e);
2053 0 : e = exp_ref(v->sql, e);
2054 0 : ne = exp_aggr1(v->sql->sa, e, a, need_distinct(e), 1, e->card, 1);
2055 : } else {
2056 0 : ne = exp_copy(v->sql, oa);
2057 : }
2058 0 : exp_setalias(ne, oa->alias.label, exp_find_rel_name(oa), exp_name(oa));
2059 0 : append(exps, ne);
2060 : }
2061 0 : v->changes++;
2062 0 : return rel_inplace_groupby( rel, u, gbe, exps);
2063 : }
2064 : return rel;
2065 : }
2066 :
2067 : /*
2068 : * More general
2069 : * groupby(
2070 : * [ outer ] join(
2071 : * project(
2072 : * table(A) [ c1, c2, .. ]
2073 : * ) [ c1, c2, identity(c2) as I, .. ],
2074 : * table(B) [ c1, c2, .. ]
2075 : * ) [ A.c1 = B.c1 ]
2076 : * ) [ I ] [ a1, a2, .. ]
2077 : *
2078 : * ->
2079 : *
2080 : * [ outer ] join(
2081 : * project(
2082 : * table(A) [ c1, c2, .. ]
2083 : * ) [ c1, c2, .. ],
2084 : * groupby (
2085 : * table(B) [ c1, c2, .. ]
2086 : * ) [ B.c1 ] [ a1, a2, .. ]
2087 : * ) [ A.c1 = B.c1 ]
2088 : */
2089 : static sql_rel *
2090 48028 : gen_push_groupby_down(mvc *sql, sql_rel *rel, int *changes)
2091 : {
2092 48028 : sql_rel *j = rel->l;
2093 48028 : list *gbe = rel->r;
2094 :
2095 48028 : if (rel->op == op_groupby && list_length(gbe) == 1 && j->op == op_join){
2096 6504 : sql_rel *jl = j->l, *jr = j->r, *cr, *cl;
2097 6504 : sql_exp *gb = gbe->h->data, *e;
2098 6504 : node *n;
2099 6504 : int left = 1;
2100 6504 : list *aggrs, *aliases, *gbe;
2101 :
2102 6504 : if (!is_identity(gb, jl) && !is_identity(gb, jr))
2103 : return rel;
2104 6 : if (jl->op == op_project &&
2105 4 : (e = list_find_exp( jl->exps, gb)) != NULL &&
2106 2 : find_prop(e->p, PROP_HASHCOL) != NULL) {
2107 : left = 0;
2108 : cr = jr;
2109 : cl = jl;
2110 4 : } else if (jr->op == op_project &&
2111 8 : (e = list_find_exp( jr->exps, gb)) != NULL &&
2112 4 : find_prop(e->p, PROP_HASHCOL) != NULL) {
2113 : left = 1;
2114 : cr = jl;
2115 : cl = jr;
2116 : } else {
2117 0 : return rel;
2118 : }
2119 :
2120 6 : if ((left && is_base(jl->op)) || (!left && is_base(jr->op))||
2121 0 : (left && is_select(jl->op)) || (!left && is_select(jr->op))
2122 0 : || rel_is_join_on_pkey(j, false))
2123 6 : return rel;
2124 :
2125 : /* only add aggr (based on left/right), and repeat the group by column */
2126 0 : aggrs = sa_list(sql->sa);
2127 0 : aliases = sa_list(sql->sa);
2128 0 : if (rel->exps) for (n = rel->exps->h; n; n = n->next) {
2129 0 : sql_exp *ce = n->data;
2130 :
2131 0 : if (exp_is_atom(ce))
2132 0 : list_append(aliases, ce);
2133 0 : else if (ce->type == e_column) {
2134 0 : if (rel_has_exp(cl, ce, false) == 0) /* collect aliases outside groupby */
2135 0 : list_append(aliases, ce);
2136 : else
2137 0 : list_append(aggrs, ce);
2138 0 : } else if (ce->type == e_aggr) {
2139 0 : list *args = ce->l;
2140 :
2141 : /* check args are part of left/right */
2142 0 : if (!list_empty(args) && rel_has_exps(cl, args, false) == 0)
2143 : return rel;
2144 0 : if (rel->op != op_join && exp_aggr_is_count(ce))
2145 0 : ce->p = prop_create(sql->sa, PROP_COUNT, ce->p);
2146 0 : list_append(aggrs, ce);
2147 : }
2148 : }
2149 : /* TODO move any column expressions (aliases) into the project list */
2150 :
2151 : /* find gb in left or right and should be unique */
2152 0 : gbe = sa_list(sql->sa);
2153 : /* push groupby to right, group on join exps */
2154 0 : if (j->exps) for (n = j->exps->h; n; n = n->next) {
2155 0 : sql_exp *ce = n->data, *l = ce->l, *r = ce->r, *e;
2156 :
2157 : /* get left/right hand of e_cmp */
2158 0 : assert(ce->type == e_cmp);
2159 0 : if (ce->flag == cmp_equal && is_alias(l->type) && is_alias(r->type) &&
2160 0 : (((e = rel_find_exp(cr, l)) && rel_find_exp(cl, r)) ||
2161 0 : ((e = rel_find_exp(cr, r)) && rel_find_exp(cl, l)))) {
2162 0 : e = exp_ref(sql, e);
2163 0 : list_append(gbe, e);
2164 : } else {
2165 0 : return rel;
2166 : }
2167 : }
2168 0 : if (!left)
2169 0 : cr = j->r = rel_groupby(sql, cr, gbe);
2170 : else
2171 0 : cr = j->l = rel_groupby(sql, cr, gbe);
2172 0 : cr->exps = list_merge(cr->exps, aggrs, (fdup)NULL);
2173 0 : set_processed(cr);
2174 0 : if (!is_project(cl->op))
2175 0 : cl = rel_project(sql->sa, cl,
2176 : rel_projections(sql, cl, NULL, 1, 1));
2177 0 : cl->exps = list_merge(cl->exps, aliases, (fdup)NULL);
2178 0 : set_processed(cl);
2179 0 : if (!left)
2180 0 : j->l = cl;
2181 : else
2182 0 : j->r = cl;
2183 0 : rel -> l = NULL;
2184 0 : rel_destroy(rel);
2185 :
2186 0 : if (list_empty(cr->exps) && list_empty(j->exps)) { /* remove crossproduct */
2187 0 : sql_rel *r = cl;
2188 0 : if (!left)
2189 0 : j->l = NULL;
2190 : else
2191 0 : j->r = NULL;
2192 0 : rel_destroy(j);
2193 0 : j = r;
2194 : }
2195 0 : (*changes)++;
2196 0 : return j;
2197 : }
2198 : return rel;
2199 : }
2200 :
2201 : /*
2202 : * Rewrite group(project(join(A,Dict)[a.i==dict.i])[...dict.n])[dict.n][ ... dict.n ]
2203 : * into
2204 : * project(join(groupby (A)[a.i],[a.i]), Dict)[a.i==dict.i])[dict.n]
2205 : *
2206 : */
2207 : static inline sql_rel *
2208 98073 : rel_push_groupby_down(visitor *v, sql_rel *rel)
2209 : {
2210 98073 : sql_rel *p = rel->l;
2211 98073 : list *gbe = rel->r;
2212 :
2213 98073 : if (rel->op == op_groupby && gbe && p && is_join(p->op))
2214 10197 : return gen_push_groupby_down(v->sql, rel, &v->changes);
2215 87876 : if (rel->op == op_groupby && gbe && p && p->op == op_project) {
2216 37831 : sql_rel *j = p->l;
2217 37831 : sql_rel *jl, *jr;
2218 37831 : node *n;
2219 :
2220 37831 : if (!j || j->op != op_join || list_length(j->exps) != 1)
2221 35695 : return gen_push_groupby_down(v->sql, rel, &v->changes);
2222 2136 : jl = j->l;
2223 2136 : jr = j->r;
2224 :
2225 : /* check if jr is a dict with index and var still used */
2226 2136 : if (jr->op != op_basetable || jr->l || !jr->r || list_length(jr->exps) != 2)
2227 2136 : return gen_push_groupby_down(v->sql, rel, &v->changes);
2228 :
2229 : /* check if group by is done on dict column */
2230 0 : for(n = gbe->h; n; n = n->next) {
2231 0 : sql_exp *ge = n->data, *pe = NULL, *e = NULL;
2232 :
2233 : /* find group by exp in project, then in dict */
2234 0 : pe = rel_find_exp(p, ge);
2235 0 : if (pe) /* find project exp in right hand of join, ie dict */
2236 0 : e = rel_find_exp(jr, pe);
2237 0 : if (pe && e) { /* Rewrite: join with dict after the group by */
2238 0 : list *pexps = rel_projections(v->sql, rel, NULL, 1, 1), *npexps;
2239 0 : node *m;
2240 0 : sql_exp *ne = j->exps->h->data; /* join exp */
2241 0 : p->l = jl; /* Project now only on the left side of the join */
2242 :
2243 0 : ne = ne->l; /* The left side of the compare is the index of the left */
2244 :
2245 : /* find ge reference in new projection list */
2246 0 : npexps = sa_list(v->sql->sa);
2247 0 : for (m = pexps->h; m; m = m->next) {
2248 0 : sql_exp *a = m->data;
2249 :
2250 0 : if (exp_refers(ge, a)) {
2251 0 : sql_exp *sc = jr->exps->t->data;
2252 0 : sql_exp *e = exp_ref(v->sql, sc);
2253 0 : if (exp_name(a))
2254 0 : exp_prop_alias(v->sql->sa, e, a);
2255 : a = e;
2256 : }
2257 0 : append(npexps, a);
2258 : }
2259 :
2260 : /* find ge in aggr list */
2261 0 : for (m = rel->exps->h; m; m = m->next) {
2262 0 : sql_exp *a = m->data;
2263 :
2264 0 : if (exp_match_exp(a, ge) || exp_refers(ge, a)) {
2265 0 : a = exp_ref(v->sql, ne);
2266 0 : if (exp_name(ne))
2267 0 : exp_prop_alias(v->sql->sa, a, ne);
2268 0 : m->data = a;
2269 : }
2270 : }
2271 :
2272 : /* change alias pe, ie project out the index */
2273 0 : pe->l = (void*)exp_relname(ne);
2274 0 : pe->r = (void*)exp_name(ne);
2275 0 : if (exp_name(ne))
2276 0 : exp_prop_alias(v->sql->sa, pe, ne);
2277 :
2278 : /* change alias ge */
2279 0 : ge->l = (void*)exp_relname(pe);
2280 0 : ge->r = (void*)exp_name(pe);
2281 0 : if (exp_name(pe))
2282 0 : exp_prop_alias(v->sql->sa, ge, pe);
2283 :
2284 : /* zap both project and groupby name hash tables (as we changed names above) */
2285 0 : list_hash_clear(rel->exps);
2286 0 : list_hash_clear((list*)rel->r);
2287 0 : list_hash_clear(p->exps);
2288 :
2289 : /* add join */
2290 0 : j->l = rel;
2291 0 : rel = rel_project(v->sql->sa, j, npexps);
2292 0 : v->changes++;
2293 : }
2294 : }
2295 : }
2296 : return rel;
2297 : }
2298 :
2299 : /* reduce group by expressions based on pkey info
2300 : *
2301 : * The reduced group by and (derived) aggr expressions are restored via
2302 : * extra (new) aggregate columns.
2303 : */
2304 : static inline sql_rel *
2305 98073 : rel_reduce_groupby_exps(visitor *v, sql_rel *rel)
2306 : {
2307 98073 : list *gbe = rel->r;
2308 :
2309 98073 : if (is_groupby(rel->op) && rel->r && !rel_is_ref(rel) && list_length(gbe)) {
2310 48687 : node *n, *m;
2311 48687 : int k, j, i, ngbe = list_length(gbe);
2312 48687 : int8_t *scores = SA_NEW_ARRAY(v->sql->ta, int8_t, ngbe);
2313 48687 : sql_column *c;
2314 48687 : sql_table **tbls = SA_NEW_ARRAY(v->sql->ta, sql_table*, ngbe);
2315 48687 : sql_rel **bts = SA_NEW_ARRAY(v->sql->ta, sql_rel*, ngbe), *bt = NULL;
2316 :
2317 48687 : gbe = rel->r;
2318 127356 : for (k = 0, i = 0, n = gbe->h; n; n = n->next, k++) {
2319 78669 : sql_exp *e = n->data;
2320 :
2321 78669 : c = exp_find_column_(rel, e, -2, &bt);
2322 78669 : if (c) {
2323 88564 : for(j = 0; j < i; j++)
2324 33571 : if (c->t == tbls[j] && bts[j] == bt)
2325 : break;
2326 71260 : tbls[j] = c->t;
2327 71260 : bts[j] = bt;
2328 71260 : i += (j == i);
2329 : }
2330 : }
2331 48687 : if (i) { /* forall tables find pkey and
2332 : remove useless other columns */
2333 : /* TODO also remove group by columns which are related to
2334 : * the other columns using a foreign-key join (n->1), ie 1
2335 : * on the to be removed side.
2336 : */
2337 100309 : for(j = 0; j < i; j++) {
2338 54981 : int l, nr = 0, cnr = 0;
2339 :
2340 54981 : k = list_length(gbe);
2341 54981 : memset(scores, 0, list_length(gbe));
2342 54981 : if (tbls[j]->pkey) {
2343 27392 : for (l = 0, n = gbe->h; l < k && n; l++, n = n->next) {
2344 20885 : fcmp cmp = (fcmp)&kc_column_cmp;
2345 20885 : sql_exp *e = n->data;
2346 :
2347 20885 : c = exp_find_column_(rel, e, -2, &bt);
2348 29746 : if (c && c->t == tbls[j] && bts[j] == bt &&
2349 8861 : list_find(tbls[j]->pkey->k.columns, c, cmp) != NULL) {
2350 797 : scores[l] = 1;
2351 797 : nr ++;
2352 18543 : } else if (c && c->t == tbls[j] && bts[j] == bt) {
2353 : /* Okay we can cleanup a group by column */
2354 8064 : scores[l] = -1;
2355 8064 : cnr ++;
2356 : }
2357 : }
2358 : }
2359 6507 : if (nr) {
2360 788 : int all = (list_length(tbls[j]->pkey->k.columns) == nr);
2361 788 : sql_kc *kc = tbls[j]->pkey->k.columns->h->data;
2362 :
2363 788 : c = kc->c;
2364 2104 : for (l = 0, n = gbe->h; l < k && n; l++, n = n->next) {
2365 1316 : sql_exp *e = n->data;
2366 :
2367 : /* pkey based group by */
2368 1465 : if (scores[l] == 1 && ((all ||
2369 : /* first of key */
2370 922 : (c == exp_find_column(rel, e, -2))) && !find_prop(e->p, PROP_HASHCOL)))
2371 13 : e->p = prop_create(v->sql->sa, PROP_HASHCOL, e->p);
2372 : }
2373 2969 : for (m = rel->exps->h; m; m = m->next ){
2374 2181 : sql_exp *e = m->data;
2375 :
2376 7696 : for (l = 0, n = gbe->h; l < k && n; l++, n = n->next) {
2377 5528 : sql_exp *gb = n->data;
2378 :
2379 : /* pkey based group by */
2380 5528 : if (scores[l] == 1 && exp_match_exp(e,gb) && find_prop(gb->p, PROP_HASHCOL) && !find_prop(e->p, PROP_HASHCOL)) {
2381 13 : e->p = prop_create(v->sql->sa, PROP_HASHCOL, e->p);
2382 13 : break;
2383 : }
2384 :
2385 : }
2386 : }
2387 : }
2388 54981 : if (cnr && nr && list_length(tbls[j]->pkey->k.columns) == nr) {
2389 108 : list *ngbe = new_exp_list(v->sql->sa);
2390 :
2391 392 : for (l = 0, n = gbe->h; l < k && n; l++, n = n->next) {
2392 284 : sql_exp *e = n->data;
2393 :
2394 : /* keep the group by columns which form a primary key
2395 : * of this table. And those unrelated to this table. */
2396 284 : if (scores[l] != -1)
2397 139 : append(ngbe, e);
2398 : }
2399 108 : rel->r = ngbe;
2400 : /* rewrite gbe and aggr, in the aggr list */
2401 108 : if (0)
2402 : for (m = rel->exps->h; m; m = m->next ){
2403 : sql_exp *e = m->data;
2404 : int fnd = 0;
2405 :
2406 : for (l = 0, n = gbe->h; l < k && n && !fnd; l++, n = n->next) {
2407 : sql_exp *gb = n->data;
2408 :
2409 : if (scores[l] == -1 && exp_refers(gb, e)) {
2410 : /*
2411 : sql_exp *rs = exp_column(v->sql->sa, gb->l?gb->l:exp_relname(gb), gb->r?gb->r:exp_name(gb), exp_subtype(gb), rel->card, has_nil(gb), is_unique(gb), is_intern(gb));
2412 : exp_setalias(rs, e->alias.label, exp_find_rel_name(e), exp_name(e));
2413 : e = rs;
2414 : */
2415 : assert(e->alias.label == e->nid);
2416 : fnd = 1;
2417 : }
2418 : }
2419 : m->data = e;
2420 : }
2421 : /* new reduced aggr expression list */
2422 108 : assert(list_length(rel->exps)>0);
2423 : /* only one reduction at a time */
2424 108 : list_hash_clear(rel->exps);
2425 108 : v->changes++;
2426 108 : return rel;
2427 : }
2428 54873 : gbe = rel->r;
2429 : }
2430 : }
2431 : }
2432 : /* remove constants from group by list */
2433 97965 : if (is_groupby(rel->op) && rel->r && !rel_is_ref(rel)) {
2434 49085 : int i;
2435 49085 : node *n;
2436 :
2437 127470 : for (i = 0, n = gbe->h; n; n = n->next) {
2438 78385 : sql_exp *e = n->data;
2439 :
2440 78385 : if (exp_is_atom(e))
2441 25 : i++;
2442 : }
2443 49085 : if (i) {
2444 25 : list *ngbe = new_exp_list(v->sql->sa);
2445 25 : list *dgbe = new_exp_list(v->sql->sa);
2446 :
2447 51 : for (n = gbe->h; n; n = n->next) {
2448 26 : sql_exp *e = n->data;
2449 :
2450 26 : if (!exp_is_atom(e))
2451 1 : append(ngbe, e);
2452 : /* we need at least one gbe */
2453 25 : else if (!n->next && list_empty(ngbe))
2454 24 : append(ngbe, e);
2455 : else
2456 1 : append(dgbe, e);
2457 : }
2458 25 : rel->r = ngbe;
2459 25 : if (!list_empty(dgbe)) {
2460 : /* use atom's directly in the aggr expr list */
2461 :
2462 3 : for (n = rel->exps->h; n; n = n->next) {
2463 2 : sql_exp *e = n->data, *ne = NULL;
2464 :
2465 2 : if (e->type == e_column) {
2466 2 : if (e->nid)
2467 2 : ne = exps_bind_nid(dgbe, e->nid);
2468 2 : if (ne) {
2469 1 : ne = exp_copy(v->sql, ne);
2470 1 : exp_prop_alias(v->sql->sa, ne, e);
2471 1 : e = ne;
2472 : }
2473 : }
2474 2 : n->data = e;
2475 : }
2476 1 : list_hash_clear(rel->exps);
2477 1 : v->changes++;
2478 : }
2479 : }
2480 : }
2481 : return rel;
2482 : }
2483 :
2484 : /* if all arguments to a distinct aggregate are unique, remove 'distinct' property */
2485 : static inline sql_rel *
2486 98073 : rel_distinct_aggregate_on_unique_values(visitor *v, sql_rel *rel)
2487 : {
2488 98073 : if (is_groupby(rel->op) && !list_empty(rel->exps)) {
2489 304719 : for (node *n = rel->exps->h; n; n = n->next) {
2490 206742 : sql_exp *exp = (sql_exp*) n->data;
2491 :
2492 206742 : if (exp->type == e_aggr && need_distinct(exp)) {
2493 690 : bool all_unique = true;
2494 690 : list *l = exp->l;
2495 :
2496 1380 : for (node *m = l->h; m && all_unique; m = m->next) {
2497 690 : sql_exp *arg = (sql_exp*) m->data;
2498 :
2499 824 : all_unique &= arg->type == e_column && is_unique(arg) && (!is_semantics(exp) || !has_nil(arg));
2500 : }
2501 690 : if (!all_unique && exps_card(l) > CARD_ATOM)
2502 394 : all_unique = exps_unique(v->sql, rel, l) && (!is_semantics(exp) || !have_nil(l));
2503 : if (all_unique) {
2504 134 : set_nodistinct(exp);
2505 134 : v->changes++;
2506 : }
2507 : }
2508 : }
2509 : }
2510 98073 : return rel;
2511 : }
2512 :
2513 : static inline sql_rel *
2514 98073 : rel_remove_const_aggr(visitor *v, sql_rel *rel)
2515 : {
2516 98073 : if(!rel) {
2517 : return rel;
2518 : }
2519 :
2520 98073 : list *exps = rel->exps;
2521 :
2522 98073 : if(rel->op != op_groupby || list_empty(exps)) {
2523 6 : return rel;
2524 : }
2525 :
2526 98067 : if(!list_empty(rel->r)) {
2527 : /* in the general case in an expression of an aggregate over
2528 : * a constant can be rewritten as just the const e.g.
2529 : * aggr(const) -> const
2530 : */
2531 :
2532 251835 : for(node *n = exps->h; n; n = n->next) {
2533 179435 : sql_exp *e = n->data;
2534 :
2535 179435 : if(e->type != e_aggr) {
2536 129336 : continue;
2537 : }
2538 :
2539 50099 : sql_func *j = ((sql_subfunc *)e->f)->func;
2540 :
2541 : /* some aggregates with const values can only be eliminated
2542 : * under certain circumstances e.g.
2543 : * sum(NULL) -> NULL, sum(0) -> 0
2544 : * prod(NULL) -> NULL, prod(1) -> 1
2545 : * count(NULL) -> 0
2546 : */
2547 50099 : int sum = strcmp(j->base.name, "sum") == 0,
2548 50099 : prd = strcmp(j->base.name, "prod") == 0,
2549 50099 : cnt = strcmp(j->base.name, "count") == 0;
2550 :
2551 50099 : if(!j->s && j->system == 1) {
2552 31859 : list *se = e->l;
2553 :
2554 31859 : if(se == NULL) {
2555 9245 : continue;
2556 : }
2557 :
2558 45226 : for(node *m = se->h; m; m = m->next) {
2559 22612 : sql_exp *w = m->data;
2560 :
2561 22612 : if(w->type == e_atom && w->card == CARD_ATOM) {
2562 26 : atom *wa = w->l;
2563 :
2564 26 : if(sum && !(wa->isnull || atom_is_zero(wa))) {
2565 3 : continue;
2566 : }
2567 :
2568 23 : if(prd && !(wa->isnull || atom_is_one(wa))) {
2569 0 : continue;
2570 : }
2571 :
2572 23 : if(cnt) {
2573 5 : if(wa->isnull) {
2574 3 : list_remove_node(se, NULL, m);
2575 :
2576 3 : w=exp_atom_lng(v->sql->sa, 0);
2577 3 : list_append(se, w);
2578 : }
2579 : else {
2580 2 : continue;
2581 : }
2582 : }
2583 :
2584 21 : exp_setalias(w,e->alias.label,e->alias.rname,e->alias.name);
2585 :
2586 21 : n->data = w;
2587 21 : v->changes++;
2588 : }
2589 : }
2590 : }
2591 : }
2592 : }
2593 :
2594 : /*
2595 : * Below code replaces GROUP BY with PROJECT in some cases;
2596 : * Triggers on...
2597 : * select 1 having true; select 42 from foo group by x; select n from foo group by rollup(n);
2598 : */
2599 :
2600 98066 : if (!rel_is_ref(rel)) {
2601 74246 : int needed = 0;
2602 229567 : for (node *n = rel->exps->h; n; n = n->next) {
2603 155321 : sql_exp *exp = (sql_exp*) n->data;
2604 :
2605 155321 : if (exp_is_atom(exp) && exp->type != e_aggr)
2606 78 : needed++;
2607 : }
2608 74246 : if (needed) {
2609 72 : if (!list_empty(rel->r)) {
2610 21 : int atoms = 0;
2611 : /* corner case, all grouping columns are atoms */
2612 43 : for (node *n = ((list*)rel->r)->h; n; n = n->next) {
2613 22 : sql_exp *exp = (sql_exp*) n->data;
2614 :
2615 22 : if (exp_is_atom(exp))
2616 1 : atoms++;
2617 : }
2618 : /* possible edge case, never triggers in coverage tests */
2619 21 : if (atoms == list_length(rel->r)) {
2620 1 : list *nexps = sa_list(v->sql->sa);
2621 3 : for (node *n = rel->exps->h; n; ) {
2622 2 : node *next = n->next;
2623 2 : sql_exp *e = (sql_exp*) n->data;
2624 :
2625 : /* remove references to constant group by columns */
2626 2 : if (e->type == e_column) {
2627 1 : sql_exp *found = NULL;
2628 1 : found = exps_bind_nid(rel->r, e->nid);
2629 1 : if (found) {
2630 1 : list_append(nexps, found);
2631 1 : list_remove_node(rel->exps, NULL, n);
2632 : }
2633 : }
2634 : n = next;
2635 : }
2636 1 : rel->r = NULL; /* transform it into a global aggregate */
2637 1 : rel->exps = list_merge(nexps, rel->exps, (fdup) NULL); /* add grouping columns back as projections */
2638 : /* global aggregates may return 1 row, so filter it based on the count */
2639 1 : sql_subfunc *cf = sql_bind_func(v->sql, "sys", "count", sql_bind_localtype("void"), NULL, F_AGGR, true, true);
2640 1 : sql_exp *count = exp_aggr(v->sql->sa, NULL, cf, 0, 1, CARD_ATOM, 0);
2641 1 : count = rel_groupby_add_aggr(v->sql, rel, count);
2642 1 : sql_exp *cp = exp_compare(v->sql->sa, exp_ref(v->sql, count), exp_atom(v->sql->sa, atom_int(v->sql->sa, exp_subtype(count), 0)), cmp_notequal);
2643 1 : rel = rel_select(v->sql->sa, rel, cp);
2644 1 : set_processed(rel);
2645 1 : return rel;
2646 : }
2647 51 : } else if (list_length(rel->exps) == needed) { /* all are const */
2648 48 : sql_rel *ll = rel->l;
2649 48 : rel->op = op_project;
2650 : /* TODO check if l->l == const, else change that */
2651 48 : if (ll && ll->l) {
2652 44 : rel_destroy(ll);
2653 44 : rel->l = rel_project_exp(v->sql, exp_atom_bool(v->sql->sa, 1));
2654 : }
2655 48 : return rel;
2656 : }
2657 23 : sql_rel *nrel = rel_project(v->sql->sa, rel, rel_projections(v->sql, rel, NULL, 1, 1));
2658 78 : for (node *n = nrel->exps->h; n; n = n->next) {
2659 55 : sql_exp *exp = (sql_exp*) n->data;
2660 55 : if (exp->type == e_column) {
2661 55 : sql_exp *e = rel_find_exp(rel, exp);
2662 :
2663 55 : if (e && exp_is_atom(e) && e->type == e_atom) {
2664 29 : sql_exp *ne = exp_copy(v->sql, e);
2665 29 : assert(ne->alias.label);
2666 29 : exp_setalias(ne, ne->alias.label, exp_find_rel_name(exp), exp_name(exp));
2667 29 : n->data = ne;
2668 29 : v->changes++;
2669 : }
2670 : }
2671 : }
2672 23 : list *nl = sa_list(v->sql->sa);
2673 78 : for (node *n = rel->exps->h; n; n = n->next) {
2674 55 : sql_exp *exp = (sql_exp*) n->data;
2675 :
2676 55 : if (!exp_is_atom(exp) || exp->type != e_atom)
2677 26 : append(nl, exp);
2678 : }
2679 23 : rel->exps = nl;
2680 23 : return nrel;
2681 : }
2682 : }
2683 :
2684 : return rel;
2685 : }
2686 :
2687 : #if 0
2688 : static sql_rel *
2689 : rel_groupby_distinct2(visitor *v, sql_rel *rel)
2690 : {
2691 : list *ngbes = sa_list(v->sql->sa), *gbes, *naggrs = sa_list(v->sql->sa), *aggrs = sa_list(v->sql->sa);
2692 : sql_rel *l;
2693 : node *n;
2694 :
2695 : gbes = rel->r;
2696 : if (!gbes)
2697 : return rel;
2698 :
2699 : /* check if each aggr is, rewritable (max,min,sum,count)
2700 : * and only has one argument */
2701 : for (n = rel->exps->h; n; n = n->next) {
2702 : sql_exp *e = n->data;
2703 : sql_subfunc *af = e->f;
2704 :
2705 : if (e->type == e_aggr &&
2706 : (strcmp(af->func->base.name, "sum") &&
2707 : strcmp(af->func->base.name, "count") &&
2708 : strcmp(af->func->base.name, "min") &&
2709 : strcmp(af->func->base.name, "max")))
2710 : return rel;
2711 : }
2712 :
2713 : for (n = gbes->h; n; n = n->next) {
2714 : sql_exp *e = n->data;
2715 :
2716 : e = exp_column(v->sql->sa, exp_find_rel_name(e), exp_name(e), exp_subtype(e), e->card, has_nil(e), is_unique(e), is_intern(e));
2717 : append(ngbes, e);
2718 : }
2719 :
2720 : /* 1 for each aggr(distinct v) add the attribute expression v to gbes and aggrs list
2721 : * 2 for each aggr(z) add aggr_phase2('z') to the naggrs list
2722 : * 3 for each group by col, add also to the naggrs list
2723 : * */
2724 : for (n = rel->exps->h; n; n = n->next) {
2725 : sql_exp *e = n->data;
2726 :
2727 : if (e->type == e_aggr && need_distinct(e)) { /* 1 */
2728 : /* need column expression */
2729 : list *args = e->l;
2730 : sql_exp *v = args->h->data;
2731 : append(gbes, v);
2732 : if (!exp_name(v))
2733 : exp_label(v->sql->sa, v, ++v->sql->label);
2734 : v = exp_column(v->sql->sa, exp_find_rel_name(v), exp_name(v), exp_subtype(v), v->card, has_nil(v), is_unique(v), is_intern(v));
2735 : append(aggrs, v);
2736 : v = exp_aggr1(v->sql->sa, v, e->f, need_distinct(e), 1, e->card, 1);
2737 : exp_setalias(v, e->alias.label, exp_find_rel_name(e), exp_name(e));
2738 : append(naggrs, v);
2739 : } else if (e->type == e_aggr && !need_distinct(e)) {
2740 : sql_exp *v;
2741 : sql_subfunc *f = e->f;
2742 : int cnt = exp_aggr_is_count(e);
2743 : sql_subfunc *a = sql_bind_func(v->sql, "sys", (cnt)?"sum":f->func->base.name, exp_subtype(e), NULL, F_AGGR, true, true);
2744 :
2745 : append(aggrs, e);
2746 : if (!exp_name(e))
2747 : exp_label(v->sql->sa, e, ++v->sql->label);
2748 : set_has_nil(e);
2749 : v = exp_column(v->sql->sa, exp_find_rel_name(e), exp_name(e), exp_subtype(e), e->card, has_nil(e), is_unique(e), is_intern(e));
2750 : v = exp_aggr1(v->sql->sa, v, a, 0, 1, e->card, 1);
2751 : if (cnt)
2752 : set_zero_if_empty(v);
2753 : exp_setalias(v, e->alias.label, exp_find_rel_name(e), exp_name(e));
2754 : append(naggrs, v);
2755 : } else { /* group by col */
2756 : if (list_find_exp(gbes, e) || !list_find_exp(naggrs, e)) {
2757 : append(aggrs, e);
2758 :
2759 : e = exp_column(v->sql->sa, exp_find_rel_name(e), exp_name(e), exp_subtype(e), e->card, has_nil(e), is_unique(e), is_intern(e));
2760 : }
2761 : append(naggrs, e);
2762 : }
2763 : }
2764 :
2765 : l = rel->l = rel_groupby(v->sql, rel->l, gbes);
2766 : l->exps = aggrs;
2767 : rel->r = ngbes;
2768 : rel->exps = naggrs;
2769 : v->changes++;
2770 : return rel;
2771 : }
2772 : #endif
2773 :
2774 : /* Rewrite group by expressions with distinct
2775 : *
2776 : * ie select a, count(distinct b) from c where ... groupby a;
2777 : * No other aggregations should be present
2778 : *
2779 : * Rewrite the more general case, good for parallel execution
2780 : *
2781 : * groupby(R) [e,f] [ aggr1 a distinct, aggr2 b distinct, aggr3 c, aggr4 d]
2782 : *
2783 : * into
2784 : *
2785 : * groupby(
2786 : * groupby(R) [e,f,a,b] [ a, b, aggr3 c, aggr4 d]
2787 : * ) [e,f]( aggr1 a distinct, aggr2 b distinct, aggr3_phase2 c, aggr4_phase2 d)
2788 : */
2789 : static inline sql_rel *
2790 98072 : rel_groupby_distinct(visitor *v, sql_rel *rel)
2791 : {
2792 98072 : node *n;
2793 :
2794 98072 : if (is_groupby(rel->op)) {
2795 97982 : sql_rel *l = rel->l;
2796 97982 : if (!l || is_groupby(l->op))
2797 : return rel;
2798 : }
2799 97938 : if (is_groupby(rel->op) && rel->r && !rel_is_ref(rel)) {
2800 49064 : int nr = 0, anr = 0;
2801 49064 : list *gbe, *ngbe, *arg, *exps, *nexps;
2802 49064 : sql_exp *distinct = NULL, *darg, *found;
2803 49064 : sql_rel *l = NULL;
2804 :
2805 177990 : for (n=rel->exps->h; n && nr <= 2; n = n->next) {
2806 128926 : sql_exp *e = n->data;
2807 128926 : if (need_distinct(e)) {
2808 391 : distinct = n->data;
2809 391 : nr++;
2810 : }
2811 128926 : anr += is_aggr(e->type);
2812 : }
2813 49064 : if (nr < 1 || distinct->type != e_aggr)
2814 : return rel;
2815 384 : if (nr > 1 || anr > nr)
2816 : return rel;//rel_groupby_distinct2(v, rel);
2817 117 : arg = distinct->l;
2818 117 : if (list_length(arg) != 1 || list_length(rel->r) + nr != list_length(rel->exps))
2819 2 : return rel;
2820 :
2821 115 : gbe = rel->r;
2822 115 : ngbe = sa_list(v->sql->sa);
2823 115 : exps = sa_list(v->sql->sa);
2824 115 : nexps = sa_list(v->sql->sa);
2825 362 : for (n=rel->exps->h; n; n = n->next) {
2826 247 : sql_exp *e = n->data;
2827 247 : if (e != distinct) {
2828 132 : if (e->type == e_aggr) { /* copy the arguments to the aggregate */
2829 0 : list *args = e->l;
2830 0 : if (args) {
2831 0 : for (node *n = args->h ; n ; n = n->next) {
2832 0 : sql_exp *e = n->data;
2833 0 : list_append(ngbe, exp_copy(v->sql, e));
2834 0 : list_append(exps, exp_copy(v->sql, e));
2835 : }
2836 : }
2837 : } else {
2838 132 : e = exp_ref(v->sql, e);
2839 132 : append(ngbe, e);
2840 132 : append(exps, e);
2841 : }
2842 132 : if (e->type == e_aggr) /* aggregates must be copied */
2843 0 : e = exp_copy(v->sql, e);
2844 : else
2845 132 : e = exp_ref(v->sql, e);
2846 132 : append(nexps, e);
2847 : }
2848 : }
2849 :
2850 115 : darg = arg->h->data;
2851 115 : if ((found = exps_find_exp(exps, darg)) == NULL) { /* not already in the groups projection list */
2852 112 : if ((found = exps_find_exp(gbe, darg))) { /* first find if the aggregate argument already exists in the grouping list */
2853 0 : darg = exp_ref(v->sql, found);
2854 : } else {
2855 112 : list_append(gbe, darg = exp_copy(v->sql, darg));
2856 112 : exp_label(v->sql->sa, darg, ++v->sql->label);
2857 112 : darg = exp_ref(v->sql, darg);
2858 : }
2859 112 : list_append(exps, darg);
2860 112 : darg = exp_ref(v->sql, darg);
2861 : } else {
2862 3 : darg = exp_ref(v->sql, found);
2863 : }
2864 115 : arg->h->data = darg;
2865 115 : l = rel->l = rel_groupby(v->sql, rel->l, gbe);
2866 115 : l->exps = exps;
2867 115 : set_processed(l);
2868 115 : rel->r = ngbe;
2869 115 : rel->exps = nexps;
2870 115 : set_nodistinct(distinct);
2871 115 : append(nexps, distinct);
2872 115 : v->changes++;
2873 : }
2874 : return rel;
2875 : }
2876 :
2877 : /*
2878 : * Push Count inside crossjoin down, and multiply the results
2879 : *
2880 : * project ( project(
2881 : * group by ( crossproduct (
2882 : * crossproduct( project (
2883 : * L, => group by (
2884 : * R L
2885 : * ) [ ] [ count NOT NULL ] ) [ ] [ count NOT NULL ]
2886 : * ) ),
2887 : * ) [ NOT NULL ] project (
2888 : * group by (
2889 : * R
2890 : * ) [ ] [ count NOT NULL ]
2891 : * )
2892 : * ) [ sql_mul(.., .. NOT NULL) ]
2893 : * )
2894 : */
2895 : static inline sql_rel *
2896 98073 : rel_push_count_down(visitor *v, sql_rel *rel)
2897 : {
2898 98073 : sql_rel *r = rel->l;
2899 :
2900 98073 : if (is_groupby(rel->op) && !rel_is_ref(rel) && list_empty(rel->r) &&
2901 25506 : r && !r->exps && r->op == op_join && !(rel_is_ref(r)) &&
2902 : /* currently only single count aggregation is handled, no other projects or aggregation */
2903 56 : list_length(rel->exps) == 1 && exp_aggr_is_count(rel->exps->h->data)) {
2904 12 : sql_exp *nce, *oce, *cnt1 = NULL, *cnt2 = NULL;
2905 12 : sql_rel *gbl = NULL, *gbr = NULL; /* Group By */
2906 12 : sql_rel *cp = NULL; /* Cross Product */
2907 12 : sql_rel *srel;
2908 :
2909 12 : oce = rel->exps->h->data;
2910 12 : if (oce->l) /* we only handle COUNT(*) */
2911 : return rel;
2912 :
2913 10 : srel = r->l;
2914 : {
2915 10 : sql_subfunc *cf = sql_bind_func(v->sql, "sys", "count", sql_bind_localtype("void"), NULL, F_AGGR, true, true);
2916 10 : sql_exp *e = exp_aggr(v->sql->sa, NULL, cf, need_distinct(oce), need_no_nil(oce), oce->card, 0);
2917 :
2918 10 : exp_label(v->sql->sa, e, ++v->sql->label);
2919 10 : cnt1 = exp_ref(v->sql, e);
2920 10 : gbl = rel_groupby(v->sql, rel_dup(srel), NULL);
2921 10 : set_processed(gbl);
2922 10 : rel_groupby_add_aggr(v->sql, gbl, e);
2923 : }
2924 :
2925 10 : srel = r->r;
2926 : {
2927 10 : sql_subfunc *cf = sql_bind_func(v->sql, "sys", "count", sql_bind_localtype("void"), NULL, F_AGGR, true, true);
2928 10 : sql_exp *e = exp_aggr(v->sql->sa, NULL, cf, need_distinct(oce), need_no_nil(oce), oce->card, 0);
2929 :
2930 10 : exp_label(v->sql->sa, e, ++v->sql->label);
2931 10 : cnt2 = exp_ref(v->sql, e);
2932 10 : gbr = rel_groupby(v->sql, rel_dup(srel), NULL);
2933 10 : set_processed(gbr);
2934 10 : rel_groupby_add_aggr(v->sql, gbr, e);
2935 : }
2936 :
2937 10 : cp = rel_crossproduct(v->sql->sa, gbl, gbr, op_join);
2938 10 : set_processed(cp);
2939 :
2940 10 : if (!(nce = rel_binop_(v->sql, NULL, cnt1, cnt2, "sys", "sql_mul", card_value, true))) {
2941 0 : v->sql->session->status = 0;
2942 0 : v->sql->errstr[0] = '\0';
2943 0 : return rel; /* error, fallback to original expression */
2944 : }
2945 : /* because of remote plans, make sure "sql_mul" returns bigint. The cardinality is atomic, so no major performance penalty */
2946 10 : if (subtype_cmp(exp_subtype(oce), exp_subtype(nce)) != 0)
2947 10 : nce = exp_convert(v->sql, nce, exp_subtype(nce), exp_subtype(oce));
2948 10 : if (exp_name(oce))
2949 10 : exp_prop_alias(v->sql->sa, nce, oce);
2950 :
2951 10 : rel_destroy(rel);
2952 10 : rel = rel_project(v->sql->sa, cp, append(new_exp_list(v->sql->sa), nce));
2953 10 : set_processed(rel);
2954 :
2955 10 : v->changes++;
2956 : }
2957 :
2958 : return rel;
2959 : }
2960 :
2961 : static inline sql_rel *
2962 42622 : rel_basecount(visitor *v, sql_rel *rel)
2963 : {
2964 52965 : if (is_groupby(rel->op) && !rel_is_ref(rel) && rel->l && list_empty(rel->r) &&
2965 19637 : list_length(rel->exps) == 1 && exp_aggr_is_count(rel->exps->h->data)) {
2966 4491 : sql_rel *bt = rel->l;
2967 4491 : sql_exp *e = rel->exps->h->data;
2968 4491 : if (is_basetable(bt->op) && list_empty(e->l)) { /* count(*) */
2969 : /* change into select cnt('schema','table') */
2970 688 : sql_table *t = bt->l;
2971 : /* I need to get the declared table's frame number to make this work correctly for those */
2972 688 : if (!isTable(t) || isDeclaredTable(t))
2973 : return rel;
2974 609 : sql_subfunc *cf = sql_bind_func(v->sql, "sys", "cnt", sql_bind_localtype("str"), sql_bind_localtype("str"), F_FUNC, true, true);
2975 609 : list *exps = sa_list(v->sql->sa);
2976 609 : append(exps, exp_atom_str(v->sql->sa, t->s->base.name, sql_bind_localtype("str")));
2977 609 : append(exps, exp_atom_str(v->sql->sa, t->base.name, sql_bind_localtype("str")));
2978 609 : sql_exp *ne = exp_op(v->sql->sa, exps, cf);
2979 :
2980 609 : ne = exp_propagate(v->sql->sa, ne, e);
2981 609 : exp_setalias(ne, e->alias.label, exp_find_rel_name(e), exp_name(e));
2982 609 : rel_destroy(rel);
2983 609 : rel = rel_project(v->sql->sa, NULL, append(sa_list(v->sql->sa), ne));
2984 609 : v->changes++;
2985 : }
2986 : }
2987 : return rel;
2988 : }
2989 :
2990 : static inline sql_rel *
2991 42622 : rel_simplify_count(visitor *v, sql_rel *rel)
2992 : {
2993 42622 : if (is_groupby(rel->op) && !list_empty(rel->exps)) {
2994 42520 : mvc *sql = v->sql;
2995 42520 : int ncountstar = 0;
2996 :
2997 : /* Convert count(no null) into count(*) */
2998 133827 : for (node *n = rel->exps->h; n ; n = n->next) {
2999 91307 : sql_exp *e = n->data;
3000 :
3001 91307 : if (exp_aggr_is_count(e) && !need_distinct(e)) {
3002 15582 : if (list_length(e->l) == 0) {
3003 12824 : ncountstar++;
3004 2758 : } else if (list_length(e->l) == 1 && !has_nil((sql_exp*)((list*)e->l)->h->data)) {
3005 2604 : sql_subfunc *cf = sql_bind_func(sql, "sys", "count", sql_bind_localtype("void"), NULL, F_AGGR, true, true);
3006 2604 : sql_exp *ne = exp_aggr(sql->sa, NULL, cf, 0, 0, e->card, 0);
3007 2604 : if (exp_name(e))
3008 2604 : exp_prop_alias(sql->sa, ne, e);
3009 2604 : n->data = ne;
3010 2604 : ncountstar++;
3011 2604 : v->changes++;
3012 : }
3013 : }
3014 : }
3015 : /* With multiple count(*), use exp_ref to reduce the number of calls to this aggregate */
3016 42520 : if (ncountstar > 1) {
3017 56 : sql_exp *count_star = NULL;
3018 56 : sql_rel *nrel = rel_project(v->sql->sa, rel, NULL);
3019 56 : list *aexps = sa_list(v->sql->sa), *nexps = sa_list(v->sql->sa);
3020 56 : nrel->exps = nexps;
3021 247 : for (node *n = rel->exps->h; n ; n = n->next) {
3022 191 : sql_exp *e = n->data;
3023 :
3024 191 : if (exp_aggr_is_count(e) && !need_distinct(e) && list_length(e->l) == 0) {
3025 126 : if (!count_star) {
3026 56 : count_star = e;
3027 56 : append(aexps, e);
3028 56 : append(nexps, exp_ref(sql, e));
3029 : } else {
3030 70 : sql_exp *ne = exp_ref(sql, count_star);
3031 :
3032 70 : if (exp_name(e))
3033 70 : exp_prop_alias(sql->sa, ne, e);
3034 70 : v->changes++;
3035 70 : append(nexps, ne);
3036 : }
3037 : } else {
3038 65 : append(aexps, e);
3039 65 : append(nexps, exp_ref(sql, e));
3040 : }
3041 : }
3042 56 : rel->exps = aexps;
3043 56 : return nrel;
3044 : }
3045 : }
3046 : return rel;
3047 : }
3048 :
3049 : static sql_rel *
3050 42622 : rel_groupjoin(visitor *v, sql_rel *rel)
3051 : {
3052 42622 : if (!rel || rel_is_ref(rel) || !is_groupby(rel->op) || list_empty(rel->r))
3053 14616 : return rel;
3054 :
3055 28006 : sql_rel *j = rel->l;
3056 28006 : if (!j || rel_is_ref(j) /*|| !is_left(j->op)*/ || j->op != op_join || list_length(rel->exps) > 1 /* only join because left joins aren't optimized yet (TODO), only length 1 as implementation of groupjoins is missing */ || !list_empty(rel->attr))
3057 27288 : return rel;
3058 : /* check group by exps == equi join exps */
3059 718 : list *gbes = rel->r;
3060 718 : if (list_length(gbes) != list_length(j->exps))
3061 : return rel;
3062 619 : int nr = 0;
3063 1235 : for(node *n = gbes->h; n; n = n->next) {
3064 619 : sql_exp *gbe = n->data;
3065 1235 : for(node *m = j->exps->h; m; m = m->next) {
3066 619 : sql_exp *je = m->data;
3067 619 : if (je->type != e_cmp || je->flag != cmp_equal)
3068 : return rel;
3069 : /* check if its a join exp (ie not a selection) */
3070 621 : if (!( (!rel_has_exp(j->l, je->l, false) && !rel_has_exp(j->r, je->r, false)) ||
3071 10 : (!rel_has_exp(j->l, je->r, false) && !rel_has_exp(j->r, je->l, false))))
3072 0 : return rel;
3073 616 : if (exp_match(je->l, gbe)) {
3074 76 : nr++;
3075 540 : } else if (exp_match(je->r, gbe)) {
3076 11 : nr++;
3077 : }
3078 : }
3079 : }
3080 616 : if (nr == list_length(gbes)) {
3081 : // printf("#group by converted\n");
3082 87 : j = rel_dup(j);
3083 87 : j->attr = rel->exps;
3084 87 : v->changes++;
3085 87 : rel_destroy(rel);
3086 87 : return j;
3087 : }
3088 : return rel;
3089 : }
3090 :
3091 : /* select k1 from bla where k1 = const -> select const from bla where k1 = const */
3092 : static sql_rel *
3093 3902073 : rel_project_select_exp(visitor *v, sql_rel *rel)
3094 : {
3095 3902073 : if (is_simple_project(rel->op) && rel->exps && rel->l) {
3096 1275929 : sql_rel *l = rel->l;
3097 1275929 : if (is_select(l->op) && l->exps) {
3098 1300010 : for(node *n = rel->exps->h; n; n = n->next) {
3099 978572 : sql_exp *col = n->data;
3100 978572 : if (col->type == e_column) {
3101 1752998 : for(node *m = l->exps->h; m; m = m->next) {
3102 1040018 : sql_exp *cmp = m->data;
3103 1040018 : if (cmp->type == e_cmp && cmp->flag == cmp_equal && !is_anti(cmp) && !is_semantics(cmp) && exp_is_atom(cmp->r)) {
3104 640204 : sql_exp *l = cmp->l;
3105 640204 : if(l->type == e_column && col->nid == l->nid) {
3106 : /* replace column with the constant */
3107 13731 : sql_exp *e = n->data = exp_copy(v->sql, cmp->r);
3108 13731 : exp_setalias(e, col->alias.label, exp_relname(col), exp_name(col));
3109 13731 : exp_propagate(v->sql->sa, e, col);
3110 13731 : list_hash_clear(rel->exps);
3111 : }
3112 : }
3113 : }
3114 : }
3115 : }
3116 : }
3117 : }
3118 3902073 : return rel;
3119 : }
3120 :
3121 : static sql_rel *
3122 3902072 : rel_optimize_projections_(visitor *v, sql_rel *rel)
3123 : {
3124 3902072 : rel = rel_project_cse(v, rel);
3125 3902073 : rel = rel_project_select_exp(v, rel);
3126 :
3127 3902072 : if (!rel || !is_groupby(rel->op))
3128 : return rel;
3129 :
3130 98073 : rel = rel_remove_const_aggr(v, rel);
3131 :
3132 98072 : if (v->value_based_opt) {
3133 42622 : rel = rel_simplify_sum(v, rel);
3134 42622 : rel = rel_simplify_groupby_columns(v, rel);
3135 : }
3136 98072 : rel = rel_groupby_cse(v, rel);
3137 98073 : rel = rel_push_aggr_down(v, rel);
3138 98073 : rel = rel_push_groupby_down(v, rel);
3139 98072 : rel = rel_reduce_groupby_exps(v, rel);
3140 98073 : rel = rel_distinct_aggregate_on_unique_values(v, rel);
3141 98072 : rel = rel_groupby_distinct(v, rel);
3142 98072 : rel = rel_push_count_down(v, rel);
3143 :
3144 : /* only when value_based_opt is on, ie not for dependency resolution */
3145 98072 : if (v->value_based_opt) {
3146 42622 : rel = rel_simplify_count(v, rel);
3147 42622 : rel = rel_basecount(v, rel);
3148 :
3149 42622 : rel = rel_groupjoin(v, rel);
3150 : }
3151 : return rel;
3152 : }
3153 :
3154 : static sql_rel *
3155 187414 : rel_optimize_projections(visitor *v, global_props *gp, sql_rel *rel)
3156 : {
3157 187414 : (void) gp;
3158 187414 : return rel_visitor_topdown(v, rel, &rel_optimize_projections_);
3159 : }
3160 :
3161 : run_optimizer
3162 638406 : bind_optimize_projections(visitor *v, global_props *gp)
3163 : {
3164 638406 : int flag = v->sql->sql_optimizer;
3165 638210 : return gp->opt_level == 1 && (gp->cnt[op_groupby] || gp->cnt[op_project] || gp->cnt[op_union] || gp->cnt[op_munion]
3166 1276616 : || gp->cnt[op_inter] || gp->cnt[op_except]) && (flag & optimize_projections) ? rel_optimize_projections : NULL;
3167 : }
3168 :
3169 :
3170 : static bool
3171 58514 : exps_have_selfref(list *exps)
3172 : {
3173 58514 : bool selfref = false;
3174 58514 : if (list_empty(exps))
3175 : return false;
3176 472445 : for(node *n = exps->h; n && !selfref; n = n->next) {
3177 413931 : sql_exp *e = n->data;
3178 413931 : selfref |= is_selfref(e);
3179 : }
3180 : return selfref;
3181 : }
3182 :
3183 : static inline sql_rel *
3184 2757697 : rel_push_project_down_union(visitor *v, sql_rel *rel)
3185 : {
3186 : /* first remove distinct if already unique */
3187 2757697 : if (rel->op == op_project && need_distinct(rel) && rel->exps && exps_unique(v->sql, rel, rel->exps) && !have_nil(rel->exps)) {
3188 109 : set_nodistinct(rel);
3189 109 : if (exps_card(rel->exps) <= CARD_ATOM && rel->card > CARD_ATOM) { /* if the projection just contains constants, then no topN is needed */
3190 2 : sql_rel *nl = rel->l = rel_topn(v->sql->sa, rel->l, append(sa_list(v->sql->sa), exp_atom_lng(v->sql->sa, 1)));
3191 2 : set_processed(nl);
3192 : }
3193 109 : v->changes++;
3194 : }
3195 :
3196 2757697 : if (rel->op == op_project && rel->l && rel->exps && list_empty(rel->r)) {
3197 933308 : int need_distinct = need_distinct(rel);
3198 933308 : sql_rel *u = rel->l;
3199 933308 : sql_rel *p = rel;
3200 :
3201 933308 : if (!u || !(is_union(u->op) || is_munion(u->op)) || need_distinct(u) || is_recursive(u) || !u->exps || rel_is_ref(u) || project_unsafe(rel, false))
3202 904015 : return rel;
3203 :
3204 29293 : sql_rel *r;
3205 :
3206 29293 : assert(!is_union(u->op));
3207 29293 : if (is_recursive(u))
3208 : return rel;
3209 :
3210 : /* don't push project down union of single values */
3211 88077 : for (node *n = ((list*)u->l)->h; n; n = n->next) {
3212 58806 : r = n->data;
3213 : // TODO: does this check make sense?
3214 58806 : if (is_project(r->op) && !r->l)
3215 : return rel;
3216 : }
3217 :
3218 88055 : for (node *n = ((list*)u->l)->h; n; n = n->next) {
3219 58784 : r = rel_dup(n->data);
3220 :
3221 : /* introduce projection around each operand if needed */
3222 58784 : if (!is_project(r->op) || exps_have_selfref(r->exps))
3223 270 : r = rel_project(v->sql->sa, r,
3224 : rel_projections(v->sql, r, NULL, 1, 1));
3225 : /* check if we need distinct */
3226 117568 : need_distinct &=
3227 58784 : (!exps_unique(v->sql, r, r->exps) || have_nil(r->exps));
3228 58784 : rel_rename_exps(v->sql, u->exps, r->exps);
3229 :
3230 58784 : rel_destroy(n->data);
3231 58784 : n->data = r;
3232 : }
3233 :
3234 : /* once we have checked for need_distinct in every rel we can
3235 : * introduce the projects under the munion which are gonna be
3236 : * copies of the single project above munion */
3237 88055 : for (node *n = ((list*)u->l)->h; n; n = n->next) {
3238 58784 : r = rel_dup(n->data);
3239 :
3240 58784 : r = rel_project(v->sql->sa, r, NULL);
3241 58784 : if (need_distinct)
3242 0 : set_distinct(r);
3243 58784 : r->exps = exps_copy(v->sql, p->exps);
3244 58784 : set_processed(r);
3245 :
3246 58784 : rel_destroy(n->data);
3247 58784 : n->data = r;
3248 : }
3249 :
3250 : /* turn the project-munion on top into munion. incr operand
3251 : * rels count to make sure that they are not deleted by the
3252 : * subsequent rel_inplace_setop_n_ary */
3253 88055 : for (node *n = ((list*)u->l)->h; n; n = n->next)
3254 58784 : rel_dup(n->data);
3255 29271 : rel = rel_inplace_setop_n_ary(v->sql, rel, u->l, op_munion,
3256 : rel_projections(v->sql, rel, NULL, 1, 1));
3257 29271 : if (need_distinct)
3258 0 : set_distinct(rel);
3259 29271 : if (is_single(u))
3260 2 : set_single(rel);
3261 :
3262 29271 : v->changes++;
3263 :
3264 : /* if any operand has two project above then squash them */
3265 88055 : for (node *n = ((list*)u->l)->h; n; n = n->next) {
3266 58784 : r = rel_dup(n->data);
3267 58784 : r = rel_merge_projects_(v, r);
3268 58784 : rel_destroy(n->data);
3269 58784 : n->data = r;
3270 : }
3271 :
3272 : return rel;
3273 : }
3274 : return rel;
3275 : }
3276 :
3277 : static inline sql_rel *
3278 2757697 : rel_merge_unions(visitor *v, sql_rel *rel)
3279 : {
3280 : /* stacked munion flattening e.g.
3281 : * munion( munion(a, b, c), munion(d, e)) => munion(a,b,c,d,e)
3282 : */
3283 2757697 : if (rel && is_munion(rel->op) && !is_recursive(rel)) {
3284 81086 : list *l = rel->l;
3285 521146 : for(node *n = l->h; n; ) {
3286 358974 : node *next = n->next;
3287 358974 : sql_rel *oc = n->data;
3288 358974 : sql_rel *c = oc;
3289 :
3290 : /* account for any group-bys pushed down between stacked munions */
3291 358974 : if (oc->op == op_groupby)
3292 2983 : c = oc->l;
3293 :
3294 358974 : if (is_munion(c->op)) {
3295 10675 : c = rel_dup(c);
3296 10675 : list_remove_node(l, NULL, n);
3297 10675 : l = list_merge(l, c->l, (fdup)NULL);
3298 10675 : c->l = NULL;
3299 10675 : rel_destroy(oc);
3300 10675 : if (!next)
3301 3 : next = l->h;
3302 10675 : v->changes++;
3303 : }
3304 : n = next;
3305 : }
3306 81086 : rel->l = l;
3307 : }
3308 2757697 : return rel;
3309 : }
3310 :
3311 :
3312 : /*
3313 : * Push (semi)joins down unions, this is basically for merge tables, where
3314 : * we know that the fk-indices are split over two clustered merge tables.
3315 : */
3316 : static inline sql_rel *
3317 2757697 : rel_push_join_down_munion(visitor *v, sql_rel *rel)
3318 : {
3319 2757697 : if ((is_join(rel->op) && !is_outerjoin(rel->op) && !is_single(rel)) || is_semi(rel->op)) {
3320 340655 : sql_rel *l = rel->l, *r = rel->r, *ol = l, *or = r;
3321 340655 : list *exps = rel->exps, *attr = rel->attr;
3322 340655 : sql_exp *je = NULL;
3323 :
3324 340655 : if (is_recursive(l) || is_recursive(r))
3325 : return rel;
3326 : /* we would like to optimize in place reference rels which point
3327 : * to replica tables and let the replica optimizer handle those
3328 : * later. otherwise we miss the push join down optimization due
3329 : * to the rel_is_ref bailout
3330 : */
3331 340651 : if (rel_is_ref(l) && is_basetable(l->op) && l->l && isReplicaTable((sql_table*)l->l)) {
3332 1 : rel->l = rel_copy(v->sql, l, true);
3333 1 : rel_destroy(l);
3334 : }
3335 340651 : if (rel_is_ref(r) && is_basetable(r->op) && r->l && isReplicaTable((sql_table*)r->l)) {
3336 4 : rel->r = rel_copy(v->sql, r, true);
3337 4 : rel_destroy(r);
3338 : }
3339 :
3340 : // TODO: do we need to check if it's l/r are refs?
3341 340651 : if (!l || !r || need_distinct(l) || need_distinct(r) || rel_is_ref(l) || rel_is_ref(r))
3342 : return rel;
3343 287890 : if (l->op == op_project)
3344 23491 : l = l->l;
3345 287890 : if (r->op == op_project)
3346 27135 : r = r->l;
3347 :
3348 : /* both sides only if we have a join index ASSUMING pkey-fkey are aligned */
3349 : // TODO: we could also check if the join cols are (not) unique
3350 287890 : bool aligned_pk_fk = true;
3351 287890 : if (!l || !r || (is_munion(l->op) && is_munion(r->op) &&
3352 173 : !(je = rel_is_join_on_pkey(rel, aligned_pk_fk))))
3353 859 : return rel;
3354 :
3355 : // TODO: why? bailout for union semijoin without pkey joins expressions
3356 287031 : if (is_semi(rel->op) && is_munion(l->op) && !je)
3357 : return rel;
3358 :
3359 : /* if both sides are munions we assume that they will have the same number of children */
3360 286165 : if (is_munion(l->op) && is_munion(r->op) && list_length(l->l) != list_length(r->l))
3361 : return rel;
3362 :
3363 286165 : if (is_munion(l->op) && !need_distinct(l) && !is_single(l) && !is_recursive(l) &&
3364 4584 : !is_munion(r->op)){
3365 : /* join(munion(a,b,c), d) -> munion(join(a,d), join(b,d), join(c,d)) */
3366 4584 : list *js = sa_list(v->sql->sa);
3367 13762 : for (node *n = ((list*)l->l)->h; n; n = n->next) {
3368 9178 : sql_rel *pc = rel_dup(n->data);
3369 9178 : if (!is_project(pc->op))
3370 12 : pc = rel_project(v->sql->sa, pc, rel_projections(v->sql, pc, NULL, 1, 1));
3371 9178 : rel_rename_exps(v->sql, l->exps, pc->exps);
3372 9178 : if (l != ol) {
3373 12 : pc = rel_project(v->sql->sa, pc, NULL);
3374 12 : pc->exps = exps_copy(v->sql, ol->exps);
3375 12 : set_processed(pc);
3376 : }
3377 9178 : pc = rel_crossproduct(v->sql->sa, pc, rel_dup(or), rel->op);
3378 9178 : pc->exps = exps_copy(v->sql, exps);
3379 9178 : pc->attr = exps_copy(v->sql, attr);
3380 9178 : set_processed(pc);
3381 9178 : pc = rel_project(v->sql->sa, pc, rel_projections(v->sql, pc, NULL, 1, 1));
3382 9178 : js = append(js, pc);
3383 : }
3384 4584 : v->changes++;
3385 4584 : return rel_inplace_setop_n_ary(v->sql, rel, js, op_munion,
3386 : rel_projections(v->sql, rel, NULL, 1, 1));
3387 281581 : } else if (is_munion(l->op) && !need_distinct(l) && !is_single(l) && !is_recursive(l) &&
3388 0 : is_munion(r->op) && !need_distinct(r) && !is_single(r) && !is_recursive(r) &&
3389 : je) {
3390 : /* join(munion(a,b,c), munion(d,e,f)) -> munion(join(a,d), join(b,e), join(c,f)) */
3391 0 : list *cps = sa_list(v->sql->sa);
3392 : /* create pairwise joins between left and right parts. assume eq num of parts (see earlier bailout) */
3393 0 : for (node *n = ((list*)l->l)->h, *m=((list*)r->l)->h; n && m; n = n->next, m = m->next) {
3394 : /* left part */
3395 0 : sql_rel *lp = rel_dup(n->data);
3396 0 : if (!is_project(lp->op))
3397 0 : lp = rel_project(v->sql->sa, lp, rel_projections(v->sql, lp, NULL, 1, 1));
3398 0 : rel_rename_exps(v->sql, l->exps, lp->exps);
3399 0 : if (l != ol) {
3400 0 : lp = rel_project(v->sql->sa, lp, NULL);
3401 0 : lp->exps = exps_copy(v->sql, ol->exps);
3402 0 : set_processed(lp);
3403 : }
3404 : /* right part */
3405 0 : sql_rel *rp = rel_dup(m->data);
3406 0 : if (!is_project(rp->op))
3407 0 : rp = rel_project(v->sql->sa, rp, rel_projections(v->sql, rp, NULL, 1, 1));
3408 0 : rel_rename_exps(v->sql, r->exps, rp->exps);
3409 0 : if (r != or) {
3410 0 : rp = rel_project(v->sql->sa, rp, NULL);
3411 0 : rp->exps = exps_copy(v->sql, or->exps);
3412 0 : set_processed(rp);
3413 : }
3414 : /* combine them */
3415 0 : sql_rel *cp = rel_crossproduct(v->sql->sa, lp, rp, rel->op);
3416 0 : cp->exps = exps_copy(v->sql, exps);
3417 0 : cp->attr = exps_copy(v->sql, attr);
3418 0 : set_processed(cp);
3419 0 : cp = rel_project(v->sql->sa, cp, rel_projections(v->sql, cp, NULL, 1, 1));
3420 0 : cps = append(cps, cp);
3421 : }
3422 0 : v->changes++;
3423 0 : return rel_inplace_setop_n_ary(v->sql, rel, cps, op_munion,
3424 : rel_projections(v->sql, rel, NULL, 1, 1));
3425 281581 : } else if (!is_munion(l->op) &&
3426 281573 : is_munion(r->op) && !need_distinct(r) && !is_single(r) && !is_recursive(r) &&
3427 2964 : !is_semi(rel->op)) {
3428 : /* join(a, munion(b,c,d)) -> munion(join(a,b), join(a,c), join(a,d)) */
3429 1585 : list *js = sa_list(v->sql->sa);
3430 4758 : for (node *n = ((list*)r->l)->h; n; n = n->next) {
3431 3173 : sql_rel *pc = rel_dup(n->data);
3432 3173 : if (!is_project(pc->op))
3433 17 : pc = rel_project(v->sql->sa, pc, rel_projections(v->sql, pc, NULL, 1, 1));
3434 3173 : rel_rename_exps(v->sql, r->exps, pc->exps);
3435 3173 : if (r != or) {
3436 125 : pc = rel_project(v->sql->sa, pc, NULL);
3437 125 : pc->exps = exps_copy(v->sql, or->exps);
3438 125 : set_processed(pc);
3439 : }
3440 3173 : pc = rel_crossproduct(v->sql->sa, rel_dup(ol), pc, rel->op);
3441 3173 : pc->exps = exps_copy(v->sql, exps);
3442 3173 : pc->attr = exps_copy(v->sql, attr);
3443 3173 : set_processed(pc);
3444 3173 : pc = rel_project(v->sql->sa, pc, rel_projections(v->sql, pc, NULL, 1, 1));
3445 3173 : js = append(js, pc);
3446 : }
3447 1585 : v->changes++;
3448 1585 : return rel_inplace_setop_n_ary(v->sql, rel, js, op_munion,
3449 : rel_projections(v->sql, rel, NULL, 1, 1));
3450 279996 : } else if (!is_munion(l->op) &&
3451 279988 : is_munion(r->op) && !need_distinct(r) && !is_single(r) && !is_recursive(r) &&
3452 1379 : is_semi(rel->op) && je) {
3453 : /* {semi}join ( A1, munion (B, A2a, C, A2b)) [A1.partkey = A2.partkey] ->
3454 : * {semi}join ( A1, munion (A2a, A2b))
3455 : * (ie some parts of an n-th munion operand)
3456 : *
3457 : * How to detect that a relation isn't matching?
3458 : * partitioning is currently done only on pkey/fkey's
3459 : * ie only matching per part if join is on pkey/fkey (parts)
3460 : * and part numbers should match.
3461 : * */
3462 0 : int lpnr = rel_part_nr(l, je);
3463 0 : if (lpnr < 0)
3464 : return rel;
3465 :
3466 0 : list *ups = sa_list(v->sql->sa);
3467 0 : for (node *n = ((list*)r->l)->h; n; n = n->next) {
3468 0 : if (rel_uses_part_nr(n->data, je, lpnr)) {
3469 0 : sql_rel *pc = rel_dup(n->data);
3470 0 : ups = append(ups, pc);
3471 : }
3472 : }
3473 0 : v->changes++;
3474 0 : return rel_inplace_setop_n_ary(v->sql, r, ups, op_munion,
3475 : rel_projections(v->sql, rel, NULL, 1, 1));
3476 : }
3477 : }
3478 : return rel;
3479 : }
3480 :
3481 : static sql_rel *
3482 2757697 : rel_optimize_unions_topdown_(visitor *v, sql_rel *rel)
3483 : {
3484 2757697 : rel = rel_push_project_down_union(v, rel);
3485 2757697 : rel = rel_merge_unions(v, rel);
3486 2757697 : rel = rel_push_join_down_munion(v, rel);
3487 2757697 : return rel;
3488 : }
3489 :
3490 : static sql_rel *
3491 29160 : rel_optimize_unions_topdown(visitor *v, global_props *gp, sql_rel *rel)
3492 : {
3493 29160 : (void) gp;
3494 29160 : return rel_visitor_topdown(v, rel, &rel_optimize_unions_topdown_);
3495 : }
3496 :
3497 : run_optimizer
3498 638405 : bind_optimize_unions_topdown(visitor *v, global_props *gp)
3499 : {
3500 638405 : (void) v;
3501 638405 : return gp->opt_level == 1 && gp->cnt[op_munion] ? rel_optimize_unions_topdown : NULL;
3502 : }
3503 :
3504 :
3505 : static sql_column *
3506 0 : is_fk_column_of_pk(mvc *sql, sql_rel *rel, sql_column *pkc, sql_exp *e) /* test if e is a foreign key column for the pk on pkc */
3507 : {
3508 0 : sql_trans *tr = sql->session->tr;
3509 0 : sql_column *c = exp_find_column(rel, e, -2);
3510 :
3511 0 : if (c) {
3512 0 : sql_table *t = c->t;
3513 :
3514 0 : for (node *n = ol_first_node(t->idxs); n; n = n->next) {
3515 0 : sql_idx *li = n->data;
3516 :
3517 0 : if (li->type == join_idx) {
3518 0 : for (node *m = li->columns->h ; m ; m = m->next) {
3519 0 : sql_kc *fkc = m->data;
3520 :
3521 0 : if (strcmp(fkc->c->base.name, c->base.name) == 0) { /* same fkey column */
3522 0 : sql_key *fkey = (sql_key*)os_find_id(tr->cat->objects, tr, ((sql_fkey*)li->key)->rkey);
3523 :
3524 0 : if (strcmp(fkey->t->base.name, pkc->t->base.name) == 0) { /* to same pk table */
3525 0 : for (node *o = fkey->columns->h ; o ; o = n->next) {
3526 0 : sql_kc *kc = m->data;
3527 :
3528 0 : if (strcmp(kc->c->base.name, pkc->base.name) == 0) /* to same pk table column */
3529 : return c;
3530 : }
3531 : }
3532 : }
3533 : }
3534 : }
3535 : }
3536 : }
3537 : return NULL;
3538 : }
3539 :
3540 : static bool
3541 0 : has_no_selectivity(mvc *sql, sql_rel *rel)
3542 : {
3543 0 : if (!rel)
3544 : return true;
3545 :
3546 0 : switch(rel->op){
3547 : case op_basetable:
3548 : case op_truncate:
3549 : case op_table:
3550 : return true;
3551 0 : case op_topn:
3552 : case op_sample:
3553 : case op_project:
3554 : case op_groupby:
3555 0 : return has_no_selectivity(sql, rel->l);
3556 : case op_ddl:
3557 : case op_insert:
3558 : case op_update:
3559 : case op_delete:
3560 : case op_merge:
3561 : case op_join:
3562 : case op_left:
3563 : case op_right:
3564 : case op_full:
3565 : case op_semi:
3566 : case op_anti:
3567 : case op_union:
3568 : case op_inter:
3569 : case op_except:
3570 : case op_munion:
3571 : case op_select:
3572 : return false;
3573 : }
3574 : return true;
3575 : }
3576 :
3577 : static sql_rel *
3578 713910 : rel_distinct_project2groupby_(visitor *v, sql_rel *rel)
3579 : {
3580 713910 : sql_rel *l = rel->l;
3581 :
3582 : /* rewrite distinct project (table) [ constant ] -> project [ constant ] */
3583 721761 : if (rel->op == op_project && rel->l && !rel->r /* no order by */ && need_distinct(rel) &&
3584 7851 : exps_card(rel->exps) <= CARD_ATOM) {
3585 482 : set_nodistinct(rel);
3586 482 : if (rel->card > CARD_ATOM) { /* if the projection just contains constants, then no topN is needed */
3587 193 : sql_rel *nl = rel->l = rel_topn(v->sql->sa, rel->l, append(sa_list(v->sql->sa), exp_atom_lng(v->sql->sa, 1)));
3588 193 : set_processed(nl);
3589 : }
3590 482 : v->changes++;
3591 : }
3592 :
3593 : /* rewrite distinct project [ pk ] ( select ( table ) [ e op val ])
3594 : * into project [ pk ] ( select/semijoin ( table ) */
3595 713910 : if (rel->op == op_project && rel->l && !rel->r /* no order by */ && need_distinct(rel) &&
3596 7581 : (l->op == op_select || l->op == op_semi) && exps_unique(v->sql, rel, rel->exps) &&
3597 212 : (!have_semantics(l->exps) || !have_nil(rel->exps))) {
3598 212 : set_nodistinct(rel);
3599 212 : v->changes++;
3600 : }
3601 :
3602 : /* rewrite distinct project ( join(p,f) [ p.pk = f.fk ] ) [ p.pk ]
3603 : * into project( (semi)join(p,f) [ p.pk = f.fk ] ) [ p.pk ] */
3604 713910 : if (rel->op == op_project && rel->l && !rel->r /* no order by */ && need_distinct(rel) &&
3605 7157 : l && (is_select(l->op) || l->op == op_join) && rel_is_join_on_pkey(l, true) /* [ pk == fk ] */) {
3606 0 : sql_exp *found = NULL, *pk = NULL, *fk = NULL;
3607 0 : bool all_exps_atoms = true;
3608 0 : sql_column *pkc = NULL;
3609 :
3610 0 : for (node *m = l->exps->h ; m ; m = m->next) { /* find a primary key join */
3611 0 : sql_exp *je = (sql_exp *) m->data;
3612 0 : sql_exp *le = je->l, *re = je->r;
3613 :
3614 0 : if (!find_prop(je->p, PROP_JOINIDX)) /* must be a pk-fk join expression */
3615 0 : continue;
3616 :
3617 0 : if ((pkc = exp_is_pkey(l, le))) { /* le is the primary key */
3618 0 : all_exps_atoms = true;
3619 :
3620 0 : for (node *n = rel->exps->h; n && all_exps_atoms; n = n->next) {
3621 0 : sql_exp *e = (sql_exp *) n->data;
3622 :
3623 0 : if (exp_match(e, le) || exp_refers(e, le))
3624 : found = e;
3625 0 : else if (e->card > CARD_ATOM)
3626 0 : all_exps_atoms = false;
3627 : }
3628 : pk = le;
3629 : fk = re;
3630 : }
3631 0 : if (!found && (pkc = exp_is_pkey(l, re))) { /* re is the primary key */
3632 0 : all_exps_atoms = true;
3633 :
3634 0 : for (node *n = rel->exps->h; n && all_exps_atoms; n = n->next) {
3635 0 : sql_exp *e = (sql_exp *) n->data;
3636 :
3637 0 : if (exp_match(e, re) || exp_refers(e, re))
3638 : found = e;
3639 0 : else if (e->card > CARD_ATOM)
3640 0 : all_exps_atoms = false;
3641 : }
3642 : pk = re;
3643 : fk = le;
3644 : }
3645 : }
3646 :
3647 0 : if (all_exps_atoms && found) { /* rel must have the same primary key on the projection list */
3648 : /* if the foreign key has no selectivity, the join can be removed */
3649 0 : if (!(rel_is_ref(l)) && ((rel_find_exp(l->l, fk) && is_fk_column_of_pk(v->sql, l->l, pkc, fk) && has_no_selectivity(v->sql, l->l)) ||
3650 0 : (l->r && rel_find_exp(l->r, fk) && is_fk_column_of_pk(v->sql, l->r, pkc, fk) && has_no_selectivity(v->sql, l->r)))) {
3651 0 : sql_rel *side = (rel_find_exp(l->l, pk) != NULL)?l->l:l->r;
3652 :
3653 0 : rel->l = rel_dup(side);
3654 0 : rel_destroy(l);
3655 0 : v->changes++;
3656 0 : set_nodistinct(rel);
3657 0 : return rel;
3658 : }
3659 : /* if the join has no multiple references it can be re-written into a semijoin */
3660 0 : if (l->op == op_join && !(rel_is_ref(l)) && list_length(rel->exps) == 1) { /* other expressions may come from the other side */
3661 0 : if (l->r && rel_find_exp(l->r, pk)) {
3662 0 : sql_rel *temp = l->l;
3663 0 : l->l = l->r;
3664 0 : l->r = temp;
3665 :
3666 0 : l->op = op_semi;
3667 0 : } else if (rel_find_exp(l->l, pk)) {
3668 0 : l->op = op_semi;
3669 : }
3670 : }
3671 0 : v->changes++;
3672 0 : set_nodistinct(rel);
3673 0 : return rel;
3674 : }
3675 : }
3676 : /* rewrite distinct project [ gbe ] ( select ( groupby [ gbe ] [ gbe, e ] )[ e op val ])
3677 : * into project [ gbe ] ( select ( group etc ) */
3678 713910 : if (rel->op == op_project && rel->l && !rel->r /* no order by */ &&
3679 7157 : need_distinct(rel) && l->op == op_select){
3680 2593 : sql_rel *g = l->l;
3681 2593 : if (is_groupby(g->op)) {
3682 4 : list *used = sa_list(v->sql->sa);
3683 4 : list *gbe = g->r;
3684 4 : node *n;
3685 4 : int fnd = 1;
3686 :
3687 8 : for (n = rel->exps->h; n && fnd; n = n->next) {
3688 4 : sql_exp *e = n->data;
3689 :
3690 4 : if (e->card > CARD_ATOM) {
3691 : /* find e in gbe */
3692 4 : sql_exp *ne = list_find_exp(g->exps, e);
3693 :
3694 4 : if (ne)
3695 3 : ne = list_find_exp( gbe, ne);
3696 3 : if (ne && !list_find_exp(used, ne)) {
3697 2 : fnd++;
3698 2 : list_append(used, ne);
3699 : }
3700 3 : if (!ne)
3701 : fnd = 0;
3702 : }
3703 : }
3704 4 : if (fnd == (list_length(gbe)+1)) {
3705 0 : v->changes++;
3706 0 : set_nodistinct(rel);
3707 : }
3708 : }
3709 : }
3710 713910 : if (rel->op == op_project && rel->l &&
3711 7157 : need_distinct(rel) && exps_card(rel->exps) > CARD_ATOM) {
3712 7157 : node *n;
3713 7157 : list *exps = new_exp_list(v->sql->sa), *gbe = new_exp_list(v->sql->sa);
3714 7157 : list *obe = rel->r; /* we need to read the ordering later */
3715 :
3716 7157 : if (obe) {
3717 0 : int fnd = 0;
3718 :
3719 0 : for(n = obe->h; n && !fnd; n = n->next) {
3720 0 : sql_exp *e = n->data;
3721 :
3722 0 : if (e->type != e_column)
3723 : fnd = 1;
3724 0 : else if (exps_bind_nid(rel->exps, e->nid) == NULL)
3725 0 : fnd = 1;
3726 : }
3727 0 : if (fnd)
3728 : return rel;
3729 : }
3730 7157 : rel->l = rel_project(v->sql->sa, rel->l, rel->exps);
3731 :
3732 21905 : for (n = rel->exps->h; n; n = n->next) {
3733 14748 : sql_exp *e = n->data, *ne;
3734 :
3735 14748 : set_nodistinct(e);
3736 14748 : ne = exp_ref(v->sql, e);
3737 14748 : if (e->card > CARD_ATOM && !list_find_exp(gbe, ne)) { /* no need to group by on constants, or the same column multiple times */
3738 14373 : append(gbe, ne);
3739 14373 : ne = exp_ref(v->sql, ne);
3740 : }
3741 14748 : append(exps, ne);
3742 : }
3743 7157 : rel->op = op_groupby;
3744 7157 : rel->exps = exps;
3745 7157 : rel->r = gbe;
3746 7157 : set_nodistinct(rel);
3747 7157 : if (obe) {
3748 : /* add order again */
3749 0 : rel = rel_project(v->sql->sa, rel, rel_projections(v->sql, rel, NULL, 1, 1));
3750 0 : rel->r = obe;
3751 : }
3752 7157 : v->changes++;
3753 : }
3754 : return rel;
3755 : }
3756 :
3757 : static sql_rel *
3758 5692 : rel_distinct_project2groupby(visitor *v, global_props *gp, sql_rel *rel)
3759 : {
3760 5692 : (void) gp;
3761 5692 : return rel_visitor_bottomup(v, rel, &rel_distinct_project2groupby_);
3762 : }
3763 :
3764 : run_optimizer
3765 638404 : bind_distinct_project2groupby(visitor *v, global_props *gp)
3766 : {
3767 638404 : int flag = v->sql->sql_optimizer;
3768 561147 : return gp->opt_cycle == 0 && gp->opt_level == 1 && gp->needs_distinct && gp->cnt[op_project] &&
3769 644096 : (flag & distinct_project2groupby)? rel_distinct_project2groupby : NULL;
3770 : }
|