Line data Source code
1 : /*
2 : * SPDX-License-Identifier: MPL-2.0
3 : *
4 : * This Source Code Form is subject to the terms of the Mozilla Public
5 : * License, v. 2.0. If a copy of the MPL was not distributed with this
6 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
7 : *
8 : * Copyright 2024 MonetDB Foundation;
9 : * Copyright August 2008 - 2023 MonetDB B.V.;
10 : * Copyright 1997 - July 2008 CWI.
11 : */
12 :
13 : #include "monetdb_config.h"
14 : #include "rel_optimizer_private.h"
15 : #include "rel_basetable.h"
16 : #include "rel_exp.h"
17 : #include "rel_select.h"
18 : #include "rel_rewriter.h"
19 :
20 : static int
21 3669196 : exp_is_rename(sql_exp *e)
22 : {
23 3669196 : return (e->type == e_column);
24 : }
25 :
26 : static int
27 2106075 : exp_is_useless_rename(sql_exp *e)
28 : {
29 4212150 : return (e->type == e_column &&
30 2106075 : ((!e->l && !exp_relname(e)) ||
31 4211868 : (e->l && exp_relname(e) && strcmp(e->l, exp_relname(e)) == 0)) &&
32 1892194 : strcmp(e->r, exp_name(e)) == 0);
33 : }
34 :
35 : static list *
36 18694 : rel_used_projections(mvc *sql, list *exps, list *users)
37 : {
38 18694 : list *nexps = sa_list(sql->sa);
39 18694 : bool *used = SA_ZNEW_ARRAY(sql->ta, bool, list_length(exps));
40 18694 : int i = 0;
41 :
42 124736 : for(node *n = users->h; n; n = n->next) {
43 106042 : sql_exp *e = n->data, *ne = NULL;
44 106042 : if ((e->l && (ne = exps_bind_column2(exps, e->l, e->r, NULL))) || (ne = exps_bind_column(exps, e->r, NULL, NULL, 1))) {
45 106042 : used[list_position(exps, ne)] = 1;
46 : }
47 : }
48 133542 : for(node *n = exps->h; n; n = n->next, i++) {
49 114848 : sql_exp *e = n->data;
50 114848 : if (is_intern(e) || used[i])
51 114781 : append(nexps, e);
52 : }
53 18694 : return nexps;
54 : }
55 :
56 : static int
57 159378 : data_equal( sql_exp *e1, sql_exp *e2)
58 : {
59 159378 : if (e1 == e2)
60 62391 : return 0;
61 : return -1;
62 : }
63 :
64 : /* move projects down with the goal op removing them completely (ie push renames/reduced lists into basetable)
65 : * for some cases we can directly remove iff renames rename into same alias
66 : * */
67 : static sql_rel *
68 4229775 : rel_push_project_down_(visitor *v, sql_rel *rel)
69 : {
70 : /* for now only push down renames */
71 4229775 : if (v->depth > 1 && is_simple_project(rel->op) && !need_distinct(rel) && !rel_is_ref(rel) && rel->l && !rel->r &&
72 1117847 : v->parent &&
73 1107130 : !is_modify(v->parent->op) && !is_topn(v->parent->op) && !is_sample(v->parent->op) &&
74 1706546 : !is_ddl(v->parent->op) && !is_set(v->parent->op) && !is_munion(v->parent->op) &&
75 661914 : list_check_prop_all(rel->exps, (prop_check_func)&exp_is_rename)) {
76 428186 : sql_rel *l = rel->l;
77 :
78 428186 : if (rel_is_ref(l))
79 : return rel;
80 377786 : if (is_basetable(l->op)) {
81 32138 : if (list_check_prop_all(rel->exps, (prop_check_func)&exp_is_useless_rename)) {
82 : /* TODO reduce list (those in the project + internal) */
83 18694 : rel->l = NULL;
84 18694 : l->exps = rel_used_projections(v->sql, l->exps, rel->exps);
85 18694 : rel_destroy(rel);
86 18694 : v->changes++;
87 18694 : return l;
88 : }
89 : return rel;
90 345648 : } else if (list_check_prop_all(rel->exps, (prop_check_func)&exp_is_useless_rename)) {
91 145022 : if ((is_project(l->op) && list_length(l->exps) == list_length(rel->exps)) ||
92 133915 : ((v->parent && is_project(v->parent->op)) &&
93 40132 : (is_mset(l->op) || is_select(l->op) || is_join(l->op) || is_semi(l->op) || is_topn(l->op) || is_sample(l->op)))) {
94 46332 : rel->l = NULL;
95 46332 : rel_destroy(rel);
96 46332 : v->changes++;
97 46332 : return l;
98 : }
99 200626 : } else if (list_check_prop_all(rel->exps, (prop_check_func)&exp_is_rename)) {
100 : /* TODO for positional (setops), if not top relation, rename in order of the inner */
101 : /* check for selfrefs, ie if l has self refs we cannot reduce (or rename) the expressions */
102 200626 : if (is_simple_project(l->op) && !l->r) {
103 71928 : list *nexps = sa_list(v->sql->sa);
104 :
105 : /* first find all required expressions */
106 193280 : for(node *n = rel->exps->h; n; n = n->next) {
107 184614 : sql_exp *e = n->data, *ne = NULL;
108 :
109 184614 : if (e->l)
110 182800 : ne = exps_bind_column2(l->exps, e->l, e->r, NULL);
111 184614 : if (!ne && !e->l)
112 1814 : ne = exps_bind_column(l->exps, e->r, NULL, NULL, 1);
113 1814 : if (!ne)
114 : return rel;
115 184614 : if (ne && exp_has_selfref(v->sql, ne))
116 : return rel;
117 : /* make sure we don't have duplicates */
118 183743 : if (list_find(nexps, ne, (fcmp)&data_equal))
119 : return rel;
120 121352 : append(nexps, ne);
121 : }
122 : /* rename using outer expressions */
123 26680 : for(node *n = rel->exps->h, *m = nexps->h; n && m; n = n->next, m = m->next) {
124 18014 : sql_exp *e = n->data, *ne = m->data;
125 :
126 18014 : exp_setname(v->sql->sa, ne, exp_relname(e), exp_name(e));
127 18014 : exp_propagate(v->sql->sa, ne, e);
128 : }
129 : /* reset hash after renaming */
130 8666 : list_hash_clear(nexps);
131 8666 : l->exps = nexps;
132 8666 : rel->l = NULL;
133 8666 : rel_destroy(rel);
134 8666 : v->changes++;
135 8666 : return l;
136 : }
137 : }
138 : }
139 : /* ToDo handle useful renames, ie new relation name and unique set of attribute names (could reduce set of * attributes) */
140 : /* handle both useless and useful with project [ group by ] */
141 : return rel;
142 : }
143 :
144 : static sql_rel *
145 162006 : rel_push_project_down(visitor *v, global_props *gp, sql_rel *rel)
146 : {
147 162006 : (void) gp;
148 162006 : return rel_visitor_bottomup(v, rel, &rel_push_project_down_);
149 : }
150 :
151 : run_optimizer
152 601169 : bind_push_project_down(visitor *v, global_props *gp)
153 : {
154 601169 : int flag = v->sql->sql_optimizer;
155 600984 : return gp->opt_level == 1 && (flag & push_project_down) &&
156 1202149 : (gp->cnt[op_project] || gp->cnt[op_groupby]) ? rel_push_project_down : NULL;
157 : }
158 :
159 :
160 : static bool exp_shares_exps(sql_exp *e, list *shared, uint64_t *uses);
161 :
162 : static bool
163 92347 : exps_shares_exps(list *exps, list *shared, uint64_t *uses)
164 : {
165 92347 : if (!exps || !shared)
166 : return false;
167 304389 : for (node *n = exps->h; n; n = n->next) {
168 212179 : sql_exp *e = n->data;
169 :
170 212179 : if (exp_shares_exps(e, shared, uses))
171 : return true;
172 : }
173 : return false;
174 : }
175 :
176 : static bool
177 864027 : exp_shares_exps(sql_exp *e, list *shared, uint64_t *uses)
178 : {
179 1516646 : switch(e->type) {
180 35 : case e_cmp:
181 35 : if (e->flag == cmp_or || e->flag == cmp_filter)
182 23 : return exps_shares_exps(e->l, shared, uses) || exps_shares_exps(e->r, shared, uses);
183 12 : else if (e->flag == cmp_in || e->flag == cmp_notin)
184 0 : return exp_shares_exps(e->l, shared, uses) || exps_shares_exps(e->r, shared, uses);
185 : else
186 24 : return exp_shares_exps(e->l, shared, uses) || exp_shares_exps(e->r, shared, uses) || (e->f && exp_shares_exps(e->f, shared, uses));
187 80496 : case e_atom:
188 80496 : if (e->f)
189 0 : return exps_shares_exps(e->f, shared, uses);
190 : return false;
191 1292317 : case e_column:
192 : {
193 1292317 : sql_exp *ne = NULL;
194 1292317 : if (e->l)
195 1236081 : ne = exps_bind_column2(shared, e->l, e->r, NULL);
196 1292318 : if (!ne && !e->l)
197 56236 : ne = exps_bind_column(shared, e->r, NULL, NULL, 1);
198 107388 : if (!ne)
199 : return false;
200 1239838 : if (ne->type != e_column) {
201 90048 : int i = list_position(shared, ne);
202 90048 : if (i < 0)
203 : return false;
204 90048 : uint64_t used = (uint64_t) 1 << i;
205 90048 : if (used & *uses)
206 : return true;
207 89972 : *uses |= used;
208 89972 : return false;
209 : }
210 1149790 : if (ne != e && (list_position(shared, e) < 0 || list_position(shared, e) > list_position(shared, ne)))
211 : /* maybe ne refers to a local complex exp */
212 601122 : return exp_shares_exps(ne, shared, uses);
213 : return false;
214 : }
215 51497 : case e_convert:
216 51497 : return exp_shares_exps(e->l, shared, uses);
217 92301 : case e_aggr:
218 : case e_func:
219 92301 : return exps_shares_exps(e->l, shared, uses);
220 : case e_psm:
221 0 : assert(0); /* not in projection list */
222 : }
223 : return false;
224 : }
225 :
226 : static bool
227 85002 : exps_share_expensive_exp(list *exps, list *shared )
228 : {
229 85002 : uint64_t uses = 0;
230 :
231 85002 : if (list_empty(exps) || list_empty(shared))
232 0 : return false;
233 85002 : if (list_length(shared) > 64)
234 : return true;
235 736198 : for (node *n = exps->h; n; n = n->next) {
236 651812 : sql_exp *e = n->data;
237 :
238 651812 : if (exp_shares_exps(e, shared, &uses))
239 : return true;
240 : }
241 : return false;
242 : }
243 :
244 : static bool ambigious_ref( list *exps, sql_exp *e);
245 : static bool
246 75940 : ambigious_refs( list *exps, list *refs)
247 : {
248 75940 : if (list_empty(refs))
249 : return false;
250 211213 : for(node *n=refs->h; n; n = n->next) {
251 157227 : if (ambigious_ref(exps, n->data))
252 : return true;
253 : }
254 : return false;
255 : }
256 :
257 : static bool
258 1191411 : ambigious_ref( list *exps, sql_exp *e)
259 : {
260 1191411 : sql_exp *ne = NULL;
261 :
262 1191411 : if (e->type == e_column) {
263 983204 : if (e->l)
264 952961 : ne = exps_bind_column2(exps, e->l, e->r, NULL);
265 983204 : if (!ne && !e->l)
266 30243 : ne = exps_bind_column(exps, e->r, NULL, NULL, 1);
267 983204 : if (ne && e != ne)
268 : return true;
269 : }
270 1184937 : if (e->type == e_func)
271 75940 : return ambigious_refs(exps, e->l);
272 : return false;
273 : }
274 :
275 : /* merge 2 projects into the lower one */
276 : static sql_rel *
277 4405504 : rel_merge_projects_(visitor *v, sql_rel *rel)
278 : {
279 4440259 : list *exps = rel->exps;
280 4440259 : sql_rel *prj = rel->l;
281 4440259 : node *n;
282 :
283 4440259 : if (rel->op == op_project &&
284 1568625 : prj && prj->op == op_project && !(rel_is_ref(prj)) && list_empty(prj->r)) {
285 462717 : int all = 1;
286 :
287 462717 : if (project_unsafe(rel,0) || project_unsafe(prj,0) || exps_share_expensive_exp(rel->exps, prj->exps))
288 378334 : return rel;
289 :
290 : /* here we try to fix aliases */
291 84386 : list *nexps = NULL;
292 : /* for each exp check if we can rename it */
293 619811 : for (n = exps->h; n && all; n = n->next) {
294 541899 : sql_exp *e = n->data, *ne = NULL;
295 :
296 : /* We do not handle expressions pointing back in the list */
297 541899 : if (ambigious_ref(exps, e)) {
298 : all = 0;
299 : break;
300 : }
301 535442 : ne = exp_push_down_prj(v->sql, e, prj, prj->l);
302 : /* check if the refered alias name isn't used twice */
303 535442 : if (ne && ambigious_ref(nexps, ne)) {
304 : all = 0;
305 : break;
306 : }
307 492268 : if (ne) {
308 492268 : if (exp_name(e))
309 487201 : exp_prop_alias(v->sql->sa, ne, e);
310 492268 : if (!nexps)
311 74356 : nexps = new_exp_list(v->sql->sa);
312 492268 : list_append(nexps, ne);
313 : } else {
314 : all = 0;
315 : }
316 : }
317 84386 : if (all) {
318 34755 : rel->exps = nexps;
319 : /* we can now remove the intermediate project */
320 : /* push order by expressions */
321 34755 : if (!list_empty(rel->r)) {
322 0 : list *nr = new_exp_list(v->sql->sa), *res = rel->r;
323 0 : for (n = res->h; n; n = n->next) {
324 0 : sql_exp *e = n->data, *ne = NULL;
325 :
326 0 : ne = exp_push_down_prj(v->sql, e, prj, prj->l);
327 0 : if (ne) {
328 0 : if (exp_name(e))
329 0 : exp_prop_alias(v->sql->sa, ne, e);
330 0 : list_append(nr, ne);
331 : } else {
332 : all = 0;
333 : }
334 : }
335 0 : if (all) {
336 0 : rel->r = nr;
337 : } else {
338 : /* leave as is */
339 0 : rel->exps = exps;
340 0 : return rel;
341 : }
342 : }
343 34755 : rel->l = prj->l;
344 34755 : prj->l = NULL;
345 34755 : rel_destroy(prj);
346 34755 : v->changes++;
347 34755 : return rel_merge_projects_(v, rel);
348 : } else {
349 : /* leave as is */
350 49631 : rel->exps = exps;
351 : }
352 49631 : return rel;
353 : }
354 : return rel;
355 : }
356 :
357 : static sql_rel *
358 162010 : rel_merge_projects(visitor *v, global_props *gp, sql_rel *rel)
359 : {
360 162010 : (void) gp;
361 162010 : return rel_visitor_bottomup(v, rel, &rel_merge_projects_);
362 : }
363 :
364 : run_optimizer
365 601242 : bind_merge_projects(visitor *v, global_props *gp)
366 : {
367 601242 : int flag = v->sql->sql_optimizer;
368 601057 : return gp->opt_level == 1 && (flag & merge_projects) &&
369 1202295 : (gp->cnt[op_project] || gp->cnt[op_groupby]) ? rel_merge_projects : NULL;
370 : }
371 :
372 :
373 : static sql_exp *split_aggr_and_project(mvc *sql, list *aexps, sql_exp *e);
374 :
375 : static void
376 0 : list_split_aggr_and_project(mvc *sql, list *aexps, list *exps)
377 : {
378 0 : if (list_empty(exps))
379 : return ;
380 0 : for(node *n = exps->h; n; n = n->next)
381 0 : n->data = split_aggr_and_project(sql, aexps, n->data);
382 : }
383 :
384 : static sql_exp *
385 0 : split_aggr_and_project(mvc *sql, list *aexps, sql_exp *e)
386 : {
387 0 : switch(e->type) {
388 0 : case e_aggr:
389 : /* add to the aggrs */
390 0 : if (!exp_name(e))
391 0 : exp_label(sql->sa, e, ++sql->label);
392 0 : list_append(aexps, e);
393 0 : return exp_ref(sql, e);
394 : case e_cmp:
395 : /* e_cmp's shouldn't exist in an aggr expression list */
396 0 : assert(0);
397 : case e_convert:
398 0 : e->l = split_aggr_and_project(sql, aexps, e->l);
399 0 : return e;
400 0 : case e_func:
401 0 : list_split_aggr_and_project(sql, aexps, e->l);
402 0 : return e;
403 : case e_atom:
404 : case e_column: /* constants and columns shouldn't be rewriten */
405 : case e_psm:
406 : return e;
407 : }
408 : return NULL;
409 : }
410 :
411 : /* exp_rename */
412 : static sql_exp * exp_rename(mvc *sql, sql_exp *e, sql_rel *f, sql_rel *t);
413 :
414 : static list *
415 47480 : exps_rename(mvc *sql, list *l, sql_rel *f, sql_rel *t)
416 : {
417 47480 : if (list_empty(l))
418 : return l;
419 42027 : for (node *n=l->h; n; n=n->next)
420 26845 : n->data = exp_rename(sql, n->data, f, t);
421 : return l;
422 : }
423 :
424 : /* exp_rename */
425 : static sql_exp *
426 164217 : exp_rename(mvc *sql, sql_exp *e, sql_rel *f, sql_rel *t)
427 : {
428 164217 : sql_exp *ne = NULL;
429 :
430 164217 : assert(is_project(f->op));
431 :
432 164217 : switch(e->type) {
433 73761 : case e_column:
434 73761 : if (e->l) {
435 73754 : ne = exps_bind_column2(f->exps, e->l, e->r, NULL);
436 : /* if relation name matches expressions relation name, find column based on column name alone */
437 : } else {
438 7 : ne = exps_bind_column(f->exps, e->r, NULL, NULL, 1);
439 : }
440 73761 : if (!ne)
441 : return e;
442 49237 : sql_exp *oe = e;
443 49237 : e = NULL;
444 49237 : if (exp_name(ne) && ne->r && ne->l)
445 48653 : e = rel_bind_column2(sql, t, ne->l, ne->r, 0);
446 49237 : if (!e && ne->r)
447 507 : e = rel_bind_column(sql, t, ne->r, 0, 1);
448 585 : if (!e) {
449 79 : sql->session->status = 0;
450 79 : sql->errstr[0] = 0;
451 79 : if (exp_is_atom(ne))
452 : return ne;
453 : return oe;
454 : }
455 49158 : return exp_ref(sql, e);
456 51166 : case e_cmp:
457 51166 : if (e->flag == cmp_or || e->flag == cmp_filter) {
458 3717 : e->l = exps_rename(sql, e->l, f, t);
459 3717 : e->r = exps_rename(sql, e->r, f, t);
460 47449 : } else if (e->flag == cmp_in || e->flag == cmp_notin) {
461 5299 : e->l = exp_rename(sql, e->l, f, t);
462 5299 : e->r = exps_rename(sql, e->r, f, t);
463 : } else {
464 42150 : e->l = exp_rename(sql, e->l, f, t);
465 42150 : e->r = exp_rename(sql, e->r, f, t);
466 42150 : if (e->f)
467 213 : e->f = exp_rename(sql, e->f, f, t);
468 : }
469 : break;
470 4543 : case e_convert:
471 4543 : e->l = exp_rename(sql, e->l, f, t);
472 4543 : break;
473 2449 : case e_aggr:
474 : case e_func:
475 2449 : e->l = exps_rename(sql, e->l, f, t);
476 2449 : break;
477 32298 : case e_atom:
478 32298 : e->f = exps_rename(sql, e->f, f, t);
479 32298 : break;
480 : case e_psm:
481 : break;
482 : }
483 : return e;
484 : }
485 :
486 : static int
487 5616086 : exp_match_exp_cmp( sql_exp *e1, sql_exp *e2)
488 : {
489 5616086 : if (exp_match_exp(e1,e2))
490 52197 : return 0;
491 : return -1;
492 : }
493 :
494 : /* Pushing projects up the tree. Done very early in the optimizer.
495 : * Makes later steps easier.
496 : */
497 : extern void _rel_print(mvc *sql, sql_rel *rel);
498 : static sql_rel *
499 4133232 : rel_push_project_up_(visitor *v, sql_rel *rel)
500 : {
501 4133232 : if (is_simple_project(rel->op) && rel->l && !rel_is_ref(rel)) {
502 1225487 : sql_rel *l = rel->l;
503 1225487 : if (is_simple_project(l->op))
504 255069 : return rel_merge_projects_(v, rel);
505 : /* find equal column references, later cases are rewritten into references back to the first
506 : * project () [ i.i L1, i.i L2 ] -> project() [ i.i L1, L1 L2 ] */
507 970418 : if (list_length(rel->exps) > 1) {
508 817725 : list *exps = rel->exps;
509 817725 : node *n = exps->h;
510 817725 : bool needed = false;
511 3876862 : for(n = n->next; n && !needed; n = n->next) {
512 3059143 : sql_exp *e = n->data;
513 3059143 : if (e->type == e_column && !is_selfref(e)) {
514 19743155 : for(node *m = exps->h; m && m != n && !needed; m = m->next) {
515 17101928 : sql_exp *h = m->data;
516 17101928 : if (exp_match_exp(h,e))
517 3415 : needed = true;
518 : }
519 : }
520 : }
521 817719 : if (needed) {
522 3415 : rel->exps = sa_list(v->sql->sa);
523 3415 : node *n = exps->h;
524 3415 : list_append(rel->exps, n->data);
525 66390 : for(n = n->next; n; n = n->next) {
526 62975 : sql_exp *e = n->data;
527 62975 : if (e->type == e_column && !is_selfref(e)) {
528 24525 : node *m = list_find(rel->exps, e, (fcmp)&exp_match_exp_cmp);
529 24525 : if (m) {
530 3904 : sql_exp *ne = exp_ref(v->sql, m->data);
531 3904 : exp_setname(v->sql->sa, ne, exp_relname(e), exp_name(e));
532 3904 : exp_propagate(v->sql->sa, ne, e);
533 3904 : set_selfref(ne);
534 3904 : e = ne;
535 : }
536 : }
537 62975 : list_append(rel->exps, e);
538 : }
539 : }
540 : }
541 : }
542 :
543 : /* project/project cleanup is done later */
544 3878155 : if (is_join(rel->op) || is_select(rel->op)) {
545 1247184 : node *n;
546 1247184 : list *exps = NULL, *l_exps, *r_exps;
547 1247184 : sql_rel *l = rel->l;
548 1247184 : sql_rel *r = rel->r;
549 1247184 : sql_rel *t;
550 1247184 : int nlexps = 0, i = 0;
551 :
552 : /* Don't rewrite refs, non projections or constant or
553 : order by projections */
554 1247184 : if (!l || rel_is_ref(l) || is_topn(l->op) || is_sample(l->op) ||
555 1142126 : (is_join(rel->op) && !list_empty(rel->attr)) ||
556 1076820 : (is_join(rel->op) && (!r || rel_is_ref(r))) ||
557 1075083 : (is_left(rel->op) && (rel->flag&MERGE_LEFT) /* can't push projections above merge statments left joins */) ||
558 1074881 : (is_select(rel->op) && l->op != op_project) ||
559 586094 : (is_join(rel->op) && ((l->op != op_project && r->op != op_project) || is_topn(r->op) || is_sample(r->op))) ||
560 191890 : ((l->op == op_project && (!l->l || l->r || project_unsafe(l,is_select(rel->op)))) ||
561 86133 : (is_join(rel->op) && (r->op == op_project && (!r->l || r->r || project_unsafe(r,0))))))
562 1185545 : return rel;
563 :
564 61639 : if (l->op == op_project && l->l) {
565 898537 : for (n = l->exps->h; n; n = n->next) {
566 860271 : sql_exp *e = n->data;
567 860271 : if (!(is_column(e->type) && exp_is_atom(e) && !(is_right(rel->op) || is_full(rel->op))) &&
568 859634 : !(e->type == e_column && !has_label(e)))
569 : return rel;
570 : }
571 : }
572 47517 : if (is_join(rel->op) && r->op == op_project && r->l) {
573 55108 : for (n = r->exps->h; n; n = n->next) {
574 48697 : sql_exp *e = n->data;
575 48697 : if (!(is_column(e->type) && exp_is_atom(e) && !(is_right(rel->op) || is_full(rel->op))) &&
576 48349 : !(e->type == e_column && !has_label(e)))
577 : return rel;
578 : }
579 : }
580 :
581 41541 : if (l->op == op_project && l->l) {
582 : /* Go through the list of project expressions.
583 : Check if they can be pushed up, ie are they not
584 : changing or introducing any columns used
585 : by the upper operator. */
586 :
587 37648 : exps = new_exp_list(v->sql->sa);
588 835000 : for (n = l->exps->h; n; n = n->next) {
589 797352 : sql_exp *e = n->data;
590 :
591 : /* we cannot rewrite projection with atomic values from outer joins */
592 797352 : if (is_column(e->type) && exp_is_atom(e) && !(is_right(rel->op) || is_full(rel->op))) {
593 591 : list_append(exps, e);
594 796761 : } else if (e->type == e_column) {
595 796761 : if (has_label(e))
596 : return rel;
597 796761 : list_append(exps, e);
598 : } else {
599 : return rel;
600 : }
601 : }
602 : } else {
603 3893 : exps = rel_projections(v->sql, l, NULL, 1, 1);
604 : }
605 41541 : nlexps = list_length(exps);
606 : /* also handle right hand of join */
607 41541 : if (is_join(rel->op) && r->op == op_project && r->l && list_empty(rel->attr)) {
608 : /* Here we also check all expressions of r like above
609 : but also we need to check for ambigious names. */
610 :
611 48158 : for (n = r->exps->h; n; n = n->next) {
612 41938 : sql_exp *e = n->data;
613 :
614 : /* we cannot rewrite projection with atomic values from outer joins */
615 41938 : if (is_column(e->type) && exp_is_atom(e) && !(is_left(rel->op) || is_full(rel->op))) {
616 149 : list_append(exps, e);
617 41789 : } else if (e->type == e_column) {
618 41598 : if (has_label(e))
619 : return rel;
620 41598 : list_append(exps, e);
621 : } else {
622 : return rel;
623 : }
624 : }
625 35130 : } else if (is_join(rel->op) && list_empty(rel->attr)) {
626 19071 : list *r_exps = rel_projections(v->sql, r, NULL, 1, 1);
627 19071 : list_merge(exps, r_exps, (fdup)NULL);
628 : }
629 41350 : if (!list_empty(rel->attr))
630 0 : append(exps, exp_ref(v->sql, rel->attr->h->data));
631 : /* Here we should check for ambigious names ? */
632 41350 : if (is_join(rel->op) && r && list_empty(rel->attr)) {
633 25291 : t = (l->op == op_project && l->l)?l->l:l;
634 25291 : l_exps = rel_projections(v->sql, t, NULL, 1, 1);
635 : /* conflict with old right expressions */
636 25291 : r_exps = rel_projections(v->sql, r, NULL, 1, 1);
637 25291 : if (rel->attr)
638 0 : append(r_exps, exp_ref(v->sql, rel->attr->h->data));
639 636980 : for(n = l_exps->h; n; n = n->next) {
640 612067 : sql_exp *e = n->data;
641 612067 : const char *rname = exp_relname(e);
642 612067 : const char *name = exp_name(e);
643 :
644 612067 : if (exp_is_atom(e))
645 0 : continue;
646 612067 : if ((rname && exps_bind_column2(r_exps, rname, name, NULL) != NULL) ||
647 207 : (!rname && exps_bind_column(r_exps, name, NULL, NULL, 1) != NULL))
648 378 : return rel;
649 : }
650 24913 : t = (r->op == op_project && r->l)?r->l:r;
651 24913 : r_exps = rel_projections(v->sql, t, NULL, 1, 1);
652 : /* conflict with new right expressions */
653 619791 : for(n = l_exps->h; n; n = n->next) {
654 596920 : sql_exp *e = n->data;
655 :
656 596920 : if (exp_is_atom(e))
657 0 : continue;
658 1191807 : if ((e->l && exps_bind_column2(r_exps, e->l, e->r, NULL) != NULL) ||
659 594896 : (exps_bind_column(r_exps, e->r, NULL, NULL, 1) != NULL && (!e->l || !e->r)))
660 2042 : return rel;
661 : }
662 : /* conflict with new left expressions */
663 164111 : for(n = r_exps->h; n; n = n->next) {
664 141240 : sql_exp *e = n->data;
665 :
666 141240 : if (exp_is_atom(e))
667 0 : continue;
668 282480 : if ((e->l && exps_bind_column2(l_exps, e->l, e->r, NULL) != NULL) ||
669 141244 : (exps_bind_column(l_exps, e->r, NULL, NULL, 1) != NULL && (!e->l || !e->r)))
670 0 : return rel;
671 : }
672 : }
673 :
674 : /* rename operator expressions */
675 38930 : if (l->op == op_project) {
676 : /* rewrite rel from rel->l into rel->l->l */
677 35256 : if (rel->exps) {
678 73099 : for (n = rel->exps->h; n; n = n->next) {
679 38836 : sql_exp *e = n->data, *ne;
680 :
681 38836 : ne = exp_rename(v->sql, e, l, l->l);
682 38836 : assert(ne);
683 38836 : if (ne != e && exp_name(e))
684 0 : exp_propagate(v->sql->sa, ne, e);
685 38836 : n->data = ne;
686 : }
687 : }
688 35256 : rel->l = l->l;
689 35256 : l->l = NULL;
690 35256 : rel_destroy(l);
691 : }
692 38930 : if (is_join(rel->op) && r->op == op_project && list_empty(rel->attr)) {
693 : /* rewrite rel from rel->r into rel->r->l */
694 4172 : if (rel->exps) {
695 7848 : for (n = rel->exps->h; n; n = n->next) {
696 4181 : sql_exp *e = n->data, *ne;
697 :
698 4181 : ne = exp_rename(v->sql, e, r, r->l);
699 4181 : assert(ne);
700 4181 : if (ne != e && exp_name(e))
701 0 : exp_propagate(v->sql->sa, ne, e);
702 4181 : n->data = ne;
703 : }
704 : }
705 4172 : rel->r = r->l;
706 4172 : r->l = NULL;
707 4172 : rel_destroy(r);
708 : }
709 : /* Done, ie introduce new project */
710 38930 : exps_fix_card(exps, rel->card);
711 : /* Fix nil flag */
712 38930 : if (!list_empty(exps)) {
713 839088 : for (n = exps->h ; n && i < nlexps ; n = n->next, i++) {
714 800158 : sql_exp *e = n->data;
715 :
716 800158 : if (is_right(rel->op) || is_full(rel->op))
717 353 : set_has_nil(e);
718 800158 : set_not_unique(e);
719 : }
720 169734 : for (; n ; n = n->next) {
721 130804 : sql_exp *e = n->data;
722 :
723 130804 : if (is_left(rel->op) || is_full(rel->op))
724 40987 : set_has_nil(e);
725 130804 : set_not_unique(e);
726 : }
727 : }
728 38930 : v->changes++;
729 38930 : return rel_inplace_project(v->sql->sa, rel, NULL, exps);
730 : }
731 2630971 : if (is_groupby(rel->op) && !rel_is_ref(rel) && rel->exps && list_length(rel->exps) > 1) {
732 43016 : node *n;
733 43016 : int fnd = 0;
734 43016 : list *aexps, *pexps;
735 :
736 : /* check if some are expressions aren't e_aggr */
737 159731 : for (n = rel->exps->h; n && !fnd; n = n->next) {
738 116715 : sql_exp *e = n->data;
739 :
740 116715 : if (e->type != e_aggr && e->type != e_column && e->type != e_atom && e->card > CARD_ATOM) {
741 116715 : fnd = 1;
742 : }
743 : }
744 : /* only aggr, no rewrite needed */
745 43016 : if (!fnd)
746 : return rel;
747 :
748 0 : aexps = sa_list(v->sql->sa);
749 0 : pexps = sa_list(v->sql->sa);
750 0 : for (n = rel->exps->h; n; n = n->next) {
751 0 : sql_exp *e = n->data, *ne = NULL;
752 :
753 0 : switch (e->type) {
754 0 : case e_atom: /* move over to the projection */
755 0 : list_append(pexps, e);
756 0 : break;
757 0 : case e_func:
758 0 : list_append(pexps, e);
759 0 : list_split_aggr_and_project(v->sql, aexps, e->l);
760 0 : break;
761 0 : case e_convert:
762 0 : list_append(pexps, e);
763 0 : e->l = split_aggr_and_project(v->sql, aexps, e->l);
764 0 : break;
765 0 : default: /* simple alias */
766 0 : list_append(aexps, e);
767 0 : ne = exp_column(v->sql->sa, exp_find_rel_name(e), exp_name(e), exp_subtype(e), e->card, has_nil(e), is_unique(e), is_intern(e));
768 0 : list_append(pexps, ne);
769 0 : break;
770 : }
771 : }
772 0 : v->changes++;
773 0 : rel->exps = aexps;
774 0 : return rel_inplace_project( v->sql->sa, rel, NULL, pexps);
775 : }
776 : return rel;
777 : }
778 :
779 : static sql_rel *
780 161349 : rel_push_project_up(visitor *v, global_props *gp, sql_rel *rel)
781 : {
782 161349 : (void) gp;
783 161349 : return rel_visitor_bottomup(v, rel, &rel_push_project_up_);
784 : }
785 :
786 : run_optimizer
787 601185 : bind_push_project_up(visitor *v, global_props *gp)
788 : {
789 601185 : int flag = v->sql->sql_optimizer;
790 601000 : return gp->opt_level == 1 && (flag & push_project_up) &&
791 1202181 : gp->cnt[op_project] ? rel_push_project_up : NULL;
792 : }
793 :
794 :
795 : static void split_exps(mvc *sql, list *exps, sql_rel *rel);
796 :
797 : static int
798 3143099 : exp_refers_cmp( sql_exp *e1, sql_exp *e2)
799 : {
800 3143099 : if (exp_refers(e1,e2))
801 0 : return 0;
802 : return -1;
803 : }
804 :
805 : sql_exp *
806 131807 : add_exp_too_project(mvc *sql, sql_exp *e, sql_rel *rel)
807 : {
808 131807 : node *n = list_find(rel->exps, e, (fcmp)&exp_match_exp_cmp);
809 :
810 : /* if not matching we may refer to an older expression */
811 131807 : if (!n)
812 83514 : n = list_find(rel->exps, e, (fcmp)&exp_refers_cmp);
813 83514 : if (!n) {
814 83514 : exp_label(sql->sa, e, ++sql->label);
815 83514 : append(rel->exps, e);
816 : } else {
817 48293 : sql_exp *ne = n->data;
818 :
819 48293 : if (rel && rel->l) {
820 96586 : if ((exp_relname(ne) && exp_name(ne) && rel_bind_column2(sql, rel->l, exp_relname(ne), exp_name(ne), 0)) ||
821 48293 : (!exp_relname(ne) && exp_name(ne) && rel_bind_column(sql, rel->l, exp_name(ne), 0, 1))) {
822 0 : exp_label(sql->sa, e, ++sql->label);
823 0 : append(rel->exps, e);
824 0 : ne = e;
825 : }
826 : }
827 : e = ne;
828 : }
829 131807 : e = exp_ref(sql, e);
830 131807 : return e;
831 : }
832 :
833 : static void
834 152503 : add_exps_too_project(mvc *sql, list *exps, sql_rel *rel)
835 : {
836 152503 : node *n;
837 :
838 152503 : if (!exps)
839 : return;
840 472401 : for(n=exps->h; n; n = n->next) {
841 319899 : sql_exp *e = n->data;
842 :
843 319899 : if (e->type != e_column && !exp_is_atom(e))
844 131537 : n->data = add_exp_too_project(sql, e, rel);
845 : }
846 : }
847 :
848 : static sql_exp *
849 543808 : split_exp(mvc *sql, sql_exp *e, sql_rel *rel)
850 : {
851 543808 : if (exp_is_atom(e))
852 : return e;
853 417170 : switch(e->type) {
854 : case e_column:
855 : return e;
856 15917 : case e_convert:
857 15917 : e->l = split_exp(sql, e->l, rel);
858 15917 : return e;
859 163743 : case e_aggr:
860 : case e_func:
861 163743 : if (!is_analytic(e) && !exp_has_sideeffect(e)) {
862 160165 : sql_subfunc *f = e->f;
863 160165 : if (e->type == e_func && !f->func->s && is_caselike_func(f) /*is_ifthenelse_func(f)*/) {
864 : return e;
865 : } else {
866 152503 : split_exps(sql, e->l, rel);
867 152502 : add_exps_too_project(sql, e->l, rel);
868 : }
869 : }
870 : return e;
871 487 : case e_cmp:
872 487 : if (e->flag == cmp_or || e->flag == cmp_filter) {
873 467 : split_exps(sql, e->l, rel);
874 467 : split_exps(sql, e->r, rel);
875 20 : } else if (e->flag == cmp_in || e->flag == cmp_notin) {
876 0 : e->l = split_exp(sql, e->l, rel);
877 0 : split_exps(sql, e->r, rel);
878 : } else {
879 20 : e->l = split_exp(sql, e->l, rel);
880 20 : e->r = split_exp(sql, e->r, rel);
881 20 : if (e->f)
882 19 : e->f = split_exp(sql, e->f, rel);
883 : }
884 : return e;
885 : case e_atom:
886 : case e_psm:
887 : return e;
888 : }
889 : return e;
890 : }
891 :
892 : static void
893 153437 : split_exps(mvc *sql, list *exps, sql_rel *rel)
894 : {
895 153437 : if (list_empty(exps))
896 : return;
897 474275 : for(node *n=exps->h; n; n = n->next){
898 320840 : sql_exp *e = n->data;
899 :
900 320840 : e = split_exp(sql, e, rel);
901 320839 : n->data = e;
902 : }
903 : }
904 :
905 : sql_rel *
906 575357 : rel_split_project_(visitor *v, sql_rel *rel, int top)
907 : {
908 1150713 : if (mvc_highwater(v->sql))
909 : return rel;
910 :
911 575371 : if (!rel)
912 : return NULL;
913 575371 : if (is_project(rel->op) && list_length(rel->exps) && (is_groupby(rel->op) || rel->l) && !need_distinct(rel) && !is_single(rel)) {
914 158397 : list *exps = rel->exps;
915 158397 : node *n;
916 158397 : int funcs = 0;
917 158397 : sql_rel *nrel;
918 :
919 : /* are there functions */
920 1373063 : for (n=exps->h; n && !funcs; n = n->next) {
921 1214663 : sql_exp *e = n->data;
922 :
923 1214663 : funcs = exp_has_func(e);
924 : }
925 : /* introduce extra project */
926 158400 : if (funcs && rel->op != op_project) {
927 0 : nrel = rel_project(v->sql->sa, rel->l,
928 0 : rel_projections(v->sql, rel->l, NULL, 1, 1));
929 0 : rel->l = nrel;
930 : /* recursively split all functions and add those to the projection list */
931 0 : split_exps(v->sql, rel->exps, nrel);
932 0 : if (nrel->l && !(nrel->l = rel_split_project_(v, nrel->l, (is_topn(rel->op)||is_sample(rel->op))?top:0)))
933 : return NULL;
934 0 : return rel;
935 158400 : } else if (funcs && !top && list_empty(rel->r)) {
936 : /* projects can have columns point back into the expression list, ie
937 : * create a new list including the split expressions */
938 16734 : node *n;
939 16734 : list *exps = rel->exps;
940 :
941 16734 : rel->exps = sa_list(v->sql->sa);
942 223725 : for (n=exps->h; n; n = n->next)
943 206993 : append(rel->exps, split_exp(v->sql, n->data, rel));
944 141666 : } else if (funcs && top && rel_is_ref(rel) && list_empty(rel->r)) {
945 : /* inplace */
946 7 : list *exps = rel_projections(v->sql, rel, NULL, 1, 1);
947 7 : sql_rel *l = rel_project(v->sql->sa, rel->l, NULL);
948 7 : rel->l = l;
949 7 : l->exps = rel->exps;
950 7 : set_processed(l);
951 7 : rel->exps = exps;
952 : }
953 : }
954 575365 : if (is_mset(rel->op) || is_set(rel->op) || is_basetable(rel->op))
955 : return rel;
956 379933 : if (rel->l) {
957 387783 : rel->l = rel_split_project_(v, rel->l, (is_topn(rel->op)||is_sample(rel->op)||is_ddl(rel->op)||is_modify(rel->op))?top:0);
958 363305 : if (!rel->l)
959 : return NULL;
960 : }
961 379936 : if ((is_join(rel->op) || is_semi(rel->op)) && rel->r) {
962 112282 : rel->r = rel_split_project_(v, rel->r, (is_topn(rel->op)||is_sample(rel->op)||is_ddl(rel->op)||is_modify(rel->op))?top:0);
963 112282 : if (!rel->r)
964 : return NULL;
965 : }
966 : return rel;
967 : }
968 :
969 : static sql_rel *
970 99776 : rel_split_project(visitor *v, global_props *gp, sql_rel *rel)
971 : {
972 99776 : (void) gp;
973 99776 : return rel_split_project_(v, rel, 1);
974 : }
975 :
976 : run_optimizer
977 601127 : bind_split_project(visitor *v, global_props *gp)
978 : {
979 601127 : int flag = v->sql->sql_optimizer;
980 538693 : return gp->opt_cycle == 0 && gp->opt_level == 1 && (flag & split_project) &&
981 1139816 : (gp->cnt[op_project] || gp->cnt[op_groupby]) ? rel_split_project : NULL;
982 : }
983 :
984 :
985 : static sql_rel *
986 161525 : rel_project_reduce_casts(visitor *v, global_props *gp, sql_rel *rel)
987 : {
988 161525 : if (!rel)
989 : return NULL;
990 161525 : (void) gp;
991 161525 : if (gp->opt_level == 1 && v->value_based_opt && is_simple_project(rel->op) && list_length(rel->exps)) {
992 87063 : list *exps = rel->exps;
993 87063 : node *n;
994 :
995 437550 : for (n=exps->h; n; n = n->next) {
996 350487 : sql_exp *e = n->data;
997 :
998 350487 : if (e && e->type == e_func) {
999 27655 : sql_subfunc *f = e->f;
1000 27655 : sql_subtype *res = f->res->h->data;
1001 :
1002 27655 : if (!f->func->s && !strcmp(f->func->base.name, "sql_mul") && res->scale > 0) {
1003 7 : list *args = e->l;
1004 7 : sql_exp *h = args->h->data;
1005 7 : sql_exp *t = args->t->data;
1006 7 : atom *ha = exp_value(v->sql, h), *ta = exp_value(v->sql, t);
1007 :
1008 7 : if (ha || ta) {
1009 6 : atom *a = ha ? ha : ta;
1010 6 : atom *na = reduce_scale(v->sql, a);
1011 :
1012 6 : if (na != a) {
1013 4 : int rs = a->tpe.scale - na->tpe.scale;
1014 4 : res->scale -= rs;
1015 4 : if (ha) {
1016 0 : h->r = NULL;
1017 0 : h->l = na;
1018 : } else {
1019 4 : t->r = NULL;
1020 4 : t->l = na;
1021 : }
1022 4 : v->changes++;
1023 : }
1024 : }
1025 : }
1026 : }
1027 : }
1028 : }
1029 : return rel;
1030 : }
1031 :
1032 : run_optimizer
1033 601183 : bind_project_reduce_casts(visitor *v, global_props *gp)
1034 : {
1035 601183 : int flag = v->sql->sql_optimizer;
1036 601183 : return gp->cnt[op_project] && (flag & project_reduce_casts) ? rel_project_reduce_casts : NULL;
1037 : }
1038 :
1039 :
1040 : static sql_rel *
1041 : exp_skip_output_parts(sql_rel *rel)
1042 : {
1043 : while ((is_topn(rel->op) || is_project(rel->op) || is_sample(rel->op)) && rel->l) {
1044 : if (is_union(rel->op) || is_munion(rel->op) || (is_groupby(rel->op) && list_empty(rel->r)))
1045 : return rel; /* a group-by with no columns is a plain aggregate and hence always returns one row */
1046 : rel = rel->l;
1047 : }
1048 : return rel;
1049 : }
1050 :
1051 : static sql_column *
1052 80191 : exp_find_column_( sql_rel *rel, sql_exp *exp, int pnr, sql_rel **bt )
1053 : {
1054 80191 : if (exp->type == e_column)
1055 80136 : return name_find_column(rel, exp->l, exp->r, pnr, bt);
1056 : return NULL;
1057 : }
1058 :
1059 : static int
1060 0 : rel_part_nr( sql_rel *rel, sql_exp *e )
1061 : {
1062 0 : sql_column *c = NULL;
1063 0 : sql_rel *bt = NULL;
1064 0 : assert(e->type == e_cmp);
1065 :
1066 0 : c = exp_find_column_(rel, e->l, -1, &bt);
1067 0 : if (!c)
1068 0 : c = exp_find_column_(rel, e->r, -1, &bt);
1069 0 : if (!c && e->f)
1070 0 : c = exp_find_column_(rel, e->f, -1, &bt);
1071 0 : if (!c || !bt || !rel_base_get_mergetable(bt))
1072 0 : return -1;
1073 0 : sql_table *pp = c->t;
1074 0 : sql_table *mt = rel_base_get_mergetable(bt);
1075 0 : return find_member_pos(mt->members, pp);
1076 : }
1077 :
1078 : static int
1079 0 : rel_uses_part_nr( sql_rel *rel, sql_exp *e, int pnr )
1080 : {
1081 0 : sql_column *c = NULL;
1082 0 : sql_rel *bt = NULL;
1083 0 : assert(e->type == e_cmp);
1084 :
1085 : /*
1086 : * following case fails.
1087 : *
1088 : * semijoin( A1, union [A1, A2] )
1089 : * The union will never return proper column (from A2).
1090 : * ie need different solution (probaly pass pnr).
1091 : */
1092 0 : c = exp_find_column_(rel, e->l, pnr, &bt);
1093 0 : if (!c)
1094 0 : c = exp_find_column_(rel, e->r, pnr, &bt);
1095 0 : if (c && bt && rel_base_get_mergetable(bt)) {
1096 0 : sql_table *pp = c->t;
1097 0 : sql_table *mt = rel_base_get_mergetable(bt);
1098 0 : if (find_member_pos(mt->members, pp) == pnr)
1099 : return 1;
1100 : }
1101 : /* for projects we may need to do a rename! */
1102 0 : if (is_project(rel->op) || is_topn(rel->op) || is_sample(rel->op))
1103 0 : return rel_uses_part_nr( rel->l, e, pnr);
1104 :
1105 0 : if (is_munion(rel->op)) {
1106 : list *l = rel->l;
1107 : if (rel_uses_part_nr( l->h->data, e, pnr))
1108 : return 1;
1109 0 : } else if (is_union(rel->op) || is_join(rel->op) || is_semi(rel->op)) {
1110 0 : if (rel_uses_part_nr( rel->l, e, pnr))
1111 : return 1;
1112 0 : if (!is_semi(rel->op) && rel_uses_part_nr( rel->r, e, pnr))
1113 : return 1;
1114 : }
1115 : return 0;
1116 : }
1117 :
1118 : static sql_column *
1119 3045 : exp_is_pkey(sql_rel *rel, sql_exp *e)
1120 : {
1121 3045 : if (find_prop(e->p, PROP_HASHCOL)) { /* aligned PKEY JOIN */
1122 248 : fcmp cmp = (fcmp)&kc_column_cmp;
1123 248 : sql_column *c = exp_find_column(rel, e, -2);
1124 :
1125 248 : if (c && c->t->pkey && list_find(c->t->pkey->k.columns, c, cmp) != NULL)
1126 : return c;
1127 : }
1128 : return NULL;
1129 : }
1130 :
1131 : static sql_exp *
1132 3456 : rel_is_join_on_pkey(sql_rel *rel, bool pk_fk) /* pk_fk is used to verify is a join on pk-fk */
1133 : {
1134 3456 : if (!rel || !rel->exps)
1135 : return NULL;
1136 4974 : for (node *n = rel->exps->h; n; n = n->next) {
1137 2759 : sql_exp *je = n->data;
1138 :
1139 4258 : if (je->type == e_cmp && je->flag == cmp_equal &&
1140 2998 : (exp_is_pkey(rel, je->l) || exp_is_pkey(rel, je->r)) &&
1141 248 : (!pk_fk || find_prop(je->p, PROP_JOINIDX)))
1142 0 : return je;
1143 : }
1144 : return NULL;
1145 : }
1146 :
1147 : /* return true if the given expression is guaranteed to have no rows */
1148 : static int
1149 : exp_is_zero_rows(visitor *v, sql_rel *rel, sql_rel *sel)
1150 : {
1151 : if (!rel || mvc_highwater(v->sql))
1152 : return 0;
1153 : rel = exp_skip_output_parts(rel);
1154 : if (is_select(rel->op) && rel->l) {
1155 : sel = rel;
1156 : rel = exp_skip_output_parts(rel->l);
1157 : }
1158 :
1159 : sql_table *t = is_basetable(rel->op) && rel->l ? rel->l : NULL;
1160 : bool table_readonly = t && isTable(t) && t->access == TABLE_READONLY;
1161 :
1162 : if (sel && !list_empty(sel->exps) && (v->value_based_opt || table_readonly)) {
1163 : for (node *n = sel->exps->h; n; n = n->next) {
1164 : sql_exp *e = n->data;
1165 :
1166 : /* if the expression is false, then the select is empty */
1167 : if (v->value_based_opt && (exp_is_false(e) || exp_is_null(e)))
1168 : return 1;
1169 : if (table_readonly && e->type == e_cmp && (e->flag == cmp_equal || e->f)) {
1170 : /* half-ranges are theoretically optimizable here, but not implemented */
1171 : sql_exp *c = e->l;
1172 : if (c->type == e_column) {
1173 : sql_exp *l = e->r;
1174 : sql_exp *h = e->f;
1175 :
1176 : atom *lval = exp_flatten(v->sql, v->value_based_opt, l);
1177 : atom *hval = h ? exp_flatten(v->sql, v->value_based_opt, h) : lval;
1178 : if (lval && hval) {
1179 : sql_rel *bt;
1180 : sql_column *col = name_find_column(sel, exp_relname(c), exp_name(c), -2, &bt);
1181 : void *min = NULL, *max = NULL;
1182 : atom *amin, *amax;
1183 : sql_subtype *ct = exp_subtype(c);
1184 :
1185 : if (col
1186 : && col->t == t
1187 : && sql_trans_ranges(v->sql->session->tr, col, &min, &max)
1188 : && min && max
1189 : && (amin = atom_general_ptr(v->sql->sa, ct, min)) && (amax = atom_general_ptr(v->sql->sa, ct, max))
1190 : && !exp_range_overlap(amin, amax, lval, hval, false, false)) {
1191 : return 1;
1192 : }
1193 : }
1194 : }
1195 : }
1196 : }
1197 : }
1198 : if ((is_innerjoin(rel->op) || is_left(rel->op) || is_right(rel->op) || is_semi(rel->op)) && !list_empty(rel->exps)) {
1199 : sql_exp *je;
1200 :
1201 : /* check non overlaping pk-fk join */
1202 : if ((je = rel_is_join_on_pkey(rel, true))) {
1203 : int lpnr = rel_part_nr(rel->l, je);
1204 :
1205 : if (lpnr >= 0 && !rel_uses_part_nr(rel->r, je, lpnr))
1206 : return 1;
1207 : }
1208 : return (((is_innerjoin(rel->op) || is_left(rel->op) || is_semi(rel->op)) && exp_is_zero_rows(v, rel->l, sel)) ||
1209 : ((is_innerjoin(rel->op) || is_right(rel->op)) && exp_is_zero_rows(v, rel->r, sel)));
1210 : }
1211 : /* global aggregates always return 1 row */
1212 : if (is_simple_project(rel->op) || (is_groupby(rel->op) && !list_empty(rel->r)) || is_select(rel->op) ||
1213 : is_topn(rel->op) || is_sample(rel->op) || is_inter(rel->op) || is_except(rel->op)) {
1214 : if (rel->l)
1215 : return exp_is_zero_rows(v, rel->l, sel);
1216 : } else if (is_innerjoin(rel->op) && list_empty(rel->exps)) { /* cartesian product */
1217 : return exp_is_zero_rows(v, rel->l, sel) || exp_is_zero_rows(v, rel->r, sel);
1218 : }
1219 : return 0;
1220 : }
1221 :
1222 : /* discard sides of UNION or UNION ALL which cannot produce any rows, as per
1223 : statistics, similarly to the merge table optimizer, e.g.
1224 : select * from a where x between 1 and 2 union all select * from b where x between 1 and 2
1225 : -> select * from b where x between 1 and 2 [assuming a has no rows with 1<=x<=2]
1226 : */
1227 : static inline sql_rel *
1228 : rel_remove_union_partitions(visitor *v, sql_rel *rel)
1229 : {
1230 : if (!is_union(rel->op) || rel_is_ref(rel))
1231 : return rel;
1232 : int left_zero_rows = !rel_is_ref(rel->l) && exp_is_zero_rows(v, rel->l, NULL);
1233 : int right_zero_rows = !rel_is_ref(rel->r) && exp_is_zero_rows(v, rel->r, NULL);
1234 :
1235 : if (left_zero_rows && right_zero_rows) {
1236 : /* generate dummy relation */
1237 : list *converted = sa_list(v->sql->sa);
1238 : sql_rel *nrel = rel_project_exp(v->sql, exp_atom_bool(v->sql->sa, 1));
1239 : nrel = rel_select(v->sql->sa, nrel, exp_atom_bool(v->sql->sa, 0));
1240 : set_processed(nrel);
1241 :
1242 : for (node *n = rel->exps->h ; n ; n = n->next) {
1243 : sql_exp *e = n->data, *a = exp_atom(v->sql->sa, atom_general(v->sql->sa, exp_subtype(e), NULL, 0));
1244 : exp_prop_alias(v->sql->sa, a, e);
1245 : list_append(converted, a);
1246 : }
1247 : rel_destroy(rel);
1248 : v->changes++;
1249 : return rel_project(v->sql->sa, nrel, converted);
1250 : } else if (left_zero_rows) {
1251 : sql_rel *r = rel->r;
1252 : if (!is_project(r->op))
1253 : r = rel_project(v->sql->sa, r, rel_projections(v->sql, r, NULL, 1, 1));
1254 : rel_rename_exps(v->sql, rel->exps, r->exps);
1255 : rel->r = NULL;
1256 : rel_destroy(rel);
1257 : v->changes++;
1258 : return r;
1259 : } else if (right_zero_rows) {
1260 : sql_rel *l = rel->l;
1261 : if (!is_project(l->op))
1262 : l = rel_project(v->sql->sa, l, rel_projections(v->sql, l, NULL, 1, 1));
1263 : rel_rename_exps(v->sql, rel->exps, l->exps);
1264 : rel->l = NULL;
1265 : rel_destroy(rel);
1266 : v->changes++;
1267 : return l;
1268 : }
1269 : return rel;
1270 : }
1271 :
1272 : static int
1273 : rel_match_projections(sql_rel *l, sql_rel *r)
1274 : {
1275 : node *n, *m;
1276 : list *le = l->exps;
1277 : list *re = r->exps;
1278 :
1279 : if (!le || !re)
1280 : return 0;
1281 : if (list_length(le) != list_length(re))
1282 : return 0;
1283 :
1284 : for (n = le->h, m = re->h; n && m; n = n->next, m = m->next)
1285 : if (!exp_match(n->data, m->data))
1286 : return 0;
1287 : return 1;
1288 : }
1289 :
1290 : static int
1291 0 : exps_has_predicate( list *l )
1292 : {
1293 0 : node *n;
1294 :
1295 0 : for( n = l->h; n; n = n->next){
1296 0 : sql_exp *e = n->data;
1297 :
1298 0 : if (e->card <= CARD_ATOM)
1299 : return 1;
1300 : }
1301 : return 0;
1302 : }
1303 :
1304 : static sql_rel *
1305 33 : rel_find_select( sql_rel *r)
1306 : {
1307 187 : while (!is_select(r->op) && r->l && is_project(r->op))
1308 : r = r->l;
1309 33 : if (is_select(r->op))
1310 3 : return r;
1311 : return NULL;
1312 : }
1313 :
1314 : static inline sql_rel *
1315 : rel_merge_union(visitor *v, sql_rel *rel)
1316 : {
1317 : sql_rel *l = rel->l;
1318 : sql_rel *r = rel->r;
1319 : sql_rel *ref = NULL;
1320 :
1321 : if (is_union(rel->op) &&
1322 : l && is_project(l->op) && !project_unsafe(l,0) &&
1323 : r && is_project(r->op) && !project_unsafe(r,0) &&
1324 : (ref = rel_find_ref(l)) != NULL && ref == rel_find_ref(r)) {
1325 : /* Find selects and try to merge */
1326 : sql_rel *ls = rel_find_select(l);
1327 : sql_rel *rs = rel_find_select(r);
1328 :
1329 : /* can we merge ? */
1330 : if (!ls || !rs)
1331 : return rel;
1332 :
1333 : /* merge any extra projects */
1334 : if (l->l != ls)
1335 : rel->l = l = rel_merge_projects_(v, l);
1336 : if (r->l != rs)
1337 : rel->r = r = rel_merge_projects_(v, r);
1338 :
1339 : if (!rel_match_projections(l,r))
1340 : return rel;
1341 :
1342 : /* for now only union(project*(select(R),project*(select(R))) */
1343 : if (ls != l->l || rs != r->l ||
1344 : ls->l != rs->l || !rel_is_ref(ls->l))
1345 : return rel;
1346 :
1347 : if (!ls->exps || !rs->exps ||
1348 : exps_has_predicate(ls->exps) ||
1349 : exps_has_predicate(rs->exps))
1350 : return rel;
1351 :
1352 : /* merge, ie. add 'or exp' */
1353 : v->changes++;
1354 : ls->exps = append(new_exp_list(v->sql->sa), exp_or(v->sql->sa, ls->exps, rs->exps, 0));
1355 : rs->exps = NULL;
1356 : rel = rel_inplace_project(v->sql->sa, rel, rel_dup(rel->l), rel->exps);
1357 : set_processed(rel);
1358 : return rel;
1359 : }
1360 : return rel;
1361 : }
1362 :
1363 : static bool
1364 150506 : rels_share_rel(list *l)
1365 : {
1366 150506 : sql_rel *ref = NULL;
1367 170618 : for (node *n = l->h; n; n = n->next) {
1368 170585 : sql_rel *r = n->data;
1369 170585 : if(!ref) {
1370 150506 : if ((ref = rel_find_ref(r)) == NULL)
1371 : return false;
1372 20079 : } else if (ref != rel_find_ref(r)) {
1373 : return false;
1374 : }
1375 : }
1376 : return true;
1377 : }
1378 :
1379 : static inline sql_rel *
1380 1240023 : rel_merge_munion(visitor *v, sql_rel *rel)
1381 : {
1382 1240023 : if (is_munion(rel->op) && rels_share_rel(rel->l)) {
1383 33 : list *rels = rel->l, *nrels = NULL;
1384 33 : sql_rel *cur = NULL, *curs = NULL;
1385 : /*
1386 : l && is_project(l->op) && !project_unsafe(l,0) &&
1387 : r && is_project(r->op) && !project_unsafe(r,0) &&
1388 : */
1389 :
1390 : /* Find selects and try to merge */
1391 33 : for(node *n = rels->h; n; n = n->next) {
1392 33 : sql_rel *r = n->data;
1393 33 : sql_rel *s = rel_find_select(r);
1394 :
1395 33 : if (!s)
1396 : return rel;
1397 3 : if (cur) {
1398 0 : if (s != r->l || curs->l != s->l || !rel_is_ref(s->l) ||
1399 : /* for now only union(project*(select(R),project*(select(R))) */
1400 0 : !s->exps || !curs->exps || exps_has_predicate(s->exps) || exps_has_predicate(curs->exps)) {
1401 0 : if (nrels)
1402 0 : append(nrels, r);
1403 : else
1404 : return rel;
1405 : }
1406 :
1407 : /* merge, ie. add 'or exp' */
1408 0 : curs->exps = append(new_exp_list(v->sql->sa), exp_or(v->sql->sa, cur->exps, s->exps, 0));
1409 0 : if (!nrels) {
1410 0 : nrels = sa_list(v->sql->sa);
1411 0 : append(nrels, cur);
1412 : }
1413 : } else {
1414 3 : if (s != r->l || !rel_is_ref(s->l)) {
1415 3 : if (nrels) {
1416 0 : append(nrels, r);
1417 : } else {
1418 : return rel;
1419 : }
1420 : }
1421 : cur = r;
1422 : curs = s;
1423 : }
1424 : }
1425 0 : if (nrels) {
1426 0 : v->changes++;
1427 0 : rel->l = nrels;
1428 : }
1429 0 : return rel;
1430 : }
1431 : return rel;
1432 : }
1433 :
1434 :
1435 : static sql_rel *
1436 : rel_optimize_unions_bottomup_(visitor *v, sql_rel *rel)
1437 : {
1438 : rel = rel_remove_union_partitions(v, rel);
1439 : rel = rel_merge_union(v, rel);
1440 : return rel;
1441 : }
1442 :
1443 : static sql_rel *
1444 : rel_optimize_unions_bottomup(visitor *v, global_props *gp, sql_rel *rel)
1445 : {
1446 : (void) gp;
1447 : return rel_visitor_bottomup(v, rel, &rel_optimize_unions_bottomup_);
1448 : }
1449 :
1450 : static sql_rel *
1451 1240023 : rel_optimize_munions_bottomup_(visitor *v, sql_rel *rel)
1452 : {
1453 1240023 : rel = rel_merge_munion(v, rel);
1454 1240023 : return rel;
1455 : }
1456 :
1457 : static sql_rel *
1458 17082 : rel_optimize_munions_bottomup(visitor *v, global_props *gp, sql_rel *rel)
1459 : {
1460 17082 : (void) gp;
1461 17082 : return rel_visitor_bottomup(v, rel, &rel_optimize_munions_bottomup_);
1462 : }
1463 :
1464 : run_optimizer
1465 601026 : bind_optimize_unions_bottomup(visitor *v, global_props *gp)
1466 : {
1467 601026 : int flag = v->sql->sql_optimizer;
1468 600841 : return gp->opt_level == 1 && gp->cnt[op_munion] && (flag & optimize_unions_bottomup)
1469 601026 : ? rel_optimize_munions_bottomup : NULL;
1470 : return gp->opt_level == 1 && gp->cnt[op_union] && (flag & optimize_unions_bottomup)
1471 : ? rel_optimize_unions_bottomup : NULL;
1472 : }
1473 :
1474 :
1475 : static inline sql_rel *
1476 4112854 : rel_project_cse(visitor *v, sql_rel *rel)
1477 : {
1478 4112854 : if (is_project(rel->op) && rel->exps) {
1479 1644922 : node *n, *m;
1480 1644922 : list *nexps;
1481 1644922 : int needed = 0;
1482 :
1483 8646451 : for (n=rel->exps->h; n && !needed; n = n->next) {
1484 7001524 : sql_exp *e1 = n->data;
1485 :
1486 7001524 : if (e1->type != e_column && !exp_is_atom(e1) && exp_name(e1)) {
1487 16475854 : for (m=n->next; m && !needed; m = m->next){
1488 15694596 : sql_exp *e2 = m->data;
1489 :
1490 15694596 : if (exp_name(e2) && exp_match_exp(e1, e2))
1491 15694595 : needed = 1;
1492 : }
1493 : }
1494 : }
1495 :
1496 1644927 : if (!needed)
1497 : return rel;
1498 :
1499 534 : nexps = new_exp_list(v->sql->sa);
1500 6407 : for (n=rel->exps->h; n; n = n->next) {
1501 5873 : sql_exp *e1 = n->data;
1502 :
1503 5873 : if (e1->type != e_column && !exp_is_atom(e1) && exp_name(e1)) {
1504 29811 : for (m=nexps->h; m; m = m->next){
1505 26146 : sql_exp *e2 = m->data;
1506 :
1507 26146 : if (exp_name(e2) && exp_match_exp(e1, e2) && (e1->type != e_column || exps_bind_column2(nexps, exp_relname(e1), exp_name(e1), NULL) == e1)) {
1508 581 : sql_exp *ne = exp_alias(v->sql->sa, exp_relname(e1), exp_name(e1), exp_relname(e2), exp_name(e2), exp_subtype(e2), e2->card, has_nil(e2), is_unique(e2), is_intern(e1));
1509 :
1510 581 : ne = exp_propagate(v->sql->sa, ne, e1);
1511 581 : set_selfref(ne);
1512 581 : exp_prop_alias(v->sql->sa, ne, e1);
1513 581 : e1 = ne;
1514 581 : break;
1515 : }
1516 : }
1517 : }
1518 5873 : append(nexps, e1);
1519 : }
1520 534 : rel->exps = nexps;
1521 : }
1522 : return rel;
1523 : }
1524 :
1525 : static int exp_is_const_op(sql_exp *exp, sql_exp *tope, sql_rel *expr);
1526 :
1527 : static int
1528 76 : exps_are_const_op(list *exps, sql_exp *tope, sql_rel *expr)
1529 : {
1530 76 : int ok = 1;
1531 :
1532 76 : if (list_empty(exps))
1533 : return 1;
1534 152 : for (node *n = exps->h; n && ok; n = n->next)
1535 76 : ok &= exp_is_const_op(n->data, tope, expr);
1536 : return ok;
1537 : }
1538 :
1539 : static int
1540 548 : exp_is_const_op(sql_exp *exp, sql_exp *tope, sql_rel *expr)
1541 : {
1542 687 : switch (exp->type) {
1543 28 : case e_atom:
1544 28 : return exp->f ? 0 : 1;
1545 2 : case e_convert:
1546 2 : return exp_is_const_op(exp->l, tope, expr);
1547 76 : case e_func:
1548 : case e_aggr: {
1549 76 : sql_subfunc *f = exp->f;
1550 76 : if (f->func->side_effect || IS_ANALYTIC(f->func))
1551 : return 0;
1552 76 : return exps_are_const_op(exp->l, tope, expr);
1553 : }
1554 0 : case e_cmp:
1555 0 : if (exp->flag == cmp_or || exp->flag == cmp_filter)
1556 0 : return exps_are_const_op(exp->l, tope, expr) && exps_are_const_op(exp->r, tope, expr);
1557 0 : if (exp->flag == cmp_in || exp->flag == cmp_notin)
1558 0 : return exp_is_const_op(exp->l, tope, expr) && exps_are_const_op(exp->r, tope, expr);
1559 0 : return exp_is_const_op(exp->l, tope, expr) && exp_is_const_op(exp->r, tope, expr) && (!exp->f || exp_is_const_op(exp->f, tope, expr));
1560 581 : case e_column: {
1561 581 : if (is_simple_project(expr->op) || is_groupby(expr->op)) {
1562 : /* in a simple projection, self-references may occur */
1563 581 : sql_exp *nexp = (exp->l ? exps_bind_column2(expr->exps, exp->l, exp->r, NULL) : exps_bind_column(expr->exps, exp->r, NULL, NULL, 0));
1564 581 : if (nexp && list_position(expr->exps, nexp) < list_position(expr->exps, tope))
1565 : return exp_is_const_op(nexp, exp, expr);
1566 : }
1567 : return 0;
1568 : }
1569 : default:
1570 : return 0;
1571 : }
1572 : }
1573 :
1574 : static sql_exp *
1575 23 : rel_groupby_add_count_star(mvc *sql, sql_rel *rel, sql_exp *count_star_exp, bool *count_added)
1576 : {
1577 23 : if (count_star_exp)
1578 : return count_star_exp;
1579 16 : if (!list_empty(rel->exps)) {
1580 44 : for (node *n=rel->exps->h; n ; n = n->next) {
1581 32 : sql_exp *e = n->data;
1582 :
1583 32 : if (exp_aggr_is_count(e) && !need_distinct(e) && list_length(e->l) == 0)
1584 4 : return e;
1585 : }
1586 : }
1587 12 : sql_subfunc *cf = sql_bind_func(sql, "sys", "count", sql_bind_localtype("void"), NULL, F_AGGR, true, true);
1588 12 : *count_added = true;
1589 12 : return rel_groupby_add_aggr(sql, rel, exp_aggr(sql->sa, NULL, cf, 0, 0, rel->card, 0));
1590 : }
1591 :
1592 : /* optimize sum(x + 12) into sum(x) + 12*count(*) */
1593 : static inline sql_rel *
1594 41627 : rel_simplify_sum(visitor *v, sql_rel *rel)
1595 : {
1596 41627 : if (is_groupby(rel->op) && !list_empty(rel->exps)) {
1597 41561 : sql_rel *upper = NULL, *groupby = rel, *l = groupby->l;
1598 41561 : sql_exp *count_star_exp = NULL;
1599 41561 : bool count_added = false;
1600 :
1601 128609 : for (node *n=groupby->exps->h; n ; n = n->next) {
1602 87048 : sql_exp *e = n->data;
1603 87048 : list *el = e->l;
1604 87048 : sql_subfunc *sf = e->f;
1605 :
1606 87048 : if (e->type == e_aggr && !need_distinct(e) && sf->func->type == F_AGGR && !sf->func->s && !strcmp(sf->func->base.name, "sum")) {
1607 8050 : sql_rel *expr = groupby;
1608 8050 : sql_exp *exp = (sql_exp*) el->h->data, *oexp = exp;
1609 :
1610 8050 : while (is_numeric_upcast(exp))
1611 0 : exp = exp->l;
1612 : /* we want to find a +/- call, so expect them to appear only on simple projections */
1613 17135 : while (exp && exp->type == e_column && (is_simple_project(expr->op) || is_groupby(expr->op)) && expr->l) {
1614 10242 : sql_rel *nexpr = NULL;
1615 10242 : sql_exp *nexp = rel_find_exp_and_corresponding_rel(l, exp, false, &nexpr, NULL);
1616 :
1617 : /* break when it loops on the same relation */
1618 10242 : if (nexpr == expr && list_position(expr->exps, nexp) >= list_position(expr->exps, exp))
1619 : break;
1620 9085 : expr = nexpr;
1621 9085 : exp = oexp = nexp;
1622 9089 : while (exp && is_numeric_upcast(exp))
1623 4 : exp = exp->l;
1624 : }
1625 :
1626 8050 : list *expl = exp ? exp->l : NULL;
1627 87048 : sql_subfunc *expf = exp ? exp->f : NULL;
1628 : /* found a candidate function */
1629 6978 : if (exp && exp->type == e_func && expf->func->type == F_FUNC && !expf->func->s &&
1630 1235 : (!strcmp(expf->func->base.name, "sql_sub") || !strcmp(expf->func->base.name, "sql_add"))) {
1631 236 : sql_exp *e1 = (sql_exp*) expl->h->data, *e2 = (sql_exp*) expl->h->next->data;
1632 236 : int e1ok = exp_is_const_op(e1, oexp, expr), e2ok = exp_is_const_op(e2, oexp, expr);
1633 :
1634 236 : if ((!e1ok && e2ok) || (e1ok && !e2ok)) {
1635 24 : sql_exp *ocol = e1ok ? e2 : e1, *constant = e1ok ? e1 : e2, *mul, *colref, *naggr, *newop, *col = ocol, *match;
1636 24 : bool add_col = true, prepend = false;
1637 :
1638 : /* if 'col' is a projection from the under relation, then use it */
1639 24 : while (is_numeric_upcast(col))
1640 0 : col = col->l;
1641 24 : if (col->type == e_column) {
1642 22 : sql_exp *colf = exps_find_exp(l->exps, col);
1643 :
1644 : /* col is already found in the inner relation. Also look for a new reference for col, eg sql_add(col, 1), 1 as col */
1645 22 : if (colf && list_position(l->exps, colf) < list_position(l->exps, oexp)) {
1646 : add_col = false;
1647 10 : } else if (!colf && is_simple_project(l->op) && list_empty(l->r) && !rel_is_ref(l) && !need_distinct(l)) {
1648 : prepend = true;
1649 : add_col = false;
1650 1 : } else if (!colf && (is_simple_project(l->op) || is_groupby(l->op))) {
1651 : /* on these scenarios the new column expression will be ordered/(grouped for distinct) or create potential ambiguity (multiple ref), so skip */
1652 1 : continue;
1653 : }
1654 2 : } else if ((is_simple_project(l->op) && (!list_empty(l->r) || rel_is_ref(l) || need_distinct(l))) || is_groupby(l->op)) {
1655 0 : continue;
1656 : }
1657 :
1658 : /* add count star */
1659 23 : count_star_exp = rel_groupby_add_count_star(v->sql, groupby, count_star_exp, &count_added);
1660 : /* multiply constant by count star */
1661 23 : if (!(mul = rel_binop_(v->sql, NULL, constant, exp_ref(v->sql, count_star_exp), "sys", "sql_mul", card_value, true))) {
1662 0 : v->sql->session->status = 0;
1663 0 : v->sql->errstr[0] = '\0';
1664 0 : continue;
1665 : }
1666 23 : if (!has_label(mul))
1667 23 : exp_label(v->sql->sa, mul, ++v->sql->label);
1668 :
1669 23 : colref = exp_ref(v->sql, ocol);
1670 23 : if (add_col) /* if 'col' will be added, then make sure it has an unique label */
1671 3 : exp_label(v->sql->sa, colref, ++v->sql->label);
1672 :
1673 : /* 'oexp' contains the type for the input for the 'sum' aggregate */
1674 23 : if (!(colref = exp_check_type(v->sql, exp_subtype(oexp), groupby, colref, type_equal))) {
1675 0 : v->sql->session->status = 0;
1676 0 : v->sql->errstr[0] = '\0';
1677 0 : continue;
1678 : }
1679 : /* update sum to use the column side only */
1680 23 : sql_subfunc *a = sql_bind_func(v->sql, "sys", "sum", exp_subtype(colref), NULL, F_AGGR, true, true);
1681 23 : if (!a)
1682 0 : continue;
1683 23 : naggr = exp_aggr(v->sql->sa, list_append(sa_list(v->sql->sa), colref), a, need_distinct(e), need_no_nil(e), groupby->card, has_nil(e));
1684 23 : if ((match = exps_any_match(groupby->exps, naggr)) && list_position(groupby->exps, match) < list_position(groupby->exps, e)) { /* found a matching aggregate, use it */
1685 9 : naggr = exp_ref(v->sql, match);
1686 9 : exp_label(v->sql->sa, naggr, ++v->sql->label);
1687 14 : } else if (!has_label(naggr)) { /* otherwise use the new one */
1688 14 : exp_label(v->sql->sa, naggr, ++v->sql->label);
1689 : }
1690 :
1691 : /* generate addition/subtraction. subtraction is not commutative, so keep original order! */
1692 23 : if (!(newop = rel_binop_(v->sql, NULL, e1 == constant ? mul : exp_ref(v->sql, naggr), e1 == constant ? exp_ref(v->sql, naggr) : mul, "sys", expf->func->base.name, card_value, true))) {
1693 0 : v->sql->session->status = 0;
1694 0 : v->sql->errstr[0] = '\0';
1695 0 : continue;
1696 : }
1697 23 : if (!(newop = exp_check_type(v->sql, exp_subtype(e), groupby, newop, type_equal))) {
1698 0 : v->sql->session->status = 0;
1699 0 : v->sql->errstr[0] = '\0';
1700 0 : continue;
1701 : }
1702 :
1703 : /* a column reference can be prepended to the inner relation, add it after all the check type calls succeed */
1704 23 : if (prepend)
1705 9 : list_prepend(l->exps, exp_ref(v->sql, col));
1706 :
1707 : /* the new generate function calls are valid, update relations */
1708 : /* we need a new relation for the multiplication and addition/subtraction */
1709 23 : if (!upper) {
1710 : /* be careful with relations with more than 1 reference, so do in-place replacement */
1711 16 : list *projs = rel_projections(v->sql, rel, NULL, 1, 1);
1712 16 : sql_rel *nrel = rel_groupby(v->sql, rel->l, NULL);
1713 16 : nrel->exps = rel->exps;
1714 16 : nrel->r = rel->r;
1715 16 : nrel->card = rel->card;
1716 16 : nrel->nrcols = list_length(rel->exps);
1717 16 : set_processed(nrel);
1718 16 : rel_dup(rel->l);
1719 16 : upper = rel = rel_inplace_project(v->sql->sa, rel, nrel, projs);
1720 16 : rel->card = exps_card(projs);
1721 16 : groupby = nrel; /* update pointers :) */
1722 16 : l = groupby->l;
1723 : }
1724 91 : for (node *n = upper->exps->h ; n ; ) {
1725 68 : node *next = n->next;
1726 68 : sql_exp *re = n->data;
1727 :
1728 : /* remove the old reference to the aggregate because we will use the addition/subtraction instead,
1729 : as well as the count star if count_added */
1730 68 : if (exp_refers(e, re) || (count_added && exp_refers(count_star_exp, re)))
1731 35 : list_remove_node(upper->exps, NULL, n);
1732 : n = next;
1733 : }
1734 :
1735 : /* update sum aggregate with new aggregate or reference to an existing one */
1736 23 : n->data = naggr;
1737 23 : list_hash_clear(groupby->exps);
1738 :
1739 : /* add column reference with new label, if 'col' was not found */
1740 23 : if (add_col) {
1741 3 : if (!is_simple_project(l->op) || !list_empty(l->r) || rel_is_ref(l) || need_distinct(l))
1742 0 : groupby->l = l = rel_project(v->sql->sa, l, rel_projections(v->sql, l, NULL, 1, 1));
1743 3 : list_append(l->exps, ocol);
1744 : }
1745 :
1746 : /* propagate alias and add new expression */
1747 23 : exp_prop_alias(v->sql->sa, newop, e);
1748 23 : list_append(upper->exps, newop);
1749 23 : v->changes++;
1750 : }
1751 : }
1752 : }
1753 : }
1754 : }
1755 41627 : return rel;
1756 : }
1757 :
1758 : /* optimize group by x+1,(y-2)*3,2-z into group by x,y,z */
1759 : static inline sql_rel *
1760 41627 : rel_simplify_groupby_columns(visitor *v, sql_rel *rel)
1761 : {
1762 41627 : if (is_groupby(rel->op) && !list_empty(rel->r)) {
1763 31088 : sql_rel *l = rel->l;
1764 :
1765 78731 : for (node *n=((list*)rel->r)->h; n ; n = n->next) {
1766 47643 : sql_exp *e = n->data;
1767 47643 : e->used = 0; /* we need to use this flag, clean it first */
1768 : }
1769 78731 : for (node *n=((list*)rel->r)->h; n ; n = n->next) {
1770 47643 : sql_exp *e = n->data;
1771 :
1772 47643 : if (e->type == e_column) {
1773 47594 : bool searching = true;
1774 47594 : sql_rel *efrel = NULL;
1775 47594 : sql_exp *exp = rel_find_exp_and_corresponding_rel(l, e, false, &efrel, NULL), *col = NULL, *tope = exp;
1776 :
1777 95207 : while (searching && !col) {
1778 47613 : sql_exp *exp_col = exp;
1779 :
1780 47613 : if (exp && is_numeric_upcast(exp))
1781 63 : exp = exp->l;
1782 47613 : if (exp && exp->type == e_func) {
1783 516 : list *el = exp->l;
1784 516 : sql_subfunc *sf = exp->f;
1785 : /* At the moment look only at injective math functions */
1786 516 : if (sf->func->type == F_FUNC && !sf->func->s &&
1787 503 : (!strcmp(sf->func->base.name, "sql_sub") || !strcmp(sf->func->base.name, "sql_add") || !strcmp(sf->func->base.name, "sql_mul"))) {
1788 105 : sql_exp *e1 = (sql_exp*) el->h->data, *e2 = (sql_exp*) el->h->next->data;
1789 : /* the optimization cannot be done if side-effect calls (e.g. rand()) are present */
1790 202 : int e1ok = exp_is_atom(e1) && !exp_unsafe(e1, 1) && !exp_has_sideeffect(e1), e2ok = exp_is_atom(e2) && !exp_unsafe(e2, 1) && !exp_has_sideeffect(e2);
1791 :
1792 105 : if ((!e1ok && e2ok) || (e1ok && !e2ok)) {
1793 51 : sql_exp *c = e1ok ? e2 : e1;
1794 51 : bool done = false;
1795 51 : exp_col = exps_find_exp(efrel->exps, c);
1796 51 : if (exp_col)
1797 25 : c = exp_col;
1798 :
1799 104 : while (!done) {
1800 53 : if (is_numeric_upcast(c))
1801 0 : c = c->l;
1802 53 : if (c->type == e_column) {
1803 34 : if (is_simple_project(efrel->op) || is_groupby(efrel->op)) {
1804 : /* in a simple projection, self-references may occur */
1805 34 : sql_exp *nc = exps_find_exp(efrel->exps, c);
1806 34 : if (nc && list_position(efrel->exps, nc) < list_position(efrel->exps, exp_col)) {
1807 2 : exp_col = c;
1808 2 : c = nc;
1809 2 : continue;
1810 : }
1811 : }
1812 : col = c; /* 'c' is a column reference from the left relation */
1813 : done = true;
1814 : } else {
1815 : exp = c; /* maybe a nested function call, let's continue searching */
1816 : done = true;
1817 : }
1818 : }
1819 : } else {
1820 : searching = false;
1821 : }
1822 : } else {
1823 : searching = false;
1824 : }
1825 : } else {
1826 : searching = false;
1827 : }
1828 : }
1829 47594 : if (col) { /* a column reference was found */
1830 32 : const char *rname = exp_relname(e), *name = exp_name(e);
1831 :
1832 : /* the grouping column has an alias, we have to keep it */
1833 32 : if ((rname && name && (strcmp(rname, e->l) != 0 || strcmp(name, e->r) != 0)) || (!rname && name && strcmp(name, e->r) != 0)) {
1834 2 : if (!has_label(e)) /* dangerous to merge, skip it */
1835 0 : continue;
1836 2 : if (!is_simple_project(l->op) || !list_empty(l->r) || rel_is_ref(l) || need_distinct(l))
1837 0 : rel->l = l = rel_project(v->sql->sa, l, rel_projections(v->sql, l, NULL, 1, 1));
1838 2 : list_append(l->exps, e);
1839 2 : n->data = e = exp_ref(v->sql, e);
1840 2 : list_hash_clear(rel->r);
1841 : }
1842 :
1843 32 : sql_exp *f = exps_find_exp(rel->r, col);
1844 :
1845 32 : if (f && list_position(rel->r, f) < list_position(rel->r, e)) { /* if already present, remove it */
1846 7 : e->used = 1;
1847 : } else {
1848 : /* Use an unique reference to the column found. If there's another grouping column label pointing into it,
1849 : rel_groupby_cse will hopefully remove it */
1850 25 : sql_exp *colf = exps_find_exp(l->exps, col);
1851 :
1852 : /* col is already found in the inner relation. Also look for a new reference for col, eg sql_add(col, 1), 1 as col */
1853 25 : if (colf && list_position(l->exps, colf) < list_position(l->exps, tope)) {
1854 5 : n->data = exp_ref(v->sql, col);
1855 20 : } else if (!colf && is_simple_project(l->op) && list_empty(l->r) && !rel_is_ref(l) && !need_distinct(l)) { /* trivial case, it can be added */
1856 20 : sql_exp *ne = exp_ref(v->sql, col);
1857 20 : list_prepend(l->exps, ne);
1858 20 : n->data = exp_ref(v->sql, ne);
1859 0 : } else if (!colf && (is_simple_project(l->op) || is_groupby(l->op))) {
1860 : /* on these scenarios the new column expression will be ordered/(grouped for distinct) or create potential ambiguity (multiple ref), so skip */
1861 0 : continue;
1862 : } else {
1863 0 : sql_exp *ne = exp_ref(v->sql, col);
1864 :
1865 0 : if (colf) /* a col reference is already there, add a new label */
1866 0 : exp_label(v->sql->sa, ne, ++v->sql->label);
1867 0 : if (!is_simple_project(l->op) || !list_empty(l->r) || rel_is_ref(l) || need_distinct(l))
1868 0 : rel->l = l = rel_project(v->sql->sa, l, rel_projections(v->sql, l, NULL, 1, 1));
1869 0 : list_append(l->exps, ne);
1870 0 : n->data = exp_ref(v->sql, ne);
1871 : }
1872 25 : list_hash_clear(rel->r);
1873 : }
1874 32 : v->changes++;
1875 : }
1876 : }
1877 : }
1878 78731 : for (node *n=((list*)rel->r)->h; n ; ) {
1879 47643 : node *next = n->next;
1880 47643 : sql_exp *e = n->data;
1881 :
1882 47643 : if (e->used) /* remove unecessary grouping columns */
1883 7 : list_remove_node(rel->r, NULL, n);
1884 : n = next;
1885 : }
1886 : }
1887 41627 : return rel;
1888 : }
1889 :
1890 : /* remove identical grouping columns */
1891 : static inline sql_rel *
1892 133956 : rel_groupby_cse(visitor *v, sql_rel *rel)
1893 : {
1894 133956 : if (is_groupby(rel->op) && !list_empty(rel->r)) {
1895 92443 : sql_rel *l = rel->l;
1896 :
1897 : /* for every group expression e1 */
1898 254017 : for (node *n=((list*)rel->r)->h; n ; n = n->next) {
1899 161574 : sql_exp *e1 = n->data;
1900 : /* it's good to examine the same expression in the subrelation e.g. in case it's an alias */
1901 : /* TODO maybe cover more cases? Here I only look at the left relation */
1902 161574 : sql_exp *e1_sub = e1->type == e_column ? exps_find_exp(l->exps, e1) : NULL;
1903 :
1904 : /* for every other group expression */
1905 261676 : for (node *m=n->next; m; m = m->next) {
1906 100102 : sql_exp *e2 = m->data;
1907 100102 : sql_exp *e2_sub = e2->type == e_column ? exps_find_exp(l->exps, e2) : NULL;
1908 :
1909 : /* check if the expression are the same */
1910 100102 : if (exp_match_exp(e1, e2) || exp_refers(e1, e2) || (e1_sub && e2_sub && (exp_match_exp(e1_sub, e2_sub) || exp_refers(e1_sub, e2_sub)))) {
1911 :
1912 : /* use e2 from rel->exps instead of e2 from the rel->r as it can have an alias from the higher rel */
1913 23 : sql_exp *e2_in_exps = (e2->l && e2->alias.rname == e2->l && e2->alias.name == e2->r) ?
1914 36 : exps_bind_column2(rel->exps, e2->l, e2->r, NULL) :
1915 16 : exps_bind_column(rel->exps, e2->alias.name, NULL, NULL, 0);
1916 26 : assert(e2_in_exps);
1917 :
1918 : /* same as e2 */
1919 22 : sql_exp *e1_in_exps = (e1->l && e1->alias.rname == e1->l && e1->alias.name == e1->r) ?
1920 38 : exps_bind_column2(rel->exps, e1->l, e1->r, NULL) :
1921 14 : exps_bind_column(rel->exps, e1->alias.name, NULL, NULL, 0);
1922 26 : assert(e1_in_exps);
1923 :
1924 : /* write e2 as an e1 alias since the expressions are the same */
1925 26 : sql_exp* e2_as_e1_alias = exp_copy(v->sql, e1_in_exps);
1926 : /* NOTE: it is important to get the rname (exp->l) and name (exp->r) from e2 IN the exps
1927 : * (e2_in_exps), and not from e2, since it could carry an alias from the higher rel */
1928 26 : exp_setalias(e2_as_e1_alias, e2_in_exps->l, e2_in_exps->r);
1929 :
1930 : /* replace e2 with e2_as_e1_alias in expressions list */
1931 26 : node *e2_exps_node = list_find(rel->exps, e2_in_exps, NULL);
1932 26 : list_append_before(rel->exps, e2_exps_node, e2_as_e1_alias);
1933 26 : list_remove_node(rel->exps, NULL, e2_exps_node);
1934 :
1935 : /* finally remove e2 from the groups' list (->r) since it's redundant */
1936 26 : node *e2_r_node = list_find(rel->r, e2, NULL);
1937 26 : list_remove_node(rel->r, NULL, e2_r_node);
1938 :
1939 26 : v->changes++;
1940 : }
1941 : }
1942 : }
1943 : }
1944 133956 : return rel;
1945 : }
1946 :
1947 : sql_exp *list_exps_uses_exp(list *exps, const char *rname, const char *name);
1948 :
1949 : static sql_exp*
1950 18351 : exp_uses_exp(sql_exp *e, const char *rname, const char *name)
1951 : {
1952 18634 : sql_exp *res = NULL;
1953 :
1954 18634 : switch (e->type) {
1955 : case e_psm:
1956 : break;
1957 1902 : case e_atom: {
1958 1902 : if (e->f)
1959 0 : return list_exps_uses_exp(e->f, rname, name);
1960 : } break;
1961 282 : case e_convert:
1962 282 : return exp_uses_exp(e->l, rname, name);
1963 9882 : case e_column: {
1964 9882 : if (e->l && rname && strcmp(e->l, rname) == 0 &&
1965 2221 : e->r && name && strcmp(e->r, name) == 0)
1966 : return e;
1967 7816 : if (!e->l && !rname &&
1968 26 : e->r && name && strcmp(e->r, name) == 0)
1969 : return e;
1970 : } break;
1971 6020 : case e_func:
1972 : case e_aggr: {
1973 6020 : if (e->l)
1974 4812 : return list_exps_uses_exp(e->l, rname, name);
1975 : } break;
1976 548 : case e_cmp: {
1977 548 : if (e->flag == cmp_in || e->flag == cmp_notin) {
1978 0 : if ((res = exp_uses_exp(e->l, rname, name)))
1979 : return res;
1980 0 : return list_exps_uses_exp(e->r, rname, name);
1981 548 : } else if (e->flag == cmp_or || e->flag == cmp_filter) {
1982 2 : if ((res = list_exps_uses_exp(e->l, rname, name)))
1983 : return res;
1984 2 : return list_exps_uses_exp(e->r, rname, name);
1985 : } else {
1986 546 : if ((res = exp_uses_exp(e->l, rname, name)))
1987 : return res;
1988 545 : if ((res = exp_uses_exp(e->r, rname, name)))
1989 : return res;
1990 543 : if (e->f)
1991 : return exp_uses_exp(e->f, rname, name);
1992 : }
1993 : } break;
1994 : }
1995 : return NULL;
1996 : }
1997 :
1998 : sql_exp *
1999 9754 : list_exps_uses_exp(list *exps, const char *rname, const char *name)
2000 : {
2001 9754 : sql_exp *res = NULL;
2002 :
2003 9754 : if (!exps)
2004 : return NULL;
2005 27014 : for (node *n = exps->h; n && !res; n = n->next) {
2006 17260 : sql_exp *e = n->data;
2007 17260 : res = exp_uses_exp(e, rname, name);
2008 : }
2009 : return res;
2010 : }
2011 :
2012 : /* find in the list of expression an expression which uses e */
2013 : sql_exp *
2014 641 : exps_uses_exp(list *exps, sql_exp *e)
2015 : {
2016 641 : return list_exps_uses_exp(exps, exp_relname(e), exp_name(e));
2017 : }
2018 : /*
2019 : * Rewrite aggregations over munion all.
2020 : * groupby ([ union all (a, b, c) ], [gbe], [ count, sum ] )
2021 : *
2022 : * into
2023 : * groupby ([ union all( groupby( a, [gbe], [ count, sum] ),
2024 : * groupby( b, [gbe], [ count, sum] ),
2025 : * groupby( c, [gbe], [ count, sum] ) ],
2026 : * [gbe], [sum, sum] )
2027 : */
2028 : static inline sql_rel *
2029 1022 : rel_push_aggr_down_n_arry(visitor *v, sql_rel *rel)
2030 : {
2031 1022 : sql_rel *g = rel;
2032 1022 : sql_rel *u = rel->l, *ou = u;
2033 1022 : sql_rel *r = NULL;
2034 1022 : list *rgbe = NULL, *gbe = NULL, *exps = NULL;
2035 1022 : node *n, *m;
2036 :
2037 : // TODO why?
2038 1022 : if (u->op == op_project && !need_distinct(u))
2039 222 : u = u->l;
2040 :
2041 : /* make sure we don't create group by on group by's */
2042 1672 : for (node *n = ((list*)u->l)->h; n; n = n->next) {
2043 1347 : r = n->data;
2044 1347 : if (r->op == op_groupby)
2045 : return rel;
2046 : }
2047 :
2048 : /* distinct should be done over the full result */
2049 826 : for (n = g->exps->h; n; n = n->next) {
2050 610 : sql_exp *e = n->data;
2051 610 : sql_subfunc *af = e->f;
2052 :
2053 610 : if (e->type == e_atom ||
2054 610 : e->type == e_func ||
2055 370 : (e->type == e_aggr &&
2056 370 : ((strcmp(af->func->base.name, "sum") &&
2057 290 : strcmp(af->func->base.name, "count") &&
2058 121 : strcmp(af->func->base.name, "min") &&
2059 370 : strcmp(af->func->base.name, "max")) ||
2060 : need_distinct(e))))
2061 : return rel;
2062 : }
2063 :
2064 216 : list *nl = sa_list(v->sql->sa);
2065 648 : for (node *n = ((list*)u->l)->h; n; n = n->next) {
2066 432 : r = n->data;
2067 432 : n->data = NULL; /* clean list as we steal the relation r, stealing is needed else (with multiple references) double project cleanup fails */
2068 432 : if (!is_project(r->op))
2069 0 : r = rel_project(v->sql->sa, r,
2070 : rel_projections(v->sql, r, NULL, 1, 1));
2071 432 : rel_rename_exps(v->sql, u->exps, r->exps);
2072 432 : if (u != ou) {
2073 304 : bool isproject = is_project(r->op);
2074 304 : r = rel_project(v->sql->sa, r, NULL);
2075 304 : r->exps = exps_copy(v->sql, ou->exps);
2076 304 : rel_rename_exps(v->sql, ou->exps, r->exps);
2077 304 : set_processed(r);
2078 304 : if (isproject)
2079 304 : r = rel_push_project_down_(v, r); /* cleanup any double projects */
2080 : }
2081 432 : if (g->r && list_length(g->r) > 0) {
2082 350 : list *gbe = g->r;
2083 350 : rgbe = exps_copy(v->sql, gbe);
2084 : }
2085 432 : r = rel_groupby(v->sql, r, NULL);
2086 432 : r->r = rgbe;
2087 432 : r->nrcols = g->nrcols;
2088 432 : r->card = g->card;
2089 432 : r->exps = exps_copy(v->sql, g->exps);
2090 432 : r->nrcols = list_length(r->exps);
2091 432 : set_processed(r);
2092 :
2093 432 : append(nl, r);
2094 : }
2095 :
2096 : /* group by on primary keys which define the partioning scheme
2097 : * don't need a finalizing group by */
2098 : /* how to check if a partition is based on some primary key ?
2099 : * */
2100 216 : if (!list_empty(rel->r)) {
2101 413 : for (node *n = ((list*)rel->r)->h; n; n = n->next) {
2102 238 : sql_exp *e = n->data;
2103 238 : sql_column *c = NULL;
2104 :
2105 238 : if ((c = exp_is_pkey(rel, e)) && partition_find_part(v->sql->session->tr, c->t, NULL)) {
2106 : /* check if key is partition key */
2107 0 : v->changes++;
2108 0 : return rel_inplace_setop_n_ary(v->sql, rel, nl, op_munion,
2109 : rel_projections(v->sql, rel, NULL, 1, 1));
2110 : }
2111 : }
2112 : }
2113 :
2114 216 : if (!list_empty(rel->r)) {
2115 175 : list *ogbe = rel->r;
2116 :
2117 175 : gbe = new_exp_list(v->sql->sa);
2118 413 : for (n = ogbe->h; n; n = n->next) {
2119 238 : sql_exp *e = n->data, *ne;
2120 :
2121 : /* group by in aggregation list */
2122 238 : ne = exps_uses_exp( rel->exps, e);
2123 238 : if (ne) {
2124 238 : sql_rel *first_munion_rel = nl->h->data;
2125 238 : ne = list_find_exp(first_munion_rel->exps, ne);
2126 : }
2127 238 : if (!ne) {
2128 : /* e only in the u1,u2,...un->r (group by list) */
2129 0 : for (node *n = nl->h; n; n = n->next) {
2130 0 : ne = exp_ref(v->sql, e);
2131 0 : list_append(((sql_rel*)n->data)->exps, ne);
2132 : }
2133 : }
2134 0 : assert(ne);
2135 238 : ne = exp_ref(v->sql, ne);
2136 238 : append(gbe, ne);
2137 : }
2138 : }
2139 :
2140 216 : u = rel_setop_n_ary(v->sql->sa, nl, op_munion);
2141 216 : rel_setop_n_ary_set_exps(v->sql, u,
2142 216 : rel_projections(v->sql, nl->h->data, NULL, 1, 1), false);
2143 216 : set_processed(u);
2144 :
2145 216 : exps = new_exp_list(v->sql->sa);
2146 715 : for (n = u->exps->h, m = rel->exps->h; n && m; n = n->next, m = m->next) {
2147 499 : sql_exp *ne, *e = n->data, *oa = m->data;
2148 :
2149 499 : if (oa->type == e_aggr) {
2150 261 : sql_subfunc *f = oa->f;
2151 261 : int cnt = exp_aggr_is_count(oa);
2152 261 : sql_subfunc *a = sql_bind_func(v->sql, "sys", (cnt)?"sum":f->func->base.name, exp_subtype(e), NULL, F_AGGR, true, true);
2153 :
2154 261 : assert(a);
2155 : /* munion of aggr result may have nils
2156 : * because sum/count of empty set */
2157 261 : set_has_nil(e);
2158 261 : e = exp_ref(v->sql, e);
2159 261 : ne = exp_aggr1(v->sql->sa, e, a, need_distinct(e), 1, e->card, 1);
2160 : } else {
2161 238 : ne = exp_copy(v->sql, oa);
2162 : }
2163 499 : exp_setname(v->sql->sa, ne, exp_find_rel_name(oa), exp_name(oa));
2164 499 : append(exps, ne);
2165 : }
2166 :
2167 216 : v->changes++;
2168 :
2169 216 : return rel_inplace_groupby(rel, u, gbe, exps);
2170 : }
2171 :
2172 : /*
2173 : * Rewrite aggregations over union all.
2174 : * groupby ([ union all (a, b) ], [gbe], [ count, sum ] )
2175 : *
2176 : * into
2177 : * groupby ( [ union all( groupby( a, [gbe], [ count, sum] ), [ groupby( b, [gbe], [ count, sum] )) , [gbe], [sum, sum] )
2178 : */
2179 : static inline sql_rel *
2180 133956 : rel_push_aggr_down(visitor *v, sql_rel *rel)
2181 : {
2182 133956 : if (rel->op == op_groupby && rel->l) {
2183 133876 : sql_rel *u = rel->l, *ou = u;
2184 133876 : sql_rel *g = rel;
2185 133876 : sql_rel *ul = u->l;
2186 133876 : sql_rel *ur = u->r;
2187 133876 : node *n, *m;
2188 133876 : list *lgbe = NULL, *rgbe = NULL, *gbe = NULL, *exps = NULL;
2189 :
2190 133876 : if (u->op == op_project && !need_distinct(u))
2191 59770 : u = u->l;
2192 :
2193 133876 : if (!u || !(is_union(u->op) || is_munion(u->op)) || need_distinct(u) || is_single(u) || !u->exps || rel_is_ref(u))
2194 : return rel;
2195 :
2196 1236 : if (is_munion(u->op))
2197 1022 : return rel_push_aggr_down_n_arry(v, rel);
2198 :
2199 214 : ul = u->l;
2200 214 : ur = u->r;
2201 :
2202 : /* make sure we don't create group by on group by's */
2203 214 : if (ul->op == op_groupby || ur->op == op_groupby)
2204 : return rel;
2205 :
2206 : /* distinct should be done over the full result */
2207 186 : for (n = g->exps->h; n; n = n->next) {
2208 112 : sql_exp *e = n->data;
2209 112 : sql_subfunc *af = e->f;
2210 :
2211 112 : if (e->type == e_atom ||
2212 112 : e->type == e_func ||
2213 68 : (e->type == e_aggr &&
2214 68 : ((strcmp(af->func->base.name, "sum") &&
2215 54 : strcmp(af->func->base.name, "count") &&
2216 8 : strcmp(af->func->base.name, "min") &&
2217 68 : strcmp(af->func->base.name, "max")) ||
2218 : need_distinct(e))))
2219 : return rel;
2220 : }
2221 :
2222 74 : ul = rel_dup(ul);
2223 74 : ur = rel_dup(ur);
2224 74 : if (!is_project(ul->op))
2225 18 : ul = rel_project(v->sql->sa, ul,
2226 : rel_projections(v->sql, ul, NULL, 1, 1));
2227 74 : if (!is_project(ur->op))
2228 21 : ur = rel_project(v->sql->sa, ur,
2229 : rel_projections(v->sql, ur, NULL, 1, 1));
2230 74 : rel_rename_exps(v->sql, u->exps, ul->exps);
2231 74 : rel_rename_exps(v->sql, u->exps, ur->exps);
2232 74 : if (u != ou) {
2233 39 : ul = rel_project(v->sql->sa, ul, NULL);
2234 39 : ul->exps = exps_copy(v->sql, ou->exps);
2235 39 : rel_rename_exps(v->sql, ou->exps, ul->exps);
2236 39 : set_processed(ul);
2237 39 : ur = rel_project(v->sql->sa, ur, NULL);
2238 39 : ur->exps = exps_copy(v->sql, ou->exps);
2239 39 : rel_rename_exps(v->sql, ou->exps, ur->exps);
2240 39 : set_processed(ur);
2241 : }
2242 :
2243 74 : if (g->r && list_length(g->r) > 0) {
2244 42 : list *gbe = g->r;
2245 :
2246 42 : lgbe = exps_copy(v->sql, gbe);
2247 42 : rgbe = exps_copy(v->sql, gbe);
2248 : }
2249 74 : ul = rel_groupby(v->sql, ul, NULL);
2250 74 : ul->r = lgbe;
2251 74 : ul->nrcols = g->nrcols;
2252 74 : ul->card = g->card;
2253 74 : ul->exps = exps_copy(v->sql, g->exps);
2254 74 : ul->nrcols = list_length(ul->exps);
2255 74 : set_processed(ul);
2256 :
2257 74 : ur = rel_groupby(v->sql, ur, NULL);
2258 74 : ur->r = rgbe;
2259 74 : ur->nrcols = g->nrcols;
2260 74 : ur->card = g->card;
2261 74 : ur->exps = exps_copy(v->sql, g->exps);
2262 74 : ur->nrcols = list_length(ur->exps);
2263 74 : set_processed(ur);
2264 :
2265 : /* group by on primary keys which define the partioning scheme
2266 : * don't need a finalizing group by */
2267 : /* how to check if a partition is based on some primary key ?
2268 : * */
2269 74 : if (!list_empty(rel->r)) {
2270 93 : for (node *n = ((list*)rel->r)->h; n; n = n->next) {
2271 51 : sql_exp *e = n->data;
2272 51 : sql_column *c = NULL;
2273 :
2274 51 : if ((c = exp_is_pkey(rel, e)) && partition_find_part(v->sql->session->tr, c->t, NULL)) {
2275 : /* check if key is partition key */
2276 0 : v->changes++;
2277 0 : return rel_inplace_setop(v->sql, rel, ul, ur, op_union,
2278 : rel_projections(v->sql, rel, NULL, 1, 1));
2279 : }
2280 : }
2281 : }
2282 :
2283 74 : if (!list_empty(rel->r)) {
2284 42 : list *ogbe = rel->r;
2285 :
2286 42 : gbe = new_exp_list(v->sql->sa);
2287 93 : for (n = ogbe->h; n; n = n->next) {
2288 51 : sql_exp *e = n->data, *ne;
2289 :
2290 : /* group by in aggreation list */
2291 51 : ne = exps_uses_exp( rel->exps, e);
2292 51 : if (ne)
2293 45 : ne = list_find_exp( ul->exps, ne);
2294 45 : if (!ne) {
2295 : /* e only in the ul/ur->r (group by list) */
2296 7 : ne = exp_ref(v->sql, e);
2297 7 : list_append(ul->exps, ne);
2298 7 : ne = exp_ref(v->sql, e);
2299 7 : list_append(ur->exps, ne);
2300 : }
2301 7 : assert(ne);
2302 51 : ne = exp_ref(v->sql, ne);
2303 51 : append(gbe, ne);
2304 : }
2305 : }
2306 :
2307 74 : u = rel_setop(v->sql->sa, ul, ur, op_union);
2308 74 : rel_setop_set_exps(v->sql, u, rel_projections(v->sql, ul, NULL, 1, 1), false);
2309 74 : set_processed(u);
2310 :
2311 74 : exps = new_exp_list(v->sql->sa);
2312 178 : for (n = u->exps->h, m = rel->exps->h; n && m; n = n->next, m = m->next) {
2313 104 : sql_exp *ne, *e = n->data, *oa = m->data;
2314 :
2315 104 : if (oa->type == e_aggr) {
2316 60 : sql_subfunc *f = oa->f;
2317 60 : int cnt = exp_aggr_is_count(oa);
2318 60 : sql_subfunc *a = sql_bind_func(v->sql, "sys", (cnt)?"sum":f->func->base.name, exp_subtype(e), NULL, F_AGGR, true, true);
2319 :
2320 60 : assert(a);
2321 : /* union of aggr result may have nils
2322 : * because sum/count of empty set */
2323 60 : set_has_nil(e);
2324 60 : e = exp_ref(v->sql, e);
2325 60 : ne = exp_aggr1(v->sql->sa, e, a, need_distinct(e), 1, e->card, 1);
2326 : } else {
2327 44 : ne = exp_copy(v->sql, oa);
2328 : }
2329 104 : exp_setname(v->sql->sa, ne, exp_find_rel_name(oa), exp_name(oa));
2330 104 : append(exps, ne);
2331 : }
2332 74 : v->changes++;
2333 74 : return rel_inplace_groupby( rel, u, gbe, exps);
2334 : }
2335 : return rel;
2336 : }
2337 :
2338 : /*
2339 : * More general
2340 : * groupby(
2341 : * [ outer ] join(
2342 : * project(
2343 : * table(A) [ c1, c2, .. ]
2344 : * ) [ c1, c2, identity(c2) as I, .. ],
2345 : * table(B) [ c1, c2, .. ]
2346 : * ) [ A.c1 = B.c1 ]
2347 : * ) [ I ] [ a1, a2, .. ]
2348 : *
2349 : * ->
2350 : *
2351 : * [ outer ] join(
2352 : * project(
2353 : * table(A) [ c1, c2, .. ]
2354 : * ) [ c1, c2, .. ],
2355 : * groupby (
2356 : * table(B) [ c1, c2, .. ]
2357 : * ) [ B.c1 ] [ a1, a2, .. ]
2358 : * ) [ A.c1 = B.c1 ]
2359 : */
2360 : static sql_rel *
2361 63410 : gen_push_groupby_down(mvc *sql, sql_rel *rel, int *changes)
2362 : {
2363 63410 : sql_rel *j = rel->l;
2364 63410 : list *gbe = rel->r;
2365 :
2366 63410 : if (rel->op == op_groupby && list_length(gbe) == 1 && j->op == op_join){
2367 6146 : sql_rel *jl = j->l, *jr = j->r, *cr, *cl;
2368 6146 : sql_exp *gb = gbe->h->data, *e;
2369 6146 : node *n;
2370 6146 : int left = 1;
2371 6146 : list *aggrs, *aliases, *gbe;
2372 :
2373 6146 : if (!is_identity(gb, jl) && !is_identity(gb, jr))
2374 : return rel;
2375 6 : if (jl->op == op_project &&
2376 4 : (e = list_find_exp( jl->exps, gb)) != NULL &&
2377 2 : find_prop(e->p, PROP_HASHCOL) != NULL) {
2378 : left = 0;
2379 : cr = jr;
2380 : cl = jl;
2381 4 : } else if (jr->op == op_project &&
2382 8 : (e = list_find_exp( jr->exps, gb)) != NULL &&
2383 4 : find_prop(e->p, PROP_HASHCOL) != NULL) {
2384 : left = 1;
2385 : cr = jl;
2386 : cl = jr;
2387 : } else {
2388 0 : return rel;
2389 : }
2390 :
2391 6 : if ((left && is_base(jl->op)) || (!left && is_base(jr->op))||
2392 0 : (left && is_select(jl->op)) || (!left && is_select(jr->op))
2393 0 : || rel_is_join_on_pkey(j, false))
2394 6 : return rel;
2395 :
2396 : /* only add aggr (based on left/right), and repeat the group by column */
2397 0 : aggrs = sa_list(sql->sa);
2398 0 : aliases = sa_list(sql->sa);
2399 0 : if (rel->exps) for (n = rel->exps->h; n; n = n->next) {
2400 0 : sql_exp *ce = n->data;
2401 :
2402 0 : if (exp_is_atom(ce))
2403 0 : list_append(aliases, ce);
2404 0 : else if (ce->type == e_column) {
2405 0 : if (rel_has_exp(cl, ce, false) == 0) /* collect aliases outside groupby */
2406 0 : list_append(aliases, ce);
2407 : else
2408 0 : list_append(aggrs, ce);
2409 0 : } else if (ce->type == e_aggr) {
2410 0 : list *args = ce->l;
2411 :
2412 : /* check args are part of left/right */
2413 0 : if (!list_empty(args) && rel_has_exps(cl, args, false) == 0)
2414 : return rel;
2415 0 : if (rel->op != op_join && exp_aggr_is_count(ce))
2416 0 : ce->p = prop_create(sql->sa, PROP_COUNT, ce->p);
2417 0 : list_append(aggrs, ce);
2418 : }
2419 : }
2420 : /* TODO move any column expressions (aliases) into the project list */
2421 :
2422 : /* find gb in left or right and should be unique */
2423 0 : gbe = sa_list(sql->sa);
2424 : /* push groupby to right, group on join exps */
2425 0 : if (j->exps) for (n = j->exps->h; n; n = n->next) {
2426 0 : sql_exp *ce = n->data, *l = ce->l, *r = ce->r, *e;
2427 :
2428 : /* get left/right hand of e_cmp */
2429 0 : assert(ce->type == e_cmp);
2430 0 : if (ce->flag == cmp_equal && is_alias(l->type) && is_alias(r->type) &&
2431 0 : (((e = rel_find_exp(cr, l)) && rel_find_exp(cl, r)) ||
2432 0 : ((e = rel_find_exp(cr, r)) && rel_find_exp(cl, l)))) {
2433 0 : e = exp_ref(sql, e);
2434 0 : list_append(gbe, e);
2435 : } else {
2436 0 : return rel;
2437 : }
2438 : }
2439 0 : if (!left)
2440 0 : cr = j->r = rel_groupby(sql, cr, gbe);
2441 : else
2442 0 : cr = j->l = rel_groupby(sql, cr, gbe);
2443 0 : cr->exps = list_merge(cr->exps, aggrs, (fdup)NULL);
2444 0 : set_processed(cr);
2445 0 : if (!is_project(cl->op))
2446 0 : cl = rel_project(sql->sa, cl,
2447 : rel_projections(sql, cl, NULL, 1, 1));
2448 0 : cl->exps = list_merge(cl->exps, aliases, (fdup)NULL);
2449 0 : set_processed(cl);
2450 0 : if (!left)
2451 0 : j->l = cl;
2452 : else
2453 0 : j->r = cl;
2454 0 : rel -> l = NULL;
2455 0 : rel_destroy(rel);
2456 :
2457 0 : if (list_empty(cr->exps) && list_empty(j->exps)) { /* remove crossproduct */
2458 0 : sql_rel *r = cl;
2459 0 : if (!left)
2460 0 : j->l = NULL;
2461 : else
2462 0 : j->r = NULL;
2463 0 : rel_destroy(j);
2464 0 : j = r;
2465 : }
2466 0 : (*changes)++;
2467 0 : return j;
2468 : }
2469 : return rel;
2470 : }
2471 :
2472 : /*
2473 : * Rewrite group(project(join(A,Dict)[a.i==dict.i])[...dict.n])[dict.n][ ... dict.n ]
2474 : * into
2475 : * project(join(groupby (A)[a.i],[a.i]), Dict)[a.i==dict.i])[dict.n]
2476 : *
2477 : */
2478 : static inline sql_rel *
2479 133956 : rel_push_groupby_down(visitor *v, sql_rel *rel)
2480 : {
2481 133956 : sql_rel *p = rel->l;
2482 133956 : list *gbe = rel->r;
2483 :
2484 133956 : if (rel->op == op_groupby && gbe && p && is_join(p->op))
2485 9716 : return gen_push_groupby_down(v->sql, rel, &v->changes);
2486 124240 : if (rel->op == op_groupby && gbe && p && p->op == op_project) {
2487 53695 : sql_rel *j = p->l;
2488 53695 : sql_rel *jl, *jr;
2489 53695 : node *n;
2490 :
2491 53695 : if (!j || j->op != op_join || list_length(j->exps) != 1)
2492 52364 : return gen_push_groupby_down(v->sql, rel, &v->changes);
2493 1331 : jl = j->l;
2494 1331 : jr = j->r;
2495 :
2496 : /* check if jr is a dict with index and var still used */
2497 1331 : if (jr->op != op_basetable || jr->l || !jr->r || list_length(jr->exps) != 2)
2498 1331 : return gen_push_groupby_down(v->sql, rel, &v->changes);
2499 :
2500 : /* check if group by is done on dict column */
2501 0 : for(n = gbe->h; n; n = n->next) {
2502 0 : sql_exp *ge = n->data, *pe = NULL, *e = NULL;
2503 :
2504 : /* find group by exp in project, then in dict */
2505 0 : pe = rel_find_exp(p, ge);
2506 0 : if (pe) /* find project exp in right hand of join, ie dict */
2507 0 : e = rel_find_exp(jr, pe);
2508 0 : if (pe && e) { /* Rewrite: join with dict after the group by */
2509 0 : list *pexps = rel_projections(v->sql, rel, NULL, 1, 1), *npexps;
2510 0 : node *m;
2511 0 : sql_exp *ne = j->exps->h->data; /* join exp */
2512 0 : p->l = jl; /* Project now only on the left side of the join */
2513 :
2514 0 : ne = ne->l; /* The left side of the compare is the index of the left */
2515 :
2516 : /* find ge reference in new projection list */
2517 0 : npexps = sa_list(v->sql->sa);
2518 0 : for (m = pexps->h; m; m = m->next) {
2519 0 : sql_exp *a = m->data;
2520 :
2521 0 : if (exp_refers(ge, a)) {
2522 0 : sql_exp *sc = jr->exps->t->data;
2523 0 : sql_exp *e = exp_ref(v->sql, sc);
2524 0 : if (exp_name(a))
2525 0 : exp_prop_alias(v->sql->sa, e, a);
2526 : a = e;
2527 : }
2528 0 : append(npexps, a);
2529 : }
2530 :
2531 : /* find ge in aggr list */
2532 0 : for (m = rel->exps->h; m; m = m->next) {
2533 0 : sql_exp *a = m->data;
2534 :
2535 0 : if (exp_match_exp(a, ge) || exp_refers(ge, a)) {
2536 0 : a = exp_ref(v->sql, ne);
2537 0 : if (exp_name(ne))
2538 0 : exp_prop_alias(v->sql->sa, a, ne);
2539 0 : m->data = a;
2540 : }
2541 : }
2542 :
2543 : /* change alias pe, ie project out the index */
2544 0 : pe->l = (void*)exp_relname(ne);
2545 0 : pe->r = (void*)exp_name(ne);
2546 0 : if (exp_name(ne))
2547 0 : exp_prop_alias(v->sql->sa, pe, ne);
2548 :
2549 : /* change alias ge */
2550 0 : ge->l = (void*)exp_relname(pe);
2551 0 : ge->r = (void*)exp_name(pe);
2552 0 : if (exp_name(pe))
2553 0 : exp_prop_alias(v->sql->sa, ge, pe);
2554 :
2555 : /* zap both project and groupby name hash tables (as we changed names above) */
2556 0 : list_hash_clear(rel->exps);
2557 0 : list_hash_clear((list*)rel->r);
2558 0 : list_hash_clear(p->exps);
2559 :
2560 : /* add join */
2561 0 : j->l = rel;
2562 0 : rel = rel_project(v->sql->sa, j, npexps);
2563 0 : v->changes++;
2564 : }
2565 : }
2566 : }
2567 : return rel;
2568 : }
2569 :
2570 : /* reduce group by expressions based on pkey info
2571 : *
2572 : * The reduced group by and (derived) aggr expressions are restored via
2573 : * extra (new) aggregate columns.
2574 : */
2575 : static inline sql_rel *
2576 133957 : rel_reduce_groupby_exps(visitor *v, sql_rel *rel)
2577 : {
2578 133957 : list *gbe = rel->r;
2579 :
2580 133957 : if (is_groupby(rel->op) && rel->r && !rel_is_ref(rel) && list_length(gbe)) {
2581 50088 : node *n, *m;
2582 50088 : int k, j, i, ngbe = list_length(gbe);
2583 50088 : int8_t *scores = SA_NEW_ARRAY(v->sql->ta, int8_t, ngbe);
2584 50088 : sql_column *c;
2585 50088 : sql_table **tbls = SA_NEW_ARRAY(v->sql->ta, sql_table*, ngbe);
2586 50088 : sql_rel **bts = SA_NEW_ARRAY(v->sql->ta, sql_rel*, ngbe), *bt = NULL;
2587 :
2588 50088 : gbe = rel->r;
2589 120741 : for (k = 0, i = 0, n = gbe->h; n; n = n->next, k++) {
2590 70653 : sql_exp *e = n->data;
2591 :
2592 70653 : c = exp_find_column_(rel, e, -2, &bt);
2593 70653 : if (c) {
2594 73759 : for(j = 0; j < i; j++)
2595 22366 : if (c->t == tbls[j] && bts[j] == bt)
2596 : break;
2597 64281 : tbls[j] = c->t;
2598 64281 : bts[j] = bt;
2599 64281 : i += (j == i);
2600 : }
2601 : }
2602 50088 : if (i) { /* forall tables find pkey and
2603 : remove useless other columns */
2604 : /* TODO also remove group by columns which are related to
2605 : * the other columns using a foreign-key join (n->1), ie 1
2606 : * on the to be removed side.
2607 : */
2608 97973 : for(j = 0; j < i; j++) {
2609 51381 : int l, nr = 0, cnr = 0;
2610 :
2611 51381 : k = list_length(gbe);
2612 51381 : memset(scores, 0, list_length(gbe));
2613 51381 : if (tbls[j]->pkey) {
2614 12875 : for (l = 0, n = gbe->h; l < k && n; l++, n = n->next) {
2615 9538 : fcmp cmp = (fcmp)&kc_column_cmp;
2616 9538 : sql_exp *e = n->data;
2617 :
2618 9538 : c = exp_find_column_(rel, e, -2, &bt);
2619 14912 : if (c && c->t == tbls[j] && bts[j] == bt &&
2620 5374 : list_find(tbls[j]->pkey->k.columns, c, cmp) != NULL) {
2621 755 : scores[l] = 1;
2622 755 : nr ++;
2623 8314 : } else if (c && c->t == tbls[j] && bts[j] == bt) {
2624 : /* Okay we can cleanup a group by column */
2625 4619 : scores[l] = -1;
2626 4619 : cnr ++;
2627 : }
2628 : }
2629 : }
2630 3337 : if (nr) {
2631 746 : int all = (list_length(tbls[j]->pkey->k.columns) == nr);
2632 746 : sql_kc *kc = tbls[j]->pkey->k.columns->h->data;
2633 :
2634 746 : c = kc->c;
2635 2025 : for (l = 0, n = gbe->h; l < k && n; l++, n = n->next) {
2636 1279 : sql_exp *e = n->data;
2637 :
2638 : /* pkey based group by */
2639 1423 : if (scores[l] == 1 && ((all ||
2640 : /* first of key */
2641 871 : (c == exp_find_column(rel, e, -2))) && !find_prop(e->p, PROP_HASHCOL)))
2642 13 : e->p = prop_create(v->sql->sa, PROP_HASHCOL, e->p);
2643 : }
2644 2853 : for (m = rel->exps->h; m; m = m->next ){
2645 2107 : sql_exp *e = m->data;
2646 :
2647 7582 : for (l = 0, n = gbe->h; l < k && n; l++, n = n->next) {
2648 5488 : sql_exp *gb = n->data;
2649 :
2650 : /* pkey based group by */
2651 5488 : if (scores[l] == 1 && exp_match_exp(e,gb) && find_prop(gb->p, PROP_HASHCOL) && !find_prop(e->p, PROP_HASHCOL)) {
2652 13 : e->p = prop_create(v->sql->sa, PROP_HASHCOL, e->p);
2653 13 : break;
2654 : }
2655 :
2656 : }
2657 : }
2658 : }
2659 51381 : if (cnr && nr && list_length(tbls[j]->pkey->k.columns) == nr) {
2660 108 : list *ngbe = new_exp_list(v->sql->sa);
2661 :
2662 392 : for (l = 0, n = gbe->h; l < k && n; l++, n = n->next) {
2663 284 : sql_exp *e = n->data;
2664 :
2665 : /* keep the group by columns which form a primary key
2666 : * of this table. And those unrelated to this table. */
2667 284 : if (scores[l] != -1)
2668 139 : append(ngbe, e);
2669 : }
2670 108 : rel->r = ngbe;
2671 : /* rewrite gbe and aggr, in the aggr list */
2672 410 : for (m = rel->exps->h; m; m = m->next ){
2673 302 : sql_exp *e = m->data;
2674 302 : int fnd = 0;
2675 :
2676 1276 : for (l = 0, n = gbe->h; l < k && n && !fnd; l++, n = n->next) {
2677 974 : sql_exp *gb = n->data;
2678 :
2679 974 : if (scores[l] == -1 && exp_refers(gb, e)) {
2680 145 : sql_exp *rs = exp_column(v->sql->sa, gb->l?gb->l:exp_relname(gb), gb->r?gb->r:exp_name(gb), exp_subtype(gb), rel->card, has_nil(gb), is_unique(gb), is_intern(gb));
2681 145 : exp_setname(v->sql->sa, rs, exp_find_rel_name(e), exp_name(e));
2682 145 : e = rs;
2683 145 : fnd = 1;
2684 : }
2685 : }
2686 302 : m->data = e;
2687 : }
2688 : /* new reduced aggr expression list */
2689 108 : assert(list_length(rel->exps)>0);
2690 : /* only one reduction at a time */
2691 108 : list_hash_clear(rel->exps);
2692 108 : v->changes++;
2693 108 : return rel;
2694 : }
2695 51273 : gbe = rel->r;
2696 : }
2697 : }
2698 : }
2699 : /* remove constants from group by list */
2700 133849 : if (is_groupby(rel->op) && rel->r && !rel_is_ref(rel)) {
2701 50493 : int i;
2702 50493 : node *n;
2703 :
2704 120862 : for (i = 0, n = gbe->h; n; n = n->next) {
2705 70369 : sql_exp *e = n->data;
2706 :
2707 70369 : if (exp_is_atom(e))
2708 54 : i++;
2709 : }
2710 50493 : if (i) {
2711 50 : list *ngbe = new_exp_list(v->sql->sa);
2712 50 : list *dgbe = new_exp_list(v->sql->sa);
2713 :
2714 108 : for (n = gbe->h; n; n = n->next) {
2715 58 : sql_exp *e = n->data;
2716 :
2717 58 : if (!exp_is_atom(e))
2718 4 : append(ngbe, e);
2719 : /* we need at least one gbe */
2720 54 : else if (!n->next && list_empty(ngbe))
2721 47 : append(ngbe, e);
2722 : else
2723 7 : append(dgbe, e);
2724 : }
2725 50 : rel->r = ngbe;
2726 50 : if (!list_empty(dgbe)) {
2727 : /* use atom's directly in the aggr expr list */
2728 :
2729 27 : for (n = rel->exps->h; n; n = n->next) {
2730 20 : sql_exp *e = n->data, *ne = NULL;
2731 :
2732 20 : if (e->type == e_column) {
2733 15 : if (e->l)
2734 15 : ne = exps_bind_column2(dgbe, e->l, e->r, NULL);
2735 : else
2736 0 : ne = exps_bind_column(dgbe, e->r, NULL, NULL, 1);
2737 15 : if (ne) {
2738 7 : ne = exp_copy(v->sql, ne);
2739 7 : exp_prop_alias(v->sql->sa, ne, e);
2740 7 : e = ne;
2741 : }
2742 : }
2743 20 : n->data = e;
2744 : }
2745 7 : list_hash_clear(rel->exps);
2746 7 : v->changes++;
2747 : }
2748 : }
2749 : }
2750 : return rel;
2751 : }
2752 :
2753 : /* if all arguments to a distinct aggregate are unique, remove 'distinct' property */
2754 : static inline sql_rel *
2755 133957 : rel_distinct_aggregate_on_unique_values(visitor *v, sql_rel *rel)
2756 : {
2757 133957 : if (is_groupby(rel->op) && !list_empty(rel->exps)) {
2758 391619 : for (node *n = rel->exps->h; n; n = n->next) {
2759 257745 : sql_exp *exp = (sql_exp*) n->data;
2760 :
2761 257745 : if (exp->type == e_aggr && need_distinct(exp)) {
2762 771 : bool all_unique = true;
2763 771 : list *l = exp->l;
2764 :
2765 1542 : for (node *m = l->h; m && all_unique; m = m->next) {
2766 771 : sql_exp *arg = (sql_exp*) m->data;
2767 :
2768 903 : all_unique &= arg->type == e_column && is_unique(arg) && (!is_semantics(exp) || !has_nil(arg));
2769 : }
2770 771 : if (!all_unique && exps_card(l) > CARD_ATOM)
2771 518 : all_unique = exps_unique(v->sql, rel, l) && (!is_semantics(exp) || !have_nil(l));
2772 : if (all_unique) {
2773 132 : set_nodistinct(exp);
2774 132 : v->changes++;
2775 : }
2776 : }
2777 : }
2778 : }
2779 133957 : return rel;
2780 : }
2781 :
2782 : static inline sql_rel *
2783 133955 : rel_remove_const_aggr(visitor *v, sql_rel *rel)
2784 : {
2785 133955 : if (!rel)
2786 : return rel;
2787 133955 : if (rel && is_groupby(rel->op) && list_length(rel->exps) >= 1 && !rel_is_ref(rel)) {
2788 91428 : int needed = 0;
2789 257584 : for (node *n = rel->exps->h; n; n = n->next) {
2790 166155 : sql_exp *exp = (sql_exp*) n->data;
2791 :
2792 166155 : if (exp_is_atom(exp) && exp->type != e_aggr)
2793 66 : needed++;
2794 : }
2795 91429 : if (needed) {
2796 64 : if (!list_empty(rel->r)) {
2797 7 : int atoms = 0;
2798 : /* corner case, all grouping columns are atoms */
2799 15 : for (node *n = ((list*)rel->r)->h; n; n = n->next) {
2800 8 : sql_exp *exp = (sql_exp*) n->data;
2801 :
2802 8 : if (exp_is_atom(exp))
2803 2 : atoms++;
2804 : }
2805 7 : if (atoms == list_length(rel->r)) {
2806 2 : list *nexps = sa_list(v->sql->sa);
2807 5 : for (node *n = rel->exps->h; n; ) {
2808 3 : node *next = n->next;
2809 3 : sql_exp *e = (sql_exp*) n->data;
2810 :
2811 : /* remove references to constant group by columns */
2812 3 : if (e->type == e_column) {
2813 0 : sql_exp *found = NULL;
2814 0 : const char *nrname = (const char*) e->l, *nename = (const char*) e->r;
2815 0 : if (nrname && nename) {
2816 0 : found = exps_bind_column2(rel->r, nrname, nename, NULL);
2817 0 : } else if (nename) {
2818 0 : found = exps_bind_column(rel->r, nename, NULL, NULL, 1);
2819 : }
2820 0 : if (found) {
2821 0 : list_append(nexps, found);
2822 0 : list_remove_node(rel->exps, NULL, n);
2823 : }
2824 : }
2825 : n = next;
2826 : }
2827 2 : rel->r = NULL; /* transform it into a global aggregate */
2828 2 : rel->exps = list_merge(nexps, rel->exps, (fdup) NULL); /* add grouping columns back as projections */
2829 : /* global aggregates may return 1 row, so filter it based on the count */
2830 2 : sql_subfunc *cf = sql_bind_func(v->sql, "sys", "count", sql_bind_localtype("void"), NULL, F_AGGR, true, true);
2831 2 : sql_exp *count = exp_aggr(v->sql->sa, NULL, cf, 0, 1, CARD_ATOM, 0);
2832 2 : count = rel_groupby_add_aggr(v->sql, rel, count);
2833 2 : sql_exp *cp = exp_compare(v->sql->sa, exp_ref(v->sql, count), exp_atom(v->sql->sa, atom_int(v->sql->sa, exp_subtype(count), 0)), cmp_notequal);
2834 2 : rel = rel_select(v->sql->sa, rel, cp);
2835 2 : set_processed(rel);
2836 2 : return rel;
2837 : }
2838 57 : } else if (list_length(rel->exps) == needed) { /* all are const */
2839 53 : sql_rel *ll = rel->l;
2840 53 : rel->op = op_project;
2841 : /* TODO check if l->l == const, else change that */
2842 53 : if (ll && ll->l) {
2843 49 : rel_destroy(ll);
2844 49 : rel->l = rel_project_exp(v->sql, exp_atom_bool(v->sql->sa, 1));
2845 : }
2846 53 : return rel;
2847 : }
2848 9 : sql_rel *nrel = rel_project(v->sql->sa, rel, rel_projections(v->sql, rel, NULL, 1, 1));
2849 30 : for (node *n = nrel->exps->h; n; n = n->next) {
2850 21 : sql_exp *exp = (sql_exp*) n->data;
2851 21 : if (exp->type == e_column) {
2852 21 : sql_exp *e = rel_find_exp(rel, exp);
2853 :
2854 21 : if (e && exp_is_atom(e) && e->type == e_atom) {
2855 11 : sql_exp *ne = exp_copy(v->sql, e);
2856 11 : exp_setname(v->sql->sa, ne, exp_find_rel_name(exp), exp_name(exp));
2857 11 : n->data = ne;
2858 11 : v->changes++;
2859 : }
2860 : }
2861 : }
2862 9 : list *nl = sa_list(v->sql->sa);
2863 30 : for (node *n = rel->exps->h; n; n = n->next) {
2864 21 : sql_exp *exp = (sql_exp*) n->data;
2865 :
2866 21 : if (!exp_is_atom(exp) || exp->type != e_atom)
2867 10 : append(nl, exp);
2868 : }
2869 9 : rel->exps = nl;
2870 9 : return nrel;
2871 : }
2872 : }
2873 : return rel;
2874 : }
2875 :
2876 : #if 0
2877 : static sql_rel *
2878 : rel_groupby_distinct2(visitor *v, sql_rel *rel)
2879 : {
2880 : list *ngbes = sa_list(v->sql->sa), *gbes, *naggrs = sa_list(v->sql->sa), *aggrs = sa_list(v->sql->sa);
2881 : sql_rel *l;
2882 : node *n;
2883 :
2884 : gbes = rel->r;
2885 : if (!gbes)
2886 : return rel;
2887 :
2888 : /* check if each aggr is, rewritable (max,min,sum,count)
2889 : * and only has one argument */
2890 : for (n = rel->exps->h; n; n = n->next) {
2891 : sql_exp *e = n->data;
2892 : sql_subfunc *af = e->f;
2893 :
2894 : if (e->type == e_aggr &&
2895 : (strcmp(af->func->base.name, "sum") &&
2896 : strcmp(af->func->base.name, "count") &&
2897 : strcmp(af->func->base.name, "min") &&
2898 : strcmp(af->func->base.name, "max")))
2899 : return rel;
2900 : }
2901 :
2902 : for (n = gbes->h; n; n = n->next) {
2903 : sql_exp *e = n->data;
2904 :
2905 : e = exp_column(v->sql->sa, exp_find_rel_name(e), exp_name(e), exp_subtype(e), e->card, has_nil(e), is_unique(e), is_intern(e));
2906 : append(ngbes, e);
2907 : }
2908 :
2909 : /* 1 for each aggr(distinct v) add the attribute expression v to gbes and aggrs list
2910 : * 2 for each aggr(z) add aggr_phase2('z') to the naggrs list
2911 : * 3 for each group by col, add also to the naggrs list
2912 : * */
2913 : for (n = rel->exps->h; n; n = n->next) {
2914 : sql_exp *e = n->data;
2915 :
2916 : if (e->type == e_aggr && need_distinct(e)) { /* 1 */
2917 : /* need column expression */
2918 : list *args = e->l;
2919 : sql_exp *v = args->h->data;
2920 : append(gbes, v);
2921 : if (!exp_name(v))
2922 : exp_label(v->sql->sa, v, ++v->sql->label);
2923 : v = exp_column(v->sql->sa, exp_find_rel_name(v), exp_name(v), exp_subtype(v), v->card, has_nil(v), is_unique(v), is_intern(v));
2924 : append(aggrs, v);
2925 : v = exp_aggr1(v->sql->sa, v, e->f, need_distinct(e), 1, e->card, 1);
2926 : exp_setname(v->sql->sa, v, exp_find_rel_name(e), exp_name(e));
2927 : append(naggrs, v);
2928 : } else if (e->type == e_aggr && !need_distinct(e)) {
2929 : sql_exp *v;
2930 : sql_subfunc *f = e->f;
2931 : int cnt = exp_aggr_is_count(e);
2932 : sql_subfunc *a = sql_bind_func(v->sql, "sys", (cnt)?"sum":f->func->base.name, exp_subtype(e), NULL, F_AGGR, true, true);
2933 :
2934 : append(aggrs, e);
2935 : if (!exp_name(e))
2936 : exp_label(v->sql->sa, e, ++v->sql->label);
2937 : set_has_nil(e);
2938 : v = exp_column(v->sql->sa, exp_find_rel_name(e), exp_name(e), exp_subtype(e), e->card, has_nil(e), is_unique(e), is_intern(e));
2939 : v = exp_aggr1(v->sql->sa, v, a, 0, 1, e->card, 1);
2940 : if (cnt)
2941 : set_zero_if_empty(v);
2942 : exp_setname(v->sql->sa, v, exp_find_rel_name(e), exp_name(e));
2943 : append(naggrs, v);
2944 : } else { /* group by col */
2945 : if (list_find_exp(gbes, e) || !list_find_exp(naggrs, e)) {
2946 : append(aggrs, e);
2947 :
2948 : e = exp_column(v->sql->sa, exp_find_rel_name(e), exp_name(e), exp_subtype(e), e->card, has_nil(e), is_unique(e), is_intern(e));
2949 : }
2950 : append(naggrs, e);
2951 : }
2952 : }
2953 :
2954 : l = rel->l = rel_groupby(v->sql, rel->l, gbes);
2955 : l->exps = aggrs;
2956 : rel->r = ngbes;
2957 : rel->exps = naggrs;
2958 : v->changes++;
2959 : return rel;
2960 : }
2961 : #endif
2962 :
2963 : /* Rewrite group by expressions with distinct
2964 : *
2965 : * ie select a, count(distinct b) from c where ... groupby a;
2966 : * No other aggregations should be present
2967 : *
2968 : * Rewrite the more general case, good for parallel execution
2969 : *
2970 : * groupby(R) [e,f] [ aggr1 a distinct, aggr2 b distinct, aggr3 c, aggr4 d]
2971 : *
2972 : * into
2973 : *
2974 : * groupby(
2975 : * groupby(R) [e,f,a,b] [ a, b, aggr3 c, aggr4 d]
2976 : * ) [e,f]( aggr1 a distinct, aggr2 b distinct, aggr3_phase2 c, aggr4_phase2 d)
2977 : */
2978 : static inline sql_rel *
2979 133957 : rel_groupby_distinct(visitor *v, sql_rel *rel)
2980 : {
2981 133957 : node *n;
2982 :
2983 133957 : if (is_groupby(rel->op)) {
2984 133877 : sql_rel *l = rel->l;
2985 133877 : if (!l || is_groupby(l->op))
2986 : return rel;
2987 : }
2988 133727 : if (is_groupby(rel->op) && rel->r && !rel_is_ref(rel)) {
2989 50468 : int nr = 0, anr = 0;
2990 50468 : list *gbe, *ngbe, *arg, *exps, *nexps;
2991 50468 : sql_exp *distinct = NULL, *darg, *found;
2992 50468 : sql_rel *l = NULL;
2993 :
2994 174579 : for (n=rel->exps->h; n && nr <= 2; n = n->next) {
2995 124111 : sql_exp *e = n->data;
2996 124111 : if (need_distinct(e)) {
2997 517 : distinct = n->data;
2998 517 : nr++;
2999 : }
3000 124111 : anr += is_aggr(e->type);
3001 : }
3002 50468 : if (nr < 1 || distinct->type != e_aggr)
3003 : return rel;
3004 508 : if (nr > 1 || anr > nr)
3005 : return rel;//rel_groupby_distinct2(v, rel);
3006 108 : arg = distinct->l;
3007 108 : if (list_length(arg) != 1 || list_length(rel->r) + nr != list_length(rel->exps))
3008 2 : return rel;
3009 :
3010 106 : gbe = rel->r;
3011 106 : ngbe = sa_list(v->sql->sa);
3012 106 : exps = sa_list(v->sql->sa);
3013 106 : nexps = sa_list(v->sql->sa);
3014 336 : for (n=rel->exps->h; n; n = n->next) {
3015 230 : sql_exp *e = n->data;
3016 230 : if (e != distinct) {
3017 124 : if (e->type == e_aggr) { /* copy the arguments to the aggregate */
3018 0 : list *args = e->l;
3019 0 : if (args) {
3020 0 : for (node *n = args->h ; n ; n = n->next) {
3021 0 : sql_exp *e = n->data;
3022 0 : list_append(ngbe, exp_copy(v->sql, e));
3023 0 : list_append(exps, exp_copy(v->sql, e));
3024 : }
3025 : }
3026 : } else {
3027 124 : e = exp_ref(v->sql, e);
3028 124 : append(ngbe, e);
3029 124 : append(exps, e);
3030 : }
3031 124 : if (e->type == e_aggr) /* aggregates must be copied */
3032 0 : e = exp_copy(v->sql, e);
3033 : else
3034 124 : e = exp_ref(v->sql, e);
3035 124 : append(nexps, e);
3036 : }
3037 : }
3038 :
3039 106 : darg = arg->h->data;
3040 106 : if ((found = exps_find_exp(exps, darg)) == NULL) { /* not already in the groups projection list */
3041 106 : if ((found = exps_find_exp(gbe, darg))) { /* first find if the aggregate argument already exists in the grouping list */
3042 0 : darg = exp_ref(v->sql, found);
3043 : } else {
3044 106 : list_append(gbe, darg = exp_copy(v->sql, darg));
3045 106 : exp_label(v->sql->sa, darg, ++v->sql->label);
3046 106 : darg = exp_ref(v->sql, darg);
3047 : }
3048 106 : list_append(exps, darg);
3049 106 : darg = exp_ref(v->sql, darg);
3050 : } else {
3051 0 : darg = exp_ref(v->sql, found);
3052 : }
3053 106 : arg->h->data = darg;
3054 106 : l = rel->l = rel_groupby(v->sql, rel->l, gbe);
3055 106 : l->exps = exps;
3056 106 : set_processed(l);
3057 106 : rel->r = ngbe;
3058 106 : rel->exps = nexps;
3059 106 : set_nodistinct(distinct);
3060 106 : append(nexps, distinct);
3061 106 : v->changes++;
3062 : }
3063 : return rel;
3064 : }
3065 :
3066 : /*
3067 : * Push Count inside crossjoin down, and multiply the results
3068 : *
3069 : * project ( project(
3070 : * group by ( crossproduct (
3071 : * crossproduct( project (
3072 : * L, => group by (
3073 : * R L
3074 : * ) [ ] [ count NOT NULL ] ) [ ] [ count NOT NULL ]
3075 : * ) ),
3076 : * ) [ NOT NULL ] project (
3077 : * group by (
3078 : * R
3079 : * ) [ ] [ count NOT NULL ]
3080 : * )
3081 : * ) [ sql_mul(.., .. NOT NULL) ]
3082 : * )
3083 : */
3084 : static inline sql_rel *
3085 133956 : rel_push_count_down(visitor *v, sql_rel *rel)
3086 : {
3087 133956 : sql_rel *r = rel->l;
3088 :
3089 133956 : if (is_groupby(rel->op) && !rel_is_ref(rel) && list_empty(rel->r) &&
3090 41305 : r && !r->exps && r->op == op_join && !(rel_is_ref(r)) &&
3091 : /* currently only single count aggregation is handled, no other projects or aggregation */
3092 76 : list_length(rel->exps) == 1 && exp_aggr_is_count(rel->exps->h->data)) {
3093 21 : sql_exp *nce, *oce, *cnt1 = NULL, *cnt2 = NULL;
3094 21 : sql_rel *gbl = NULL, *gbr = NULL; /* Group By */
3095 21 : sql_rel *cp = NULL; /* Cross Product */
3096 21 : sql_rel *srel;
3097 :
3098 21 : oce = rel->exps->h->data;
3099 21 : if (oce->l) /* we only handle COUNT(*) */
3100 : return rel;
3101 :
3102 19 : srel = r->l;
3103 : {
3104 19 : sql_subfunc *cf = sql_bind_func(v->sql, "sys", "count", sql_bind_localtype("void"), NULL, F_AGGR, true, true);
3105 19 : sql_exp *e = exp_aggr(v->sql->sa, NULL, cf, need_distinct(oce), need_no_nil(oce), oce->card, 0);
3106 :
3107 19 : exp_label(v->sql->sa, e, ++v->sql->label);
3108 19 : cnt1 = exp_ref(v->sql, e);
3109 19 : gbl = rel_groupby(v->sql, rel_dup(srel), NULL);
3110 19 : set_processed(gbl);
3111 19 : rel_groupby_add_aggr(v->sql, gbl, e);
3112 : }
3113 :
3114 19 : srel = r->r;
3115 : {
3116 19 : sql_subfunc *cf = sql_bind_func(v->sql, "sys", "count", sql_bind_localtype("void"), NULL, F_AGGR, true, true);
3117 19 : sql_exp *e = exp_aggr(v->sql->sa, NULL, cf, need_distinct(oce), need_no_nil(oce), oce->card, 0);
3118 :
3119 19 : exp_label(v->sql->sa, e, ++v->sql->label);
3120 19 : cnt2 = exp_ref(v->sql, e);
3121 19 : gbr = rel_groupby(v->sql, rel_dup(srel), NULL);
3122 19 : set_processed(gbr);
3123 19 : rel_groupby_add_aggr(v->sql, gbr, e);
3124 : }
3125 :
3126 19 : cp = rel_crossproduct(v->sql->sa, gbl, gbr, op_join);
3127 19 : set_processed(cp);
3128 :
3129 19 : if (!(nce = rel_binop_(v->sql, NULL, cnt1, cnt2, "sys", "sql_mul", card_value, true))) {
3130 0 : v->sql->session->status = 0;
3131 0 : v->sql->errstr[0] = '\0';
3132 0 : return rel; /* error, fallback to original expression */
3133 : }
3134 : /* because of remote plans, make sure "sql_mul" returns bigint. The cardinality is atomic, so no major performance penalty */
3135 19 : if (subtype_cmp(exp_subtype(oce), exp_subtype(nce)) != 0)
3136 19 : nce = exp_convert(v->sql->sa, nce, exp_subtype(nce), exp_subtype(oce));
3137 19 : if (exp_name(oce))
3138 19 : exp_prop_alias(v->sql->sa, nce, oce);
3139 :
3140 19 : rel_destroy(rel);
3141 19 : rel = rel_project(v->sql->sa, cp, append(new_exp_list(v->sql->sa), nce));
3142 19 : set_processed(rel);
3143 :
3144 19 : v->changes++;
3145 : }
3146 :
3147 : return rel;
3148 : }
3149 :
3150 : static inline sql_rel *
3151 41627 : rel_basecount(visitor *v, sql_rel *rel)
3152 : {
3153 51894 : if (is_groupby(rel->op) && !rel_is_ref(rel) && rel->l && list_empty(rel->r) &&
3154 19520 : list_length(rel->exps) == 1 && exp_aggr_is_count(rel->exps->h->data)) {
3155 4187 : sql_rel *bt = rel->l;
3156 4187 : sql_exp *e = rel->exps->h->data;
3157 4187 : if (is_basetable(bt->op) && list_empty(e->l)) { /* count(*) */
3158 : /* change into select cnt('schema','table') */
3159 651 : sql_table *t = bt->l;
3160 : /* I need to get the declared table's frame number to make this work correctly for those */
3161 651 : if (!isTable(t) || isDeclaredTable(t))
3162 : return rel;
3163 556 : sql_subfunc *cf = sql_bind_func(v->sql, "sys", "cnt", sql_bind_localtype("str"), sql_bind_localtype("str"), F_FUNC, true, true);
3164 556 : list *exps = sa_list(v->sql->sa);
3165 556 : append(exps, exp_atom_str(v->sql->sa, t->s->base.name, sql_bind_localtype("str")));
3166 556 : append(exps, exp_atom_str(v->sql->sa, t->base.name, sql_bind_localtype("str")));
3167 556 : sql_exp *ne = exp_op(v->sql->sa, exps, cf);
3168 :
3169 556 : ne = exp_propagate(v->sql->sa, ne, e);
3170 556 : exp_setname(v->sql->sa, ne, exp_find_rel_name(e), exp_name(e));
3171 556 : rel_destroy(rel);
3172 556 : rel = rel_project(v->sql->sa, NULL, append(sa_list(v->sql->sa), ne));
3173 556 : v->changes++;
3174 : }
3175 : }
3176 : return rel;
3177 : }
3178 :
3179 : static inline sql_rel *
3180 41627 : rel_simplify_count(visitor *v, sql_rel *rel)
3181 : {
3182 41627 : if (is_groupby(rel->op) && !list_empty(rel->exps)) {
3183 41526 : mvc *sql = v->sql;
3184 41526 : int ncountstar = 0;
3185 :
3186 : /* Convert count(no null) into count(*) */
3187 128511 : for (node *n = rel->exps->h; n ; n = n->next) {
3188 86985 : sql_exp *e = n->data;
3189 :
3190 86985 : if (exp_aggr_is_count(e) && !need_distinct(e)) {
3191 13961 : if (list_length(e->l) == 0) {
3192 11221 : ncountstar++;
3193 2740 : } else if (list_length(e->l) == 1 && !has_nil((sql_exp*)((list*)e->l)->h->data)) {
3194 2420 : sql_subfunc *cf = sql_bind_func(sql, "sys", "count", sql_bind_localtype("void"), NULL, F_AGGR, true, true);
3195 2420 : sql_exp *ne = exp_aggr(sql->sa, NULL, cf, 0, 0, e->card, 0);
3196 2420 : if (exp_name(e))
3197 2420 : exp_prop_alias(sql->sa, ne, e);
3198 2420 : n->data = ne;
3199 2420 : ncountstar++;
3200 2420 : v->changes++;
3201 : }
3202 : }
3203 : }
3204 : /* With multiple count(*), use exp_ref to reduce the number of calls to this aggregate */
3205 41526 : if (ncountstar > 1) {
3206 11 : sql_exp *count_star = NULL;
3207 11 : sql_rel *nrel = rel_project(v->sql->sa, rel, NULL);
3208 11 : list *aexps = sa_list(v->sql->sa), *nexps = sa_list(v->sql->sa);
3209 11 : nrel->exps = nexps;
3210 64 : for (node *n = rel->exps->h; n ; n = n->next) {
3211 53 : sql_exp *e = n->data;
3212 :
3213 53 : if (exp_aggr_is_count(e) && !need_distinct(e) && list_length(e->l) == 0) {
3214 25 : if (!count_star) {
3215 11 : count_star = e;
3216 11 : append(aexps, e);
3217 11 : append(nexps, exp_ref(sql, e));
3218 : } else {
3219 14 : sql_exp *ne = exp_ref(sql, count_star);
3220 :
3221 14 : if (exp_name(e))
3222 14 : exp_prop_alias(sql->sa, ne, e);
3223 14 : v->changes++;
3224 14 : append(nexps, ne);
3225 : }
3226 : } else {
3227 28 : append(aexps, e);
3228 28 : append(nexps, exp_ref(sql, e));
3229 : }
3230 : }
3231 11 : rel->exps = aexps;
3232 11 : return nrel;
3233 : }
3234 : }
3235 : return rel;
3236 : }
3237 :
3238 : static sql_rel *
3239 41627 : rel_groupjoin(visitor *v, sql_rel *rel)
3240 : {
3241 41627 : if (!rel || rel_is_ref(rel) || !is_groupby(rel->op) || list_empty(rel->r))
3242 14886 : return rel;
3243 :
3244 26741 : sql_rel *j = rel->l;
3245 26741 : if (!j || rel_is_ref(j) /*|| !is_left(j->op)*/ || j->op != op_join || list_length(rel->exps) > 1 /* only join because left joins aren't optimized jet (TODO), only length 1 as implementation of groupjoins is missing */ || !list_empty(rel->attr))
3246 26036 : return rel;
3247 : /* check group by exps == equi join exps */
3248 705 : list *gbes = rel->r;
3249 705 : if (list_length(gbes) != list_length(j->exps))
3250 : return rel;
3251 618 : int nr = 0;
3252 1233 : for(node *n = gbes->h; n; n = n->next) {
3253 618 : sql_exp *gbe = n->data;
3254 1233 : for(node *m = j->exps->h; m; m = m->next) {
3255 618 : sql_exp *je = m->data;
3256 618 : if (je->type != e_cmp || je->flag != cmp_equal)
3257 : return rel;
3258 : /* check if its a join exp (ie not a selection) */
3259 619 : if (!( (!rel_has_exp(j->l, je->l, false) && !rel_has_exp(j->r, je->r, false)) ||
3260 8 : (!rel_has_exp(j->l, je->r, false) && !rel_has_exp(j->r, je->l, false))))
3261 0 : return rel;
3262 615 : if (exp_match(je->l, gbe)) {
3263 76 : nr++;
3264 539 : } else if (exp_match(je->r, gbe)) {
3265 10 : nr++;
3266 : }
3267 : }
3268 : }
3269 615 : if (nr == list_length(gbes)) {
3270 : // printf("#group by converted\n");
3271 86 : j = rel_dup(j);
3272 86 : j->attr = rel->exps;
3273 86 : v->changes++;
3274 86 : rel_destroy(rel);
3275 86 : return j;
3276 : }
3277 : return rel;
3278 : }
3279 :
3280 : static sql_rel *
3281 4112856 : rel_optimize_projections_(visitor *v, sql_rel *rel)
3282 : {
3283 4112856 : rel = rel_project_cse(v, rel);
3284 :
3285 4112875 : if (!rel || !is_groupby(rel->op))
3286 : return rel;
3287 :
3288 133957 : rel = rel_remove_const_aggr(v, rel);
3289 133956 : if (v->value_based_opt) {
3290 41627 : rel = rel_simplify_sum(v, rel);
3291 41627 : rel = rel_simplify_groupby_columns(v, rel);
3292 : }
3293 133956 : rel = rel_groupby_cse(v, rel);
3294 133956 : rel = rel_push_aggr_down(v, rel);
3295 133956 : rel = rel_push_groupby_down(v, rel);
3296 133956 : rel = rel_reduce_groupby_exps(v, rel);
3297 133957 : rel = rel_distinct_aggregate_on_unique_values(v, rel);
3298 133957 : rel = rel_groupby_distinct(v, rel);
3299 133956 : rel = rel_push_count_down(v, rel);
3300 : /* only when value_based_opt is on, ie not for dependency resolution */
3301 133957 : if (v->value_based_opt) {
3302 41627 : rel = rel_simplify_count(v, rel);
3303 41627 : rel = rel_basecount(v, rel);
3304 :
3305 41627 : rel = rel_groupjoin(v, rel);
3306 : }
3307 : return rel;
3308 : }
3309 :
3310 : static sql_rel *
3311 162005 : rel_optimize_projections(visitor *v, global_props *gp, sql_rel *rel)
3312 : {
3313 162005 : (void) gp;
3314 162005 : return rel_visitor_topdown(v, rel, &rel_optimize_projections_);
3315 : }
3316 :
3317 : run_optimizer
3318 600950 : bind_optimize_projections(visitor *v, global_props *gp)
3319 : {
3320 600950 : int flag = v->sql->sql_optimizer;
3321 600772 : return gp->opt_level == 1 && (gp->cnt[op_groupby] || gp->cnt[op_project] || gp->cnt[op_union] || gp->cnt[op_munion]
3322 1201722 : || gp->cnt[op_inter] || gp->cnt[op_except]) && (flag & optimize_projections) ? rel_optimize_projections : NULL;
3323 : }
3324 :
3325 :
3326 : static inline sql_rel *
3327 1802917 : rel_push_project_down_union(visitor *v, sql_rel *rel)
3328 : {
3329 : /* first remove distinct if already unique */
3330 1802917 : if (rel->op == op_project && need_distinct(rel) && rel->exps && exps_unique(v->sql, rel, rel->exps) && !have_nil(rel->exps)) {
3331 15 : set_nodistinct(rel);
3332 15 : if (exps_card(rel->exps) <= CARD_ATOM && rel->card > CARD_ATOM) { /* if the projection just contains constants, then no topN is needed */
3333 0 : sql_rel *nl = rel->l = rel_topn(v->sql->sa, rel->l, append(sa_list(v->sql->sa), exp_atom_lng(v->sql->sa, 1)));
3334 0 : set_processed(nl);
3335 : }
3336 15 : v->changes++;
3337 : }
3338 :
3339 1802917 : if (rel->op == op_project && rel->l && rel->exps && list_empty(rel->r)) {
3340 432053 : int need_distinct = need_distinct(rel);
3341 432053 : sql_rel *u = rel->l;
3342 432053 : sql_rel *p = rel;
3343 432053 : sql_rel *ul = u->l;
3344 432053 : sql_rel *ur = u->r;
3345 :
3346 432053 : if (!u || !(is_union(u->op) || is_munion(u->op)) || need_distinct(u) || !u->exps || rel_is_ref(u) || project_unsafe(rel,0))
3347 430156 : return rel;
3348 :
3349 : // TODO: for now we have to differentiate between union and munion
3350 1897 : if (is_union(u->op)) {
3351 : /* don't push project down union of single values */
3352 1867 : if ((is_project(ul->op) && !ul->l) || (is_project(ur->op) && !ur->l))
3353 : return rel;
3354 :
3355 1829 : ul = rel_dup(ul);
3356 1829 : ur = rel_dup(ur);
3357 :
3358 1829 : if (!is_project(ul->op))
3359 90 : ul = rel_project(v->sql->sa, ul,
3360 : rel_projections(v->sql, ul, NULL, 1, 1));
3361 1829 : if (!is_project(ur->op))
3362 108 : ur = rel_project(v->sql->sa, ur,
3363 : rel_projections(v->sql, ur, NULL, 1, 1));
3364 1829 : need_distinct = (need_distinct &&
3365 0 : (!exps_unique(v->sql, ul, ul->exps) || have_nil(ul->exps) ||
3366 0 : !exps_unique(v->sql, ur, ur->exps) || have_nil(ur->exps)));
3367 1829 : rel_rename_exps(v->sql, u->exps, ul->exps);
3368 1829 : rel_rename_exps(v->sql, u->exps, ur->exps);
3369 :
3370 : /* introduce projects under the set */
3371 1829 : ul = rel_project(v->sql->sa, ul, NULL);
3372 1829 : if (need_distinct)
3373 0 : set_distinct(ul);
3374 1829 : ur = rel_project(v->sql->sa, ur, NULL);
3375 1829 : if (need_distinct)
3376 0 : set_distinct(ur);
3377 :
3378 1829 : ul->exps = exps_copy(v->sql, p->exps);
3379 1829 : set_processed(ul);
3380 1829 : ur->exps = exps_copy(v->sql, p->exps);
3381 1829 : set_processed(ur);
3382 :
3383 1829 : rel = rel_inplace_setop(v->sql, rel, ul, ur, op_union,
3384 : rel_projections(v->sql, rel, NULL, 1, 1));
3385 1829 : if (need_distinct)
3386 0 : set_distinct(rel);
3387 1829 : if (is_single(u))
3388 1 : set_single(rel);
3389 1829 : v->changes++;
3390 1829 : rel->l = rel_merge_projects_(v, rel->l);
3391 1829 : rel->r = rel_merge_projects_(v, rel->r);
3392 1829 : return rel;
3393 30 : } else if (is_munion(u->op)) {
3394 30 : sql_rel *r;
3395 :
3396 : /* don't push project down union of single values */
3397 90 : for (node *n = ((list*)u->l)->h; n; n = n->next) {
3398 60 : r = n->data;
3399 : // TODO: does this check make sense?
3400 60 : if (is_project(r->op) && !r->l)
3401 : return rel;
3402 : }
3403 :
3404 90 : for (node *n = ((list*)u->l)->h; n; n = n->next) {
3405 60 : r = rel_dup(n->data);
3406 :
3407 : /* introduce projection around each operand if needed */
3408 60 : if (!is_project(r->op))
3409 0 : r = rel_project(v->sql->sa, r,
3410 : rel_projections(v->sql, r, NULL, 1, 1));
3411 : /* check if we need distinct */
3412 120 : need_distinct &=
3413 60 : (!exps_unique(v->sql, r, r->exps) || have_nil(r->exps));
3414 60 : rel_rename_exps(v->sql, u->exps, r->exps);
3415 :
3416 60 : rel_destroy(n->data);
3417 60 : n->data = r;
3418 : }
3419 :
3420 : /* once we have checked for need_distinct in every rel we can
3421 : * introduce the projects under the munion which are gonna be
3422 : * copies of the single project above munion */
3423 90 : for (node *n = ((list*)u->l)->h; n; n = n->next) {
3424 60 : r = rel_dup(n->data);
3425 :
3426 60 : r = rel_project(v->sql->sa, r, NULL);
3427 60 : if (need_distinct)
3428 0 : set_distinct(r);
3429 60 : r->exps = exps_copy(v->sql, p->exps);
3430 60 : set_processed(r);
3431 :
3432 60 : rel_destroy(n->data);
3433 60 : n->data = r;
3434 : }
3435 :
3436 : /* turn the project-munion on top into munion. incr operand
3437 : * rels count to make sure that they are not deleted by the
3438 : * subsequent rel_inplace_setop_n_ary */
3439 90 : for (node *n = ((list*)u->l)->h; n; n = n->next)
3440 60 : rel_dup(n->data);
3441 30 : rel = rel_inplace_setop_n_ary(v->sql, rel, u->l, op_munion,
3442 : rel_projections(v->sql, rel, NULL, 1, 1));
3443 30 : if (need_distinct)
3444 0 : set_distinct(rel);
3445 30 : if (is_single(u))
3446 1 : set_single(rel);
3447 :
3448 30 : v->changes++;
3449 :
3450 : /* if any operand has two project above then squash them */
3451 90 : for (node *n = ((list*)u->l)->h; n; n = n->next) {
3452 60 : r = rel_dup(n->data);
3453 60 : r = rel_merge_projects_(v, r);
3454 60 : rel_destroy(n->data);
3455 60 : n->data = r;
3456 : }
3457 :
3458 : return rel;
3459 : } else {
3460 : /* we need to have either union or munion */
3461 0 : assert(0);
3462 : }
3463 : }
3464 : return rel;
3465 : }
3466 :
3467 : /*
3468 : * Push (semi)joins down unions, this is basically for merge tables, where
3469 : * we know that the fk-indices are split over two clustered merge tables.
3470 : */
3471 : static inline sql_rel *
3472 1802917 : rel_push_join_down_union(visitor *v, sql_rel *rel)
3473 : {
3474 1802917 : if ((is_join(rel->op) && !is_outerjoin(rel->op) && !is_single(rel)) || is_semi(rel->op)) {
3475 302582 : sql_rel *l = rel->l, *r = rel->r, *ol = l, *or = r;
3476 302582 : list *exps = rel->exps, *attr = rel->attr;
3477 302582 : sql_exp *je = NULL;
3478 :
3479 : /* we would like to optimize in place reference rels which point
3480 : * to replica tables and let the replica optimizer handle those
3481 : * later. otherwise we miss the push join down optimization due
3482 : * to the rel_is_ref bailout
3483 : */
3484 302582 : if (rel_is_ref(l) && is_basetable(l->op) && l->l && isReplicaTable((sql_table*)l->l)) {
3485 1 : rel->l = rel_copy(v->sql, l, true);
3486 1 : rel_destroy(l);
3487 : }
3488 302582 : if (rel_is_ref(r) && is_basetable(r->op) && r->l && isReplicaTable((sql_table*)r->l)) {
3489 6 : rel->r = rel_copy(v->sql, r, true);
3490 6 : rel_destroy(r);
3491 : }
3492 :
3493 302582 : if (!l || !r || need_distinct(l) || need_distinct(r) || rel_is_ref(l) || rel_is_ref(r))
3494 : return rel;
3495 301133 : if (l->op == op_project)
3496 28937 : l = l->l;
3497 301133 : if (r->op == op_project)
3498 32149 : r = r->l;
3499 :
3500 : /* both sides only if we have a join index */
3501 301133 : if (!l || !r || (is_union(l->op) && is_union(r->op) &&
3502 1 : !(je = rel_is_join_on_pkey(rel, true)))) /* aligned PKEY-FKEY JOIN */
3503 205 : return rel;
3504 300928 : if (is_semi(rel->op) && is_union(l->op) && !je)
3505 : return rel;
3506 :
3507 300920 : if ((is_union(l->op) && !need_distinct(l) && !is_single(l)) && !is_union(r->op)){
3508 15 : sql_rel *nl, *nr;
3509 15 : sql_rel *ll = rel_dup(l->l), *lr = rel_dup(l->r);
3510 :
3511 : /* join(union(a,b), c) -> union(join(a,c), join(b,c)) */
3512 15 : if (!is_project(ll->op))
3513 5 : ll = rel_project(v->sql->sa, ll,
3514 : rel_projections(v->sql, ll, NULL, 1, 1));
3515 15 : if (!is_project(lr->op))
3516 8 : lr = rel_project(v->sql->sa, lr,
3517 : rel_projections(v->sql, lr, NULL, 1, 1));
3518 15 : rel_rename_exps(v->sql, l->exps, ll->exps);
3519 15 : rel_rename_exps(v->sql, l->exps, lr->exps);
3520 15 : if (l != ol) {
3521 0 : ll = rel_project(v->sql->sa, ll, NULL);
3522 0 : ll->exps = exps_copy(v->sql, ol->exps);
3523 0 : set_processed(ll);
3524 0 : lr = rel_project(v->sql->sa, lr, NULL);
3525 0 : lr->exps = exps_copy(v->sql, ol->exps);
3526 0 : set_processed(lr);
3527 : }
3528 15 : nl = rel_crossproduct(v->sql->sa, ll, rel_dup(or), rel->op);
3529 15 : nr = rel_crossproduct(v->sql->sa, lr, rel_dup(or), rel->op);
3530 15 : nl->exps = exps_copy(v->sql, exps);
3531 15 : nl->attr = exps_copy(v->sql, attr);
3532 15 : nr->exps = exps_copy(v->sql, exps);
3533 15 : nr->attr = exps_copy(v->sql, attr);
3534 15 : set_processed(nl);
3535 15 : set_processed(nr);
3536 15 : nl = rel_project(v->sql->sa, nl, rel_projections(v->sql, nl, NULL, 1, 1));
3537 15 : nr = rel_project(v->sql->sa, nr, rel_projections(v->sql, nr, NULL, 1, 1));
3538 15 : v->changes++;
3539 15 : return rel_inplace_setop(v->sql, rel, nl, nr, op_union, rel_projections(v->sql, rel, NULL, 1, 1));
3540 300905 : } else if (is_union(l->op) && !need_distinct(l) && !is_single(l) &&
3541 0 : is_union(r->op) && !need_distinct(r) && !is_single(r) && je) {
3542 0 : sql_rel *nl, *nr;
3543 0 : sql_rel *ll = rel_dup(l->l), *lr = rel_dup(l->r);
3544 0 : sql_rel *rl = rel_dup(r->l), *rr = rel_dup(r->r);
3545 :
3546 : /* join(union(a,b), union(c,d)) -> union(join(a,c), join(b,d)) */
3547 0 : if (!is_project(ll->op))
3548 0 : ll = rel_project(v->sql->sa, ll,
3549 : rel_projections(v->sql, ll, NULL, 1, 1));
3550 0 : if (!is_project(lr->op))
3551 0 : lr = rel_project(v->sql->sa, lr,
3552 : rel_projections(v->sql, lr, NULL, 1, 1));
3553 0 : rel_rename_exps(v->sql, l->exps, ll->exps);
3554 0 : rel_rename_exps(v->sql, l->exps, lr->exps);
3555 0 : if (l != ol) {
3556 0 : ll = rel_project(v->sql->sa, ll, NULL);
3557 0 : ll->exps = exps_copy(v->sql, ol->exps);
3558 0 : set_processed(ll);
3559 0 : lr = rel_project(v->sql->sa, lr, NULL);
3560 0 : lr->exps = exps_copy(v->sql, ol->exps);
3561 0 : set_processed(lr);
3562 : }
3563 0 : if (!is_project(rl->op))
3564 0 : rl = rel_project(v->sql->sa, rl,
3565 : rel_projections(v->sql, rl, NULL, 1, 1));
3566 0 : if (!is_project(rr->op))
3567 0 : rr = rel_project(v->sql->sa, rr,
3568 : rel_projections(v->sql, rr, NULL, 1, 1));
3569 0 : rel_rename_exps(v->sql, r->exps, rl->exps);
3570 0 : rel_rename_exps(v->sql, r->exps, rr->exps);
3571 0 : if (r != or) {
3572 0 : rl = rel_project(v->sql->sa, rl, NULL);
3573 0 : rl->exps = exps_copy(v->sql, or->exps);
3574 0 : set_processed(rl);
3575 0 : rr = rel_project(v->sql->sa, rr, NULL);
3576 0 : rr->exps = exps_copy(v->sql, or->exps);
3577 0 : set_processed(rr);
3578 : }
3579 0 : nl = rel_crossproduct(v->sql->sa, ll, rl, rel->op);
3580 0 : nr = rel_crossproduct(v->sql->sa, lr, rr, rel->op);
3581 0 : nl->exps = exps_copy(v->sql, exps);
3582 0 : nl->attr = exps_copy(v->sql, attr);
3583 0 : nr->exps = exps_copy(v->sql, exps);
3584 0 : nr->attr = exps_copy(v->sql, attr);
3585 0 : set_processed(nl);
3586 0 : set_processed(nr);
3587 0 : nl = rel_project(v->sql->sa, nl, rel_projections(v->sql, nl, NULL, 1, 1));
3588 0 : nr = rel_project(v->sql->sa, nr, rel_projections(v->sql, nr, NULL, 1, 1));
3589 0 : v->changes++;
3590 0 : return rel_inplace_setop(v->sql, rel, nl, nr, op_union, rel_projections(v->sql, rel, NULL, 1, 1));
3591 300905 : } else if (!is_union(l->op) &&
3592 300905 : is_union(r->op) && !need_distinct(r) && !is_single(r) &&
3593 : !is_semi(rel->op)) {
3594 14 : sql_rel *nl, *nr;
3595 14 : sql_rel *rl = rel_dup(r->l), *rr = rel_dup(r->r);
3596 :
3597 : /* join(a, union(b,c)) -> union(join(a,b), join(a,c)) */
3598 14 : if (!is_project(rl->op))
3599 5 : rl = rel_project(v->sql->sa, rl,
3600 : rel_projections(v->sql, rl, NULL, 1, 1));
3601 14 : if (!is_project(rr->op))
3602 6 : rr = rel_project(v->sql->sa, rr,
3603 : rel_projections(v->sql, rr, NULL, 1, 1));
3604 14 : rel_rename_exps(v->sql, r->exps, rl->exps);
3605 14 : rel_rename_exps(v->sql, r->exps, rr->exps);
3606 14 : if (r != or) {
3607 1 : rl = rel_project(v->sql->sa, rl, NULL);
3608 1 : rl->exps = exps_copy(v->sql, or->exps);
3609 1 : set_processed(rl);
3610 1 : rr = rel_project(v->sql->sa, rr, NULL);
3611 1 : rr->exps = exps_copy(v->sql, or->exps);
3612 1 : set_processed(rr);
3613 : }
3614 14 : nl = rel_crossproduct(v->sql->sa, rel_dup(ol), rl, rel->op);
3615 14 : nr = rel_crossproduct(v->sql->sa, rel_dup(ol), rr, rel->op);
3616 14 : nl->exps = exps_copy(v->sql, exps);
3617 14 : nl->attr = exps_copy(v->sql, attr);
3618 14 : nr->exps = exps_copy(v->sql, exps);
3619 14 : nr->attr = exps_copy(v->sql, attr);
3620 14 : set_processed(nl);
3621 14 : set_processed(nr);
3622 14 : nl = rel_project(v->sql->sa, nl, rel_projections(v->sql, nl, NULL, 1, 1));
3623 14 : nr = rel_project(v->sql->sa, nr, rel_projections(v->sql, nr, NULL, 1, 1));
3624 14 : v->changes++;
3625 14 : return rel_inplace_setop(v->sql, rel, nl, nr, op_union, rel_projections(v->sql, rel, NULL, 1, 1));
3626 : /* {semi}join ( A1, union (A2, B)) [A1.partkey = A2.partkey] ->
3627 : * {semi}join ( A1, A2 )
3628 : * and
3629 : * {semi}join ( A1, union (B, A2)) [A1.partkey = A2.partkey] ->
3630 : * {semi}join ( A1, A2 )
3631 : * (ie a single part on the left)
3632 : *
3633 : * Howto detect that a relation isn't matching.
3634 : *
3635 : * partitioning is currently done only on pkey/fkey's
3636 : * ie only matching per part if join is on pkey/fkey (parts)
3637 : *
3638 : * and part numbers should match.
3639 : *
3640 : * */
3641 300891 : } else if (!is_union(l->op) &&
3642 300891 : is_union(r->op) && !need_distinct(r) && !is_single(r) &&
3643 0 : is_semi(rel->op) && (je = rel_is_join_on_pkey(rel, true))) {
3644 : /* use first join expression, to find part nr */
3645 0 : int lpnr = rel_part_nr(l, je);
3646 0 : sql_rel *rl = r->l;
3647 0 : sql_rel *rr = r->r;
3648 :
3649 0 : if (lpnr < 0)
3650 : return rel;
3651 : /* case 1: uses left not right */
3652 0 : if (rel_uses_part_nr(rl, je, lpnr) &&
3653 0 : !rel_uses_part_nr(rr, je, lpnr)) {
3654 0 : sql_rel *nl;
3655 :
3656 0 : rl = rel_dup(rl);
3657 0 : if (!is_project(rl->op))
3658 0 : rl = rel_project(v->sql->sa, rl,
3659 : rel_projections(v->sql, rl, NULL, 1, 1));
3660 0 : rel_rename_exps(v->sql, r->exps, rl->exps);
3661 0 : if (r != or) {
3662 0 : rl = rel_project(v->sql->sa, rl, NULL);
3663 0 : rl->exps = exps_copy(v->sql, or->exps);
3664 0 : set_processed(rl);
3665 : }
3666 0 : nl = rel_crossproduct(v->sql->sa, rel_dup(ol), rl, rel->op);
3667 0 : nl->exps = exps_copy(v->sql, exps);
3668 0 : nl->attr = exps_copy(v->sql, attr);
3669 0 : set_processed(nl);
3670 0 : v->changes++;
3671 0 : return rel_inplace_project(v->sql->sa, rel, nl, rel_projections(v->sql, rel, NULL, 1, 1));
3672 : /* case 2: uses right not left */
3673 0 : } else if (!rel_uses_part_nr(rl, je, lpnr) &&
3674 0 : rel_uses_part_nr(rr, je, lpnr)) {
3675 0 : sql_rel *nl;
3676 :
3677 0 : rr = rel_dup(rr);
3678 0 : if (!is_project(rr->op))
3679 0 : rr = rel_project(v->sql->sa, rr,
3680 : rel_projections(v->sql, rr, NULL, 1, 1));
3681 0 : rel_rename_exps(v->sql, r->exps, rr->exps);
3682 0 : if (r != or) {
3683 0 : rr = rel_project(v->sql->sa, rr, NULL);
3684 0 : rr->exps = exps_copy(v->sql, or->exps);
3685 0 : set_processed(rr);
3686 : }
3687 0 : nl = rel_crossproduct(v->sql->sa, rel_dup(ol), rr, rel->op);
3688 0 : nl->exps = exps_copy(v->sql, exps);
3689 0 : nl->attr = exps_copy(v->sql, attr);
3690 0 : set_processed(nl);
3691 0 : v->changes++;
3692 0 : return rel_inplace_project(v->sql->sa, rel, nl, rel_projections(v->sql, rel, NULL, 1, 1));
3693 : }
3694 : }
3695 : }
3696 : return rel;
3697 : }
3698 :
3699 : static sql_rel *
3700 1802917 : rel_optimize_unions_topdown_(visitor *v, sql_rel *rel)
3701 : {
3702 1802917 : rel = rel_push_project_down_union(v, rel);
3703 1802917 : rel = rel_push_join_down_union(v, rel);
3704 1802917 : return rel;
3705 : }
3706 :
3707 : static sql_rel *
3708 2790 : rel_optimize_unions_topdown(visitor *v, global_props *gp, sql_rel *rel)
3709 : {
3710 2790 : (void) gp;
3711 2790 : return rel_visitor_topdown(v, rel, &rel_optimize_unions_topdown_);
3712 : }
3713 :
3714 : run_optimizer
3715 601224 : bind_optimize_unions_topdown(visitor *v, global_props *gp)
3716 : {
3717 601224 : (void) v;
3718 : // TODO: remove this and default to munion
3719 601224 : int op = mvc_debug_on(v->sql, 32) ? gp->cnt[op_munion] : gp->cnt[op_union];
3720 601124 : return gp->opt_level == 1 && op ? rel_optimize_unions_topdown : NULL;
3721 : }
3722 :
3723 :
3724 : static sql_column *
3725 0 : is_fk_column_of_pk(mvc *sql, sql_rel *rel, sql_column *pkc, sql_exp *e) /* test if e is a foreing key column for the pk on pkc */
3726 : {
3727 0 : sql_trans *tr = sql->session->tr;
3728 0 : sql_column *c = exp_find_column(rel, e, -2);
3729 :
3730 0 : if (c) {
3731 0 : sql_table *t = c->t;
3732 :
3733 0 : for (node *n = ol_first_node(t->idxs); n; n = n->next) {
3734 0 : sql_idx *li = n->data;
3735 :
3736 0 : if (li->type == join_idx) {
3737 0 : for (node *m = li->columns->h ; m ; m = m->next) {
3738 0 : sql_kc *fkc = m->data;
3739 :
3740 0 : if (strcmp(fkc->c->base.name, c->base.name) == 0) { /* same fkey column */
3741 0 : sql_key *fkey = (sql_key*)os_find_id(tr->cat->objects, tr, ((sql_fkey*)li->key)->rkey);
3742 :
3743 0 : if (strcmp(fkey->t->base.name, pkc->t->base.name) == 0) { /* to same pk table */
3744 0 : for (node *o = fkey->columns->h ; o ; o = n->next) {
3745 0 : sql_kc *kc = m->data;
3746 :
3747 0 : if (strcmp(kc->c->base.name, pkc->base.name) == 0) /* to same pk table column */
3748 : return c;
3749 : }
3750 : }
3751 : }
3752 : }
3753 : }
3754 : }
3755 : }
3756 : return NULL;
3757 : }
3758 :
3759 : static bool
3760 0 : has_no_selectivity(mvc *sql, sql_rel *rel)
3761 : {
3762 0 : if (!rel)
3763 : return true;
3764 :
3765 0 : switch(rel->op){
3766 : case op_basetable:
3767 : case op_truncate:
3768 : case op_table:
3769 : return true;
3770 0 : case op_topn:
3771 : case op_sample:
3772 : case op_project:
3773 : case op_groupby:
3774 0 : return has_no_selectivity(sql, rel->l);
3775 : case op_ddl:
3776 : case op_insert:
3777 : case op_update:
3778 : case op_delete:
3779 : case op_merge:
3780 : case op_join:
3781 : case op_left:
3782 : case op_right:
3783 : case op_full:
3784 : case op_semi:
3785 : case op_anti:
3786 : case op_union:
3787 : case op_inter:
3788 : case op_except:
3789 : case op_munion:
3790 : case op_select:
3791 : return false;
3792 : }
3793 : return true;
3794 : }
3795 :
3796 : static sql_rel *
3797 700916 : rel_distinct_project2groupby_(visitor *v, sql_rel *rel)
3798 : {
3799 700916 : sql_rel *l = rel->l;
3800 :
3801 : /* rewrite distinct project (table) [ constant ] -> project [ constant ] */
3802 708147 : if (rel->op == op_project && rel->l && !rel->r /* no order by */ && need_distinct(rel) &&
3803 7231 : exps_card(rel->exps) <= CARD_ATOM) {
3804 347 : set_nodistinct(rel);
3805 347 : if (rel->card > CARD_ATOM) { /* if the projection just contains constants, then no topN is needed */
3806 94 : sql_rel *nl = rel->l = rel_topn(v->sql->sa, rel->l, append(sa_list(v->sql->sa), exp_atom_lng(v->sql->sa, 1)));
3807 94 : set_processed(nl);
3808 : }
3809 347 : v->changes++;
3810 : }
3811 :
3812 : /* rewrite distinct project [ pk ] ( select ( table ) [ e op val ])
3813 : * into project [ pk ] ( select/semijoin ( table ) */
3814 700916 : if (rel->op == op_project && rel->l && !rel->r /* no order by */ && need_distinct(rel) &&
3815 7097 : (l->op == op_select || l->op == op_semi) && exps_unique(v->sql, rel, rel->exps) &&
3816 215 : (!have_semantics(l->exps) || !have_nil(rel->exps))) {
3817 211 : set_nodistinct(rel);
3818 211 : v->changes++;
3819 : }
3820 :
3821 : /* rewrite distinct project ( join(p,f) [ p.pk = f.fk ] ) [ p.pk ]
3822 : * into project( (semi)join(p,f) [ p.pk = f.fk ] ) [ p.pk ] */
3823 700916 : if (rel->op == op_project && rel->l && !rel->r /* no order by */ && need_distinct(rel) &&
3824 6673 : l && (is_select(l->op) || l->op == op_join) && rel_is_join_on_pkey(l, true) /* [ pk == fk ] */) {
3825 0 : sql_exp *found = NULL, *pk = NULL, *fk = NULL;
3826 0 : bool all_exps_atoms = true;
3827 0 : sql_column *pkc = NULL;
3828 :
3829 0 : for (node *m = l->exps->h ; m ; m = m->next) { /* find a primary key join */
3830 0 : sql_exp *je = (sql_exp *) m->data;
3831 0 : sql_exp *le = je->l, *re = je->r;
3832 :
3833 0 : if (!find_prop(je->p, PROP_JOINIDX)) /* must be a pk-fk join expression */
3834 0 : continue;
3835 :
3836 0 : if ((pkc = exp_is_pkey(l, le))) { /* le is the primary key */
3837 0 : all_exps_atoms = true;
3838 :
3839 0 : for (node *n = rel->exps->h; n && all_exps_atoms; n = n->next) {
3840 0 : sql_exp *e = (sql_exp *) n->data;
3841 :
3842 0 : if (exp_match(e, le) || exp_refers(e, le))
3843 : found = e;
3844 0 : else if (e->card > CARD_ATOM)
3845 0 : all_exps_atoms = false;
3846 : }
3847 : pk = le;
3848 : fk = re;
3849 : }
3850 0 : if (!found && (pkc = exp_is_pkey(l, re))) { /* re is the primary key */
3851 0 : all_exps_atoms = true;
3852 :
3853 0 : for (node *n = rel->exps->h; n && all_exps_atoms; n = n->next) {
3854 0 : sql_exp *e = (sql_exp *) n->data;
3855 :
3856 0 : if (exp_match(e, re) || exp_refers(e, re))
3857 : found = e;
3858 0 : else if (e->card > CARD_ATOM)
3859 0 : all_exps_atoms = false;
3860 : }
3861 : pk = re;
3862 : fk = le;
3863 : }
3864 : }
3865 :
3866 0 : if (all_exps_atoms && found) { /* rel must have the same primary key on the projection list */
3867 : /* if the foreign key has no selectivity, the join can be removed */
3868 0 : if (!(rel_is_ref(l)) && ((rel_find_exp(l->l, fk) && is_fk_column_of_pk(v->sql, l->l, pkc, fk) && has_no_selectivity(v->sql, l->l)) ||
3869 0 : (l->r && rel_find_exp(l->r, fk) && is_fk_column_of_pk(v->sql, l->r, pkc, fk) && has_no_selectivity(v->sql, l->r)))) {
3870 0 : sql_rel *side = (rel_find_exp(l->l, pk) != NULL)?l->l:l->r;
3871 :
3872 0 : rel->l = rel_dup(side);
3873 0 : rel_destroy(l);
3874 0 : v->changes++;
3875 0 : set_nodistinct(rel);
3876 0 : return rel;
3877 : }
3878 : /* if the join has no multiple references it can be re-written into a semijoin */
3879 0 : if (l->op == op_join && !(rel_is_ref(l)) && list_length(rel->exps) == 1) { /* other expressions may come from the other side */
3880 0 : if (l->r && rel_find_exp(l->r, pk)) {
3881 0 : sql_rel *temp = l->l;
3882 0 : l->l = l->r;
3883 0 : l->r = temp;
3884 :
3885 0 : l->op = op_semi;
3886 0 : } else if (rel_find_exp(l->l, pk)) {
3887 0 : l->op = op_semi;
3888 : }
3889 : }
3890 0 : v->changes++;
3891 0 : set_nodistinct(rel);
3892 0 : return rel;
3893 : }
3894 : }
3895 : /* rewrite distinct project [ gbe ] ( select ( groupby [ gbe ] [ gbe, e ] )[ e op val ])
3896 : * into project [ gbe ] ( select ( group etc ) */
3897 700916 : if (rel->op == op_project && rel->l && !rel->r /* no order by */ &&
3898 6673 : need_distinct(rel) && l->op == op_select){
3899 2683 : sql_rel *g = l->l;
3900 2683 : if (is_groupby(g->op)) {
3901 5 : list *used = sa_list(v->sql->sa);
3902 5 : list *gbe = g->r;
3903 5 : node *n;
3904 5 : int fnd = 1;
3905 :
3906 10 : for (n = rel->exps->h; n && fnd; n = n->next) {
3907 5 : sql_exp *e = n->data;
3908 :
3909 5 : if (e->card > CARD_ATOM) {
3910 : /* find e in gbe */
3911 5 : sql_exp *ne = list_find_exp(g->exps, e);
3912 :
3913 5 : if (ne)
3914 4 : ne = list_find_exp( gbe, ne);
3915 4 : if (ne && !list_find_exp(used, ne)) {
3916 2 : fnd++;
3917 2 : list_append(used, ne);
3918 : }
3919 3 : if (!ne)
3920 : fnd = 0;
3921 : }
3922 : }
3923 5 : if (fnd == (list_length(gbe)+1)) {
3924 0 : v->changes++;
3925 0 : set_nodistinct(rel);
3926 : }
3927 : }
3928 : }
3929 700916 : if (rel->op == op_project && rel->l &&
3930 6673 : need_distinct(rel) && exps_card(rel->exps) > CARD_ATOM) {
3931 6673 : node *n;
3932 6673 : list *exps = new_exp_list(v->sql->sa), *gbe = new_exp_list(v->sql->sa);
3933 6673 : list *obe = rel->r; /* we need to read the ordering later */
3934 :
3935 6673 : if (obe) {
3936 0 : int fnd = 0;
3937 :
3938 0 : for(n = obe->h; n && !fnd; n = n->next) {
3939 0 : sql_exp *e = n->data;
3940 :
3941 0 : if (e->type != e_column)
3942 : fnd = 1;
3943 0 : else if (exps_bind_column2(rel->exps, e->l, e->r, NULL) == 0)
3944 0 : fnd = 1;
3945 : }
3946 0 : if (fnd)
3947 : return rel;
3948 : }
3949 6673 : rel->l = rel_project(v->sql->sa, rel->l, rel->exps);
3950 :
3951 19707 : for (n = rel->exps->h; n; n = n->next) {
3952 13034 : sql_exp *e = n->data, *ne;
3953 :
3954 13034 : set_nodistinct(e);
3955 13034 : ne = exp_ref(v->sql, e);
3956 13034 : if (e->card > CARD_ATOM && !list_find_exp(gbe, ne)) { /* no need to group by on constants, or the same column multiple times */
3957 12670 : append(gbe, ne);
3958 12670 : ne = exp_ref(v->sql, ne);
3959 : }
3960 13034 : append(exps, ne);
3961 : }
3962 6673 : rel->op = op_groupby;
3963 6673 : rel->exps = exps;
3964 6673 : rel->r = gbe;
3965 6673 : set_nodistinct(rel);
3966 6673 : if (obe) {
3967 : /* add order again */
3968 0 : rel = rel_project(v->sql->sa, rel, rel_projections(v->sql, rel, NULL, 1, 1));
3969 0 : rel->r = obe;
3970 : }
3971 6673 : v->changes++;
3972 : }
3973 : return rel;
3974 : }
3975 :
3976 : static sql_rel *
3977 5264 : rel_distinct_project2groupby(visitor *v, global_props *gp, sql_rel *rel)
3978 : {
3979 5264 : (void) gp;
3980 5264 : return rel_visitor_bottomup(v, rel, &rel_distinct_project2groupby_);
3981 : }
3982 :
3983 : run_optimizer
3984 601237 : bind_distinct_project2groupby(visitor *v, global_props *gp)
3985 : {
3986 601237 : int flag = v->sql->sql_optimizer;
3987 538734 : return gp->opt_cycle == 0 && gp->opt_level == 1 && gp->needs_distinct && gp->cnt[op_project] &&
3988 606501 : (flag & distinct_project2groupby)? rel_distinct_project2groupby : NULL;
3989 : }
|