LCOV - code coverage report
Current view: top level - sql/server - rel_optimize_proj.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 1485 1966 75.5 %
Date: 2024-11-13 22:44:48 Functions: 74 80 92.5 %

          Line data    Source code
       1             : /*
       2             :  * SPDX-License-Identifier: MPL-2.0
       3             :  *
       4             :  * This Source Code Form is subject to the terms of the Mozilla Public
       5             :  * License, v. 2.0.  If a copy of the MPL was not distributed with this
       6             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       7             :  *
       8             :  * Copyright 2024 MonetDB Foundation;
       9             :  * Copyright August 2008 - 2023 MonetDB B.V.;
      10             :  * Copyright 1997 - July 2008 CWI.
      11             :  */
      12             : 
      13             : #include "monetdb_config.h"
      14             : #include "rel_optimizer_private.h"
      15             : #include "rel_basetable.h"
      16             : #include "rel_exp.h"
      17             : #include "rel_select.h"
      18             : #include "rel_rewriter.h"
      19             : 
      20             : static int
      21     3281798 : exp_is_rename(sql_exp *e)
      22             : {
      23     3281798 :         return (e->type == e_column);
      24             : }
      25             : 
      26             : static int
      27     1862834 : exp_is_useless_rename(sql_exp *e)
      28             : {
      29     1862834 :         return (e->type == e_column && e->alias.label == e->nid);
      30             : }
      31             : 
      32             : static list *
      33       20051 : rel_used_projections(mvc *sql, list *exps, list *users)
      34             : {
      35       20051 :         list *nexps = sa_list(sql->sa);
      36       20051 :         bool *used = SA_ZNEW_ARRAY(sql->ta, bool, list_length(exps));
      37       20051 :         int i = 0;
      38             : 
      39      132064 :         for(node *n = users->h; n; n = n->next) {
      40      112013 :                 sql_exp *e = n->data, *ne = NULL;
      41      112013 :                 assert(e->nid && exps_bind_nid(exps, e->nid));
      42      112013 :                 if (e->nid && (ne = exps_bind_nid(exps, e->nid))) {
      43      112013 :                         used[list_position(exps, ne)] = 1;
      44             :                 }
      45             :         }
      46      141904 :         for(node *n = exps->h; n; n = n->next, i++) {
      47      121853 :                 sql_exp *e = n->data;
      48      121853 :                 if (is_intern(e) || used[i])
      49      121716 :                         append(nexps, e);
      50             :         }
      51       20051 :         return nexps;
      52             : }
      53             : 
      54             : /* move projects down with the goal op removing them completely (ie push renames/reduced lists into basetable)
      55             :  * for some cases we can directly remove iff renames rename into same alias
      56             :  * */
      57             : static sql_rel *
      58     3759494 : rel_push_project_down_(visitor *v, sql_rel *rel)
      59             : {
      60             :         /* for now only push down renames */
      61     3759494 :         if (v->depth > 1 && is_simple_project(rel->op) && !need_distinct(rel) && !rel_is_ref(rel) && rel->l && !rel->r &&
      62     1079129 :                         v->parent &&
      63     1079121 :                         !is_modify(v->parent->op) && !is_topn(v->parent->op) && !is_sample(v->parent->op) &&
      64     1696878 :                         !is_ddl(v->parent->op) && !is_set(v->parent->op) && !is_munion(v->parent->op) &&
      65      653162 :                         list_check_prop_all(rel->exps, (prop_check_func)&exp_is_rename)) {
      66      387594 :                 sql_rel *l = rel->l;
      67             : 
      68      387594 :                 if (rel_is_ref(l))
      69             :                         return rel;
      70      357020 :                 if (is_basetable(l->op)) {
      71       37818 :                         if (list_check_prop_all(rel->exps, (prop_check_func)&exp_is_useless_rename)) {
      72             :                                 /* TODO reduce list (those in the project + internal) */
      73       20051 :                                 rel->l = NULL;
      74       20051 :                                 l->exps = rel_used_projections(v->sql, l->exps, rel->exps);
      75       20051 :                                 rel_destroy(rel);
      76       20051 :                                 v->changes++;
      77       20051 :                                 return l;
      78             :                         }
      79             :                         return rel;
      80      319202 :                 } else if (list_check_prop_all(rel->exps, (prop_check_func)&exp_is_useless_rename)) {
      81      123479 :                         if ((is_project(l->op) && list_length(l->exps) == list_length(rel->exps)) ||
      82      113989 :                                 ((v->parent && is_project(v->parent->op)) &&
      83       37784 :                                  (is_mset(l->op) || is_set(l->op) || is_select(l->op) || is_join(l->op) || is_semi(l->op) || is_topn(l->op) || is_sample(l->op)))) {
      84       46474 :                                 rel->l = NULL;
      85       46474 :                                 rel_destroy(rel);
      86       46474 :                                 v->changes++;
      87       46474 :                                 return l;
      88             :                         }
      89             :                 }
      90             :         }
      91             :         /* ToDo handle useful renames, ie new relation name and unique set of attribute names (could reduce set of * attributes) */
      92             :         /* handle both useless and useful with project [ group by ] */
      93             :         return rel;
      94             : }
      95             : 
      96             : static sql_rel *
      97      171684 : rel_push_project_down(visitor *v, global_props *gp, sql_rel *rel)
      98             : {
      99      171684 :         (void) gp;
     100      171684 :         return rel_visitor_bottomup(v, rel, &rel_push_project_down_);
     101             : }
     102             : 
     103             : run_optimizer
     104      612288 : bind_push_project_down(visitor *v, global_props *gp)
     105             : {
     106      612288 :         int flag = v->sql->sql_optimizer;
     107      612092 :         return gp->opt_level == 1 && (flag & push_project_down) &&
     108     1224380 :                    (gp->cnt[op_project] || gp->cnt[op_groupby]) ? rel_push_project_down : NULL;
     109             : }
     110             : 
     111             : 
     112             : static bool exp_shares_exps(sql_exp *e, list *shared, uint64_t *uses);
     113             : 
     114             : static bool
     115      112531 : exps_shares_exps(list *exps, list *shared, uint64_t *uses)
     116             : {
     117      112531 :         if (!exps || !shared)
     118             :                 return false;
     119      390641 :         for (node *n = exps->h; n; n = n->next) {
     120      278516 :                 sql_exp *e = n->data;
     121             : 
     122      278516 :                 if (exp_shares_exps(e, shared, uses))
     123             :                         return true;
     124             :         }
     125             :         return false;
     126             : }
     127             : 
     128             : static bool
     129     2706921 : exp_shares_exps(sql_exp *e, list *shared, uint64_t *uses)
     130             : {
     131     4935728 :         switch(e->type) {
     132          75 :         case e_cmp:
     133          75 :                 if (e->flag == cmp_or || e->flag == cmp_filter)
     134          52 :                         return exps_shares_exps(e->l, shared, uses) || exps_shares_exps(e->r, shared, uses);
     135          23 :                 else if (e->flag == cmp_in || e->flag == cmp_notin)
     136           0 :                         return exp_shares_exps(e->l, shared, uses) || exps_shares_exps(e->r, shared, uses);
     137             :                 else
     138          46 :                         return exp_shares_exps(e->l, shared, uses) || exp_shares_exps(e->r, shared, uses) || (e->f && exp_shares_exps(e->f, shared, uses));
     139      111636 :         case e_atom:
     140      111636 :                 if (e->f)
     141           0 :                         return exps_shares_exps(e->f, shared, uses);
     142             :                 return false;
     143     4476735 :         case e_column:
     144             :                 {
     145     4476735 :                         sql_exp *ne = NULL;
     146     4476735 :                         assert(e->nid);
     147     4476735 :                         if (e->nid)
     148     4476735 :                                 ne = exps_bind_nid(shared, e->nid);
     149     4476735 :                         if (!ne)
     150             :                                 return false;
     151     3458137 :                         if (ne->type != e_column) {
     152      481488 :                                 int i = list_position(shared, ne);
     153      481488 :                                 if (i < 0)
     154             :                                         return false;
     155      481488 :                                 uint64_t used = (uint64_t) 1 << i;
     156      481488 :                                 if (used & *uses)
     157             :                                         return true;
     158      481085 :                                 *uses |= used;
     159      481085 :                                 return false;
     160             :                         }
     161     2976649 :                         if (ne != e && (list_position(shared, e) < 0 || list_position(shared, e) > list_position(shared, ne)))
     162             :                                 /* maybe ne refers to a local complex exp */
     163     1993952 :                                 return exp_shares_exps(ne, shared, uses);
     164             :                         return false;
     165             :                 }
     166      234855 :         case e_convert:
     167      234855 :                 return exp_shares_exps(e->l, shared, uses);
     168      112427 :         case e_aggr:
     169             :         case e_func:
     170      112427 :                 return exps_shares_exps(e->l, shared, uses);
     171             :         case e_psm:
     172           0 :                 assert(0);  /* not in projection list */
     173             :         }
     174             :         return false;
     175             : }
     176             : 
     177             : static bool
     178      321524 : exps_share_expensive_exp(list *exps, list *shared )
     179             : {
     180      321524 :         uint64_t uses = 0;
     181             : 
     182      321524 :         if (list_empty(exps) || list_empty(shared))
     183           0 :                 return false;
     184      321524 :         if (list_length(shared) > 64)
     185             :                 return true;
     186     2749045 :         for (node *n = exps->h; n; n = n->next) {
     187     2428343 :                 sql_exp *e = n->data;
     188             : 
     189     2428343 :                 if (exp_shares_exps(e, shared, &uses))
     190             :                         return true;
     191             :         }
     192             :         return false;
     193             : }
     194             : 
     195             : static bool ambigious_ref( list *exps, sql_exp *e);
     196             : static bool
     197       86354 : ambigious_refs( list *exps, list *refs)
     198             : {
     199       86354 :         if (list_empty(refs))
     200             :                 return false;
     201      262011 :         for(node *n=refs->h; n; n = n->next) {
     202      196690 :                 if (ambigious_ref(exps, n->data))
     203             :                         return true;
     204             :         }
     205             :         return false;
     206             : }
     207             : 
     208             : static bool
     209     3674760 : ambigious_ref( list *exps, sql_exp *e)
     210             : {
     211     3674760 :         sql_exp *ne = NULL;
     212             : 
     213     3674760 :         if (e->type == e_column) {
     214     3162776 :                 assert(e->nid);
     215     3162776 :                 if (e->nid)
     216     3162776 :                         ne = exps_bind_nid(exps, e->nid);
     217     3162776 :                 if (ne && e != ne)
     218             :                         return true;
     219             :         }
     220     3665839 :         if (e->type == e_func)
     221       86354 :                 return ambigious_refs(exps, e->l);
     222             :         return false;
     223             : }
     224             : 
     225             : /* merge 2 projects into the lower one */
     226             : static sql_rel *
     227     4009621 : rel_merge_projects_(visitor *v, sql_rel *rel)
     228             : {
     229     4068165 :         list *exps = rel->exps;
     230     4068165 :         sql_rel *prj = rel->l;
     231     4068165 :         node *n;
     232             : 
     233     4068165 :         if (rel->op == op_project &&
     234     1606569 :             prj && prj->op == op_project && !(rel_is_ref(prj)) && list_empty(prj->r)) {
     235      584125 :                 int all = 1;
     236             : 
     237      584125 :                 if (project_unsafe(rel, false) || project_unsafe(prj, false) || exps_share_expensive_exp(rel->exps, prj->exps))
     238      263422 :                         return rel;
     239             : 
     240             :                 /* here we try to fix aliases */
     241      320702 :                 list *nexps = NULL;
     242             :                 /* for each exp check if we can rename it */
     243     2181895 :                 for (n = exps->h; n && all; n = n->next) {
     244     1870114 :                         sql_exp *e = n->data, *ne = NULL;
     245             : 
     246             :                         /* We do not handle expressions pointing back in the list */
     247     1870114 :                         if (ambigious_ref(exps, e)) {
     248             :                                 all = 0;
     249             :                                 break;
     250             :                         }
     251     1861193 :                         ne = exp_push_down_prj(v->sql, e, prj, prj->l);
     252             :                         /* check if the referred alias name isn't used twice */
     253     1861193 :                         if (ne && ambigious_ref(nexps, ne)) {
     254             :                                 all = 0;
     255             :                                 break;
     256             :                         }
     257     1607956 :                         if (ne) {
     258     1607956 :                                 if (exp_name(e))
     259     1601590 :                                         exp_prop_alias(v->sql->sa, ne, e);
     260     1607956 :                                 if (!nexps)
     261      312167 :                                         nexps = new_exp_list(v->sql->sa);
     262     1607956 :                                 list_append(nexps, ne);
     263             :                         } else {
     264             :                                 all = 0;
     265             :                         }
     266             :                 }
     267      320702 :                 if (all) {
     268       58544 :                         rel->exps = nexps;
     269             :                         /* we can now remove the intermediate project */
     270             :                         /* push order by expressions */
     271       58544 :                         if (!list_empty(rel->r)) {
     272           0 :                                 list *nr = new_exp_list(v->sql->sa), *res = rel->r;
     273           0 :                                 for (n = res->h; n; n = n->next) {
     274           0 :                                         sql_exp *e = n->data, *ne = NULL;
     275             : 
     276           0 :                                         ne = exp_push_down_prj(v->sql, e, prj, prj->l);
     277           0 :                                         if (ne) {
     278           0 :                                                 if (exp_name(e))
     279           0 :                                                         exp_prop_alias(v->sql->sa, ne, e);
     280           0 :                                                 list_append(nr, ne);
     281             :                                         } else {
     282             :                                                 all = 0;
     283             :                                         }
     284             :                                 }
     285           0 :                                 if (all) {
     286           0 :                                         rel->r = nr;
     287             :                                 } else {
     288             :                                         /* leave as is */
     289           0 :                                         rel->exps = exps;
     290           0 :                                         return rel;
     291             :                                 }
     292             :                         }
     293       58544 :                         rel->l = prj->l;
     294       58544 :                         prj->l = NULL;
     295       58544 :                         rel_destroy(prj);
     296       58544 :                         v->changes++;
     297       58544 :                         return rel_merge_projects_(v, rel);
     298             :                 } else {
     299             :                         /* leave as is */
     300      262158 :                         rel->exps = exps;
     301             :                 }
     302      262158 :                 return rel;
     303             :         }
     304             :         return rel;
     305             : }
     306             : 
     307             : static sql_rel *
     308      171683 : rel_merge_projects(visitor *v, global_props *gp, sql_rel *rel)
     309             : {
     310      171683 :         (void) gp;
     311      171683 :         return rel_visitor_bottomup(v, rel, &rel_merge_projects_);
     312             : }
     313             : 
     314             : run_optimizer
     315      612288 : bind_merge_projects(visitor *v, global_props *gp)
     316             : {
     317      612288 :         int flag = v->sql->sql_optimizer;
     318      612092 :         return gp->opt_level == 1 && (flag & merge_projects) &&
     319     1224372 :                    (gp->cnt[op_project] || gp->cnt[op_groupby]) ? rel_merge_projects : NULL;
     320             : }
     321             : 
     322             : 
     323             : static sql_exp *split_aggr_and_project(mvc *sql, list *aexps, sql_exp *e);
     324             : 
     325             : static void
     326           0 : list_split_aggr_and_project(mvc *sql, list *aexps, list *exps)
     327             : {
     328           0 :         if (list_empty(exps))
     329             :                 return ;
     330           0 :         for(node *n = exps->h; n; n = n->next)
     331           0 :                 n->data = split_aggr_and_project(sql, aexps, n->data);
     332             : }
     333             : 
     334             : static sql_exp *
     335           0 : split_aggr_and_project(mvc *sql, list *aexps, sql_exp *e)
     336             : {
     337           0 :         switch(e->type) {
     338           0 :         case e_aggr:
     339             :                 /* add to the aggrs */
     340           0 :                 if (!exp_name(e))
     341           0 :                         exp_label(sql->sa, e, ++sql->label);
     342           0 :                 list_append(aexps, e);
     343           0 :                 return exp_ref(sql, e);
     344             :         case e_cmp:
     345             :                 /* e_cmp's shouldn't exist in an aggr expression list */
     346           0 :                 assert(0);
     347             :         case e_convert:
     348           0 :                 e->l = split_aggr_and_project(sql, aexps, e->l);
     349           0 :                 return e;
     350           0 :         case e_func:
     351           0 :                 list_split_aggr_and_project(sql, aexps, e->l);
     352           0 :                 return e;
     353             :         case e_atom:
     354             :         case e_column: /* constants and columns shouldn't be rewritten */
     355             :         case e_psm:
     356             :                 return e;
     357             :         }
     358             :         return NULL;
     359             : }
     360             : 
     361             : /* exp_rename */
     362             : static sql_exp * exp_rename(mvc *sql, sql_exp *e, sql_rel *f, sql_rel *t);
     363             : 
     364             : static list *
     365       48988 : exps_rename(mvc *sql, list *l, sql_rel *f, sql_rel *t)
     366             : {
     367       48988 :         if (list_empty(l))
     368             :                 return l;
     369       44248 :         for (node *n=l->h; n; n=n->next)
     370       28400 :                 n->data = exp_rename(sql, n->data, f, t);
     371             :         return l;
     372             : }
     373             : 
     374             : /* exp_rename */
     375             : static sql_exp *
     376      161192 : exp_rename(mvc *sql, sql_exp *e, sql_rel *f, sql_rel *t)
     377             : {
     378      161192 :         sql_exp *ne = NULL;
     379             : 
     380      161192 :         assert(is_project(f->op));
     381             : 
     382      161192 :         switch(e->type) {
     383       71180 :         case e_column:
     384       71180 :                 assert(e->nid);
     385       71180 :                 ne = exps_bind_nid(f->exps, e->nid);
     386       71180 :                 if (!ne)
     387             :                         return e;
     388       47084 :                 sql_exp *oe = e;
     389       47084 :                 e = NULL;
     390       47084 :                 if (ne && ne->nid)
     391       46939 :                         e = rel_find_exp(t, ne);
     392       46939 :                 if (!e) {
     393         145 :                         sql->session->status = 0;
     394         145 :                         sql->errstr[0] = 0;
     395         145 :                         if (exp_is_atom(ne))
     396             :                                 return ne;
     397             :                         return oe;
     398             :                 }
     399       46939 :                 ne = exp_ref(sql, e);
     400       46939 :                 if (oe)
     401       46939 :                         exp_propagate(sql->sa, ne, oe);
     402       46939 :                 return ne;
     403       49499 :         case e_cmp:
     404       49499 :                 if (e->flag == cmp_or || e->flag == cmp_filter) {
     405        3450 :                         e->l = exps_rename(sql, e->l, f, t);
     406        3450 :                         e->r = exps_rename(sql, e->r, f, t);
     407       46049 :                 } else if (e->flag == cmp_in || e->flag == cmp_notin) {
     408        6139 :                         e->l = exp_rename(sql, e->l, f, t);
     409        6139 :                         e->r = exps_rename(sql, e->r, f, t);
     410             :                 } else {
     411       39910 :                         e->l = exp_rename(sql, e->l, f, t);
     412       39910 :                         e->r = exp_rename(sql, e->r, f, t);
     413       39910 :                         if (e->f)
     414         232 :                                 e->f = exp_rename(sql, e->f, f, t);
     415             :                 }
     416             :                 break;
     417        4564 :         case e_convert:
     418        4564 :                 e->l = exp_rename(sql, e->l, f, t);
     419        4564 :                 break;
     420        2824 :         case e_aggr:
     421             :         case e_func:
     422        2824 :                 e->l = exps_rename(sql, e->l, f, t);
     423        2824 :                 break;
     424       33125 :         case e_atom:
     425       33125 :                 e->f = exps_rename(sql, e->f, f, t);
     426       33125 :                 break;
     427             :         case e_psm:
     428             :                 break;
     429             :         }
     430             :         return e;
     431             : }
     432             : 
     433             : static int
     434     5513066 : exp_match_exp_cmp( sql_exp *e1, sql_exp *e2)
     435             : {
     436     5513066 :         if (exp_match_exp(e1,e2))
     437       55066 :                 return 0;
     438             :         return -1;
     439             : }
     440             : 
     441             : /* Pushing projects up the tree. Done very early in the optimizer.
     442             :  * Makes later steps easier.
     443             :  */
     444             : extern void _rel_print(mvc *sql, sql_rel *rel);
     445             : static sql_rel *
     446     3661471 : rel_push_project_up_(visitor *v, sql_rel *rel)
     447             : {
     448     3661471 :         if (is_simple_project(rel->op) && rel->l && !rel_is_ref(rel)) {
     449     1167267 :                 sql_rel *l = rel->l;
     450     1167267 :                 if (is_simple_project(l->op))
     451      283539 :                         return rel_merge_projects_(v, rel);
     452             :                 /* find equal column references, later cases are rewritten into references back to the first
     453             :                  * project () [ i.i L1, i.i L2 ] -> project() [ i.i L1, L1 L2 ] */
     454      883728 :                 if (list_length(rel->exps) > 1) {
     455      746818 :                         list *exps = rel->exps;
     456      746818 :                         node *n = exps->h;
     457      746818 :                         bool needed = false;
     458     3746037 :                         for(n = n->next; n && !needed; n = n->next) {
     459     2999220 :                                 sql_exp *e = n->data;
     460     2999220 :                                 if (e->type == e_column && !is_selfref(e)) {
     461    19783030 :                                         for(node *m = exps->h; m && m != n && !needed; m = m->next) {
     462    17225049 :                                                 sql_exp *h = m->data;
     463    17225049 :                                                 if (exp_match_exp(h,e))
     464        5603 :                                                         needed = true;
     465             :                                         }
     466             :                                 }
     467             :                         }
     468      746817 :                         if (needed) {
     469        5603 :                                 rel->exps = sa_list(v->sql->sa);
     470        5603 :                                 node *n = exps->h;
     471        5603 :                                 list_append(rel->exps, n->data);
     472       77861 :                                 for(n = n->next; n; n = n->next) {
     473       72258 :                                         sql_exp *e = n->data;
     474       72258 :                                         if (e->type == e_column && !is_selfref(e)) {
     475       31687 :                                                 node *m = list_find(rel->exps, e, (fcmp)&exp_match_exp_cmp);
     476       31688 :                                                 if (m) {
     477        7529 :                                                         sql_exp *me = m->data;
     478        7529 :                                                         if (me->alias.label != me->nid) {
     479         780 :                                                                 sql_exp *ne = exp_ref(v->sql, m->data);
     480         780 :                                                                 exp_setalias(ne, e->alias.label, exp_relname(e), exp_name(e));
     481         780 :                                                                 exp_propagate(v->sql->sa, ne, e);
     482         780 :                                                                 set_selfref(ne);
     483         780 :                                                                 e = ne;
     484             :                                                         }
     485             :                                                 }
     486             :                                         }
     487       72259 :                                         list_append(rel->exps, e);
     488             :                                 }
     489             :                         }
     490             :                 }
     491             :         }
     492             : 
     493             :         /* project/project cleanup is done later */
     494     3377931 :         if (is_join(rel->op) || is_select(rel->op)) {
     495     1089313 :                 node *n;
     496     1089313 :                 list *exps = NULL, *l_exps, *r_exps;
     497     1089313 :                 sql_rel *l = rel->l;
     498     1089313 :                 sql_rel *r = rel->r;
     499     1089313 :                 sql_rel *t;
     500     1089313 :                 int nlexps = 0, i = 0;
     501             : 
     502             :                 /* Don't rewrite refs, non projections or constant or
     503             :                    order by projections  */
     504     1089313 :                 if (!l || rel_is_ref(l) || is_topn(l->op) || is_sample(l->op) ||
     505     1003672 :                    (is_join(rel->op) && !list_empty(rel->attr)) ||
     506      953358 :                    (is_join(rel->op) && (!r || rel_is_ref(r))) ||
     507      930021 :                    (is_left(rel->op) && (rel->flag&MERGE_LEFT) /* can't push projections above merge statements left joins */) ||
     508      930019 :                    (is_select(rel->op) && l->op != op_project) ||
     509      452495 :                    (is_join(rel->op) && ((l->op != op_project && r->op != op_project) || is_topn(r->op) || is_sample(r->op))) ||
     510      156442 :                   ((l->op == op_project && (!l->l || l->r || project_unsafe(l, is_select(rel->op)))) ||
     511       88416 :                    (is_join(rel->op) && (r->op == op_project && (!r->l || r->r || project_unsafe(r, false))))))
     512     1014034 :                         return rel;
     513             : 
     514       75279 :                 if (l->op == op_project && l->l) {
     515      910178 :                         for (n = l->exps->h; n; n = n->next) {
     516      874243 :                                 sql_exp *e = n->data;
     517      874243 :                                 if (!(is_column(e->type) && exp_is_atom(e) && !(is_right(rel->op) || is_full(rel->op))) &&
     518      873024 :                                         !(e->type == e_column && !has_label(e)))
     519             :                                                 return rel;
     520             :                         }
     521             :                 }
     522       53931 :                 if (is_join(rel->op) && r->op == op_project && r->l) {
     523       57796 :                         for (n = r->exps->h; n; n = n->next) {
     524       53129 :                                 sql_exp *e = n->data;
     525       53129 :                                 if (!(is_column(e->type) && exp_is_atom(e) && !(is_right(rel->op) || is_full(rel->op))) &&
     526       52722 :                                         !(e->type == e_column && !has_label(e)))
     527             :                                                 return rel;
     528             :                         }
     529             :                 }
     530             : 
     531       38539 :                 if (l->op == op_project && l->l) {
     532             :                         /* Go through the list of project expressions.
     533             :                            Check if they can be pushed up, ie are they not
     534             :                            changing or introducing any columns used
     535             :                            by the upper operator. */
     536             : 
     537       35133 :                         exps = new_exp_list(v->sql->sa);
     538      799328 :                         for (n = l->exps->h; n; n = n->next) {
     539      764195 :                                 sql_exp *e = n->data;
     540             : 
     541             :                                 /* we cannot rewrite projection with atomic values from outer joins */
     542      764195 :                                 if (is_column(e->type) && exp_is_atom(e) && !(is_right(rel->op) || is_full(rel->op))) {
     543         661 :                                         list_append(exps, e);
     544      763534 :                                 } else if (e->type == e_column) {
     545      763534 :                                         if (has_label(e))
     546             :                                                 return rel;
     547      763534 :                                         list_append(exps, e);
     548             :                                 } else {
     549             :                                         return rel;
     550             :                                 }
     551             :                         }
     552             :                 } else {
     553        3406 :                         exps = rel_projections(v->sql, l, NULL, 1, 1);
     554             :                 }
     555       38539 :                 nlexps = list_length(exps);
     556             :                 /* also handle right hand of join */
     557       38539 :                 if (is_join(rel->op) && r->op == op_project && r->l && list_empty(rel->attr)) {
     558             :                         /* Here we also check all expressions of r like above
     559             :                            but also we need to check for ambiguous names. */
     560             : 
     561       39348 :                         for (n = r->exps->h; n; n = n->next) {
     562       34930 :                                 sql_exp *e = n->data;
     563             : 
     564             :                                 /* we cannot rewrite projection with atomic values from outer joins */
     565       34930 :                                 if (is_column(e->type) && exp_is_atom(e) && !(is_left(rel->op) || is_full(rel->op))) {
     566         147 :                                         list_append(exps, e);
     567       34783 :                                 } else if (e->type == e_column) {
     568       34534 :                                         if (has_label(e))
     569             :                                                 return rel;
     570       34534 :                                         list_append(exps, e);
     571             :                                 } else {
     572             :                                         return rel;
     573             :                                 }
     574             :                         }
     575       33872 :                 } else if (is_join(rel->op) && list_empty(rel->attr)) {
     576       18192 :                         list *r_exps = rel_projections(v->sql, r, NULL, 1, 1);
     577       18192 :                         list_merge(exps, r_exps, (fdup)NULL);
     578             :                 }
     579       38290 :                 if (!list_empty(rel->attr))
     580           0 :                         append(exps, exp_ref(v->sql, rel->attr->h->data));
     581             :                 /* Here we should check for ambiguous names ? */
     582       38290 :                 if (is_join(rel->op) && r && list_empty(rel->attr)) {
     583       22610 :                         t = (l->op == op_project && l->l)?l->l:l;
     584       22610 :                         l_exps = rel_projections(v->sql, t, NULL, 1, 1);
     585             :                         /* conflict with old right expressions */
     586       22610 :                         r_exps = rel_projections(v->sql, r, NULL, 1, 1);
     587       22610 :                         if (rel->attr)
     588           0 :                                 append(r_exps, exp_ref(v->sql, rel->attr->h->data));
     589       22610 :                         t = (r->op == op_project && r->l)?r->l:r;
     590       22610 :                         r_exps = rel_projections(v->sql, t, NULL, 1, 1);
     591             :                         /* conflict with new right expressions */
     592      607950 :                         for(n = l_exps->h; n; n = n->next) {
     593      585675 :                                 sql_exp *e = n->data;
     594             : 
     595      585675 :                                 if (exp_is_atom(e))
     596           0 :                                         continue;
     597      585675 :                                 if (e->nid && exps_bind_nid(r_exps, e->nid))
     598             :                                         return rel;
     599      585340 :                                 if (e->alias.label && exps_bind_nid(r_exps, e->alias.label))
     600             :                                         return rel;
     601             :                         }
     602             :                         /* conflict with new left expressions */
     603      155106 :                         for(n = r_exps->h; n; n = n->next) {
     604      132831 :                                 sql_exp *e = n->data;
     605             : 
     606      132831 :                                 if (exp_is_atom(e))
     607           0 :                                         continue;
     608      132831 :                                 if (e->nid && exps_bind_nid(l_exps, e->nid))
     609             :                                         return rel;
     610      132831 :                                 if (e->alias.label && exps_bind_nid(l_exps, e->alias.label))
     611             :                                         return rel;
     612             :                         }
     613             :                 }
     614             : 
     615             :                 /* rename operator expressions */
     616       37955 :                 if (l->op == op_project) {
     617             :                         /* rewrite rel from rel->l into rel->l->l */
     618       34769 :                         if (rel->exps) {
     619       71731 :                                 for (n = rel->exps->h; n; n = n->next) {
     620       38026 :                                         sql_exp *e = n->data, *ne;
     621             : 
     622       38026 :                                         ne = exp_rename(v->sql, e, l, l->l);
     623       38026 :                                         assert(ne);
     624       38026 :                                         if (ne != e && exp_name(e))
     625           0 :                                                 exp_propagate(v->sql->sa, ne, e);
     626       38026 :                                         n->data = ne;
     627             :                                 }
     628             :                         }
     629       34769 :                         rel->l = l->l;
     630       34769 :                         l->l = NULL;
     631       34769 :                         rel_destroy(l);
     632             :                 }
     633       37955 :                 if (is_join(rel->op) && r->op == op_project && list_empty(rel->attr)) {
     634             :                         /* rewrite rel from rel->r into rel->r->l */
     635        4093 :                         if (rel->exps) {
     636        7464 :                                 for (n = rel->exps->h; n; n = n->next) {
     637        4011 :                                         sql_exp *e = n->data, *ne;
     638             : 
     639        4011 :                                         ne = exp_rename(v->sql, e, r, r->l);
     640        4011 :                                         assert(ne);
     641        4011 :                                         if (ne != e && exp_name(e))
     642           0 :                                                 exp_propagate(v->sql->sa, ne, e);
     643        4011 :                                         n->data = ne;
     644             :                                 }
     645             :                         }
     646        4093 :                         rel->r = r->l;
     647        4093 :                         r->l = NULL;
     648        4093 :                         rel_destroy(r);
     649             :                 }
     650             :                 /* Done, ie introduce new project */
     651       37955 :                 exps_fix_card(exps, rel->card);
     652             :                 /* Fix nil flag */
     653       37955 :                 if (!list_empty(exps)) {
     654      819299 :                         for (n = exps->h ; n && i < nlexps ; n = n->next, i++) {
     655      781344 :                                 sql_exp *e = n->data;
     656             : 
     657      781344 :                                 if (is_right(rel->op) || is_full(rel->op))
     658         448 :                                         set_has_nil(e);
     659      781344 :                                 set_not_unique(e);
     660             :                         }
     661      164541 :                         for (; n ; n = n->next) {
     662      126586 :                                 sql_exp *e = n->data;
     663             : 
     664      126586 :                                 if (is_left(rel->op) || is_full(rel->op))
     665       39442 :                                         set_has_nil(e);
     666      126586 :                                 set_not_unique(e);
     667             :                         }
     668             :                 }
     669       37955 :                 v->changes++;
     670       37955 :                 return rel_inplace_project(v->sql->sa, rel, NULL, exps);
     671             :         }
     672     2288618 :         if (is_groupby(rel->op) && !rel_is_ref(rel) && rel->exps && list_length(rel->exps) > 1) {
     673       39369 :                 node *n;
     674       39369 :                 int fnd = 0;
     675       39369 :                 list *aexps, *pexps;
     676             : 
     677             :                 /* check if some are expressions aren't e_aggr */
     678      154202 :                 for (n = rel->exps->h; n && !fnd; n = n->next) {
     679      114833 :                         sql_exp *e = n->data;
     680             : 
     681      114833 :                         if (e->type != e_aggr && e->type != e_column && e->type != e_atom && e->card > CARD_ATOM) {
     682      114833 :                                 fnd = 1;
     683             :                         }
     684             :                 }
     685             :                 /* only aggr, no rewrite needed */
     686       39369 :                 if (!fnd)
     687             :                         return rel;
     688             : 
     689           0 :                 aexps = sa_list(v->sql->sa);
     690           0 :                 pexps = sa_list(v->sql->sa);
     691           0 :                 for (n = rel->exps->h; n; n = n->next) {
     692           0 :                         sql_exp *e = n->data, *ne = NULL;
     693             : 
     694           0 :                         switch (e->type) {
     695           0 :                         case e_atom: /* move over to the projection */
     696           0 :                                 list_append(pexps, e);
     697           0 :                                 break;
     698           0 :                         case e_func:
     699           0 :                                 list_append(pexps, e);
     700           0 :                                 list_split_aggr_and_project(v->sql, aexps, e->l);
     701           0 :                                 break;
     702           0 :                         case e_convert:
     703           0 :                                 list_append(pexps, e);
     704           0 :                                 e->l = split_aggr_and_project(v->sql, aexps, e->l);
     705           0 :                                 break;
     706           0 :                         default: /* simple alias */
     707           0 :                                 list_append(aexps, e);
     708           0 :                                 ne = exp_column(v->sql->sa, exp_find_rel_name(e), exp_name(e), exp_subtype(e), e->card, has_nil(e), is_unique(e), is_intern(e));
     709           0 :                                 assert(0);
     710             :                                 list_append(pexps, ne);
     711             :                                 break;
     712             :                         }
     713             :                 }
     714           0 :                 v->changes++;
     715           0 :                 rel->exps = aexps;
     716           0 :                 return rel_inplace_project( v->sql->sa, rel, NULL, pexps);
     717             :         }
     718             :         return rel;
     719             : }
     720             : 
     721             : static sql_rel *
     722      170967 : rel_push_project_up(visitor *v, global_props *gp, sql_rel *rel)
     723             : {
     724      170967 :         (void) gp;
     725      170967 :         return rel_visitor_bottomup(v, rel, &rel_push_project_up_);
     726             : }
     727             : 
     728             : run_optimizer
     729      612286 : bind_push_project_up(visitor *v, global_props *gp)
     730             : {
     731      612286 :         int flag = v->sql->sql_optimizer;
     732      612090 :         return gp->opt_level == 1 && (flag & push_project_up) &&
     733     1224376 :                    gp->cnt[op_project] ? rel_push_project_up : NULL;
     734             : }
     735             : 
     736             : 
     737             : static void split_exps(mvc *sql, list *exps, sql_rel *rel);
     738             : 
     739             : static int
     740     3080682 : exp_refers_cmp( sql_exp *e1, sql_exp *e2)
     741             : {
     742     3080682 :         if (exp_refers(e1,e2))
     743           0 :                 return 0;
     744             :         return -1;
     745             : }
     746             : 
     747             : sql_exp *
     748      130251 : add_exp_too_project(mvc *sql, sql_exp *e, sql_rel *rel)
     749             : {
     750      130251 :         node *n = list_find(rel->exps, e, (fcmp)&exp_match_exp_cmp);
     751             : 
     752             :         /* if not matching we may refer to an older expression */
     753      130251 :         if (!n)
     754       82714 :                 n = list_find(rel->exps, e, (fcmp)&exp_refers_cmp);
     755       82714 :         if (!n) {
     756       82714 :                 exp_label(sql->sa, e, ++sql->label);
     757       82714 :                 append(rel->exps, e);
     758             :         } else {
     759       47537 :                 sql_exp *ne = n->data;
     760             : 
     761       47537 :                 if (rel && rel->l) {
     762       47537 :                         if (ne && ne->alias.label && rel_find_nid(rel->l, ne->alias.label)) {
     763           0 :                                 exp_label(sql->sa, e, ++sql->label);
     764           0 :                                 append(rel->exps, e);
     765           0 :                                 ne = e;
     766             :                         }
     767             :                 }
     768             :                 e = ne;
     769             :         }
     770      130251 :         e = exp_ref(sql, e);
     771      130251 :         return e;
     772             : }
     773             : 
     774             : static void
     775      150835 : add_exps_too_project(mvc *sql, list *exps, sql_rel *rel)
     776             : {
     777      150835 :         node *n;
     778             : 
     779      150835 :         if (!exps)
     780             :                 return;
     781      466725 :         for(n=exps->h; n; n = n->next) {
     782      315890 :                 sql_exp *e = n->data;
     783             : 
     784      315890 :                 if (e->type != e_column && !exp_is_atom(e))
     785      129971 :                         n->data = add_exp_too_project(sql, e, rel);
     786             :         }
     787             : }
     788             : 
     789             : static sql_exp *
     790      495684 : split_exp(mvc *sql, sql_exp *e, sql_rel *rel)
     791             : {
     792      495684 :         if (exp_is_atom(e))
     793             :                 return e;
     794      367821 :         switch(e->type) {
     795             :         case e_column:
     796             :                 return e;
     797       16653 :         case e_convert:
     798       16653 :                 e->l = split_exp(sql, e->l, rel);
     799       16653 :                 return e;
     800      159958 :         case e_aggr:
     801             :         case e_func:
     802      159958 :                 if (!is_analytic(e) && !exp_has_sideeffect(e)) {
     803      158098 :                         sql_subfunc *f = e->f;
     804      158098 :                         if (e->type == e_func && !f->func->s && is_caselike_func(f) /*is_ifthenelse_func(f)*/) {
     805             :                                 return e;
     806             :                         } else {
     807      150835 :                                 split_exps(sql, e->l, rel);
     808      150835 :                                 add_exps_too_project(sql, e->l, rel);
     809             :                         }
     810             :                 }
     811             :                 return e;
     812         484 :         case e_cmp:
     813         484 :                 if (e->flag == cmp_or || e->flag == cmp_filter) {
     814         461 :                         split_exps(sql, e->l, rel);
     815         461 :                         split_exps(sql, e->r, rel);
     816          23 :                 } else if (e->flag == cmp_in || e->flag == cmp_notin) {
     817           0 :                         e->l = split_exp(sql, e->l, rel);
     818           0 :                         split_exps(sql, e->r, rel);
     819             :                 } else {
     820          23 :                         e->l = split_exp(sql, e->l, rel);
     821          23 :                         e->r = split_exp(sql, e->r, rel);
     822          23 :                         if (e->f)
     823          21 :                                 e->f = split_exp(sql, e->f, rel);
     824             :                 }
     825             :                 return e;
     826             :         case e_atom:
     827             :         case e_psm:
     828             :                 return e;
     829             :         }
     830             :         return e;
     831             : }
     832             : 
     833             : static void
     834      151764 : split_exps(mvc *sql, list *exps, sql_rel *rel)
     835             : {
     836      151764 :         if (list_empty(exps))
     837             :                 return;
     838      468597 :         for(node *n=exps->h; n; n = n->next){
     839      316833 :                 sql_exp *e = n->data;
     840             : 
     841      316833 :                 e = split_exp(sql, e, rel);
     842      316833 :                 n->data = e;
     843             :         }
     844             : }
     845             : 
     846             : sql_rel *
     847      585718 : rel_split_project_(visitor *v, sql_rel *rel, int top)
     848             : {
     849     1171438 :         if (mvc_highwater(v->sql))
     850             :                 return rel;
     851             : 
     852      585718 :         if (!rel)
     853             :                 return NULL;
     854      585718 :         if (is_project(rel->op) && list_length(rel->exps) && (is_groupby(rel->op) || rel->l) && !need_distinct(rel) && !is_single(rel)) {
     855      164310 :                 list *exps = rel->exps;
     856      164310 :                 node *n;
     857      164310 :                 int funcs = 0;
     858      164310 :                 sql_rel *nrel;
     859             : 
     860             :                 /* are there functions */
     861     1278173 :                 for (n=exps->h; n && !funcs; n = n->next) {
     862     1113863 :                         sql_exp *e = n->data;
     863             : 
     864     1113863 :                         funcs = exp_has_func(e);
     865             :                 }
     866             :                 /* introduce extra project */
     867      164310 :                 if (funcs && rel->op != op_project) {
     868           7 :                         nrel = rel_project(v->sql->sa, rel->l,
     869           7 :                                 rel_projections(v->sql, rel->l, NULL, 1, 1));
     870           7 :                         rel->l = nrel;
     871             :                         /* recursively split all functions and add those to the projection list */
     872           7 :                         split_exps(v->sql, rel->exps, nrel);
     873          14 :                         if (nrel->l && !(nrel->l = rel_split_project_(v, nrel->l, (is_topn(rel->op)||is_sample(rel->op))?top:0)))
     874             :                                 return NULL;
     875           7 :                         return rel;
     876      164303 :                 } else if (funcs && !top && list_empty(rel->r)) {
     877             :                         /* projects can have columns point back into the expression list, ie
     878             :                          * create a new list including the split expressions */
     879       16566 :                         node *n;
     880       16566 :                         list *exps = rel->exps;
     881             : 
     882       16566 :                         rel->exps = sa_list(v->sql->sa);
     883      178697 :                         for (n=exps->h; n; n = n->next)
     884      162131 :                                 append(rel->exps, split_exp(v->sql, n->data, rel));
     885      147737 :                 } else if (funcs && top && rel_is_ref(rel) && list_empty(rel->r)) {
     886             :                         /* inplace */
     887           9 :                         list *exps = rel_projections(v->sql, rel, NULL, 1, 1);
     888           9 :                         sql_rel *l = rel_project(v->sql->sa, rel->l, NULL);
     889           9 :                         rel->l = l;
     890           9 :                         l->exps = rel->exps;
     891           9 :                         set_processed(l);
     892           9 :                         rel->exps = exps;
     893             :                 }
     894             :         }
     895      585711 :         if (is_mset(rel->op) || is_set(rel->op) || is_basetable(rel->op))
     896             :                 return rel;
     897      382369 :         if (rel->l && (rel->op != op_table || rel->flag != TRIGGER_WRAPPER)) {
     898      402889 :                 rel->l = rel_split_project_(v, rel->l, (is_topn(rel->op)||is_sample(rel->op)||is_ddl(rel->op)||is_modify(rel->op))?top:0);
     899      372991 :                 if (!rel->l)
     900             :                         return NULL;
     901             :         }
     902      382371 :         if ((is_join(rel->op) || is_semi(rel->op)) && rel->r) {
     903      111279 :                 rel->r = rel_split_project_(v, rel->r, (is_topn(rel->op)||is_sample(rel->op)||is_ddl(rel->op)||is_modify(rel->op))?top:0);
     904      111279 :                 if (!rel->r)
     905             :                         return NULL;
     906             :         }
     907             :         return rel;
     908             : }
     909             : 
     910             : static sql_rel *
     911      101443 : rel_split_project(visitor *v, global_props *gp, sql_rel *rel)
     912             : {
     913      101443 :         (void) gp;
     914      101443 :         return rel_split_project_(v, rel, 1);
     915             : }
     916             : 
     917             : run_optimizer
     918      612288 : bind_split_project(visitor *v, global_props *gp)
     919             : {
     920      612288 :         int flag = v->sql->sql_optimizer;
     921      541837 :         return gp->opt_cycle == 0 && gp->opt_level == 1 && (flag & split_project) &&
     922     1154117 :                    (gp->cnt[op_project] || gp->cnt[op_groupby]) ? rel_split_project : NULL;
     923             : }
     924             : 
     925             : 
     926             : static sql_rel *
     927      171161 : rel_project_reduce_casts(visitor *v, global_props *gp, sql_rel *rel)
     928             : {
     929      171161 :         if (!rel)
     930             :                 return NULL;
     931      171161 :         (void) gp;
     932      171161 :         if (gp->opt_level == 1 && v->value_based_opt && is_simple_project(rel->op) && list_length(rel->exps)) {
     933       86602 :                 list *exps = rel->exps;
     934       86602 :                 node *n;
     935             : 
     936      416198 :                 for (n=exps->h; n; n = n->next) {
     937      329596 :                         sql_exp *e = n->data;
     938             : 
     939      329596 :                         if (e && e->type == e_func) {
     940       30546 :                                 sql_subfunc *f = e->f;
     941       30546 :                                 sql_subtype *res = f->res->h->data;
     942             : 
     943       30546 :                                 if (!f->func->s && !strcmp(f->func->base.name, "sql_mul") && res->scale > 0) {
     944           6 :                                         list *args = e->l;
     945           6 :                                         sql_exp *h = args->h->data;
     946           6 :                                         sql_exp *t = args->t->data;
     947           6 :                                         atom *ha = exp_value(v->sql, h), *ta = exp_value(v->sql, t);
     948             : 
     949           6 :                                         if (ha || ta) {
     950           5 :                                                 atom *a = ha ? ha : ta;
     951           5 :                                                 atom *na = reduce_scale(v->sql, a);
     952             : 
     953           5 :                                                 if (na && na != a) {
     954           2 :                                                         int rs = a->tpe.scale - na->tpe.scale;
     955           2 :                                                         res->scale -= rs;
     956           2 :                                                         if (ha) {
     957           0 :                                                                 h->r = NULL;
     958           0 :                                                                 h->l = na;
     959             :                                                         } else {
     960           2 :                                                                 t->r = NULL;
     961           2 :                                                                 t->l = na;
     962             :                                                         }
     963           2 :                                                         v->changes++;
     964             :                                                 }
     965             :                                         }
     966             :                                 }
     967             :                         }
     968             :                 }
     969             :         }
     970             :         return rel;
     971             : }
     972             : 
     973             : run_optimizer
     974      612286 : bind_project_reduce_casts(visitor *v, global_props *gp)
     975             : {
     976      612286 :         int flag = v->sql->sql_optimizer;
     977      612286 :         return gp->cnt[op_project] && (flag & project_reduce_casts) ? rel_project_reduce_casts : NULL;
     978             : }
     979             : 
     980             : static sql_column *
     981       94323 : exp_find_column_( sql_rel *rel, sql_exp *exp, int pnr, sql_rel **bt )
     982             : {
     983       94323 :         if (exp->type == e_column)
     984       94297 :                 return name_find_column(rel, exp->l, exp->r, pnr, bt);
     985             :         return NULL;
     986             : }
     987             : 
     988             : static int
     989           0 : rel_part_nr( sql_rel *rel, sql_exp *e )
     990             : {
     991           0 :         sql_column *c = NULL;
     992           0 :         sql_rel *bt = NULL;
     993           0 :         assert(e->type == e_cmp);
     994             : 
     995           0 :         c = exp_find_column_(rel, e->l, -1, &bt);
     996           0 :         if (!c)
     997           0 :                 c = exp_find_column_(rel, e->r, -1, &bt);
     998           0 :         if (!c && e->f)
     999           0 :                 c = exp_find_column_(rel, e->f, -1, &bt);
    1000           0 :         if (!c || !bt || !rel_base_get_mergetable(bt))
    1001           0 :                 return -1;
    1002           0 :         sql_table *pp = c->t;
    1003           0 :         sql_table *mt = rel_base_get_mergetable(bt);
    1004           0 :         return find_member_pos(mt->members, pp);
    1005             : }
    1006             : 
    1007             : static int
    1008           0 : rel_uses_part_nr( sql_rel *rel, sql_exp *e, int pnr )
    1009             : {
    1010           0 :         sql_column *c = NULL;
    1011           0 :         sql_rel *bt = NULL;
    1012           0 :         assert(e->type == e_cmp);
    1013             : 
    1014             :         /*
    1015             :          * following case fails.
    1016             :          *
    1017             :          * semijoin( A1, union [A1, A2] )
    1018             :          * The union will never return proper column (from A2).
    1019             :          * ie need different solution (probably pass pnr).
    1020             :          */
    1021           0 :         c = exp_find_column_(rel, e->l, pnr, &bt);
    1022           0 :         if (!c)
    1023           0 :                 c = exp_find_column_(rel, e->r, pnr, &bt);
    1024           0 :         if (c && bt && rel_base_get_mergetable(bt)) {
    1025           0 :                 sql_table *pp = c->t;
    1026           0 :                 sql_table *mt = rel_base_get_mergetable(bt);
    1027           0 :                 if (find_member_pos(mt->members, pp) == pnr)
    1028             :                         return 1;
    1029             :         }
    1030             :         /* for projects we may need to do a rename! */
    1031           0 :         if (is_project(rel->op) || is_topn(rel->op) || is_sample(rel->op))
    1032           0 :                 return rel_uses_part_nr( rel->l, e, pnr);
    1033             : 
    1034           0 :         if (is_munion(rel->op)) {
    1035             :                 list *l = rel->l;
    1036             :                 if (rel_uses_part_nr( l->h->data, e, pnr))
    1037             :                         return 1;
    1038           0 :         } else if (is_union(rel->op) || is_join(rel->op) || is_semi(rel->op)) {
    1039           0 :                 if (rel_uses_part_nr( rel->l, e, pnr))
    1040             :                         return 1;
    1041           0 :                 if (!is_semi(rel->op) && rel_uses_part_nr( rel->r, e, pnr))
    1042             :                         return 1;
    1043             :         }
    1044             :         return 0;
    1045             : }
    1046             : 
    1047             : static sql_column *
    1048        2733 : exp_is_pkey(sql_rel *rel, sql_exp *e)
    1049             : {
    1050        2733 :         if (find_prop(e->p, PROP_HASHCOL)) { /* aligned PKEY JOIN */
    1051         238 :                 fcmp cmp = (fcmp)&kc_column_cmp;
    1052         238 :                 sql_column *c = exp_find_column(rel, e, -2);
    1053             : 
    1054         238 :                 if (c && c->t->pkey && list_find(c->t->pkey->k.columns, c, cmp) != NULL)
    1055             :                         return c;
    1056             :         }
    1057             :         return NULL;
    1058             : }
    1059             : 
    1060             : static sql_exp *
    1061        3464 : rel_is_join_on_pkey(sql_rel *rel, bool pk_fk) /* pk_fk is used to verify is a join on pk-fk */
    1062             : {
    1063        3464 :         if (!rel || !rel->exps)
    1064             :                 return NULL;
    1065        4751 :         for (node *n = rel->exps->h; n; n = n->next) {
    1066        2519 :                 sql_exp *je = n->data;
    1067             : 
    1068        3778 :                 if (je->type == e_cmp && je->flag == cmp_equal &&
    1069        2518 :                         (exp_is_pkey(rel, je->l) || exp_is_pkey(rel, je->r)) &&
    1070         237 :                         (!pk_fk || find_prop(je->p, PROP_JOINIDX)))
    1071           0 :                         return je;
    1072             :         }
    1073             :         return NULL;
    1074             : }
    1075             : 
    1076             : static int
    1077           0 : exps_has_predicate( list *l )
    1078             : {
    1079           0 :         node *n;
    1080             : 
    1081           0 :         for( n = l->h; n; n = n->next){
    1082           0 :                 sql_exp *e = n->data;
    1083             : 
    1084           0 :                 if (e->card <= CARD_ATOM)
    1085             :                         return 1;
    1086             :         }
    1087             :         return 0;
    1088             : }
    1089             : 
    1090             : static sql_rel *
    1091         656 : rel_find_select( sql_rel *r)
    1092             : {
    1093        2248 :         while (!is_select(r->op) && r->l && is_project(r->op))
    1094             :                 r = r->l;
    1095         656 :         if (is_select(r->op))
    1096          15 :                 return r;
    1097             :         return NULL;
    1098             : }
    1099             : 
    1100             : static bool
    1101      134843 : rels_share_rel(list *l)
    1102             : {
    1103      134843 :         sql_rel *ref = NULL;
    1104      212439 :         for (node *n = l->h; n; n = n->next) {
    1105      211783 :                 sql_rel *r = n->data;
    1106      211783 :                 if(!ref) {
    1107      134843 :                         if ((ref = rel_find_ref(r)) == NULL)
    1108             :                                 return false;
    1109       76940 :                 } else if (ref != rel_find_ref(r)) {
    1110             :                                 return false;
    1111             :                 }
    1112             :         }
    1113             :         return true;
    1114             : }
    1115             : 
    1116             : static inline sql_rel *
    1117     2659227 : rel_merge_munion(visitor *v, sql_rel *rel)
    1118             : {
    1119     2659227 :         if (is_munion(rel->op) && rels_share_rel(rel->l)) {
    1120         656 :                 list *rels = rel->l, *nrels = NULL;
    1121         656 :                 sql_rel *cur = NULL, *curs = NULL;
    1122             : 
    1123             :                 /* Find selects and try to merge */
    1124         656 :                 for(node *n = rels->h; n; n = n->next) {
    1125         656 :                         sql_rel *r = n->data;
    1126         656 :                         sql_rel *s = rel_find_select(r);
    1127             : 
    1128         656 :                         if (!s)
    1129             :                                 return rel;
    1130          15 :                         if (cur) {
    1131           0 :                                 if (s != r->l || curs->l != s->l || !rel_is_ref(s->l) ||
    1132             :                                         /* for now only union(project*(select(R),project*(select(R))) */
    1133           0 :                                    !s->exps || !curs->exps || exps_has_predicate(s->exps) || exps_has_predicate(curs->exps)) {
    1134           0 :                                         if (nrels)
    1135           0 :                                                 append(nrels, r);
    1136             :                                         else
    1137             :                                                 return rel;
    1138             :                                 }
    1139             : 
    1140             :                                 /* merge, ie. add 'or exp' */
    1141           0 :                                 curs->exps = append(new_exp_list(v->sql->sa), exp_or(v->sql->sa, cur->exps, s->exps, 0));
    1142           0 :                                 if (!nrels) {
    1143           0 :                                         nrels = sa_list(v->sql->sa);
    1144           0 :                                         append(nrels, cur);
    1145             :                                 }
    1146             :                         } else {
    1147          15 :                                 if (s != r->l || !rel_is_ref(s->l)) {
    1148          15 :                                         if (nrels) {
    1149           0 :                                                 append(nrels, r);
    1150             :                                         } else {
    1151             :                                                 return rel;
    1152             :                                         }
    1153             :                                 }
    1154             :                                 cur = r;
    1155             :                                 curs = s;
    1156             :                         }
    1157             :                 }
    1158           0 :                 if (nrels) {
    1159           0 :                         v->changes++;
    1160           0 :                         rel->l = nrels;
    1161             :                 }
    1162           0 :                 return rel;
    1163             :         }
    1164             :         return rel;
    1165             : }
    1166             : 
    1167             : static sql_rel *
    1168     2659227 : rel_optimize_munions_bottomup_(visitor *v, sql_rel *rel)
    1169             : {
    1170     2659227 :         rel = rel_merge_munion(v, rel);
    1171     2659227 :         return rel;
    1172             : }
    1173             : 
    1174             : static sql_rel *
    1175       25763 : rel_optimize_munions_bottomup(visitor *v, global_props *gp, sql_rel *rel)
    1176             : {
    1177       25763 :         (void) gp;
    1178       25763 :         return rel_visitor_bottomup(v, rel, &rel_optimize_munions_bottomup_);
    1179             : }
    1180             : 
    1181             : run_optimizer
    1182      612287 : bind_optimize_unions_bottomup(visitor *v, global_props *gp)
    1183             : {
    1184      612287 :         int flag = v->sql->sql_optimizer;
    1185      612091 :         return gp->opt_level == 1 && gp->cnt[op_munion] && (flag & optimize_unions_bottomup)
    1186      612287 :                    ? rel_optimize_munions_bottomup : NULL;
    1187             : }
    1188             : 
    1189             : 
    1190             : static inline sql_rel *
    1191     3639853 : rel_project_cse(visitor *v, sql_rel *rel)
    1192             : {
    1193     3639853 :         if (is_project(rel->op) && rel->exps) {
    1194     1467940 :                 node *n, *m;
    1195     1467940 :                 list *nexps;
    1196     1467940 :                 int needed = 0;
    1197             : 
    1198     8520589 :                 for (n=rel->exps->h; n && !needed; n = n->next) {
    1199     7052650 :                         sql_exp *e1 = n->data;
    1200             : 
    1201     7052650 :                         if (e1->type != e_column && !exp_is_atom(e1) && exp_name(e1)) {
    1202    16044466 :                                 for (m=n->next; m && !needed; m = m->next){
    1203    15320008 :                                         sql_exp *e2 = m->data;
    1204             : 
    1205    15320008 :                                         if (exp_name(e2) && exp_match_exp(e1, e2))
    1206    15320008 :                                                 needed = 1;
    1207             :                                 }
    1208             :                         }
    1209             :                 }
    1210             : 
    1211     1467939 :                 if (!needed)
    1212             :                         return rel;
    1213             : 
    1214         759 :                 nexps = new_exp_list(v->sql->sa);
    1215        8737 :                 for (n=rel->exps->h; n; n = n->next) {
    1216        7978 :                         sql_exp *e1 = n->data;
    1217             : 
    1218        7978 :                         if (e1->type != e_column && !exp_is_atom(e1) && exp_name(e1)) {
    1219       34095 :                                 for (m=nexps->h; m; m = m->next){
    1220       29489 :                                         sql_exp *e2 = m->data;
    1221             : 
    1222       29489 :                                         if (exp_name(e2) && exp_match_exp(e1, e2)) {
    1223         799 :                                                 assert(e2->alias.label);
    1224         799 :                                                 sql_exp *ne = exp_ref(v->sql, e2);
    1225             : 
    1226         799 :                                                 ne = exp_propagate(v->sql->sa, ne, e1);
    1227         799 :                                                 assert(!e1->ref);
    1228         799 :                                                 exp_setalias(ne, e1->alias.label, exp_relname(e1), exp_name(e1));
    1229         799 :                                                 set_selfref(ne);
    1230         799 :                                                 assert(ne->nid);
    1231             :                                                 e1 = ne;
    1232             :                                                 break;
    1233             :                                         }
    1234             :                                 }
    1235             :                         }
    1236        7978 :                         append(nexps, e1);
    1237             :                 }
    1238         759 :                 rel->exps = nexps;
    1239             :         }
    1240             :         return rel;
    1241             : }
    1242             : 
    1243             : static int exp_is_const_op(sql_exp *exp, sql_exp *tope, sql_rel *expr);
    1244             : 
    1245             : static int
    1246          76 : exps_are_const_op(list *exps, sql_exp *tope, sql_rel *expr)
    1247             : {
    1248          76 :         int ok = 1;
    1249             : 
    1250          76 :         if (list_empty(exps))
    1251             :                 return 1;
    1252         152 :         for (node *n = exps->h; n && ok; n = n->next)
    1253          76 :                 ok &= exp_is_const_op(n->data, tope, expr);
    1254             :         return ok;
    1255             : }
    1256             : 
    1257             : static int
    1258         522 : exp_is_const_op(sql_exp *exp, sql_exp *tope, sql_rel *expr)
    1259             : {
    1260         661 :         switch (exp->type) {
    1261          28 :         case e_atom:
    1262          28 :                 return exp->f ? 0 : 1;
    1263           2 :         case e_convert:
    1264           2 :                 return exp_is_const_op(exp->l, tope, expr);
    1265          76 :         case e_func:
    1266             :         case e_aggr: {
    1267          76 :                 sql_subfunc *f = exp->f;
    1268          76 :                 if (f->func->side_effect || IS_ANALYTIC(f->func))
    1269             :                         return 0;
    1270          76 :                 return exps_are_const_op(exp->l, tope, expr);
    1271             :         }
    1272           0 :         case e_cmp:
    1273           0 :                 if (exp->flag == cmp_or || exp->flag == cmp_filter)
    1274           0 :                         return exps_are_const_op(exp->l, tope, expr) && exps_are_const_op(exp->r, tope, expr);
    1275           0 :                 if (exp->flag == cmp_in || exp->flag == cmp_notin)
    1276           0 :                         return exp_is_const_op(exp->l, tope, expr) && exps_are_const_op(exp->r, tope, expr);
    1277           0 :                 return exp_is_const_op(exp->l, tope, expr) && exp_is_const_op(exp->r, tope, expr) && (!exp->f || exp_is_const_op(exp->f, tope, expr));
    1278         555 :         case e_column: {
    1279         555 :                 if (is_simple_project(expr->op) || is_groupby(expr->op)) {
    1280             :                         /* in a simple projection, self-references may occur */
    1281         555 :                         sql_exp *nexp = exps_bind_nid(expr->exps, exp->nid);
    1282         555 :                         if (nexp && list_position(expr->exps, nexp) < list_position(expr->exps, tope))
    1283             :                                 return exp_is_const_op(nexp, exp, expr);
    1284             :                 }
    1285             :                 return 0;
    1286             :         }
    1287             :         default:
    1288             :                 return 0;
    1289             :         }
    1290             : }
    1291             : 
    1292             : static sql_exp *
    1293          23 : rel_groupby_add_count_star(mvc *sql, sql_rel *rel, sql_exp *count_star_exp, bool *count_added)
    1294             : {
    1295          23 :         if (count_star_exp)
    1296             :                 return count_star_exp;
    1297          16 :         if (!list_empty(rel->exps)) {
    1298          44 :                 for (node *n=rel->exps->h; n ; n = n->next) {
    1299          32 :                         sql_exp *e = n->data;
    1300             : 
    1301          32 :                         if (exp_aggr_is_count(e) && !need_distinct(e) && list_length(e->l) == 0)
    1302           4 :                                 return e;
    1303             :                 }
    1304             :         }
    1305          12 :         sql_subfunc *cf = sql_bind_func(sql, "sys", "count", sql_bind_localtype("void"), NULL, F_AGGR, true, true);
    1306          12 :         *count_added = true;
    1307          12 :         return rel_groupby_add_aggr(sql, rel, exp_aggr(sql->sa, NULL, cf, 0, 0, rel->card, 0));
    1308             : }
    1309             : 
    1310             : /* optimize sum(x + 12) into sum(x) + 12*count(*) */
    1311             : static inline sql_rel *
    1312       40482 : rel_simplify_sum(visitor *v, sql_rel *rel)
    1313             : {
    1314       40482 :         if (is_groupby(rel->op) && !list_empty(rel->exps)) {
    1315       40427 :                 sql_rel *upper = NULL, *groupby = rel, *l = groupby->l;
    1316       40427 :                 sql_exp *count_star_exp = NULL;
    1317       40427 :                 bool count_added = false;
    1318             : 
    1319      125684 :                 for (node *n=groupby->exps->h; n ; n = n->next) {
    1320       85257 :                         sql_exp *e = n->data;
    1321       85257 :                         list *el = e->l;
    1322       85257 :                         sql_subfunc *sf = e->f;
    1323             : 
    1324       85257 :                         if (e->type == e_aggr && !need_distinct(e) && sf->func->type == F_AGGR && !sf->func->s && !strcmp(sf->func->base.name, "sum")) {
    1325        7924 :                                 sql_rel *expr = groupby;
    1326        7924 :                                 sql_exp *exp = (sql_exp*) el->h->data, *oexp = exp;
    1327             : 
    1328        7924 :                                 while (is_numeric_upcast(exp))
    1329           0 :                                         exp = exp->l;
    1330             :                                 /* we want to find a +/- call, so expect them to appear only on simple projections */
    1331       16875 :                                 while (exp && exp->type == e_column && (is_simple_project(expr->op) || is_groupby(expr->op)) && expr->l) {
    1332       10531 :                                         sql_rel *nexpr = NULL;
    1333       10531 :                                         sql_exp *nexp = rel_find_exp_and_corresponding_rel(l, exp, false, &nexpr, NULL);
    1334             : 
    1335             :                                         /* break when it loops on the same relation */
    1336       10531 :                                         if (nexpr == expr && list_position(expr->exps, nexp) >= list_position(expr->exps, exp))
    1337             :                                                 break;
    1338        8951 :                                         expr = nexpr;
    1339        8951 :                                         exp = oexp = nexp;
    1340        8953 :                                         while (exp && is_numeric_upcast(exp))
    1341           2 :                                                 exp = exp->l;
    1342             :                                 }
    1343             : 
    1344        7924 :                                 list *expl = exp ? exp->l : NULL;
    1345       85257 :                                 sql_subfunc *expf = exp ? exp->f : NULL;
    1346             :                                 /* found a candidate function */
    1347        6820 :                                 if (exp && exp->type == e_func && expf->func->type == F_FUNC && !expf->func->s &&
    1348        1141 :                                         (!strcmp(expf->func->base.name, "sql_sub") || !strcmp(expf->func->base.name, "sql_add"))) {
    1349         223 :                                         sql_exp *e1 = (sql_exp*) expl->h->data, *e2 = (sql_exp*) expl->h->next->data;
    1350         223 :                                         int e1ok = exp_is_const_op(e1, oexp, expr), e2ok = exp_is_const_op(e2, oexp, expr);
    1351             : 
    1352         223 :                                         if ((!e1ok && e2ok) || (e1ok && !e2ok)) {
    1353          24 :                                                 sql_exp *ocol = e1ok ? e2 : e1, *constant = e1ok ? e1 : e2, *mul, *colref, *naggr, *newop, *col = ocol, *match;
    1354          24 :                                                 bool add_col = true, prepend = false;
    1355             : 
    1356             :                                                 /* if 'col' is a projection from the under relation, then use it */
    1357          24 :                                                 while (is_numeric_upcast(col))
    1358           0 :                                                         col = col->l;
    1359          24 :                                                 if (col->type == e_column) {
    1360          22 :                                                         sql_exp *colf = exps_find_exp(l->exps, col);
    1361             : 
    1362             :                                                         /* col is already found in the inner relation. Also look for a new reference for col, eg sql_add(col, 1), 1 as col */
    1363          22 :                                                         if (colf && list_position(l->exps, colf) < list_position(l->exps, oexp)) {
    1364             :                                                                 add_col = false;
    1365          10 :                                                         } else if (!colf && is_simple_project(l->op) && list_empty(l->r) && !rel_is_ref(l) && !need_distinct(l)) {
    1366             :                                                                 prepend = true;
    1367             :                                                                 add_col = false;
    1368           1 :                                                         } else if (!colf && (is_simple_project(l->op) || is_groupby(l->op)))  {
    1369             :                                                                 /* on these scenarios the new column expression will be ordered/(grouped for distinct) or create potential ambiguity (multiple ref), so skip */
    1370           1 :                                                                 continue;
    1371             :                                                         }
    1372           2 :                                                 } else if ((is_simple_project(l->op) && (!list_empty(l->r) || rel_is_ref(l) || need_distinct(l))) || is_groupby(l->op)) {
    1373           0 :                                                         continue;
    1374             :                                                 }
    1375             : 
    1376             :                                                 /* add count star */
    1377          23 :                                                 count_star_exp = rel_groupby_add_count_star(v->sql, groupby, count_star_exp, &count_added);
    1378             :                                                 /* multiply constant by count star */
    1379          23 :                                                 if (!(mul = rel_binop_(v->sql, NULL, constant, exp_ref(v->sql, count_star_exp), "sys", "sql_mul", card_value, true))) {
    1380           0 :                                                         v->sql->session->status = 0;
    1381           0 :                                                         v->sql->errstr[0] = '\0';
    1382           0 :                                                         continue;
    1383             :                                                 }
    1384          23 :                                                 if (!has_label(mul))
    1385          23 :                                                         exp_label(v->sql->sa, mul, ++v->sql->label);
    1386             : 
    1387          23 :                                                 colref = exp_ref(v->sql, ocol);
    1388          23 :                                                 if (add_col) /* if 'col' will be added, then make sure it has an unique label */
    1389           3 :                                                         exp_label(v->sql->sa, colref, ++v->sql->label);
    1390             : 
    1391             :                                                 /* 'oexp' contains the type for the input for the 'sum' aggregate */
    1392          23 :                                                 if (!(colref = exp_check_type(v->sql, exp_subtype(oexp), groupby, colref, type_equal))) {
    1393           0 :                                                         v->sql->session->status = 0;
    1394           0 :                                                         v->sql->errstr[0] = '\0';
    1395           0 :                                                         continue;
    1396             :                                                 }
    1397             :                                                 /* update sum to use the column side only */
    1398          23 :                                                 sql_subfunc *a = sql_bind_func(v->sql, "sys", "sum", exp_subtype(colref), NULL, F_AGGR, true, true);
    1399          23 :                                                 if (!a)
    1400           0 :                                                         continue;
    1401          23 :                                                 naggr = exp_aggr(v->sql->sa, list_append(sa_list(v->sql->sa), colref), a, need_distinct(e), need_no_nil(e), groupby->card, has_nil(e));
    1402          23 :                                                 if ((match = exps_any_match(groupby->exps, naggr)) && list_position(groupby->exps, match) < list_position(groupby->exps, e)) { /* found a matching aggregate, use it */
    1403           9 :                                                         naggr = exp_ref(v->sql, match);
    1404           9 :                                                         exp_label(v->sql->sa, naggr, ++v->sql->label);
    1405          14 :                                                 } else if (!has_label(naggr)) { /* otherwise use the new one */
    1406          14 :                                                         exp_label(v->sql->sa, naggr, ++v->sql->label);
    1407             :                                                 }
    1408             : 
    1409             :                                                 /* generate addition/subtraction. subtraction is not commutative, so keep original order! */
    1410          23 :                                                 if (!(newop = rel_binop_(v->sql, NULL, e1 == constant ? mul : exp_ref(v->sql, naggr), e1 == constant ? exp_ref(v->sql, naggr) : mul, "sys", expf->func->base.name, card_value, true))) {
    1411           0 :                                                         v->sql->session->status = 0;
    1412           0 :                                                         v->sql->errstr[0] = '\0';
    1413           0 :                                                         continue;
    1414             :                                                 }
    1415          23 :                                                 if (!(newop = exp_check_type(v->sql, exp_subtype(e), groupby, newop, type_equal))) {
    1416           0 :                                                         v->sql->session->status = 0;
    1417           0 :                                                         v->sql->errstr[0] = '\0';
    1418           0 :                                                         continue;
    1419             :                                                 }
    1420             : 
    1421             :                                                 /* a column reference can be prepended to the inner relation, add it after all the check type calls succeed */
    1422          23 :                                                 if (prepend)
    1423           9 :                                                         list_prepend(l->exps, col);
    1424             : 
    1425             :                                                 /* the new generate function calls are valid, update relations */
    1426             :                                                 /* we need a new relation for the multiplication and addition/subtraction */
    1427          23 :                                                 if (!upper) {
    1428             :                                                         /* be careful with relations with more than 1 reference, so do in-place replacement */
    1429          16 :                                                         list *projs = rel_projections(v->sql, rel, NULL, 1, 1);
    1430          16 :                                                         sql_rel *nrel = rel_groupby(v->sql, rel->l, NULL);
    1431          16 :                                                         nrel->exps = rel->exps;
    1432          16 :                                                         nrel->r = rel->r;
    1433          16 :                                                         nrel->card = rel->card;
    1434          16 :                                                         nrel->nrcols = list_length(rel->exps);
    1435          16 :                                                         set_processed(nrel);
    1436          16 :                                                         rel_dup(rel->l);
    1437          16 :                                                         upper = rel = rel_inplace_project(v->sql->sa, rel, nrel, projs);
    1438          16 :                                                         rel->card = exps_card(projs);
    1439          16 :                                                         groupby = nrel; /* update pointers :) */
    1440          16 :                                                         l = groupby->l;
    1441             :                                                 }
    1442          91 :                                                 for (node *n = upper->exps->h ; n ; ) {
    1443          68 :                                                         node *next = n->next;
    1444          68 :                                                         sql_exp *re = n->data;
    1445             : 
    1446             :                                                         /* remove the old reference to the aggregate because we will use the addition/subtraction instead,
    1447             :                                                            as well as the count star if count_added */
    1448          68 :                                                         if (exp_refers(e, re) || (count_added && exp_refers(count_star_exp, re)))
    1449          35 :                                                                 list_remove_node(upper->exps, NULL, n);
    1450             :                                                         n = next;
    1451             :                                                 }
    1452             : 
    1453             :                                                 /* update sum aggregate with new aggregate or reference to an existing one */
    1454          23 :                                                 n->data = naggr;
    1455          23 :                                                 list_hash_clear(groupby->exps);
    1456             : 
    1457             :                                                 /* add column reference with new label, if 'col' was not found */
    1458          23 :                                                 if (add_col) {
    1459           3 :                                                         if (!is_simple_project(l->op) || !list_empty(l->r) || rel_is_ref(l) || need_distinct(l))
    1460           0 :                                                                 groupby->l = l = rel_project(v->sql->sa, l, rel_projections(v->sql, l, NULL, 1, 1));
    1461           3 :                                                         list_append(l->exps, ocol);
    1462             :                                                 }
    1463             : 
    1464             :                                                 /* propagate alias and add new expression */
    1465          23 :                                                 exp_prop_alias(v->sql->sa, newop, e);
    1466          23 :                                                 list_append(upper->exps, newop);
    1467          23 :                                                 v->changes++;
    1468             :                                         }
    1469             :                                 }
    1470             :                         }
    1471             :                 }
    1472             :         }
    1473       40482 :         return rel;
    1474             : }
    1475             : 
    1476             : /* optimize group by x+1,(y-2)*3,2-z into group by x,y,z */
    1477             : static inline sql_rel *
    1478       40482 : rel_simplify_groupby_columns(visitor *v, sql_rel *rel)
    1479             : {
    1480       40482 :         if (is_groupby(rel->op) && !list_empty(rel->r)) {
    1481       29929 :                 sql_rel *l = rel->l;
    1482             : 
    1483       77667 :                 for (node *n=((list*)rel->r)->h; n ; n = n->next) {
    1484       47738 :                         sql_exp *e = n->data;
    1485       47738 :                         e->used = 0; /* we need to use this flag, clean it first */
    1486             :                 }
    1487       77667 :                 for (node *n=((list*)rel->r)->h; n ; n = n->next) {
    1488       47738 :                         sql_exp *e = n->data;
    1489             : 
    1490       47738 :                         if (e->type == e_column) {
    1491       47701 :                                 bool searching = true;
    1492       47701 :                                 sql_rel *efrel = NULL;
    1493       47701 :                                 sql_exp *exp = rel_find_exp_and_corresponding_rel(l, e, false, &efrel, NULL), *col = NULL, *tope = exp;
    1494             : 
    1495       95421 :                                 while (searching && !col) {
    1496       47720 :                                         sql_exp *exp_col = exp;
    1497             : 
    1498       47720 :                                         if (exp && is_numeric_upcast(exp))
    1499          54 :                                                 exp = exp->l;
    1500       47720 :                                         if (exp && exp->type == e_func) {
    1501         658 :                                                 list *el = exp->l;
    1502         658 :                                                 sql_subfunc *sf = exp->f;
    1503             :                                                 /* At the moment look only at injective math functions */
    1504         658 :                                                 if (sf->func->type == F_FUNC && !sf->func->s &&
    1505         645 :                                                         (!strcmp(sf->func->base.name, "sql_sub") || !strcmp(sf->func->base.name, "sql_add") || !strcmp(sf->func->base.name, "sql_mul"))) {
    1506         111 :                                                         sql_exp *e1 = (sql_exp*) el->h->data, *e2 = (sql_exp*) el->h->next->data;
    1507             :                                                         /* the optimization cannot be done if side-effect calls (e.g. rand()) are present */
    1508         215 :                                                         int e1ok = exp_is_atom(e1) && !exp_unsafe(e1, true, false) && !exp_has_sideeffect(e1), e2ok = exp_is_atom(e2) && !exp_unsafe(e2, true, false) && !exp_has_sideeffect(e2);
    1509             : 
    1510         111 :                                                         if ((!e1ok && e2ok) || (e1ok && !e2ok)) {
    1511          57 :                                                                 sql_exp *c = e1ok ? e2 : e1;
    1512          57 :                                                                 bool done = false;
    1513          57 :                                                                 exp_col = exps_find_exp(efrel->exps, c);
    1514          57 :                                                                 if (exp_col)
    1515          28 :                                                                         c = exp_col;
    1516             : 
    1517         116 :                                                                 while (!done) {
    1518          59 :                                                                         if (is_numeric_upcast(c))
    1519           0 :                                                                                 c = c->l;
    1520          59 :                                                                         if (c->type == e_column) {
    1521          40 :                                                                                 if (is_simple_project(efrel->op) || is_groupby(efrel->op)) {
    1522             :                                                                                         /* in a simple projection, self-references may occur */
    1523          40 :                                                                                         sql_exp *nc = exps_find_exp(efrel->exps, c);
    1524          40 :                                                                                         if (nc && list_position(efrel->exps, nc) < list_position(efrel->exps, exp_col)) {
    1525           2 :                                                                                                 exp_col = c;
    1526           2 :                                                                                                 c = nc;
    1527           2 :                                                                                                 continue;
    1528             :                                                                                         }
    1529             :                                                                                 }
    1530             :                                                                                 col = c; /* 'c' is a column reference from the left relation */
    1531             :                                                                                 done = true;
    1532             :                                                                         } else {
    1533             :                                                                                 exp = c; /* maybe a nested function call, let's continue searching */
    1534             :                                                                                 done = true;
    1535             :                                                                         }
    1536             :                                                                 }
    1537             :                                                         } else {
    1538             :                                                                 searching = false;
    1539             :                                                         }
    1540             :                                                 } else {
    1541             :                                                         searching = false;
    1542             :                                                 }
    1543             :                                         } else {
    1544             :                                                 searching = false;
    1545             :                                         }
    1546             :                                 }
    1547       47701 :                                 if (col) { /* a column reference was found */
    1548          38 :                                         const char *rname = exp_relname(e), *name = exp_name(e);
    1549             : 
    1550             :                                         /* the grouping column has an alias, we have to keep it */
    1551          38 :                                         if ((rname && name && (strcmp(rname, e->l) != 0 || strcmp(name, e->r) != 0)) || (!rname && name && strcmp(name, e->r) != 0)) {
    1552           2 :                                                 if (!has_label(e)) /* dangerous to merge, skip it */
    1553           0 :                                                         continue;
    1554           2 :                                                 if (!is_simple_project(l->op) || !list_empty(l->r) || rel_is_ref(l) || need_distinct(l))
    1555           0 :                                                         rel->l = l = rel_project(v->sql->sa, l, rel_projections(v->sql, l, NULL, 1, 1));
    1556           2 :                                                 list_append(l->exps, e);
    1557           2 :                                                 n->data = e = exp_ref(v->sql, e);
    1558           2 :                                                 list_hash_clear(rel->r);
    1559             :                                         }
    1560             : 
    1561          38 :                                         sql_exp *f = exps_find_exp(rel->r, col);
    1562             : 
    1563          38 :                                         if (f && list_position(rel->r, f) < list_position(rel->r, e)) { /* if already present, remove it */
    1564          10 :                                                 e->used = 1;
    1565             :                                         } else {
    1566             :                                                 /* Use an unique reference to the column found. If there's another grouping column label pointing into it,
    1567             :                                                    rel_groupby_cse will hopefully remove it */
    1568          28 :                                                 sql_exp *colf = exps_find_exp(l->exps, col);
    1569             : 
    1570             :                                                 /* col is already found in the inner relation. Also look for a new reference for col, eg sql_add(col, 1), 1 as col */
    1571          28 :                                                 if (colf && list_position(l->exps, colf) < list_position(l->exps, tope)) {
    1572           2 :                                                         n->data = exp_ref(v->sql, col);
    1573          23 :                                                 } else if (!colf && is_simple_project(l->op) && list_empty(l->r) && !rel_is_ref(l) && !need_distinct(l)) { /* trivial case, it can be added */
    1574          23 :                                                         sql_exp *ne = col;
    1575          23 :                                                         list_prepend(l->exps, ne);
    1576          23 :                                                         n->data = exp_ref(v->sql, ne);
    1577           0 :                                                 } else if (!colf && (is_simple_project(l->op) || is_groupby(l->op)))  {
    1578             :                                                         /* on these scenarios the new column expression will be ordered/(grouped for distinct) or create potential ambiguity (multiple ref), so skip */
    1579           0 :                                                         continue;
    1580             :                                                 } else {
    1581           3 :                                                         sql_exp *ne = exp_ref(v->sql, col);
    1582             : 
    1583           3 :                                                         if (colf) /* a col reference is already there, add a new label */
    1584           3 :                                                                 exp_label(v->sql->sa, ne, ++v->sql->label);
    1585           3 :                                                         if (!is_simple_project(l->op) || !list_empty(l->r) || rel_is_ref(l) || need_distinct(l))
    1586           0 :                                                                 rel->l = l = rel_project(v->sql->sa, l, rel_projections(v->sql, l, NULL, 1, 1));
    1587           3 :                                                         list_append(l->exps, ne);
    1588           3 :                                                         n->data = exp_ref(v->sql, ne);
    1589             :                                                 }
    1590          28 :                                                 list_hash_clear(rel->r);
    1591             :                                         }
    1592          38 :                                         v->changes++;
    1593             :                                 }
    1594             :                         }
    1595             :                 }
    1596       77667 :                 for (node *n=((list*)rel->r)->h; n ; ) {
    1597       47738 :                         node *next = n->next;
    1598       47738 :                         sql_exp *e = n->data;
    1599             : 
    1600       47738 :                         if (e->used) /* remove unnecessary grouping columns */
    1601          10 :                                 list_remove_node(rel->r, NULL, n);
    1602             :                         n = next;
    1603             :                 }
    1604             :         }
    1605       40482 :         return rel;
    1606             : }
    1607             : 
    1608             : /* remove identical grouping columns */
    1609             : static inline sql_rel *
    1610       93717 : rel_groupby_cse(visitor *v, sql_rel *rel)
    1611             : {
    1612       93717 :         if (is_groupby(rel->op) && !list_empty(rel->r)) {
    1613       69276 :                 sql_rel *l = rel->l;
    1614             : 
    1615             :                 /* for every group expression e1 */
    1616      193179 :                 for (node *n=((list*)rel->r)->h; n ; n = n->next) {
    1617      123903 :                         sql_exp *e1 = n->data;
    1618             :                         /* it's good to examine the same expression in the subrelation e.g. in case it's an alias */
    1619             :                         /* TODO maybe cover more cases? Here I only look at the left relation */
    1620      247806 :                         sql_exp *e1_sub = e1->type == e_column ?
    1621      123903 :                                             e1->nid ? exps_bind_nid(l->exps, e1->nid) : exps_find_exp(l->exps, e1) : NULL;
    1622             : 
    1623             :                         /* for every other group expression */
    1624      213017 :                         for (node *m=n->next; m; m = m->next) {
    1625       89114 :                                 sql_exp *e2 = m->data;
    1626      178228 :                                 sql_exp *e2_sub = e2->type == e_column ?
    1627       89114 :                                                 e2->nid ? exps_bind_nid(l->exps, e2->nid) : exps_find_exp(l->exps, e2) : NULL;
    1628             : 
    1629             :                                 /* check if the expression are the same */
    1630       89114 :                                 if (exp_match_exp(e1, e2) || exp_refers(e1, e2) || (e1_sub && e2_sub && (exp_match_exp(e1_sub, e2_sub) || exp_refers(e1_sub, e2_sub)))) {
    1631             : 
    1632             :                                         /* use e2 from rel->exps instead of e2 from the rel->r as it can have an alias from the higher rel */
    1633          41 :                                         sql_exp *e2_in_exps = exps_uses_nid(rel->exps, e2->alias.label);
    1634          41 :                                         assert(e2_in_exps);
    1635             : 
    1636             :                                         /* same as e2 */
    1637          41 :                                         sql_exp *e1_in_exps = exps_uses_nid(rel->exps, e1->alias.label);
    1638          41 :                                         if (!e1_in_exps)
    1639           6 :                                                 continue;
    1640             : 
    1641             :                                         /* write e2 as an e1 alias since the expressions are the same */
    1642          35 :                                         sql_exp* e2_as_e1_alias = exp_copy(v->sql, e1_in_exps);
    1643             :                                         /* NOTE: it is important to get the rname (exp->l) and name (exp->r) from e2 IN the exps
    1644             :                                          * (e2_in_exps), and not from e2, since it could carry an alias from the higher rel */
    1645          35 :                                         exp_setalias(e2_as_e1_alias, e2_as_e1_alias->alias.label, e2_in_exps->l, e2_in_exps->r);
    1646          35 :                                         e2_as_e1_alias->alias.label = e2->alias.label;
    1647             : 
    1648             :                                         /* replace e2 with e2_as_e1_alias in expressions list */
    1649          35 :                                         node *e2_exps_node = list_find(rel->exps, e2_in_exps, NULL);
    1650          35 :                                         list_append_before(rel->exps, e2_exps_node, e2_as_e1_alias);
    1651          35 :                                         list_remove_node(rel->exps, NULL, e2_exps_node);
    1652             : 
    1653             :                                         /* finally remove e2 from the groups' list (->r) since it's redundant */
    1654          35 :                                         node *e2_r_node = list_find(rel->r, e2, NULL);
    1655          35 :                                         list_remove_node(rel->r, NULL, e2_r_node);
    1656             : 
    1657          35 :                                         v->changes++;
    1658             :                                 }
    1659             :                         }
    1660             :                 }
    1661             :         }
    1662       93716 :         return rel;
    1663             : }
    1664             : 
    1665             : sql_exp *list_exps_uses_exp(list *exps, const char *rname, const char *name);
    1666             : 
    1667             : static sql_exp*
    1668       18948 : exp_uses_exp(sql_exp *e, const char *rname, const char *name)
    1669             : {
    1670       19232 :         sql_exp *res = NULL;
    1671             : 
    1672       19232 :         switch (e->type) {
    1673             :                 case e_psm:
    1674             :                         break;
    1675        1915 :                 case e_atom: {
    1676        1915 :                         if (e->f)
    1677           0 :                                 return list_exps_uses_exp(e->f, rname, name);
    1678             :                 } break;
    1679         283 :                 case e_convert:
    1680         283 :                         return exp_uses_exp(e->l, rname, name);
    1681       10512 :                 case e_column: {
    1682       10512 :                         if (e->l && rname && strcmp(e->l, rname) == 0 &&
    1683        2419 :                                 e->r && name && strcmp(e->r, name) == 0)
    1684             :                                 return e;
    1685        8263 :                         if (!e->l && !rname &&
    1686         244 :                                 e->r && name && strcmp(e->r, name) == 0)
    1687             :                                 return e;
    1688             :                 } break;
    1689        6121 :                 case e_func:
    1690             :                 case e_aggr: {
    1691        6121 :                         if (e->l)
    1692        4902 :                                 return list_exps_uses_exp(e->l, rname, name);
    1693             :                 }       break;
    1694         401 :                 case e_cmp: {
    1695         401 :                         if (e->flag == cmp_in || e->flag == cmp_notin) {
    1696           0 :                                 if ((res = exp_uses_exp(e->l, rname, name)))
    1697             :                                         return res;
    1698           0 :                                 return list_exps_uses_exp(e->r, rname, name);
    1699         401 :                         } else if (e->flag == cmp_or || e->flag == cmp_filter) {
    1700           2 :                                 if ((res = list_exps_uses_exp(e->l, rname, name)))
    1701             :                                         return res;
    1702           2 :                                 return list_exps_uses_exp(e->r, rname, name);
    1703             :                         } else {
    1704         399 :                                 if ((res = exp_uses_exp(e->l, rname, name)))
    1705             :                                         return res;
    1706         397 :                                 if ((res = exp_uses_exp(e->r, rname, name)))
    1707             :                                         return res;
    1708         391 :                                 if (e->f)
    1709             :                                         return exp_uses_exp(e->f, rname, name);
    1710             :                         }
    1711             :                 } break;
    1712             :         }
    1713             :         return NULL;
    1714             : }
    1715             : 
    1716             : sql_exp *
    1717        9880 : list_exps_uses_exp(list *exps, const char *rname, const char *name)
    1718             : {
    1719        9880 :         sql_exp *res = NULL;
    1720             : 
    1721        9880 :         if (!exps)
    1722             :                 return NULL;
    1723       28032 :         for (node *n = exps->h; n && !res; n = n->next) {
    1724       18152 :                 sql_exp *e = n->data;
    1725       18152 :                 res = exp_uses_exp(e, rname, name);
    1726             :         }
    1727             :         return res;
    1728             : }
    1729             : 
    1730             : /* find in the list of expression an expression which uses e */
    1731             : sql_exp *
    1732         803 : exps_uses_exp(list *exps, sql_exp *e)
    1733             : {
    1734         803 :         return list_exps_uses_exp(exps, exp_relname(e), exp_name(e));
    1735             : }
    1736             : /*
    1737             :  * Rewrite aggregations over munion all.
    1738             :  *      groupby ([ union all (a, b, c) ], [gbe], [ count, sum ] )
    1739             :  *
    1740             :  * into
    1741             :  *      groupby ([ union all( groupby( a, [gbe], [ count, sum] ),
    1742             :  *                            groupby( b, [gbe], [ count, sum] ),
    1743             :  *                            groupby( c, [gbe], [ count, sum] ) ],
    1744             :  *                       [gbe], [sum, sum] )
    1745             :  */
    1746             : static inline sql_rel *
    1747        1056 : rel_push_aggr_down_n_arry(visitor *v, sql_rel *rel)
    1748             : {
    1749        1056 :         sql_rel *g = rel;
    1750        1056 :         sql_rel *u = rel->l, *ou = u;
    1751        1056 :         sql_rel *r = NULL;
    1752        1056 :         list *rgbe = NULL, *gbe = NULL, *exps = NULL;
    1753        1056 :         node *n, *m;
    1754             : 
    1755             :         // TODO why?
    1756        1056 :         if (u->op == op_project && !need_distinct(u))
    1757         197 :                 u = u->l;
    1758             : 
    1759             :         /* make sure we don't create group by on group by's */
    1760        2146 :         for (node *n = ((list*)u->l)->h; n; n = n->next) {
    1761        1651 :                 r = n->data;
    1762        1651 :                 if (r->op == op_groupby)
    1763             :                         return rel;
    1764             :         }
    1765             : 
    1766             :         /* distinct should be done over the full result */
    1767        1299 :         for (n = g->exps->h; n; n = n->next) {
    1768         910 :                 sql_exp *e = n->data;
    1769         910 :                 sql_subfunc *af = e->f;
    1770             : 
    1771         910 :                 if (e->type == e_atom ||
    1772         910 :                         e->type == e_func ||
    1773         456 :                    (e->type == e_aggr &&
    1774         456 :                    ((strcmp(af->func->base.name, "sum") &&
    1775         319 :                          strcmp(af->func->base.name, "count") &&
    1776         171 :                          strcmp(af->func->base.name, "min") &&
    1777         456 :                          strcmp(af->func->base.name, "max")) ||
    1778             :                    need_distinct(e))))
    1779             :                         return rel;
    1780             :         }
    1781             : 
    1782         389 :         list *nl = sa_list(v->sql->sa);
    1783        1213 :         for (node *n = ((list*)u->l)->h; n; n = n->next) {
    1784         824 :                 r = rel_dup(n->data);
    1785             :                 //n->data = NULL; /* clean list as we steal the relation r, stealing is needed else (with multiple references) double project cleanup fails */
    1786         824 :                 if (!is_project(r->op))
    1787          47 :                         r = rel_project(v->sql->sa, r,
    1788             :                                             rel_projections(v->sql, r, NULL, 1, 1));
    1789         824 :                 rel_rename_exps(v->sql, u->exps, r->exps);
    1790         824 :                 if (u != ou) {
    1791         359 :                         bool isproject = is_project(r->op);
    1792         359 :                         r = rel_project(v->sql->sa, r, NULL);
    1793         359 :                         r->exps = exps_copy(v->sql, ou->exps);
    1794         359 :                         rel_rename_exps(v->sql, ou->exps, r->exps);
    1795         359 :                         set_processed(r);
    1796         359 :                         if (isproject)
    1797         359 :                                 r = rel_push_project_down_(v, r); /* cleanup any double projects */
    1798             :                 }
    1799         824 :                 if (g->r && list_length(g->r) > 0) {
    1800         574 :                         list *gbe = g->r;
    1801         574 :                         rgbe = exps_copy(v->sql, gbe);
    1802             :                 }
    1803         824 :                 r = rel_groupby(v->sql, r, NULL);
    1804         824 :                 r->r = rgbe;
    1805         824 :                 r->nrcols = g->nrcols;
    1806         824 :                 r->card = g->card;
    1807         824 :                 r->exps = exps_copy(v->sql, g->exps);
    1808         824 :                 r->nrcols = list_length(r->exps);
    1809         824 :                 set_processed(r);
    1810             : 
    1811         824 :                 assert(r);
    1812         824 :                 append(nl, r);
    1813             :         }
    1814             : 
    1815             :         /* group by on primary keys which define the partitioning scheme
    1816             :          * don't need a finalizing group by */
    1817             :         /* how to check if a partition is based on some primary key ?
    1818             :          * */
    1819         389 :         if (!list_empty(rel->r)) {
    1820         714 :                 for (node *n = ((list*)rel->r)->h; n; n = n->next) {
    1821         446 :                         sql_exp *e = n->data;
    1822         446 :                         sql_column *c = NULL;
    1823             : 
    1824         446 :                         if ((c = exp_is_pkey(rel, e)) && partition_find_part(v->sql->session->tr, c->t, NULL)) {
    1825             :                                 /* check if key is partition key */
    1826           0 :                                 v->changes++;
    1827           0 :                                 return rel_inplace_setop_n_ary(v->sql, rel, nl, op_munion,
    1828             :                                                                                            rel_projections(v->sql, rel, NULL, 1, 1));
    1829             :                         }
    1830             :                 }
    1831             :         }
    1832             : 
    1833         389 :         if (!list_empty(rel->r)) {
    1834         268 :                 list *ogbe = rel->r;
    1835             : 
    1836         268 :                 gbe = new_exp_list(v->sql->sa);
    1837         714 :                 for (n = ogbe->h; n; n = n->next) {
    1838         446 :                         sql_exp *e = n->data, *ne;
    1839             : 
    1840             :                         /* group by in aggregation list */
    1841         446 :                         ne = exps_uses_exp( rel->exps, e);
    1842         446 :                         if (ne) {
    1843         440 :                                 sql_rel *first_munion_rel = nl->h->data;
    1844         440 :                                 ne = list_find_exp(first_munion_rel->exps, ne);
    1845             :                         }
    1846         440 :                         if (!ne) {
    1847             :                                 /* e only in the u1,u2,...un->r (group by list) */
    1848          24 :                                 for (node *n = nl->h; n; n = n->next) {
    1849          16 :                                         ne = exp_ref(v->sql, e);
    1850          16 :                                         list_append(((sql_rel*)n->data)->exps, ne);
    1851             :                                 }
    1852             :                         }
    1853           8 :                         assert(ne);
    1854         446 :                         ne = exp_ref(v->sql, ne);
    1855         446 :                         append(gbe, ne);
    1856             :                 }
    1857             :         }
    1858             : 
    1859         389 :         u = rel_setop_n_ary(v->sql->sa, nl, op_munion);
    1860         389 :         rel_setop_n_ary_set_exps(v->sql, u,
    1861         389 :                                          rel_projections(v->sql, nl->h->data, NULL, 1, 1), false);
    1862         389 :         set_processed(u);
    1863             : 
    1864         389 :         exps = new_exp_list(v->sql->sa);
    1865        1190 :         for (n = u->exps->h, m = rel->exps->h; n && m; n = n->next, m = m->next) {
    1866         801 :                 sql_exp *ne, *e = n->data, *oa = m->data;
    1867             : 
    1868         801 :                 if (oa->type == e_aggr) {
    1869         350 :                         sql_subfunc *f = oa->f;
    1870         350 :                         int cnt = exp_aggr_is_count(oa);
    1871         350 :                         sql_subfunc *a = sql_bind_func(v->sql, "sys", (cnt)?"sum":f->func->base.name, exp_subtype(e), NULL, F_AGGR, true, true);
    1872             : 
    1873         350 :                         assert(a);
    1874             :                         /* munion of aggr result may have nils
    1875             :                          * because sum/count of empty set */
    1876         350 :                         set_has_nil(e);
    1877         350 :                         e = exp_ref(v->sql, e);
    1878         350 :                         ne = exp_aggr1(v->sql->sa, e, a, need_distinct(e), 1, e->card, 1);
    1879             :                 } else {
    1880         451 :                         ne = exp_copy(v->sql, oa);
    1881             :                 }
    1882         801 :                 exp_setalias(ne, oa->alias.label, exp_find_rel_name(oa), exp_name(oa));
    1883         801 :                 append(exps, ne);
    1884             :         }
    1885         389 :         v->changes++;
    1886         389 :         return rel_inplace_groupby(rel, u, gbe, exps);
    1887             : }
    1888             : 
    1889             : /*
    1890             :  * Rewrite aggregations over union all.
    1891             :  *      groupby ([ union all (a, b) ], [gbe], [ count, sum ] )
    1892             :  *
    1893             :  * into
    1894             :  *      groupby ( [ union all( groupby( a, [gbe], [ count, sum] ), [ groupby( b, [gbe], [ count, sum] )) , [gbe], [sum, sum] )
    1895             :  */
    1896             : static inline sql_rel *
    1897       93717 : rel_push_aggr_down(visitor *v, sql_rel *rel)
    1898             : {
    1899       93717 :         if (rel->op == op_groupby && rel->l) {
    1900       93648 :                 sql_rel *u = rel->l, *ou = u;
    1901       93648 :                 sql_rel *g = rel;
    1902       93648 :                 sql_rel *ul = u->l;
    1903       93648 :                 sql_rel *ur = u->r;
    1904       93648 :                 node *n, *m;
    1905       93648 :                 list *lgbe = NULL, *rgbe = NULL, *gbe = NULL, *exps = NULL;
    1906             : 
    1907       93648 :                 if (u->op == op_project && !need_distinct(u))
    1908       40401 :                         u = u->l;
    1909             : 
    1910       93648 :                 if (!u || !(is_union(u->op) || is_munion(u->op)) || need_distinct(u) || is_single(u) || !u->exps || rel_is_ref(u))
    1911             :                         return rel;
    1912             : 
    1913        1056 :                 if (is_munion(u->op))
    1914        1056 :                         return rel_push_aggr_down_n_arry(v, rel);
    1915             : 
    1916           0 :                 ul = u->l;
    1917           0 :                 ur = u->r;
    1918             : 
    1919             :                 /* make sure we don't create group by on group by's */
    1920           0 :                 if (ul->op == op_groupby || ur->op == op_groupby)
    1921             :                         return rel;
    1922             : 
    1923             :                 /* distinct should be done over the full result */
    1924           0 :                 for (n = g->exps->h; n; n = n->next) {
    1925           0 :                         sql_exp *e = n->data;
    1926           0 :                         sql_subfunc *af = e->f;
    1927             : 
    1928           0 :                         if (e->type == e_atom ||
    1929           0 :                             e->type == e_func ||
    1930           0 :                            (e->type == e_aggr &&
    1931           0 :                            ((strcmp(af->func->base.name, "sum") &&
    1932           0 :                              strcmp(af->func->base.name, "count") &&
    1933           0 :                              strcmp(af->func->base.name, "min") &&
    1934           0 :                              strcmp(af->func->base.name, "max")) ||
    1935             :                            need_distinct(e))))
    1936             :                                 return rel;
    1937             :                 }
    1938             : 
    1939           0 :                 ul = rel_dup(ul);
    1940           0 :                 ur = rel_dup(ur);
    1941           0 :                 if (!is_project(ul->op))
    1942           0 :                         ul = rel_project(v->sql->sa, ul,
    1943             :                                 rel_projections(v->sql, ul, NULL, 1, 1));
    1944           0 :                 if (!is_project(ur->op))
    1945           0 :                         ur = rel_project(v->sql->sa, ur,
    1946             :                                 rel_projections(v->sql, ur, NULL, 1, 1));
    1947           0 :                 rel_rename_exps(v->sql, u->exps, ul->exps);
    1948           0 :                 rel_rename_exps(v->sql, u->exps, ur->exps);
    1949           0 :                 if (u != ou) {
    1950           0 :                         ul = rel_project(v->sql->sa, ul, NULL);
    1951           0 :                         ul->exps = exps_copy(v->sql, ou->exps);
    1952           0 :                         rel_rename_exps(v->sql, ou->exps, ul->exps);
    1953           0 :                         set_processed(ul);
    1954           0 :                         ur = rel_project(v->sql->sa, ur, NULL);
    1955           0 :                         ur->exps = exps_copy(v->sql, ou->exps);
    1956           0 :                         rel_rename_exps(v->sql, ou->exps, ur->exps);
    1957           0 :                         set_processed(ur);
    1958             :                 }
    1959             : 
    1960           0 :                 if (g->r && list_length(g->r) > 0) {
    1961           0 :                         list *gbe = g->r;
    1962             : 
    1963           0 :                         lgbe = exps_copy(v->sql, gbe);
    1964           0 :                         rgbe = exps_copy(v->sql, gbe);
    1965             :                 }
    1966           0 :                 ul = rel_groupby(v->sql, ul, NULL);
    1967           0 :                 ul->r = lgbe;
    1968           0 :                 ul->nrcols = g->nrcols;
    1969           0 :                 ul->card = g->card;
    1970           0 :                 ul->exps = exps_copy(v->sql, g->exps);
    1971           0 :                 ul->nrcols = list_length(ul->exps);
    1972           0 :                 set_processed(ul);
    1973             : 
    1974           0 :                 ur = rel_groupby(v->sql, ur, NULL);
    1975           0 :                 ur->r = rgbe;
    1976           0 :                 ur->nrcols = g->nrcols;
    1977           0 :                 ur->card = g->card;
    1978           0 :                 ur->exps = exps_copy(v->sql, g->exps);
    1979           0 :                 ur->nrcols = list_length(ur->exps);
    1980           0 :                 set_processed(ur);
    1981             : 
    1982             :                 /* group by on primary keys which define the partitioning scheme
    1983             :                  * don't need a finalizing group by */
    1984             :                 /* how to check if a partition is based on some primary key ?
    1985             :                  * */
    1986           0 :                 if (!list_empty(rel->r)) {
    1987           0 :                         for (node *n = ((list*)rel->r)->h; n; n = n->next) {
    1988           0 :                                 sql_exp *e = n->data;
    1989           0 :                                 sql_column *c = NULL;
    1990             : 
    1991           0 :                                 if ((c = exp_is_pkey(rel, e)) && partition_find_part(v->sql->session->tr, c->t, NULL)) {
    1992             :                                         /* check if key is partition key */
    1993           0 :                                         v->changes++;
    1994           0 :                                         return rel_inplace_setop(v->sql, rel, ul, ur, op_union,
    1995             :                                                                                          rel_projections(v->sql, rel, NULL, 1, 1));
    1996             :                                 }
    1997             :                         }
    1998             :                 }
    1999             : 
    2000           0 :                 if (!list_empty(rel->r)) {
    2001           0 :                         list *ogbe = rel->r;
    2002             : 
    2003           0 :                         gbe = new_exp_list(v->sql->sa);
    2004           0 :                         for (n = ogbe->h; n; n = n->next) {
    2005           0 :                                 sql_exp *e = n->data, *ne;
    2006             : 
    2007             :                                 /* group by in aggregation list */
    2008           0 :                                 ne = exps_uses_exp( rel->exps, e);
    2009           0 :                                 if (ne)
    2010           0 :                                         ne = list_find_exp( ul->exps, ne);
    2011           0 :                                 if (!ne) {
    2012             :                                         /* e only in the ul/ur->r (group by list) */
    2013           0 :                                         ne = exp_ref(v->sql, e);
    2014           0 :                                         list_append(ul->exps, ne);
    2015           0 :                                         ne = exp_ref(v->sql, e);
    2016           0 :                                         list_append(ur->exps, ne);
    2017             :                                 }
    2018           0 :                                 assert(ne);
    2019           0 :                                 ne = exp_ref(v->sql, ne);
    2020           0 :                                 append(gbe, ne);
    2021             :                         }
    2022             :                 }
    2023             : 
    2024           0 :                 u = rel_setop(v->sql->sa, ul, ur, op_union);
    2025           0 :                 rel_setop_set_exps(v->sql, u, rel_projections(v->sql, ul, NULL, 1, 1), false);
    2026           0 :                 set_processed(u);
    2027             : 
    2028           0 :                 exps = new_exp_list(v->sql->sa);
    2029           0 :                 for (n = u->exps->h, m = rel->exps->h; n && m; n = n->next, m = m->next) {
    2030           0 :                         sql_exp *ne, *e = n->data, *oa = m->data;
    2031             : 
    2032           0 :                         if (oa->type == e_aggr) {
    2033           0 :                                 sql_subfunc *f = oa->f;
    2034           0 :                                 int cnt = exp_aggr_is_count(oa);
    2035           0 :                                 sql_subfunc *a = sql_bind_func(v->sql, "sys", (cnt)?"sum":f->func->base.name, exp_subtype(e), NULL, F_AGGR, true, true);
    2036             : 
    2037           0 :                                 assert(a);
    2038             :                                 /* union of aggr result may have nils
    2039             :                                  * because sum/count of empty set */
    2040           0 :                                 set_has_nil(e);
    2041           0 :                                 e = exp_ref(v->sql, e);
    2042           0 :                                 ne = exp_aggr1(v->sql->sa, e, a, need_distinct(e), 1, e->card, 1);
    2043             :                         } else {
    2044           0 :                                 ne = exp_copy(v->sql, oa);
    2045             :                         }
    2046           0 :                         exp_setalias(ne, oa->alias.label, exp_find_rel_name(oa), exp_name(oa));
    2047           0 :                         append(exps, ne);
    2048             :                 }
    2049           0 :                 v->changes++;
    2050           0 :                 return rel_inplace_groupby( rel, u, gbe, exps);
    2051             :         }
    2052             :         return rel;
    2053             : }
    2054             : 
    2055             : /*
    2056             :  * More general
    2057             :  *      groupby(
    2058             :  *       [ outer ] join(
    2059             :  *          project(
    2060             :  *            table(A) [ c1, c2, .. ]
    2061             :  *          ) [ c1, c2, identity(c2) as I, .. ],
    2062             :  *          table(B) [ c1, c2, .. ]
    2063             :  *        ) [ A.c1 = B.c1 ]
    2064             :  *      ) [ I ] [ a1, a2, .. ]
    2065             :  *
    2066             :  * ->
    2067             :  *
    2068             :  *      [ outer ] join(
    2069             :  *        project(
    2070             :  *          table(A) [ c1, c2, .. ]
    2071             :  *        ) [ c1, c2, .. ],
    2072             :  *        groupby (
    2073             :  *          table(B) [ c1, c2, .. ]
    2074             :  *        ) [ B.c1 ] [ a1, a2, .. ]
    2075             :  *      ) [ A.c1 = B.c1 ]
    2076             :  */
    2077             : static sql_rel *
    2078       46050 : gen_push_groupby_down(mvc *sql, sql_rel *rel, int *changes)
    2079             : {
    2080       46050 :         sql_rel *j = rel->l;
    2081       46050 :         list *gbe = rel->r;
    2082             : 
    2083       46050 :         if (rel->op == op_groupby && list_length(gbe) == 1 && j->op == op_join){
    2084        6442 :                 sql_rel *jl = j->l, *jr = j->r, *cr, *cl;
    2085        6442 :                 sql_exp *gb = gbe->h->data, *e;
    2086        6442 :                 node *n;
    2087        6442 :                 int left = 1;
    2088        6442 :                 list *aggrs, *aliases, *gbe;
    2089             : 
    2090        6442 :                 if (!is_identity(gb, jl) && !is_identity(gb, jr))
    2091             :                         return rel;
    2092           6 :                 if (jl->op == op_project &&
    2093           4 :                     (e = list_find_exp( jl->exps, gb)) != NULL &&
    2094           2 :                      find_prop(e->p, PROP_HASHCOL) != NULL) {
    2095             :                         left = 0;
    2096             :                         cr = jr;
    2097             :                         cl = jl;
    2098           4 :                 } else if (jr->op == op_project &&
    2099           8 :                     (e = list_find_exp( jr->exps, gb)) != NULL &&
    2100           4 :                      find_prop(e->p, PROP_HASHCOL) != NULL) {
    2101             :                         left = 1;
    2102             :                         cr = jl;
    2103             :                         cl = jr;
    2104             :                 } else {
    2105           0 :                         return rel;
    2106             :                 }
    2107             : 
    2108           6 :                 if ((left && is_base(jl->op)) || (!left && is_base(jr->op))||
    2109           0 :                     (left && is_select(jl->op)) || (!left && is_select(jr->op))
    2110           0 :                     || rel_is_join_on_pkey(j, false))
    2111           6 :                         return rel;
    2112             : 
    2113             :                 /* only add aggr (based on left/right), and repeat the group by column */
    2114           0 :                 aggrs = sa_list(sql->sa);
    2115           0 :                 aliases = sa_list(sql->sa);
    2116           0 :                 if (rel->exps) for (n = rel->exps->h; n; n = n->next) {
    2117           0 :                         sql_exp *ce = n->data;
    2118             : 
    2119           0 :                         if (exp_is_atom(ce))
    2120           0 :                                 list_append(aliases, ce);
    2121           0 :                         else if (ce->type == e_column) {
    2122           0 :                                 if (rel_has_exp(cl, ce, false) == 0) /* collect aliases outside groupby */
    2123           0 :                                         list_append(aliases, ce);
    2124             :                                 else
    2125           0 :                                         list_append(aggrs, ce);
    2126           0 :                         } else if (ce->type == e_aggr) {
    2127           0 :                                 list *args = ce->l;
    2128             : 
    2129             :                                 /* check args are part of left/right */
    2130           0 :                                 if (!list_empty(args) && rel_has_exps(cl, args, false) == 0)
    2131             :                                         return rel;
    2132           0 :                                 if (rel->op != op_join && exp_aggr_is_count(ce))
    2133           0 :                                         ce->p = prop_create(sql->sa, PROP_COUNT, ce->p);
    2134           0 :                                 list_append(aggrs, ce);
    2135             :                         }
    2136             :                 }
    2137             :                 /* TODO move any column expressions (aliases) into the project list */
    2138             : 
    2139             :                 /* find gb in left or right and should be unique */
    2140           0 :                 gbe = sa_list(sql->sa);
    2141             :                 /* push groupby to right, group on join exps */
    2142           0 :                 if (j->exps) for (n = j->exps->h; n; n = n->next) {
    2143           0 :                         sql_exp *ce = n->data, *l = ce->l, *r = ce->r, *e;
    2144             : 
    2145             :                         /* get left/right hand of e_cmp */
    2146           0 :                         assert(ce->type == e_cmp);
    2147           0 :                         if (ce->flag == cmp_equal && is_alias(l->type) && is_alias(r->type) &&
    2148           0 :                                 (((e = rel_find_exp(cr, l)) && rel_find_exp(cl, r)) ||
    2149           0 :                                  ((e = rel_find_exp(cr, r)) && rel_find_exp(cl, l)))) {
    2150           0 :                                 e = exp_ref(sql, e);
    2151           0 :                                 list_append(gbe, e);
    2152             :                         } else {
    2153           0 :                                 return rel;
    2154             :                         }
    2155             :                 }
    2156           0 :                 if (!left)
    2157           0 :                         cr = j->r = rel_groupby(sql, cr, gbe);
    2158             :                 else
    2159           0 :                         cr = j->l = rel_groupby(sql, cr, gbe);
    2160           0 :                 cr->exps = list_merge(cr->exps, aggrs, (fdup)NULL);
    2161           0 :                 set_processed(cr);
    2162           0 :                 if (!is_project(cl->op))
    2163           0 :                         cl = rel_project(sql->sa, cl,
    2164             :                                 rel_projections(sql, cl, NULL, 1, 1));
    2165           0 :                 cl->exps = list_merge(cl->exps, aliases, (fdup)NULL);
    2166           0 :                 set_processed(cl);
    2167           0 :                 if (!left)
    2168           0 :                         j->l = cl;
    2169             :                 else
    2170           0 :                         j->r = cl;
    2171           0 :                 rel -> l = NULL;
    2172           0 :                 rel_destroy(rel);
    2173             : 
    2174           0 :                 if (list_empty(cr->exps) && list_empty(j->exps)) { /* remove crossproduct */
    2175           0 :                         sql_rel *r = cl;
    2176           0 :                         if (!left)
    2177           0 :                                 j->l = NULL;
    2178             :                         else
    2179           0 :                                 j->r = NULL;
    2180           0 :                         rel_destroy(j);
    2181           0 :                         j = r;
    2182             :                 }
    2183           0 :                 (*changes)++;
    2184           0 :                 return j;
    2185             :         }
    2186             :         return rel;
    2187             : }
    2188             : 
    2189             : /*
    2190             :  * Rewrite group(project(join(A,Dict)[a.i==dict.i])[...dict.n])[dict.n][ ... dict.n ]
    2191             :  * into
    2192             :  *      project(join(groupby (A)[a.i],[a.i]), Dict)[a.i==dict.i])[dict.n]
    2193             :  *
    2194             :  */
    2195             : static inline sql_rel *
    2196       93717 : rel_push_groupby_down(visitor *v, sql_rel *rel)
    2197             : {
    2198       93717 :         sql_rel *p = rel->l;
    2199       93717 :         list *gbe = rel->r;
    2200             : 
    2201       93717 :         if (rel->op == op_groupby && gbe && p && is_join(p->op))
    2202       10012 :                 return gen_push_groupby_down(v->sql, rel, &v->changes);
    2203       83705 :         if (rel->op == op_groupby && gbe && p && p->op == op_project) {
    2204       36038 :                 sql_rel *j = p->l;
    2205       36038 :                 sql_rel *jl, *jr;
    2206       36038 :                 node *n;
    2207             : 
    2208       36038 :                 if (!j || j->op != op_join || list_length(j->exps) != 1)
    2209       33975 :                         return gen_push_groupby_down(v->sql, rel, &v->changes);
    2210        2063 :                 jl = j->l;
    2211        2063 :                 jr = j->r;
    2212             : 
    2213             :                 /* check if jr is a dict with index and var still used */
    2214        2063 :                 if (jr->op != op_basetable || jr->l || !jr->r || list_length(jr->exps) != 2)
    2215        2063 :                         return gen_push_groupby_down(v->sql, rel, &v->changes);
    2216             : 
    2217             :                 /* check if group by is done on dict column */
    2218           0 :                 for(n = gbe->h; n; n = n->next) {
    2219           0 :                         sql_exp *ge = n->data, *pe = NULL, *e = NULL;
    2220             : 
    2221             :                         /* find group by exp in project, then in dict */
    2222           0 :                         pe = rel_find_exp(p, ge);
    2223           0 :                         if (pe) /* find project exp in right hand of join, ie dict */
    2224           0 :                                 e = rel_find_exp(jr, pe);
    2225           0 :                         if (pe && e) {  /* Rewrite: join with dict after the group by */
    2226           0 :                                 list *pexps = rel_projections(v->sql, rel, NULL, 1, 1), *npexps;
    2227           0 :                                 node *m;
    2228           0 :                                 sql_exp *ne = j->exps->h->data; /* join exp */
    2229           0 :                                 p->l = jl;   /* Project now only on the left side of the join */
    2230             : 
    2231           0 :                                 ne = ne->l;  /* The left side of the compare is the index of the left */
    2232             : 
    2233             :                                 /* find ge reference in new projection list */
    2234           0 :                                 npexps = sa_list(v->sql->sa);
    2235           0 :                                 for (m = pexps->h; m; m = m->next) {
    2236           0 :                                         sql_exp *a = m->data;
    2237             : 
    2238           0 :                                         if (exp_refers(ge, a)) {
    2239           0 :                                                 sql_exp *sc = jr->exps->t->data;
    2240           0 :                                                 sql_exp *e = exp_ref(v->sql, sc);
    2241           0 :                                                 if (exp_name(a))
    2242           0 :                                                         exp_prop_alias(v->sql->sa, e, a);
    2243             :                                                 a = e;
    2244             :                                         }
    2245           0 :                                         append(npexps, a);
    2246             :                                 }
    2247             : 
    2248             :                                 /* find ge in aggr list */
    2249           0 :                                 for (m = rel->exps->h; m; m = m->next) {
    2250           0 :                                         sql_exp *a = m->data;
    2251             : 
    2252           0 :                                         if (exp_match_exp(a, ge) || exp_refers(ge, a)) {
    2253           0 :                                                 a = exp_ref(v->sql, ne);
    2254           0 :                                                 if (exp_name(ne))
    2255           0 :                                                         exp_prop_alias(v->sql->sa, a, ne);
    2256           0 :                                                 m->data = a;
    2257             :                                         }
    2258             :                                 }
    2259             : 
    2260             :                                 /* change alias pe, ie project out the index  */
    2261           0 :                                 pe->l = (void*)exp_relname(ne);
    2262           0 :                                 pe->r = (void*)exp_name(ne);
    2263           0 :                                 if (exp_name(ne))
    2264           0 :                                         exp_prop_alias(v->sql->sa, pe, ne);
    2265             : 
    2266             :                                 /* change alias ge */
    2267           0 :                                 ge->l = (void*)exp_relname(pe);
    2268           0 :                                 ge->r = (void*)exp_name(pe);
    2269           0 :                                 if (exp_name(pe))
    2270           0 :                                         exp_prop_alias(v->sql->sa, ge, pe);
    2271             : 
    2272             :                                 /* zap both project and groupby name hash tables (as we changed names above) */
    2273           0 :                                 list_hash_clear(rel->exps);
    2274           0 :                                 list_hash_clear((list*)rel->r);
    2275           0 :                                 list_hash_clear(p->exps);
    2276             : 
    2277             :                                 /* add join */
    2278           0 :                                 j->l = rel;
    2279           0 :                                 rel = rel_project(v->sql->sa, j, npexps);
    2280           0 :                                 v->changes++;
    2281             :                         }
    2282             :                 }
    2283             :         }
    2284             :         return rel;
    2285             : }
    2286             : 
    2287             : /* reduce group by expressions based on pkey info
    2288             :  *
    2289             :  * The reduced group by and (derived) aggr expressions are restored via
    2290             :  * extra (new) aggregate columns.
    2291             :  */
    2292             : static inline sql_rel *
    2293       93717 : rel_reduce_groupby_exps(visitor *v, sql_rel *rel)
    2294             : {
    2295       93717 :         list *gbe = rel->r;
    2296             : 
    2297       93717 :         if (is_groupby(rel->op) && rel->r && !rel_is_ref(rel) && list_length(gbe)) {
    2298       45989 :                 node *n, *m;
    2299       45989 :                 int k, j, i, ngbe = list_length(gbe);
    2300       45988 :                 int8_t *scores = SA_NEW_ARRAY(v->sql->ta, int8_t, ngbe);
    2301       45989 :                 sql_column *c;
    2302       45989 :                 sql_table **tbls = SA_NEW_ARRAY(v->sql->ta, sql_table*, ngbe);
    2303       45989 :                 sql_rel **bts = SA_NEW_ARRAY(v->sql->ta, sql_rel*, ngbe), *bt = NULL;
    2304             : 
    2305       45989 :                 gbe = rel->r;
    2306      120209 :                 for (k = 0, i = 0, n = gbe->h; n; n = n->next, k++) {
    2307       74221 :                         sql_exp *e = n->data;
    2308             : 
    2309       74221 :                         c = exp_find_column_(rel, e, -2, &bt);
    2310       74220 :                         if (c) {
    2311       83813 :                                 for(j = 0; j < i; j++)
    2312       31641 :                                         if (c->t == tbls[j] && bts[j] == bt)
    2313             :                                                 break;
    2314       67650 :                                 tbls[j] = c->t;
    2315       67650 :                                 bts[j] = bt;
    2316       67650 :                                 i += (j == i);
    2317             :                         }
    2318             :                 }
    2319       45988 :                 if (i) { /* forall tables find pkey and
    2320             :                                 remove useless other columns */
    2321             :                         /* TODO also remove group by columns which are related to
    2322             :                          * the other columns using a foreign-key join (n->1), ie 1
    2323             :                          * on the to be removed side.
    2324             :                          */
    2325       95344 :                         for(j = 0; j < i; j++) {
    2326       52160 :                                 int l, nr = 0, cnr = 0;
    2327             : 
    2328       52160 :                                 k = list_length(gbe);
    2329       52160 :                                 memset(scores, 0, list_length(gbe));
    2330       52161 :                                 if (tbls[j]->pkey) {
    2331       26298 :                                         for (l = 0, n = gbe->h; l < k && n; l++, n = n->next) {
    2332       20103 :                                                 fcmp cmp = (fcmp)&kc_column_cmp;
    2333       20103 :                                                 sql_exp *e = n->data;
    2334             : 
    2335       20103 :                                                 c = exp_find_column_(rel, e, -2, &bt);
    2336       28677 :                                                 if (c && c->t == tbls[j] && bts[j] == bt &&
    2337        8574 :                                                     list_find(tbls[j]->pkey->k.columns, c, cmp) != NULL) {
    2338         708 :                                                         scores[l] = 1;
    2339         708 :                                                         nr ++;
    2340       17924 :                                                 } else if (c && c->t == tbls[j] && bts[j] == bt) {
    2341             :                                                         /* Okay we can cleanup a group by column */
    2342        7866 :                                                         scores[l] = -1;
    2343        7866 :                                                         cnr ++;
    2344             :                                                 }
    2345             :                                         }
    2346             :                                 }
    2347        6195 :                                 if (nr) {
    2348         699 :                                         int all = (list_length(tbls[j]->pkey->k.columns) == nr);
    2349         699 :                                         sql_kc *kc = tbls[j]->pkey->k.columns->h->data;
    2350             : 
    2351         699 :                                         c = kc->c;
    2352        1926 :                                         for (l = 0, n = gbe->h; l < k && n; l++, n = n->next) {
    2353        1227 :                                                 sql_exp *e = n->data;
    2354             : 
    2355             :                                                 /* pkey based group by */
    2356        1376 :                                                 if (scores[l] == 1 && ((all ||
    2357             :                                                    /* first of key */
    2358         833 :                                                    (c == exp_find_column(rel, e, -2))) && !find_prop(e->p, PROP_HASHCOL)))
    2359          13 :                                                         e->p = prop_create(v->sql->sa, PROP_HASHCOL, e->p);
    2360             :                                         }
    2361        2702 :                                         for (m = rel->exps->h; m; m = m->next ){
    2362        2003 :                                                 sql_exp *e = m->data;
    2363             : 
    2364        7340 :                                                 for (l = 0, n = gbe->h; l < k && n; l++, n = n->next) {
    2365        5350 :                                                         sql_exp *gb = n->data;
    2366             : 
    2367             :                                                         /* pkey based group by */
    2368        5350 :                                                         if (scores[l] == 1 && exp_match_exp(e,gb) && find_prop(gb->p, PROP_HASHCOL) && !find_prop(e->p, PROP_HASHCOL)) {
    2369          13 :                                                                 e->p = prop_create(v->sql->sa, PROP_HASHCOL, e->p);
    2370          13 :                                                                 break;
    2371             :                                                         }
    2372             : 
    2373             :                                                 }
    2374             :                                         }
    2375             :                                 }
    2376       52161 :                                 if (cnr && nr && list_length(tbls[j]->pkey->k.columns) == nr) {
    2377         108 :                                         list *ngbe = new_exp_list(v->sql->sa);
    2378             : 
    2379         392 :                                         for (l = 0, n = gbe->h; l < k && n; l++, n = n->next) {
    2380         284 :                                                 sql_exp *e = n->data;
    2381             : 
    2382             :                                                 /* keep the group by columns which form a primary key
    2383             :                                                  * of this table. And those unrelated to this table. */
    2384         284 :                                                 if (scores[l] != -1)
    2385         139 :                                                         append(ngbe, e);
    2386             :                                         }
    2387         108 :                                         rel->r = ngbe;
    2388             :                                         /* rewrite gbe and aggr, in the aggr list */
    2389         108 :                                         if (0)
    2390             :                                         for (m = rel->exps->h; m; m = m->next ){
    2391             :                                                 sql_exp *e = m->data;
    2392             :                                                 int fnd = 0;
    2393             : 
    2394             :                                                 for (l = 0, n = gbe->h; l < k && n && !fnd; l++, n = n->next) {
    2395             :                                                         sql_exp *gb = n->data;
    2396             : 
    2397             :                                                         if (scores[l] == -1 && exp_refers(gb, e)) {
    2398             :                                                                 /*
    2399             :                                                                 sql_exp *rs = exp_column(v->sql->sa, gb->l?gb->l:exp_relname(gb), gb->r?gb->r:exp_name(gb), exp_subtype(gb), rel->card, has_nil(gb), is_unique(gb), is_intern(gb));
    2400             :                                                                 exp_setalias(rs, e->alias.label, exp_find_rel_name(e), exp_name(e));
    2401             :                                                                 e = rs;
    2402             :                                                                 */
    2403             :                                                                 assert(e->alias.label == e->nid);
    2404             :                                                                 fnd = 1;
    2405             :                                                         }
    2406             :                                                 }
    2407             :                                                 m->data = e;
    2408             :                                         }
    2409             :                                         /* new reduced aggr expression list */
    2410         108 :                                         assert(list_length(rel->exps)>0);
    2411             :                                         /* only one reduction at a time */
    2412         108 :                                         list_hash_clear(rel->exps);
    2413         108 :                                         v->changes++;
    2414         108 :                                         return rel;
    2415             :                                 }
    2416       52053 :                                 gbe = rel->r;
    2417             :                         }
    2418             :                 }
    2419             :         }
    2420             :         /* remove constants from group by list */
    2421       93609 :         if (is_groupby(rel->op) && rel->r && !rel_is_ref(rel)) {
    2422       46388 :                 int i;
    2423       46388 :                 node *n;
    2424             : 
    2425      120324 :                 for (i = 0, n = gbe->h; n; n = n->next) {
    2426       73936 :                         sql_exp *e = n->data;
    2427             : 
    2428       73936 :                         if (exp_is_atom(e))
    2429          26 :                                 i++;
    2430             :                 }
    2431       46388 :                 if (i) {
    2432          26 :                         list *ngbe = new_exp_list(v->sql->sa);
    2433          26 :                         list *dgbe = new_exp_list(v->sql->sa);
    2434             : 
    2435          53 :                         for (n = gbe->h; n; n = n->next) {
    2436          27 :                                 sql_exp *e = n->data;
    2437             : 
    2438          27 :                                 if (!exp_is_atom(e))
    2439           1 :                                         append(ngbe, e);
    2440             :                                 /* we need at least one gbe */
    2441          26 :                                 else if (!n->next && list_empty(ngbe))
    2442          25 :                                         append(ngbe, e);
    2443             :                                 else
    2444           1 :                                         append(dgbe, e);
    2445             :                         }
    2446          26 :                         rel->r = ngbe;
    2447          26 :                         if (!list_empty(dgbe)) {
    2448             :                                 /* use atom's directly in the aggr expr list */
    2449             : 
    2450           3 :                                 for (n = rel->exps->h; n; n = n->next) {
    2451           2 :                                         sql_exp *e = n->data, *ne = NULL;
    2452             : 
    2453           2 :                                         if (e->type == e_column) {
    2454           2 :                                                 if (e->nid)
    2455           2 :                                                         ne = exps_bind_nid(dgbe, e->nid);
    2456           2 :                                                 if (ne) {
    2457           1 :                                                         ne = exp_copy(v->sql, ne);
    2458           1 :                                                         exp_prop_alias(v->sql->sa, ne, e);
    2459           1 :                                                         e = ne;
    2460             :                                                 }
    2461             :                                         }
    2462           2 :                                         n->data = e;
    2463             :                                 }
    2464           1 :                                 list_hash_clear(rel->exps);
    2465           1 :                                 v->changes++;
    2466             :                         }
    2467             :                 }
    2468             :         }
    2469             :         return rel;
    2470             : }
    2471             : 
    2472             : /* if all arguments to a distinct aggregate are unique, remove 'distinct' property */
    2473             : static inline sql_rel *
    2474       93717 : rel_distinct_aggregate_on_unique_values(visitor *v, sql_rel *rel)
    2475             : {
    2476       93717 :         if (is_groupby(rel->op) && !list_empty(rel->exps)) {
    2477      290401 :                 for (node *n = rel->exps->h; n; n = n->next) {
    2478      196760 :                         sql_exp *exp = (sql_exp*) n->data;
    2479             : 
    2480      196760 :                         if (exp->type == e_aggr && need_distinct(exp)) {
    2481         689 :                                 bool all_unique = true;
    2482         689 :                                 list *l = exp->l;
    2483             : 
    2484        1378 :                                 for (node *m = l->h; m && all_unique; m = m->next) {
    2485         689 :                                         sql_exp *arg = (sql_exp*) m->data;
    2486             : 
    2487         823 :                                         all_unique &= arg->type == e_column && is_unique(arg) && (!is_semantics(exp) || !has_nil(arg));
    2488             :                                 }
    2489         689 :                                 if (!all_unique && exps_card(l) > CARD_ATOM)
    2490         394 :                                         all_unique = exps_unique(v->sql, rel, l) && (!is_semantics(exp) || !have_nil(l));
    2491             :                                 if (all_unique) {
    2492         134 :                                         set_nodistinct(exp);
    2493         134 :                                         v->changes++;
    2494             :                                 }
    2495             :                         }
    2496             :                 }
    2497             :         }
    2498       93716 :         return rel;
    2499             : }
    2500             : 
    2501             : static inline sql_rel *
    2502       93716 : rel_remove_const_aggr(visitor *v, sql_rel *rel)
    2503             : {
    2504       93716 :         if (!rel)
    2505             :                 return rel;
    2506       93716 :         if (rel && is_groupby(rel->op) && list_length(rel->exps) >= 1 && !rel_is_ref(rel)) {
    2507       70293 :                 int needed = 0;
    2508      216595 :                 for (node *n = rel->exps->h; n; n = n->next) {
    2509      146302 :                         sql_exp *exp = (sql_exp*) n->data;
    2510             : 
    2511      146302 :                         if (exp_is_atom(exp) && exp->type != e_aggr)
    2512          55 :                                 needed++;
    2513             :                 }
    2514       70293 :                 if (needed) {
    2515          53 :                         if (!list_empty(rel->r)) {
    2516           3 :                                 int atoms = 0;
    2517             :                                 /* corner case, all grouping columns are atoms */
    2518           6 :                                 for (node *n = ((list*)rel->r)->h; n; n = n->next) {
    2519           3 :                                         sql_exp *exp = (sql_exp*) n->data;
    2520             : 
    2521           3 :                                         if (exp_is_atom(exp))
    2522           0 :                                                 atoms++;
    2523             :                                 }
    2524           3 :                                 if (atoms == list_length(rel->r)) {
    2525           0 :                                         list *nexps = sa_list(v->sql->sa);
    2526           0 :                                         for (node *n = rel->exps->h; n; ) {
    2527           0 :                                                 node *next = n->next;
    2528           0 :                                                 sql_exp *e = (sql_exp*) n->data;
    2529             : 
    2530             :                                                 /* remove references to constant group by columns */
    2531           0 :                                                 if (e->type == e_column) {
    2532           0 :                                                         sql_exp *found = NULL;
    2533           0 :                                                         found = exps_bind_nid(rel->r, e->nid);
    2534           0 :                                                         if (found) {
    2535           0 :                                                                 list_append(nexps, found);
    2536           0 :                                                                 list_remove_node(rel->exps, NULL, n);
    2537             :                                                         }
    2538             :                                                 }
    2539             :                                                 n = next;
    2540             :                                         }
    2541           0 :                                         rel->r = NULL; /* transform it into a global aggregate */
    2542           0 :                                         rel->exps = list_merge(nexps, rel->exps, (fdup) NULL); /* add grouping columns back as projections */
    2543             :                                         /* global aggregates may return 1 row, so filter it based on the count */
    2544           0 :                                         sql_subfunc *cf = sql_bind_func(v->sql, "sys", "count", sql_bind_localtype("void"), NULL, F_AGGR, true, true);
    2545           0 :                                         sql_exp *count = exp_aggr(v->sql->sa, NULL, cf, 0, 1, CARD_ATOM, 0);
    2546           0 :                                         count = rel_groupby_add_aggr(v->sql, rel, count);
    2547           0 :                                         sql_exp *cp = exp_compare(v->sql->sa, exp_ref(v->sql, count), exp_atom(v->sql->sa, atom_int(v->sql->sa, exp_subtype(count), 0)), cmp_notequal);
    2548           0 :                                         rel = rel_select(v->sql->sa, rel, cp);
    2549           0 :                                         set_processed(rel);
    2550           0 :                                         return rel;
    2551             :                                 }
    2552          50 :                         } else if (list_length(rel->exps) == needed) { /* all are const */
    2553          48 :                                 sql_rel *ll = rel->l;
    2554          48 :                                 rel->op = op_project;
    2555             :                                 /* TODO check if l->l == const, else change that */
    2556          48 :                                 if (ll && ll->l) {
    2557          44 :                                         rel_destroy(ll);
    2558          44 :                                         rel->l = rel_project_exp(v->sql, exp_atom_bool(v->sql->sa, 1));
    2559             :                                 }
    2560          48 :                                 return rel;
    2561             :                         }
    2562           5 :                         sql_rel *nrel = rel_project(v->sql->sa, rel, rel_projections(v->sql, rel, NULL, 1, 1));
    2563          17 :                         for (node *n = nrel->exps->h; n; n = n->next) {
    2564          12 :                                 sql_exp *exp = (sql_exp*) n->data;
    2565          12 :                                 if (exp->type == e_column) {
    2566          12 :                                         sql_exp *e = rel_find_exp(rel, exp);
    2567             : 
    2568          12 :                                         if (e && exp_is_atom(e) && e->type == e_atom) {
    2569           7 :                                                 sql_exp *ne = exp_copy(v->sql, e);
    2570           7 :                                                 assert(ne->alias.label);
    2571           7 :                                                 exp_setalias(ne, ne->alias.label, exp_find_rel_name(exp), exp_name(exp));
    2572           7 :                                                 n->data = ne;
    2573           7 :                                                 v->changes++;
    2574             :                                         }
    2575             :                                 }
    2576             :                         }
    2577           5 :                         list *nl = sa_list(v->sql->sa);
    2578          17 :                         for (node *n = rel->exps->h; n; n = n->next) {
    2579          12 :                                 sql_exp *exp = (sql_exp*) n->data;
    2580             : 
    2581          12 :                                 if (!exp_is_atom(exp) || exp->type != e_atom)
    2582           5 :                                         append(nl, exp);
    2583             :                         }
    2584           5 :                         rel->exps = nl;
    2585           5 :                         return nrel;
    2586             :                 }
    2587             :         }
    2588             :         return rel;
    2589             : }
    2590             : 
    2591             : #if 0
    2592             : static sql_rel *
    2593             : rel_groupby_distinct2(visitor *v, sql_rel *rel)
    2594             : {
    2595             :         list *ngbes = sa_list(v->sql->sa), *gbes, *naggrs = sa_list(v->sql->sa), *aggrs = sa_list(v->sql->sa);
    2596             :         sql_rel *l;
    2597             :         node *n;
    2598             : 
    2599             :         gbes = rel->r;
    2600             :         if (!gbes)
    2601             :                 return rel;
    2602             : 
    2603             :         /* check if each aggr is, rewritable (max,min,sum,count)
    2604             :          *                        and only has one argument */
    2605             :         for (n = rel->exps->h; n; n = n->next) {
    2606             :                 sql_exp *e = n->data;
    2607             :                 sql_subfunc *af = e->f;
    2608             : 
    2609             :                 if (e->type == e_aggr &&
    2610             :                    (strcmp(af->func->base.name, "sum") &&
    2611             :                      strcmp(af->func->base.name, "count") &&
    2612             :                      strcmp(af->func->base.name, "min") &&
    2613             :                      strcmp(af->func->base.name, "max")))
    2614             :                         return rel;
    2615             :         }
    2616             : 
    2617             :         for (n = gbes->h; n; n = n->next) {
    2618             :                 sql_exp *e = n->data;
    2619             : 
    2620             :                 e = exp_column(v->sql->sa, exp_find_rel_name(e), exp_name(e), exp_subtype(e), e->card, has_nil(e), is_unique(e), is_intern(e));
    2621             :                 append(ngbes, e);
    2622             :         }
    2623             : 
    2624             :         /* 1 for each aggr(distinct v) add the attribute expression v to gbes and aggrs list
    2625             :          * 2 for each aggr(z) add aggr_phase2('z') to the naggrs list
    2626             :          * 3 for each group by col, add also to the naggrs list
    2627             :          * */
    2628             :         for (n = rel->exps->h; n; n = n->next) {
    2629             :                 sql_exp *e = n->data;
    2630             : 
    2631             :                 if (e->type == e_aggr && need_distinct(e)) { /* 1 */
    2632             :                         /* need column expression */
    2633             :                         list *args = e->l;
    2634             :                         sql_exp *v = args->h->data;
    2635             :                         append(gbes, v);
    2636             :                         if (!exp_name(v))
    2637             :                                 exp_label(v->sql->sa, v, ++v->sql->label);
    2638             :                         v = exp_column(v->sql->sa, exp_find_rel_name(v), exp_name(v), exp_subtype(v), v->card, has_nil(v), is_unique(v), is_intern(v));
    2639             :                         append(aggrs, v);
    2640             :                         v = exp_aggr1(v->sql->sa, v, e->f, need_distinct(e), 1, e->card, 1);
    2641             :                         exp_setalias(v, e->alias.label, exp_find_rel_name(e), exp_name(e));
    2642             :                         append(naggrs, v);
    2643             :                 } else if (e->type == e_aggr && !need_distinct(e)) {
    2644             :                         sql_exp *v;
    2645             :                         sql_subfunc *f = e->f;
    2646             :                         int cnt = exp_aggr_is_count(e);
    2647             :                         sql_subfunc *a = sql_bind_func(v->sql, "sys", (cnt)?"sum":f->func->base.name, exp_subtype(e), NULL, F_AGGR, true, true);
    2648             : 
    2649             :                         append(aggrs, e);
    2650             :                         if (!exp_name(e))
    2651             :                                 exp_label(v->sql->sa, e, ++v->sql->label);
    2652             :                         set_has_nil(e);
    2653             :                         v = exp_column(v->sql->sa, exp_find_rel_name(e), exp_name(e), exp_subtype(e), e->card, has_nil(e), is_unique(e), is_intern(e));
    2654             :                         v = exp_aggr1(v->sql->sa, v, a, 0, 1, e->card, 1);
    2655             :                         if (cnt)
    2656             :                                 set_zero_if_empty(v);
    2657             :                         exp_setalias(v, e->alias.label, exp_find_rel_name(e), exp_name(e));
    2658             :                         append(naggrs, v);
    2659             :                 } else { /* group by col */
    2660             :                         if (list_find_exp(gbes, e) || !list_find_exp(naggrs, e)) {
    2661             :                                 append(aggrs, e);
    2662             : 
    2663             :                                 e = exp_column(v->sql->sa, exp_find_rel_name(e), exp_name(e), exp_subtype(e), e->card, has_nil(e), is_unique(e), is_intern(e));
    2664             :                         }
    2665             :                         append(naggrs, e);
    2666             :                 }
    2667             :         }
    2668             : 
    2669             :         l = rel->l = rel_groupby(v->sql, rel->l, gbes);
    2670             :         l->exps = aggrs;
    2671             :         rel->r = ngbes;
    2672             :         rel->exps = naggrs;
    2673             :         v->changes++;
    2674             :         return rel;
    2675             : }
    2676             : #endif
    2677             : 
    2678             : /* Rewrite group by expressions with distinct
    2679             :  *
    2680             :  * ie select a, count(distinct b) from c where ... groupby a;
    2681             :  * No other aggregations should be present
    2682             :  *
    2683             :  * Rewrite the more general case, good for parallel execution
    2684             :  *
    2685             :  * groupby(R) [e,f] [ aggr1 a distinct, aggr2 b distinct, aggr3 c, aggr4 d]
    2686             :  *
    2687             :  * into
    2688             :  *
    2689             :  * groupby(
    2690             :  *      groupby(R) [e,f,a,b] [ a, b, aggr3 c, aggr4 d]
    2691             :  * ) [e,f]( aggr1 a distinct, aggr2 b distinct, aggr3_phase2 c, aggr4_phase2 d)
    2692             :  */
    2693             : static inline sql_rel *
    2694       93716 : rel_groupby_distinct(visitor *v, sql_rel *rel)
    2695             : {
    2696       93716 :         node *n;
    2697             : 
    2698       93716 :         if (is_groupby(rel->op)) {
    2699       93647 :                 sql_rel *l = rel->l;
    2700       93647 :                 if (!l || is_groupby(l->op))
    2701             :                         return rel;
    2702             :         }
    2703       93582 :         if (is_groupby(rel->op) && rel->r && !rel_is_ref(rel)) {
    2704       46369 :                 int nr = 0, anr = 0;
    2705       46369 :                 list *gbe, *ngbe, *arg, *exps, *nexps;
    2706       46369 :                 sql_exp *distinct = NULL, *darg, *found;
    2707       46369 :                 sql_rel *l = NULL;
    2708             : 
    2709      167576 :                 for (n=rel->exps->h; n && nr <= 2; n = n->next) {
    2710      121207 :                         sql_exp *e = n->data;
    2711      121207 :                         if (need_distinct(e)) {
    2712         393 :                                 distinct = n->data;
    2713         393 :                                 nr++;
    2714             :                         }
    2715      121207 :                         anr += is_aggr(e->type);
    2716             :                 }
    2717       46369 :                 if (nr < 1 || distinct->type != e_aggr)
    2718             :                         return rel;
    2719         386 :                 if (nr > 1 || anr > nr)
    2720             :                         return rel;//rel_groupby_distinct2(v, rel);
    2721         117 :                 arg = distinct->l;
    2722         117 :                 if (list_length(arg) != 1 || list_length(rel->r) + nr != list_length(rel->exps))
    2723           2 :                         return rel;
    2724             : 
    2725         115 :                 gbe = rel->r;
    2726         115 :                 ngbe = sa_list(v->sql->sa);
    2727         115 :                 exps = sa_list(v->sql->sa);
    2728         115 :                 nexps = sa_list(v->sql->sa);
    2729         362 :                 for (n=rel->exps->h; n; n = n->next) {
    2730         247 :                         sql_exp *e = n->data;
    2731         247 :                         if (e != distinct) {
    2732         132 :                                 if (e->type == e_aggr) { /* copy the arguments to the aggregate */
    2733           0 :                                         list *args = e->l;
    2734           0 :                                         if (args) {
    2735           0 :                                                 for (node *n = args->h ; n ; n = n->next) {
    2736           0 :                                                         sql_exp *e = n->data;
    2737           0 :                                                         list_append(ngbe, exp_copy(v->sql, e));
    2738           0 :                                                         list_append(exps, exp_copy(v->sql, e));
    2739             :                                                 }
    2740             :                                         }
    2741             :                                 } else {
    2742         132 :                                         e = exp_ref(v->sql, e);
    2743         132 :                                         append(ngbe, e);
    2744         132 :                                         append(exps, e);
    2745             :                                 }
    2746         132 :                                 if (e->type == e_aggr) /* aggregates must be copied */
    2747           0 :                                         e = exp_copy(v->sql, e);
    2748             :                                 else
    2749         132 :                                         e = exp_ref(v->sql, e);
    2750         132 :                                 append(nexps, e);
    2751             :                         }
    2752             :                 }
    2753             : 
    2754         115 :                 darg = arg->h->data;
    2755         115 :                 if ((found = exps_find_exp(exps, darg)) == NULL) { /* not already in the groups projection list */
    2756         112 :                         if ((found = exps_find_exp(gbe, darg))) { /* first find if the aggregate argument already exists in the grouping list */
    2757           0 :                                 darg = exp_ref(v->sql, found);
    2758             :                         } else {
    2759         112 :                                 list_append(gbe, darg = exp_copy(v->sql, darg));
    2760         112 :                                 exp_label(v->sql->sa, darg, ++v->sql->label);
    2761         112 :                                 darg = exp_ref(v->sql, darg);
    2762             :                         }
    2763         112 :                         list_append(exps, darg);
    2764         112 :                         darg = exp_ref(v->sql, darg);
    2765             :                 } else {
    2766           3 :                         darg = exp_ref(v->sql, found);
    2767             :                 }
    2768         115 :                 arg->h->data = darg;
    2769         115 :                 l = rel->l = rel_groupby(v->sql, rel->l, gbe);
    2770         115 :                 l->exps = exps;
    2771         115 :                 set_processed(l);
    2772         115 :                 rel->r = ngbe;
    2773         115 :                 rel->exps = nexps;
    2774         115 :                 set_nodistinct(distinct);
    2775         115 :                 append(nexps, distinct);
    2776         115 :                 v->changes++;
    2777             :         }
    2778             :         return rel;
    2779             : }
    2780             : 
    2781             : /*
    2782             :  * Push Count inside crossjoin down, and multiply the results
    2783             :  *
    2784             :  *     project (                                project(
    2785             :  *          group by (                               crossproduct (
    2786             :  *              crossproduct(                             project (
    2787             :  *                   L,                  =>                    group by (
    2788             :  *                   R                                              L
    2789             :  *              ) [ ] [ count NOT NULL ]                       ) [ ] [ count NOT NULL ]
    2790             :  *          )                                             ),
    2791             :  *     ) [ NOT NULL ]                                     project (
    2792             :  *                                                              group by (
    2793             :  *                                                                  R
    2794             :  *                                                              ) [ ] [ count NOT NULL ]
    2795             :  *                                                        )
    2796             :  *                                                   ) [ sql_mul(.., .. NOT NULL) ]
    2797             :  *                                              )
    2798             :  */
    2799             : static inline sql_rel *
    2800       93717 : rel_push_count_down(visitor *v, sql_rel *rel)
    2801             : {
    2802       93717 :         sql_rel *r = rel->l;
    2803             : 
    2804       93717 :         if (is_groupby(rel->op) && !rel_is_ref(rel) && list_empty(rel->r) &&
    2805       24269 :                 r && !r->exps && r->op == op_join && !(rel_is_ref(r)) &&
    2806             :                 /* currently only single count aggregation is handled, no other projects or aggregation */
    2807          50 :                 list_length(rel->exps) == 1 && exp_aggr_is_count(rel->exps->h->data)) {
    2808          12 :                 sql_exp *nce, *oce, *cnt1 = NULL, *cnt2 = NULL;
    2809          12 :                 sql_rel *gbl = NULL, *gbr = NULL;       /* Group By */
    2810          12 :                 sql_rel *cp = NULL;                                     /* Cross Product */
    2811          12 :                 sql_rel *srel;
    2812             : 
    2813          12 :                 oce = rel->exps->h->data;
    2814          12 :                 if (oce->l) /* we only handle COUNT(*) */
    2815             :                         return rel;
    2816             : 
    2817          10 :                 srel = r->l;
    2818             :                 {
    2819          10 :                         sql_subfunc *cf = sql_bind_func(v->sql, "sys", "count", sql_bind_localtype("void"), NULL, F_AGGR, true, true);
    2820          10 :                         sql_exp *e = exp_aggr(v->sql->sa, NULL, cf, need_distinct(oce), need_no_nil(oce), oce->card, 0);
    2821             : 
    2822          10 :                         exp_label(v->sql->sa, e, ++v->sql->label);
    2823          10 :                         cnt1 = exp_ref(v->sql, e);
    2824          10 :                         gbl = rel_groupby(v->sql, rel_dup(srel), NULL);
    2825          10 :                         set_processed(gbl);
    2826          10 :                         rel_groupby_add_aggr(v->sql, gbl, e);
    2827             :                 }
    2828             : 
    2829          10 :                 srel = r->r;
    2830             :                 {
    2831          10 :                         sql_subfunc *cf = sql_bind_func(v->sql, "sys", "count", sql_bind_localtype("void"), NULL, F_AGGR, true, true);
    2832          10 :                         sql_exp *e = exp_aggr(v->sql->sa, NULL, cf, need_distinct(oce), need_no_nil(oce), oce->card, 0);
    2833             : 
    2834          10 :                         exp_label(v->sql->sa, e, ++v->sql->label);
    2835          10 :                         cnt2 = exp_ref(v->sql, e);
    2836          10 :                         gbr = rel_groupby(v->sql, rel_dup(srel), NULL);
    2837          10 :                         set_processed(gbr);
    2838          10 :                         rel_groupby_add_aggr(v->sql, gbr, e);
    2839             :                 }
    2840             : 
    2841          10 :                 cp = rel_crossproduct(v->sql->sa, gbl, gbr, op_join);
    2842          10 :                 set_processed(cp);
    2843             : 
    2844          10 :                 if (!(nce = rel_binop_(v->sql, NULL, cnt1, cnt2, "sys", "sql_mul", card_value, true))) {
    2845           0 :                         v->sql->session->status = 0;
    2846           0 :                         v->sql->errstr[0] = '\0';
    2847           0 :                         return rel; /* error, fallback to original expression */
    2848             :                 }
    2849             :                 /* because of remote plans, make sure "sql_mul" returns bigint. The cardinality is atomic, so no major performance penalty */
    2850          10 :                 if (subtype_cmp(exp_subtype(oce), exp_subtype(nce)) != 0)
    2851          10 :                         nce = exp_convert(v->sql, nce, exp_subtype(nce), exp_subtype(oce));
    2852          10 :                 if (exp_name(oce))
    2853          10 :                         exp_prop_alias(v->sql->sa, nce, oce);
    2854             : 
    2855          10 :                 rel_destroy(rel);
    2856          10 :                 rel = rel_project(v->sql->sa, cp, append(new_exp_list(v->sql->sa), nce));
    2857          10 :                 set_processed(rel);
    2858             : 
    2859          10 :                 v->changes++;
    2860             :         }
    2861             : 
    2862             :         return rel;
    2863             : }
    2864             : 
    2865             : static inline sql_rel *
    2866       40482 : rel_basecount(visitor *v, sql_rel *rel)
    2867             : {
    2868       50794 :         if (is_groupby(rel->op) && !rel_is_ref(rel) && rel->l && list_empty(rel->r) &&
    2869       19589 :                 list_length(rel->exps) == 1 && exp_aggr_is_count(rel->exps->h->data)) {
    2870        4444 :                 sql_rel *bt = rel->l;
    2871        4444 :                 sql_exp *e = rel->exps->h->data;
    2872        4444 :                 if (is_basetable(bt->op) && list_empty(e->l)) { /* count(*) */
    2873             :                         /* change into select cnt('schema','table') */
    2874         668 :                         sql_table *t = bt->l;
    2875             :                         /* I need to get the declared table's frame number to make this work correctly for those */
    2876         668 :                         if (!isTable(t) || isDeclaredTable(t))
    2877             :                                 return rel;
    2878         589 :                         sql_subfunc *cf = sql_bind_func(v->sql, "sys", "cnt", sql_bind_localtype("str"), sql_bind_localtype("str"), F_FUNC, true, true);
    2879         589 :                         list *exps = sa_list(v->sql->sa);
    2880         589 :                         append(exps, exp_atom_str(v->sql->sa, t->s->base.name, sql_bind_localtype("str")));
    2881         589 :                         append(exps, exp_atom_str(v->sql->sa, t->base.name, sql_bind_localtype("str")));
    2882         589 :                         sql_exp *ne = exp_op(v->sql->sa, exps, cf);
    2883             : 
    2884         589 :                         ne = exp_propagate(v->sql->sa, ne, e);
    2885         589 :                         exp_setalias(ne, e->alias.label, exp_find_rel_name(e), exp_name(e));
    2886         589 :                         rel_destroy(rel);
    2887         589 :                         rel = rel_project(v->sql->sa, NULL, append(sa_list(v->sql->sa), ne));
    2888         589 :                         v->changes++;
    2889             :                 }
    2890             :         }
    2891             :         return rel;
    2892             : }
    2893             : 
    2894             : static inline sql_rel *
    2895       40482 : rel_simplify_count(visitor *v, sql_rel *rel)
    2896             : {
    2897       40482 :         if (is_groupby(rel->op) && !list_empty(rel->exps)) {
    2898       40401 :                 mvc *sql = v->sql;
    2899       40401 :                 int ncountstar = 0;
    2900             : 
    2901             :                 /* Convert count(no null) into count(*) */
    2902      125604 :                 for (node *n = rel->exps->h; n ; n = n->next) {
    2903       85203 :                         sql_exp *e = n->data;
    2904             : 
    2905       85203 :                         if (exp_aggr_is_count(e) && !need_distinct(e)) {
    2906       13849 :                                 if (list_length(e->l) == 0) {
    2907       11089 :                                         ncountstar++;
    2908        2760 :                                 } else if (list_length(e->l) == 1 && !has_nil((sql_exp*)((list*)e->l)->h->data)) {
    2909        2600 :                                         sql_subfunc *cf = sql_bind_func(sql, "sys", "count", sql_bind_localtype("void"), NULL, F_AGGR, true, true);
    2910        2600 :                                         sql_exp *ne = exp_aggr(sql->sa, NULL, cf, 0, 0, e->card, 0);
    2911        2600 :                                         if (exp_name(e))
    2912        2600 :                                                 exp_prop_alias(sql->sa, ne, e);
    2913        2600 :                                         n->data = ne;
    2914        2600 :                                         ncountstar++;
    2915        2600 :                                         v->changes++;
    2916             :                                 }
    2917             :                         }
    2918             :                 }
    2919             :                 /* With multiple count(*), use exp_ref to reduce the number of calls to this aggregate */
    2920       40401 :                 if (ncountstar > 1) {
    2921          56 :                         sql_exp *count_star = NULL;
    2922          56 :                         sql_rel *nrel = rel_project(v->sql->sa, rel, NULL);
    2923          56 :                         list *aexps = sa_list(v->sql->sa), *nexps = sa_list(v->sql->sa);
    2924          56 :                         nrel->exps = nexps;
    2925         247 :                         for (node *n = rel->exps->h; n ; n = n->next) {
    2926         191 :                                 sql_exp *e = n->data;
    2927             : 
    2928         191 :                                 if (exp_aggr_is_count(e) && !need_distinct(e) && list_length(e->l) == 0) {
    2929         126 :                                         if (!count_star) {
    2930          56 :                                                 count_star = e;
    2931          56 :                                                 append(aexps, e);
    2932          56 :                                                 append(nexps, exp_ref(sql, e));
    2933             :                                         } else {
    2934          70 :                                                 sql_exp *ne = exp_ref(sql, count_star);
    2935             : 
    2936          70 :                                                 if (exp_name(e))
    2937          70 :                                                         exp_prop_alias(sql->sa, ne, e);
    2938          70 :                                                 v->changes++;
    2939          70 :                                                 append(nexps, ne);
    2940             :                                         }
    2941             :                                 } else {
    2942          65 :                                         append(aexps, e);
    2943          65 :                                         append(nexps, exp_ref(sql, e));
    2944             :                                 }
    2945             :                         }
    2946          56 :                         rel->exps = aexps;
    2947          56 :                         return nrel;
    2948             :                 }
    2949             :         }
    2950             :         return rel;
    2951             : }
    2952             : 
    2953             : static sql_rel *
    2954       40482 : rel_groupjoin(visitor *v, sql_rel *rel)
    2955             : {
    2956       40482 :         if (!rel || rel_is_ref(rel) || !is_groupby(rel->op) || list_empty(rel->r))
    2957       14506 :                 return rel;
    2958             : 
    2959       25976 :         sql_rel *j = rel->l;
    2960       25976 :         if (!j || rel_is_ref(j) /*|| !is_left(j->op)*/ || j->op != op_join || list_length(rel->exps) > 1 /* only join because left joins aren't optimized jet (TODO), only length 1 as implementation of groupjoins is missing */ || !list_empty(rel->attr))
    2961       25255 :                 return rel;
    2962             :         /* check group by exps == equi join exps */
    2963         721 :         list *gbes = rel->r;
    2964         721 :         if (list_length(gbes) != list_length(j->exps))
    2965             :                 return rel;
    2966         620 :         int nr = 0;
    2967        1237 :         for(node *n = gbes->h; n; n = n->next) {
    2968         620 :                 sql_exp *gbe = n->data;
    2969        1237 :                 for(node *m = j->exps->h; m; m = m->next) {
    2970         620 :                         sql_exp *je = m->data;
    2971         620 :                         if (je->type != e_cmp || je->flag != cmp_equal)
    2972             :                                 return rel;
    2973             :                         /* check if its a join exp (ie not a selection) */
    2974         621 :                         if (!( (!rel_has_exp(j->l, je->l, false) && !rel_has_exp(j->r, je->r, false)) ||
    2975           8 :                                    (!rel_has_exp(j->l, je->r, false) && !rel_has_exp(j->r, je->l, false))))
    2976           0 :                                 return rel;
    2977         617 :                         if (exp_match(je->l, gbe)) {
    2978          78 :                                 nr++;
    2979         539 :                         } else if (exp_match(je->r, gbe)) {
    2980          10 :                                 nr++;
    2981             :                         }
    2982             :                 }
    2983             :         }
    2984         617 :         if (nr == list_length(gbes)) {
    2985             :                 // printf("#group by converted\n");
    2986          88 :                 j = rel_dup(j);
    2987          88 :                 j->attr = rel->exps;
    2988          88 :                 v->changes++;
    2989          88 :                 rel_destroy(rel);
    2990          88 :                 return j;
    2991             :         }
    2992             :         return rel;
    2993             : }
    2994             : 
    2995             : /* select k1 from bla where k1 = const -> select const from bla where k1 = const */
    2996             : static sql_rel *
    2997     3639847 : rel_project_select_exp(visitor *v, sql_rel *rel)
    2998             : {
    2999     3639847 :         if (is_simple_project(rel->op) && rel->exps && rel->l) {
    3000     1179458 :                 sql_rel *l = rel->l;
    3001     1179458 :                 if (is_select(l->op) && l->exps) {
    3002     1197534 :                         for(node *n = rel->exps->h; n; n = n->next) {
    3003      891513 :                                 sql_exp *col = n->data;
    3004      891513 :                                 if (col->type == e_column) {
    3005     1599535 :                                         for(node *m = l->exps->h; m; m = m->next) {
    3006      961443 :                                                 sql_exp *cmp = m->data;
    3007      961443 :                                                 if (cmp->type == e_cmp && cmp->flag == cmp_equal && !is_anti(cmp) && !is_semantics(cmp) && exp_is_atom(cmp->r)) {
    3008      608879 :                                                         sql_exp *l = cmp->l;
    3009      608879 :                                                         if(l->type == e_column && col->nid == l->nid) {
    3010             :                                                                 /* replace column with the constant */
    3011       12159 :                                                                 sql_exp *e = n->data = exp_copy(v->sql, cmp->r);
    3012       12159 :                                                                 exp_setalias(e, col->alias.label, exp_relname(col), exp_name(col));
    3013       12159 :                                                                 exp_propagate(v->sql->sa, e, col);
    3014       12159 :                                                                 list_hash_clear(rel->exps);
    3015             :                                                         }
    3016             :                                                 }
    3017             :                                         }
    3018             :                                 }
    3019             :                         }
    3020             :                 }
    3021             :         }
    3022     3639846 :         return rel;
    3023             : }
    3024             : 
    3025             : static sql_rel *
    3026     3639843 : rel_optimize_projections_(visitor *v, sql_rel *rel)
    3027             : {
    3028     3639843 :         rel = rel_project_cse(v, rel);
    3029     3639844 :         rel = rel_project_select_exp(v, rel);
    3030             : 
    3031     3639850 :         if (!rel || !is_groupby(rel->op))
    3032             :                 return rel;
    3033             : 
    3034       93717 :         rel = rel_remove_const_aggr(v, rel);
    3035       93716 :         if (v->value_based_opt) {
    3036       40482 :                 rel = rel_simplify_sum(v, rel);
    3037       40482 :                 rel = rel_simplify_groupby_columns(v, rel);
    3038             :         }
    3039       93716 :         rel = rel_groupby_cse(v, rel);
    3040       93717 :         rel = rel_push_aggr_down(v, rel);
    3041       93717 :         rel = rel_push_groupby_down(v, rel);
    3042       93717 :         rel = rel_reduce_groupby_exps(v, rel);
    3043       93717 :         rel = rel_distinct_aggregate_on_unique_values(v, rel);
    3044       93716 :         rel = rel_groupby_distinct(v, rel);
    3045       93717 :         rel = rel_push_count_down(v, rel);
    3046             :         /* only when value_based_opt is on, ie not for dependency resolution */
    3047       93717 :         if (v->value_based_opt) {
    3048       40482 :                 rel = rel_simplify_count(v, rel);
    3049       40482 :                 rel = rel_basecount(v, rel);
    3050             : 
    3051       40482 :                 rel = rel_groupjoin(v, rel);
    3052             :         }
    3053             :         return rel;
    3054             : }
    3055             : 
    3056             : static sql_rel *
    3057      171683 : rel_optimize_projections(visitor *v, global_props *gp, sql_rel *rel)
    3058             : {
    3059      171683 :         (void) gp;
    3060      171683 :         return rel_visitor_topdown(v, rel, &rel_optimize_projections_);
    3061             : }
    3062             : 
    3063             : run_optimizer
    3064      612287 : bind_optimize_projections(visitor *v, global_props *gp)
    3065             : {
    3066      612287 :         int flag = v->sql->sql_optimizer;
    3067      612091 :         return gp->opt_level == 1 && (gp->cnt[op_groupby] || gp->cnt[op_project] || gp->cnt[op_union] || gp->cnt[op_munion]
    3068     1224378 :                    || gp->cnt[op_inter] || gp->cnt[op_except]) && (flag & optimize_projections) ? rel_optimize_projections : NULL;
    3069             : }
    3070             : 
    3071             : 
    3072             : static inline sql_rel *
    3073     2584258 : rel_push_project_down_union(visitor *v, sql_rel *rel)
    3074             : {
    3075             :         /* first remove distinct if already unique */
    3076     2584258 :         if (rel->op == op_project && need_distinct(rel) && rel->exps && exps_unique(v->sql, rel, rel->exps) && !have_nil(rel->exps)) {
    3077         109 :                 set_nodistinct(rel);
    3078         109 :                 if (exps_card(rel->exps) <= CARD_ATOM && rel->card > CARD_ATOM) { /* if the projection just contains constants, then no topN is needed */
    3079           2 :                         sql_rel *nl = rel->l = rel_topn(v->sql->sa, rel->l, append(sa_list(v->sql->sa), exp_atom_lng(v->sql->sa, 1)));
    3080           2 :                         set_processed(nl);
    3081             :                 }
    3082         109 :                 v->changes++;
    3083             :         }
    3084             : 
    3085     2584258 :         if (rel->op == op_project && rel->l && rel->exps && list_empty(rel->r)) {
    3086      847573 :                 int need_distinct = need_distinct(rel);
    3087      847573 :                 sql_rel *u = rel->l;
    3088      847573 :                 sql_rel *p = rel;
    3089             : 
    3090      847573 :                 if (!u || !(is_union(u->op) || is_munion(u->op)) || need_distinct(u) || !u->exps || rel_is_ref(u) || project_unsafe(rel, false))
    3091      823072 :                         return rel;
    3092             : 
    3093       24501 :                 sql_rel *r;
    3094             : 
    3095             :                 /* don't push project down union of single values */
    3096       73686 :                 for (node *n = ((list*)u->l)->h; n; n = n->next) {
    3097       49207 :                         r = n->data;
    3098             :                         // TODO: does this check make sense?
    3099       49207 :                         if (is_project(r->op) && !r->l)
    3100             :                                 return rel;
    3101             :                 }
    3102             : 
    3103       73664 :                 for (node *n = ((list*)u->l)->h; n; n = n->next) {
    3104       49185 :                         r = rel_dup(n->data);
    3105             : 
    3106             :                         /* introduce projection around each operand if needed */
    3107       49185 :                         if (!is_project(r->op))
    3108         750 :                                 r = rel_project(v->sql->sa, r,
    3109             :                                                 rel_projections(v->sql, r, NULL, 1, 1));
    3110             :                         /* check if we need distinct */
    3111       98370 :                         need_distinct &=
    3112       49185 :                                 (!exps_unique(v->sql, r, r->exps) || have_nil(r->exps));
    3113       49185 :                         rel_rename_exps(v->sql, u->exps, r->exps);
    3114             : 
    3115       49185 :                         rel_destroy(n->data);
    3116       49185 :                         n->data = r;
    3117             :                 }
    3118             : 
    3119             :                 /* once we have checked for need_distinct in every rel we can
    3120             :                  * introduce the projects under the munion which are gonna be
    3121             :                  * copies of the single project above munion */
    3122       73664 :                 for (node *n = ((list*)u->l)->h; n; n = n->next) {
    3123       49185 :                         r = rel_dup(n->data);
    3124             : 
    3125       49185 :                         r = rel_project(v->sql->sa, r, NULL);
    3126       49185 :                         if (need_distinct)
    3127           0 :                                 set_distinct(r);
    3128       49185 :                         r->exps = exps_copy(v->sql, p->exps);
    3129       49185 :                         set_processed(r);
    3130             : 
    3131       49185 :                         rel_destroy(n->data);
    3132       49185 :                         n->data = r;
    3133             :                 }
    3134             : 
    3135             :                 /* turn the project-munion on top into munion. incr operand
    3136             :                  * rels count to make sure that they are not deleted by the
    3137             :                  * subsequent rel_inplace_setop_n_ary */
    3138       73664 :                 for (node *n = ((list*)u->l)->h; n; n = n->next)
    3139       49185 :                         rel_dup(n->data);
    3140       24479 :                 rel = rel_inplace_setop_n_ary(v->sql, rel, u->l, op_munion,
    3141             :                                 rel_projections(v->sql, rel, NULL, 1, 1));
    3142       24479 :                 if (need_distinct)
    3143           0 :                         set_distinct(rel);
    3144       24479 :                 if (is_single(u))
    3145           2 :                         set_single(rel);
    3146             : 
    3147       24479 :                 v->changes++;
    3148             : 
    3149             :                 /* if any operand has two project above then squash them */
    3150       73664 :                 for (node *n = ((list*)u->l)->h; n; n = n->next) {
    3151       49185 :                         r = rel_dup(n->data);
    3152       49185 :                         r = rel_merge_projects_(v, r);
    3153       49185 :                         rel_destroy(n->data);
    3154       49185 :                         n->data = r;
    3155             :                 }
    3156             : 
    3157             :                 return rel;
    3158             :         }
    3159             :         return rel;
    3160             : }
    3161             : 
    3162             : static inline sql_rel *
    3163     2584258 : rel_merge_unions(visitor *v, sql_rel *rel)
    3164             : {
    3165             :         /* stacked munion flattening e.g.
    3166             :          * munion( munion(a, b, c), munion(d, e)) => munion(a,b,c,d,e)
    3167             :          */
    3168     2584258 :         if (rel && is_munion(rel->op)) {
    3169       72816 :                 list *l = rel->l;
    3170      479618 :                 for(node *n = l->h; n; ) {
    3171      333986 :                         node *next = n->next;
    3172      333986 :                         sql_rel *oc = n->data;
    3173      333986 :                         sql_rel *c = oc;
    3174             : 
    3175             :                         /* account for any group-bys pushed down between stacked munions */
    3176      333986 :                         if (oc->op == op_groupby)
    3177        2357 :                                 c = oc->l;
    3178             : 
    3179      333986 :                         if (is_munion(c->op)) {
    3180        9021 :                                 c = rel_dup(c);
    3181        9021 :                                 list_remove_node(l, NULL, n);
    3182        9021 :                                 l = list_merge(l, c->l, (fdup)NULL);
    3183        9021 :                                 c->l = NULL;
    3184        9021 :                                 rel_destroy(oc);
    3185        9021 :                                 if (!next)
    3186           3 :                                         next = l->h;
    3187        9021 :                                 v->changes++;
    3188             :                         }
    3189             :                         n = next;
    3190             :                 }
    3191       72816 :                 rel->l = l;
    3192             :         }
    3193     2584258 :         return rel;
    3194             : }
    3195             : 
    3196             : 
    3197             : /*
    3198             :  * Push (semi)joins down unions, this is basically for merge tables, where
    3199             :  * we know that the fk-indices are split over two clustered merge tables.
    3200             :  */
    3201             : static inline sql_rel *
    3202     2584258 : rel_push_join_down_munion(visitor *v, sql_rel *rel)
    3203             : {
    3204     2584258 :         if ((is_join(rel->op) && !is_outerjoin(rel->op) && !is_single(rel)) || is_semi(rel->op)) {
    3205      319590 :                 sql_rel *l = rel->l, *r = rel->r, *ol = l, *or = r;
    3206      319590 :                 list *exps = rel->exps, *attr = rel->attr;
    3207      319590 :                 sql_exp *je = NULL;
    3208             : 
    3209             :                 /* we would like to optimize in place reference rels which point
    3210             :                  * to replica tables and let the replica optimizer handle those
    3211             :                  * later. otherwise we miss the push join down optimization due
    3212             :                  * to the rel_is_ref bailout
    3213             :                  */
    3214      319590 :                 if (rel_is_ref(l) && is_basetable(l->op) && l->l && isReplicaTable((sql_table*)l->l)) {
    3215           1 :                         rel->l = rel_copy(v->sql, l, true);
    3216           1 :                         rel_destroy(l);
    3217             :                 }
    3218      319590 :                 if (rel_is_ref(r) && is_basetable(r->op) && r->l && isReplicaTable((sql_table*)r->l)) {
    3219           4 :                         rel->r = rel_copy(v->sql, r, true);
    3220           4 :                         rel_destroy(r);
    3221             :                 }
    3222             : 
    3223             :                 // TODO: do we need to check if it's l/r are refs?
    3224      319590 :                 if (!l || !r || need_distinct(l) || need_distinct(r) || rel_is_ref(l) || rel_is_ref(r))
    3225             :                         return rel;
    3226      271481 :                 if (l->op == op_project)
    3227       22465 :                         l = l->l;
    3228      271481 :                 if (r->op == op_project)
    3229       25317 :                         r = r->l;
    3230             : 
    3231             :                 /* both sides only if we have a join index ASSUMING pkey-fkey are aligned */
    3232             :                 // TODO: we could also check if the join cols are (not) unique
    3233      271481 :                 bool aligned_pk_fk = true;
    3234      271481 :                 if (!l || !r || (is_munion(l->op) && is_munion(r->op) &&
    3235         134 :                         !(je = rel_is_join_on_pkey(rel, aligned_pk_fk))))
    3236         642 :                         return rel;
    3237             : 
    3238             :                 // TODO: why? bailout for union semijoin without pkey joins expressions
    3239      270839 :                 if (is_semi(rel->op) && is_munion(l->op) && !je)
    3240             :                         return rel;
    3241             : 
    3242             :                 /* if both sides are munions we assume that they will have the same number of children */
    3243      270101 :                 if (is_munion(l->op) && is_munion(r->op) && list_length(l->l) != list_length(r->l))
    3244             :                         return rel;
    3245             : 
    3246      270101 :                 if (is_munion(l->op) && !need_distinct(l) && !is_single(l) &&
    3247        4163 :                    !is_munion(r->op)){
    3248             :                         /* join(munion(a,b,c), d) -> munion(join(a,d), join(b,d), join(c,d)) */
    3249        4163 :                         list *js = sa_list(v->sql->sa);
    3250       12499 :                         for (node *n = ((list*)l->l)->h; n; n = n->next) {
    3251        8336 :                                 sql_rel *pc = rel_dup(n->data);
    3252        8336 :                                 if (!is_project(pc->op))
    3253          17 :                                         pc = rel_project(v->sql->sa, pc, rel_projections(v->sql, pc, NULL, 1, 1));
    3254        8336 :                                 rel_rename_exps(v->sql, l->exps, pc->exps);
    3255        8336 :                                 if (l != ol) {
    3256          12 :                                         pc = rel_project(v->sql->sa, pc, NULL);
    3257          12 :                                         pc->exps = exps_copy(v->sql, ol->exps);
    3258          12 :                                         set_processed(pc);
    3259             :                                 }
    3260        8336 :                                 pc = rel_crossproduct(v->sql->sa, pc, rel_dup(or), rel->op);
    3261        8336 :                                 pc->exps = exps_copy(v->sql, exps);
    3262        8336 :                                 pc->attr = exps_copy(v->sql, attr);
    3263        8336 :                                 set_processed(pc);
    3264        8336 :                                 pc = rel_project(v->sql->sa, pc, rel_projections(v->sql, pc, NULL, 1, 1));
    3265        8336 :                                 js = append(js, pc);
    3266             :                         }
    3267        4163 :                         v->changes++;
    3268        4163 :                         return rel_inplace_setop_n_ary(v->sql, rel, js, op_munion,
    3269             :                                                                rel_projections(v->sql, rel, NULL, 1, 1));
    3270      265938 :                 } else if (is_munion(l->op) && !need_distinct(l) && !is_single(l) &&
    3271           0 :                                is_munion(r->op) && !need_distinct(r) && !is_single(r) &&
    3272             :                                je) {
    3273             :                         /* join(munion(a,b,c), munion(d,e,f)) -> munion(join(a,d), join(b,e), join(c,f)) */
    3274           0 :                         list *cps = sa_list(v->sql->sa);
    3275             :                         /* create pairwise joins between left and right parts. assume eq num of parts (see earlier bailout) */
    3276           0 :                         for (node *n = ((list*)l->l)->h, *m=((list*)r->l)->h; n && m; n = n->next, m = m->next) {
    3277             :                                 /* left part */
    3278           0 :                                 sql_rel *lp = rel_dup(n->data);
    3279           0 :                                 if (!is_project(lp->op))
    3280           0 :                                         lp = rel_project(v->sql->sa, lp, rel_projections(v->sql, lp, NULL, 1, 1));
    3281           0 :                                 rel_rename_exps(v->sql, l->exps, lp->exps);
    3282           0 :                                 if (l != ol) {
    3283           0 :                                         lp = rel_project(v->sql->sa, lp, NULL);
    3284           0 :                                         lp->exps = exps_copy(v->sql, ol->exps);
    3285           0 :                                         set_processed(lp);
    3286             :                                 }
    3287             :                                 /* right part */
    3288           0 :                                 sql_rel *rp = rel_dup(m->data);
    3289           0 :                                 if (!is_project(rp->op))
    3290           0 :                                         rp = rel_project(v->sql->sa, rp, rel_projections(v->sql, rp, NULL, 1, 1));
    3291           0 :                                 rel_rename_exps(v->sql, r->exps, rp->exps);
    3292           0 :                                 if (r != or) {
    3293           0 :                                         rp = rel_project(v->sql->sa, rp, NULL);
    3294           0 :                                         rp->exps = exps_copy(v->sql, or->exps);
    3295           0 :                                         set_processed(rp);
    3296             :                                 }
    3297             :                                 /* combine them */
    3298           0 :                                 sql_rel *cp = rel_crossproduct(v->sql->sa, lp, rp, rel->op);
    3299           0 :                                 cp->exps = exps_copy(v->sql, exps);
    3300           0 :                                 cp->attr = exps_copy(v->sql, attr);
    3301           0 :                                 set_processed(cp);
    3302           0 :                                 cp = rel_project(v->sql->sa, cp, rel_projections(v->sql, cp, NULL, 1, 1));
    3303           0 :                                 cps = append(cps, cp);
    3304             :                         }
    3305           0 :                         v->changes++;
    3306           0 :                         return rel_inplace_setop_n_ary(v->sql, rel, cps, op_munion,
    3307             :                                                                                    rel_projections(v->sql, rel, NULL, 1, 1));
    3308      265938 :                 } else if (!is_munion(l->op) &&
    3309      265930 :                                 is_munion(r->op) && !need_distinct(r) && !is_single(r) &&
    3310        2006 :                                !is_semi(rel->op)) {
    3311             :                         /* join(a, munion(b,c,d)) -> munion(join(a,b), join(a,c), join(a,d)) */
    3312        1419 :                         list *js = sa_list(v->sql->sa);
    3313        4260 :                         for (node *n = ((list*)r->l)->h; n; n = n->next) {
    3314        2841 :                                 sql_rel *pc = rel_dup(n->data);
    3315        2841 :                                 if (!is_project(pc->op))
    3316          21 :                                         pc = rel_project(v->sql->sa, pc, rel_projections(v->sql, pc, NULL, 1, 1));
    3317        2841 :                                 rel_rename_exps(v->sql, r->exps, pc->exps);
    3318        2841 :                                 if (r != or) {
    3319          93 :                                         pc = rel_project(v->sql->sa, pc, NULL);
    3320          93 :                                         pc->exps = exps_copy(v->sql, or->exps);
    3321          93 :                                         set_processed(pc);
    3322             :                                 }
    3323        2841 :                                 pc = rel_crossproduct(v->sql->sa, rel_dup(ol), pc, rel->op);
    3324        2841 :                                 pc->exps = exps_copy(v->sql, exps);
    3325        2841 :                                 pc->attr = exps_copy(v->sql, attr);
    3326        2841 :                                 set_processed(pc);
    3327        2841 :                                 pc = rel_project(v->sql->sa, pc, rel_projections(v->sql, pc, NULL, 1, 1));
    3328        2841 :                                 js = append(js, pc);
    3329             :                         }
    3330        1419 :                         v->changes++;
    3331        1419 :                         return rel_inplace_setop_n_ary(v->sql, rel, js, op_munion,
    3332             :                                                                rel_projections(v->sql, rel, NULL, 1, 1));
    3333      264519 :                 } else if (!is_munion(l->op) &&
    3334      264511 :                                 is_munion(r->op) && !need_distinct(r) && !is_single(r) &&
    3335         587 :                                 is_semi(rel->op) && je) {
    3336             :                         /* {semi}join ( A1, munion (B, A2a, C, A2b)) [A1.partkey = A2.partkey] ->
    3337             :                          * {semi}join ( A1, munion (A2a, A2b))
    3338             :                          * (ie some parts of an n-th munion operand)
    3339             :                          *
    3340             :                          * How to detect that a relation isn't matching?
    3341             :                          *              partitioning is currently done only on pkey/fkey's
    3342             :                          *              ie only matching per part if join is on pkey/fkey (parts)
    3343             :                          *              and part numbers should match.
    3344             :                          * */
    3345           0 :                         int lpnr = rel_part_nr(l, je);
    3346           0 :                         if (lpnr < 0)
    3347             :                                 return rel;
    3348             : 
    3349           0 :                         list *ups = sa_list(v->sql->sa);
    3350           0 :                         for (node *n = ((list*)r->l)->h; n; n = n->next) {
    3351           0 :                                 if (rel_uses_part_nr(n->data, je, lpnr)) {
    3352           0 :                                         sql_rel *pc = rel_dup(n->data);
    3353           0 :                                         ups = append(ups, pc);
    3354             :                                 }
    3355             :                         }
    3356           0 :                         v->changes++;
    3357           0 :                         return rel_inplace_setop_n_ary(v->sql, r, ups, op_munion,
    3358             :                                                                       rel_projections(v->sql, rel, NULL, 1, 1));
    3359             :                 }
    3360             :         }
    3361             :         return rel;
    3362             : }
    3363             : 
    3364             : static sql_rel *
    3365     2584258 : rel_optimize_unions_topdown_(visitor *v, sql_rel *rel)
    3366             : {
    3367     2584258 :         rel = rel_push_project_down_union(v, rel);
    3368     2584258 :         rel = rel_merge_unions(v, rel);
    3369     2584258 :         rel = rel_push_join_down_munion(v, rel);
    3370     2584258 :         return rel;
    3371             : }
    3372             : 
    3373             : static sql_rel *
    3374       25763 : rel_optimize_unions_topdown(visitor *v, global_props *gp, sql_rel *rel)
    3375             : {
    3376       25763 :         (void) gp;
    3377       25763 :         return rel_visitor_topdown(v, rel, &rel_optimize_unions_topdown_);
    3378             : }
    3379             : 
    3380             : run_optimizer
    3381      612288 : bind_optimize_unions_topdown(visitor *v, global_props *gp)
    3382             : {
    3383      612288 :         (void) v;
    3384      612288 :         return gp->opt_level == 1 && gp->cnt[op_munion] ? rel_optimize_unions_topdown : NULL;
    3385             : }
    3386             : 
    3387             : 
    3388             : static sql_column *
    3389           0 : is_fk_column_of_pk(mvc *sql, sql_rel *rel, sql_column *pkc, sql_exp *e) /* test if e is a foreign key column for the pk on pkc */
    3390             : {
    3391           0 :         sql_trans *tr = sql->session->tr;
    3392           0 :         sql_column *c = exp_find_column(rel, e, -2);
    3393             : 
    3394           0 :         if (c) {
    3395           0 :                 sql_table *t = c->t;
    3396             : 
    3397           0 :                 for (node *n = ol_first_node(t->idxs); n; n = n->next) {
    3398           0 :                         sql_idx *li = n->data;
    3399             : 
    3400           0 :                         if (li->type == join_idx) {
    3401           0 :                                 for (node *m = li->columns->h ; m ; m = m->next) {
    3402           0 :                                         sql_kc *fkc = m->data;
    3403             : 
    3404           0 :                                         if (strcmp(fkc->c->base.name, c->base.name) == 0) { /* same fkey column */
    3405           0 :                                                 sql_key *fkey = (sql_key*)os_find_id(tr->cat->objects, tr, ((sql_fkey*)li->key)->rkey);
    3406             : 
    3407           0 :                                                 if (strcmp(fkey->t->base.name, pkc->t->base.name) == 0) { /* to same pk table */
    3408           0 :                                                         for (node *o = fkey->columns->h ; o ; o = n->next) {
    3409           0 :                                                                 sql_kc *kc = m->data;
    3410             : 
    3411           0 :                                                                 if (strcmp(kc->c->base.name, pkc->base.name) == 0) /* to same pk table column */
    3412             :                                                                         return c;
    3413             :                                                         }
    3414             :                                                 }
    3415             :                                         }
    3416             :                                 }
    3417             :                         }
    3418             :                 }
    3419             :         }
    3420             :         return NULL;
    3421             : }
    3422             : 
    3423             : static bool
    3424           0 : has_no_selectivity(mvc *sql, sql_rel *rel)
    3425             : {
    3426           0 :         if (!rel)
    3427             :                 return true;
    3428             : 
    3429           0 :         switch(rel->op){
    3430             :         case op_basetable:
    3431             :         case op_truncate:
    3432             :         case op_table:
    3433             :                 return true;
    3434           0 :         case op_topn:
    3435             :         case op_sample:
    3436             :         case op_project:
    3437             :         case op_groupby:
    3438           0 :                 return has_no_selectivity(sql, rel->l);
    3439             :         case op_ddl:
    3440             :         case op_insert:
    3441             :         case op_update:
    3442             :         case op_delete:
    3443             :         case op_merge:
    3444             :         case op_join:
    3445             :         case op_left:
    3446             :         case op_right:
    3447             :         case op_full:
    3448             :         case op_semi:
    3449             :         case op_anti:
    3450             :         case op_union:
    3451             :         case op_inter:
    3452             :         case op_except:
    3453             :         case op_munion:
    3454             :         case op_select:
    3455             :                 return false;
    3456             :         }
    3457             :         return true;
    3458             : }
    3459             : 
    3460             : static sql_rel *
    3461      701508 : rel_distinct_project2groupby_(visitor *v, sql_rel *rel)
    3462             : {
    3463      701508 :         sql_rel *l = rel->l;
    3464             : 
    3465             :         /* rewrite distinct project (table) [ constant ] -> project [ constant ] */
    3466      709228 :         if (rel->op == op_project && rel->l && !rel->r /* no order by */ && need_distinct(rel) &&
    3467        7720 :             exps_card(rel->exps) <= CARD_ATOM) {
    3468         471 :                 set_nodistinct(rel);
    3469         471 :                 if (rel->card > CARD_ATOM) { /* if the projection just contains constants, then no topN is needed */
    3470         193 :                         sql_rel *nl = rel->l = rel_topn(v->sql->sa, rel->l, append(sa_list(v->sql->sa), exp_atom_lng(v->sql->sa, 1)));
    3471         193 :                         set_processed(nl);
    3472             :                 }
    3473         471 :                 v->changes++;
    3474             :         }
    3475             : 
    3476             :         /* rewrite distinct project [ pk ] ( select ( table ) [ e op val ])
    3477             :          * into project [ pk ] ( select/semijoin ( table )  */
    3478      701508 :         if (rel->op == op_project && rel->l && !rel->r /* no order by */ && need_distinct(rel) &&
    3479        7461 :             (l->op == op_select || l->op == op_semi) && exps_unique(v->sql, rel, rel->exps) &&
    3480         212 :                 (!have_semantics(l->exps) || !have_nil(rel->exps))) {
    3481         212 :                 set_nodistinct(rel);
    3482         212 :                 v->changes++;
    3483             :         }
    3484             : 
    3485             :         /* rewrite distinct project ( join(p,f) [ p.pk = f.fk ] ) [ p.pk ]
    3486             :          *      into project( (semi)join(p,f) [ p.pk = f.fk ] ) [ p.pk ] */
    3487      701508 :         if (rel->op == op_project && rel->l && !rel->r /* no order by */ && need_distinct(rel) &&
    3488        7037 :             l && (is_select(l->op) || l->op == op_join) && rel_is_join_on_pkey(l, true) /* [ pk == fk ] */) {
    3489           0 :                 sql_exp *found = NULL, *pk = NULL, *fk = NULL;
    3490           0 :                 bool all_exps_atoms = true;
    3491           0 :                 sql_column *pkc = NULL;
    3492             : 
    3493           0 :                 for (node *m = l->exps->h ; m ; m = m->next) { /* find a primary key join */
    3494           0 :                         sql_exp *je = (sql_exp *) m->data;
    3495           0 :                         sql_exp *le = je->l, *re = je->r;
    3496             : 
    3497           0 :                         if (!find_prop(je->p, PROP_JOINIDX)) /* must be a pk-fk join expression */
    3498           0 :                                 continue;
    3499             : 
    3500           0 :                         if ((pkc = exp_is_pkey(l, le))) { /* le is the primary key */
    3501           0 :                                 all_exps_atoms = true;
    3502             : 
    3503           0 :                                 for (node *n = rel->exps->h; n && all_exps_atoms; n = n->next) {
    3504           0 :                                         sql_exp *e = (sql_exp *) n->data;
    3505             : 
    3506           0 :                                         if (exp_match(e, le) || exp_refers(e, le))
    3507             :                                                 found = e;
    3508           0 :                                         else if (e->card > CARD_ATOM)
    3509           0 :                                                 all_exps_atoms = false;
    3510             :                                 }
    3511             :                                 pk = le;
    3512             :                                 fk = re;
    3513             :                         }
    3514           0 :                         if (!found && (pkc = exp_is_pkey(l, re))) { /* re is the primary key */
    3515           0 :                                 all_exps_atoms = true;
    3516             : 
    3517           0 :                                 for (node *n = rel->exps->h; n && all_exps_atoms; n = n->next) {
    3518           0 :                                         sql_exp *e = (sql_exp *) n->data;
    3519             : 
    3520           0 :                                         if (exp_match(e, re) || exp_refers(e, re))
    3521             :                                                 found = e;
    3522           0 :                                         else if (e->card > CARD_ATOM)
    3523           0 :                                                 all_exps_atoms = false;
    3524             :                                 }
    3525             :                                 pk = re;
    3526             :                                 fk = le;
    3527             :                         }
    3528             :                 }
    3529             : 
    3530           0 :                 if (all_exps_atoms && found) { /* rel must have the same primary key on the projection list */
    3531             :                         /* if the foreign key has no selectivity, the join can be removed */
    3532           0 :                         if (!(rel_is_ref(l)) && ((rel_find_exp(l->l, fk) && is_fk_column_of_pk(v->sql, l->l, pkc, fk) && has_no_selectivity(v->sql, l->l)) ||
    3533           0 :                                 (l->r && rel_find_exp(l->r, fk) && is_fk_column_of_pk(v->sql, l->r, pkc, fk) && has_no_selectivity(v->sql, l->r)))) {
    3534           0 :                                 sql_rel *side = (rel_find_exp(l->l, pk) != NULL)?l->l:l->r;
    3535             : 
    3536           0 :                                 rel->l = rel_dup(side);
    3537           0 :                                 rel_destroy(l);
    3538           0 :                                 v->changes++;
    3539           0 :                                 set_nodistinct(rel);
    3540           0 :                                 return rel;
    3541             :                         }
    3542             :                         /* if the join has no multiple references it can be re-written into a semijoin */
    3543           0 :                         if (l->op == op_join && !(rel_is_ref(l)) && list_length(rel->exps) == 1) { /* other expressions may come from the other side */
    3544           0 :                                 if (l->r && rel_find_exp(l->r, pk)) {
    3545           0 :                                         sql_rel *temp = l->l;
    3546           0 :                                         l->l = l->r;
    3547           0 :                                         l->r = temp;
    3548             : 
    3549           0 :                                         l->op = op_semi;
    3550           0 :                                 } else if (rel_find_exp(l->l, pk)) {
    3551           0 :                                         l->op = op_semi;
    3552             :                                 }
    3553             :                         }
    3554           0 :                         v->changes++;
    3555           0 :                         set_nodistinct(rel);
    3556           0 :                         return rel;
    3557             :                 }
    3558             :         }
    3559             :         /* rewrite distinct project [ gbe ] ( select ( groupby [ gbe ] [ gbe, e ] )[ e op val ])
    3560             :          * into project [ gbe ] ( select ( group etc ) */
    3561      701508 :         if (rel->op == op_project && rel->l && !rel->r /* no order by */ &&
    3562        7037 :             need_distinct(rel) && l->op == op_select){
    3563        2573 :                 sql_rel *g = l->l;
    3564        2573 :                 if (is_groupby(g->op)) {
    3565           4 :                         list *used = sa_list(v->sql->sa);
    3566           4 :                         list *gbe = g->r;
    3567           4 :                         node *n;
    3568           4 :                         int fnd = 1;
    3569             : 
    3570           8 :                         for (n = rel->exps->h; n && fnd; n = n->next) {
    3571           4 :                                 sql_exp *e = n->data;
    3572             : 
    3573           4 :                                 if (e->card > CARD_ATOM) {
    3574             :                                         /* find e in gbe */
    3575           4 :                                         sql_exp *ne = list_find_exp(g->exps, e);
    3576             : 
    3577           4 :                                         if (ne)
    3578           3 :                                                 ne = list_find_exp( gbe, ne);
    3579           3 :                                         if (ne && !list_find_exp(used, ne)) {
    3580           2 :                                                 fnd++;
    3581           2 :                                                 list_append(used, ne);
    3582             :                                         }
    3583           3 :                                         if (!ne)
    3584             :                                                 fnd = 0;
    3585             :                                 }
    3586             :                         }
    3587           4 :                         if (fnd == (list_length(gbe)+1)) {
    3588           0 :                                 v->changes++;
    3589           0 :                                 set_nodistinct(rel);
    3590             :                         }
    3591             :                 }
    3592             :         }
    3593      701508 :         if (rel->op == op_project && rel->l &&
    3594        7037 :             need_distinct(rel) && exps_card(rel->exps) > CARD_ATOM) {
    3595        7037 :                 node *n;
    3596        7037 :                 list *exps = new_exp_list(v->sql->sa), *gbe = new_exp_list(v->sql->sa);
    3597        7037 :                 list *obe = rel->r; /* we need to read the ordering later */
    3598             : 
    3599        7037 :                 if (obe) {
    3600           0 :                         int fnd = 0;
    3601             : 
    3602           0 :                         for(n = obe->h; n && !fnd; n = n->next) {
    3603           0 :                                 sql_exp *e = n->data;
    3604             : 
    3605           0 :                                 if (e->type != e_column)
    3606             :                                         fnd = 1;
    3607           0 :                                 else if (exps_bind_nid(rel->exps, e->nid) == NULL)
    3608           0 :                                         fnd = 1;
    3609             :                         }
    3610           0 :                         if (fnd)
    3611             :                                 return rel;
    3612             :                 }
    3613        7037 :                 rel->l = rel_project(v->sql->sa, rel->l, rel->exps);
    3614             : 
    3615       21496 :                 for (n = rel->exps->h; n; n = n->next) {
    3616       14459 :                         sql_exp *e = n->data, *ne;
    3617             : 
    3618       14459 :                         set_nodistinct(e);
    3619       14459 :                         ne = exp_ref(v->sql, e);
    3620       14459 :                         if (e->card > CARD_ATOM && !list_find_exp(gbe, ne)) { /* no need to group by on constants, or the same column multiple times */
    3621       14088 :                                 append(gbe, ne);
    3622       14088 :                                 ne = exp_ref(v->sql, ne);
    3623             :                         }
    3624       14459 :                         append(exps, ne);
    3625             :                 }
    3626        7037 :                 rel->op = op_groupby;
    3627        7037 :                 rel->exps = exps;
    3628        7037 :                 rel->r = gbe;
    3629        7037 :                 set_nodistinct(rel);
    3630        7037 :                 if (obe) {
    3631             :                         /* add order again */
    3632           0 :                         rel = rel_project(v->sql->sa, rel, rel_projections(v->sql, rel, NULL, 1, 1));
    3633           0 :                         rel->r = obe;
    3634             :                 }
    3635        7037 :                 v->changes++;
    3636             :         }
    3637             :         return rel;
    3638             : }
    3639             : 
    3640             : static sql_rel *
    3641        5577 : rel_distinct_project2groupby(visitor *v, global_props *gp, sql_rel *rel)
    3642             : {
    3643        5577 :         (void) gp;
    3644        5577 :         return rel_visitor_bottomup(v, rel, &rel_distinct_project2groupby_);
    3645             : }
    3646             : 
    3647             : run_optimizer
    3648      612284 : bind_distinct_project2groupby(visitor *v, global_props *gp)
    3649             : {
    3650      612284 :         int flag = v->sql->sql_optimizer;
    3651      541834 :         return gp->opt_cycle == 0 && gp->opt_level == 1 && gp->needs_distinct && gp->cnt[op_project] &&
    3652      617861 :                    (flag & distinct_project2groupby)? rel_distinct_project2groupby : NULL;
    3653             : }

Generated by: LCOV version 1.14