LCOV - code coverage report
Current view: top level - sql/server - rel_optimize_proj.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 1489 1970 75.6 %
Date: 2024-12-20 21:24:02 Functions: 74 80 92.5 %

          Line data    Source code
       1             : /*
       2             :  * SPDX-License-Identifier: MPL-2.0
       3             :  *
       4             :  * This Source Code Form is subject to the terms of the Mozilla Public
       5             :  * License, v. 2.0.  If a copy of the MPL was not distributed with this
       6             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       7             :  *
       8             :  * Copyright 2024 MonetDB Foundation;
       9             :  * Copyright August 2008 - 2023 MonetDB B.V.;
      10             :  * Copyright 1997 - July 2008 CWI.
      11             :  */
      12             : 
      13             : #include "monetdb_config.h"
      14             : #include "rel_optimizer_private.h"
      15             : #include "rel_basetable.h"
      16             : #include "rel_exp.h"
      17             : #include "rel_select.h"
      18             : #include "rel_rewriter.h"
      19             : 
      20             : static int
      21     3714463 : exp_is_rename(sql_exp *e)
      22             : {
      23     3714463 :         return (e->type == e_column);
      24             : }
      25             : 
      26             : static int
      27     2067049 : exp_is_useless_rename(sql_exp *e)
      28             : {
      29     2067049 :         return (e->type == e_column && e->alias.label == e->nid);
      30             : }
      31             : 
      32             : static list *
      33       20961 : rel_used_projections(mvc *sql, list *exps, list *users)
      34             : {
      35       20961 :         list *nexps = sa_list(sql->sa);
      36       20961 :         bool *used = SA_ZNEW_ARRAY(sql->ta, bool, list_length(exps));
      37       20962 :         int i = 0;
      38             : 
      39      136541 :         for(node *n = users->h; n; n = n->next) {
      40      115580 :                 sql_exp *e = n->data, *ne = NULL;
      41      115580 :                 assert(e->nid && exps_bind_nid(exps, e->nid));
      42      115580 :                 if (e->nid && (ne = exps_bind_nid(exps, e->nid))) {
      43      115580 :                         used[list_position(exps, ne)] = 1;
      44             :                 }
      45             :         }
      46      147233 :         for(node *n = exps->h; n; n = n->next, i++) {
      47      126271 :                 sql_exp *e = n->data;
      48      126271 :                 if (is_intern(e) || used[i])
      49      126134 :                         append(nexps, e);
      50             :         }
      51       20962 :         return nexps;
      52             : }
      53             : 
      54             : /* move projects down with the goal op removing them completely (ie push renames/reduced lists into basetable)
      55             :  * for some cases we can directly remove iff renames rename into same alias
      56             :  * */
      57             : static sql_rel *
      58     4014081 : rel_push_project_down_(visitor *v, sql_rel *rel)
      59             : {
      60             :         /* for now only push down renames */
      61     4014081 :         if (v->depth > 1 && is_simple_project(rel->op) && !need_distinct(rel) && !rel_is_ref(rel) && rel->l && !rel->r &&
      62     1169238 :                         v->parent &&
      63     1169230 :                         !is_modify(v->parent->op) && !is_topn(v->parent->op) && !is_sample(v->parent->op) &&
      64     1851950 :                         !is_ddl(v->parent->op) && !is_set(v->parent->op) && !is_munion(v->parent->op) &&
      65      719590 :                         list_check_prop_all(rel->exps, (prop_check_func)&exp_is_rename)) {
      66      418136 :                 sql_rel *l = rel->l;
      67             : 
      68      418136 :                 if (rel_is_ref(l))
      69             :                         return rel;
      70      387195 :                 if (is_basetable(l->op)) {
      71       40797 :                         if (list_check_prop_all(rel->exps, (prop_check_func)&exp_is_useless_rename)) {
      72             :                                 /* TODO reduce list (those in the project + internal) */
      73       20962 :                                 rel->l = NULL;
      74       20962 :                                 l->exps = rel_used_projections(v->sql, l->exps, rel->exps);
      75       20962 :                                 rel_destroy(rel);
      76       20961 :                                 v->changes++;
      77       20961 :                                 return l;
      78             :                         }
      79             :                         return rel;
      80      346398 :                 } else if (list_check_prop_all(rel->exps, (prop_check_func)&exp_is_useless_rename)) {
      81      139218 :                         if ((is_project(l->op) && list_length(l->exps) == list_length(rel->exps)) ||
      82      122331 :                                 ((v->parent && is_project(v->parent->op)) &&
      83       42417 :                                  (is_mset(l->op) || is_set(l->op) || is_select(l->op) || is_join(l->op) || is_semi(l->op) || is_topn(l->op) || is_sample(l->op)))) {
      84       58299 :                                 rel->l = NULL;
      85       58299 :                                 rel_destroy(rel);
      86       58299 :                                 v->changes++;
      87       58299 :                                 return l;
      88             :                         }
      89             :                 }
      90             :         }
      91             :         /* ToDo handle useful renames, ie new relation name and unique set of attribute names (could reduce set of * attributes) */
      92             :         /* handle both useless and useful with project [ group by ] */
      93             :         return rel;
      94             : }
      95             : 
      96             : static sql_rel *
      97      186409 : rel_push_project_down(visitor *v, global_props *gp, sql_rel *rel)
      98             : {
      99      186409 :         (void) gp;
     100      186409 :         return rel_visitor_bottomup(v, rel, &rel_push_project_down_);
     101             : }
     102             : 
     103             : run_optimizer
     104      631198 : bind_push_project_down(visitor *v, global_props *gp)
     105             : {
     106      631198 :         int flag = v->sql->sql_optimizer;
     107      631002 :         return gp->opt_level == 1 && (flag & push_project_down) &&
     108     1262200 :                    (gp->cnt[op_project] || gp->cnt[op_groupby]) ? rel_push_project_down : NULL;
     109             : }
     110             : 
     111             : 
     112             : static bool exp_shares_exps(sql_exp *e, list *shared, uint64_t *uses);
     113             : 
     114             : static bool
     115      113475 : exps_shares_exps(list *exps, list *shared, uint64_t *uses)
     116             : {
     117      113475 :         if (!exps || !shared)
     118             :                 return false;
     119      393563 :         for (node *n = exps->h; n; n = n->next) {
     120      280500 :                 sql_exp *e = n->data;
     121             : 
     122      280500 :                 if (exp_shares_exps(e, shared, uses))
     123             :                         return true;
     124             :         }
     125             :         return false;
     126             : }
     127             : 
     128             : static bool
     129     3208053 : exp_shares_exps(sql_exp *e, list *shared, uint64_t *uses)
     130             : {
     131     5880652 :         switch(e->type) {
     132          81 :         case e_cmp:
     133          81 :                 if (e->flag == cmp_or || e->flag == cmp_filter)
     134          58 :                         return exps_shares_exps(e->l, shared, uses) || exps_shares_exps(e->r, shared, uses);
     135          23 :                 else if (e->flag == cmp_in || e->flag == cmp_notin)
     136           0 :                         return exp_shares_exps(e->l, shared, uses) || exps_shares_exps(e->r, shared, uses);
     137             :                 else
     138          46 :                         return exp_shares_exps(e->l, shared, uses) || exp_shares_exps(e->r, shared, uses) || (e->f && exp_shares_exps(e->f, shared, uses));
     139      114085 :         case e_atom:
     140      114085 :                 if (e->f)
     141           0 :                         return exps_shares_exps(e->f, shared, uses);
     142             :                 return false;
     143     5366566 :         case e_column:
     144             :                 {
     145     5366566 :                         sql_exp *ne = NULL;
     146     5366566 :                         assert(e->nid);
     147     5366566 :                         if (e->nid)
     148     5366566 :                                 ne = exps_bind_nid(shared, e->nid);
     149     5366566 :                         if (!ne)
     150             :                                 return false;
     151     4101164 :                         if (ne->type != e_column) {
     152      587147 :                                 int i = list_position(shared, ne);
     153      587147 :                                 if (i < 0)
     154             :                                         return false;
     155      587147 :                                 uint64_t used = (uint64_t) 1 << i;
     156      587147 :                                 if (used & *uses)
     157             :                                         return true;
     158      586731 :                                 *uses |= used;
     159      586731 :                                 return false;
     160             :                         }
     161     3514017 :                         if (ne != e && (list_position(shared, e) < 0 || list_position(shared, e) > list_position(shared, ne)))
     162             :                                 /* maybe ne refers to a local complex exp */
     163     2386038 :                                 return exp_shares_exps(ne, shared, uses);
     164             :                         return false;
     165             :                 }
     166      286561 :         case e_convert:
     167      286561 :                 return exp_shares_exps(e->l, shared, uses);
     168      113359 :         case e_aggr:
     169             :         case e_func:
     170      113359 :                 return exps_shares_exps(e->l, shared, uses);
     171             :         case e_psm:
     172           0 :                 assert(0);  /* not in projection list */
     173             :         }
     174             :         return false;
     175             : }
     176             : 
     177             : static bool
     178      387608 : exps_share_expensive_exp(list *exps, list *shared )
     179             : {
     180      387608 :         uint64_t uses = 0;
     181             : 
     182      387608 :         if (list_empty(exps) || list_empty(shared))
     183           0 :                 return false;
     184      387608 :         if (list_length(shared) > 64)
     185             :                 return true;
     186     3313936 :         for (node *n = exps->h; n; n = n->next) {
     187     2927491 :                 sql_exp *e = n->data;
     188             : 
     189     2927491 :                 if (exp_shares_exps(e, shared, &uses))
     190             :                         return true;
     191             :         }
     192             :         return false;
     193             : }
     194             : 
     195             : static bool ambigious_ref( list *exps, sql_exp *e);
     196             : static bool
     197       87304 : ambigious_refs( list *exps, list *refs)
     198             : {
     199       87304 :         if (list_empty(refs))
     200             :                 return false;
     201      264783 :         for(node *n=refs->h; n; n = n->next) {
     202      198608 :                 if (ambigious_ref(exps, n->data))
     203             :                         return true;
     204             :         }
     205             :         return false;
     206             : }
     207             : 
     208             : static bool
     209     4384877 : ambigious_ref( list *exps, sql_exp *e)
     210             : {
     211     4384877 :         sql_exp *ne = NULL;
     212             : 
     213     4384877 :         if (e->type == e_column) {
     214     3790979 :                 assert(e->nid);
     215     3790979 :                 if (e->nid)
     216     3790979 :                         ne = exps_bind_nid(exps, e->nid);
     217     3790979 :                 if (ne && e != ne)
     218             :                         return true;
     219             :         }
     220     4375896 :         if (e->type == e_func)
     221       87304 :                 return ambigious_refs(exps, e->l);
     222             :         return false;
     223             : }
     224             : 
     225             : /* merge 2 projects into the lower one */
     226             : static sql_rel *
     227     4300432 : rel_merge_projects_(visitor *v, sql_rel *rel)
     228             : {
     229     4368362 :         list *exps = rel->exps;
     230     4368362 :         sql_rel *prj = rel->l;
     231     4368362 :         node *n;
     232             : 
     233     4368362 :         if (rel->op == op_project &&
     234     1757764 :             prj && prj->op == op_project && !(rel_is_ref(prj)) && list_empty(prj->r)) {
     235      671102 :                 int all = 1;
     236             : 
     237      671102 :                 if (project_unsafe(rel, false) || project_unsafe(prj, false) || exps_share_expensive_exp(rel->exps, prj->exps))
     238      284657 :                         return rel;
     239             : 
     240             :                 /* here we try to fix aliases */
     241      386445 :                 list *nexps = NULL;
     242             :                 /* for each exp check if we can rename it */
     243     2629856 :                 for (n = exps->h; n && all; n = n->next) {
     244     2252392 :                         sql_exp *e = n->data, *ne = NULL;
     245             : 
     246             :                         /* We do not handle expressions pointing back in the list */
     247     2252392 :                         if (ambigious_ref(exps, e)) {
     248             :                                 all = 0;
     249             :                                 break;
     250             :                         }
     251     2243411 :                         ne = exp_push_down_prj(v->sql, e, prj, prj->l);
     252             :                         /* check if the referred alias name isn't used twice */
     253     2243411 :                         if (ne && ambigious_ref(nexps, ne)) {
     254             :                                 all = 0;
     255             :                                 break;
     256             :                         }
     257     1933877 :                         if (ne) {
     258     1933877 :                                 if (exp_name(e))
     259     1927479 :                                         exp_prop_alias(v->sql->sa, ne, e);
     260     1933877 :                                 if (!nexps)
     261      377456 :                                         nexps = new_exp_list(v->sql->sa);
     262     1933877 :                                 list_append(nexps, ne);
     263             :                         } else {
     264             :                                 all = 0;
     265             :                         }
     266             :                 }
     267      386445 :                 if (all) {
     268       67930 :                         rel->exps = nexps;
     269             :                         /* we can now remove the intermediate project */
     270             :                         /* push order by expressions */
     271       67930 :                         if (!list_empty(rel->r)) {
     272           0 :                                 list *nr = new_exp_list(v->sql->sa), *res = rel->r;
     273           0 :                                 for (n = res->h; n; n = n->next) {
     274           0 :                                         sql_exp *e = n->data, *ne = NULL;
     275             : 
     276           0 :                                         ne = exp_push_down_prj(v->sql, e, prj, prj->l);
     277           0 :                                         if (ne) {
     278           0 :                                                 if (exp_name(e))
     279           0 :                                                         exp_prop_alias(v->sql->sa, ne, e);
     280           0 :                                                 list_append(nr, ne);
     281             :                                         } else {
     282             :                                                 all = 0;
     283             :                                         }
     284             :                                 }
     285           0 :                                 if (all) {
     286           0 :                                         rel->r = nr;
     287             :                                 } else {
     288             :                                         /* leave as is */
     289           0 :                                         rel->exps = exps;
     290           0 :                                         return rel;
     291             :                                 }
     292             :                         }
     293       67930 :                         rel->l = prj->l;
     294       67930 :                         prj->l = NULL;
     295       67930 :                         rel_destroy(prj);
     296       67930 :                         v->changes++;
     297       67930 :                         return rel_merge_projects_(v, rel);
     298             :                 } else {
     299             :                         /* leave as is */
     300      318515 :                         rel->exps = exps;
     301             :                 }
     302      318515 :                 return rel;
     303             :         }
     304             :         return rel;
     305             : }
     306             : 
     307             : static sql_rel *
     308      186409 : rel_merge_projects(visitor *v, global_props *gp, sql_rel *rel)
     309             : {
     310      186409 :         (void) gp;
     311      186409 :         return rel_visitor_bottomup(v, rel, &rel_merge_projects_);
     312             : }
     313             : 
     314             : run_optimizer
     315      631231 : bind_merge_projects(visitor *v, global_props *gp)
     316             : {
     317      631231 :         int flag = v->sql->sql_optimizer;
     318      631035 :         return gp->opt_level == 1 && (flag & merge_projects) &&
     319     1262258 :                    (gp->cnt[op_project] || gp->cnt[op_groupby]) ? rel_merge_projects : NULL;
     320             : }
     321             : 
     322             : 
     323             : static sql_exp *split_aggr_and_project(mvc *sql, list *aexps, sql_exp *e);
     324             : 
     325             : static void
     326           0 : list_split_aggr_and_project(mvc *sql, list *aexps, list *exps)
     327             : {
     328           0 :         if (list_empty(exps))
     329             :                 return ;
     330           0 :         for(node *n = exps->h; n; n = n->next)
     331           0 :                 n->data = split_aggr_and_project(sql, aexps, n->data);
     332             : }
     333             : 
     334             : static sql_exp *
     335           0 : split_aggr_and_project(mvc *sql, list *aexps, sql_exp *e)
     336             : {
     337           0 :         switch(e->type) {
     338           0 :         case e_aggr:
     339             :                 /* add to the aggrs */
     340           0 :                 if (!exp_name(e))
     341           0 :                         exp_label(sql->sa, e, ++sql->label);
     342           0 :                 list_append(aexps, e);
     343           0 :                 return exp_ref(sql, e);
     344             :         case e_cmp:
     345             :                 /* e_cmp's shouldn't exist in an aggr expression list */
     346           0 :                 assert(0);
     347             :         case e_convert:
     348           0 :                 e->l = split_aggr_and_project(sql, aexps, e->l);
     349           0 :                 return e;
     350           0 :         case e_func:
     351           0 :                 list_split_aggr_and_project(sql, aexps, e->l);
     352           0 :                 return e;
     353             :         case e_atom:
     354             :         case e_column: /* constants and columns shouldn't be rewritten */
     355             :         case e_psm:
     356             :                 return e;
     357             :         }
     358             :         return NULL;
     359             : }
     360             : 
     361             : /* exp_rename */
     362             : static sql_exp * exp_rename(mvc *sql, sql_exp *e, sql_rel *f, sql_rel *t);
     363             : 
     364             : static list *
     365       52930 : exps_rename(mvc *sql, list *l, sql_rel *f, sql_rel *t)
     366             : {
     367       52930 :         if (list_empty(l))
     368             :                 return l;
     369       46846 :         for (node *n=l->h; n; n=n->next)
     370       30087 :                 n->data = exp_rename(sql, n->data, f, t);
     371             :         return l;
     372             : }
     373             : 
     374             : /* exp_rename */
     375             : static sql_exp *
     376      180534 : exp_rename(mvc *sql, sql_exp *e, sql_rel *f, sql_rel *t)
     377             : {
     378      180534 :         sql_exp *ne = NULL;
     379             : 
     380      180534 :         assert(is_project(f->op));
     381             : 
     382      180534 :         switch(e->type) {
     383       80870 :         case e_column:
     384       80870 :                 assert(e->nid);
     385       80870 :                 ne = exps_bind_nid(f->exps, e->nid);
     386       80870 :                 if (!ne)
     387             :                         return e;
     388       53110 :                 sql_exp *oe = e;
     389       53110 :                 e = NULL;
     390       53110 :                 if (ne && ne->nid)
     391       52961 :                         e = rel_find_exp(t, ne);
     392       52961 :                 if (!e) {
     393         149 :                         sql->session->status = 0;
     394         149 :                         sql->errstr[0] = 0;
     395         149 :                         if (exp_is_atom(ne))
     396             :                                 return ne;
     397             :                         return oe;
     398             :                 }
     399       52961 :                 ne = exp_ref(sql, e);
     400       52961 :                 if (oe)
     401       52961 :                         exp_propagate(sql->sa, ne, oe);
     402       52961 :                 return ne;
     403       55689 :         case e_cmp:
     404       55689 :                 if (e->flag == cmp_or || e->flag == cmp_filter) {
     405        3768 :                         e->l = exps_rename(sql, e->l, f, t);
     406        3768 :                         e->r = exps_rename(sql, e->r, f, t);
     407       51921 :                 } else if (e->flag == cmp_in || e->flag == cmp_notin) {
     408        6406 :                         e->l = exp_rename(sql, e->l, f, t);
     409        6406 :                         e->r = exps_rename(sql, e->r, f, t);
     410             :                 } else {
     411       45515 :                         e->l = exp_rename(sql, e->l, f, t);
     412       45515 :                         e->r = exp_rename(sql, e->r, f, t);
     413       45515 :                         if (e->f)
     414         233 :                                 e->f = exp_rename(sql, e->f, f, t);
     415             :                 }
     416             :                 break;
     417        4987 :         case e_convert:
     418        4987 :                 e->l = exp_rename(sql, e->l, f, t);
     419        4987 :                 break;
     420        2832 :         case e_aggr:
     421             :         case e_func:
     422        2832 :                 e->l = exps_rename(sql, e->l, f, t);
     423        2832 :                 break;
     424       36156 :         case e_atom:
     425       36156 :                 e->f = exps_rename(sql, e->f, f, t);
     426       36156 :                 break;
     427             :         case e_psm:
     428             :                 break;
     429             :         }
     430             :         return e;
     431             : }
     432             : 
     433             : static int
     434     5874122 : exp_match_exp_cmp( sql_exp *e1, sql_exp *e2)
     435             : {
     436     5874122 :         if (exp_match_exp(e1,e2))
     437       60944 :                 return 0;
     438             :         return -1;
     439             : }
     440             : 
     441             : /* Pushing projects up the tree. Done very early in the optimizer.
     442             :  * Makes later steps easier.
     443             :  */
     444             : extern void _rel_print(mvc *sql, sql_rel *rel);
     445             : static sql_rel *
     446     3901154 : rel_push_project_up_(visitor *v, sql_rel *rel)
     447             : {
     448     3901154 :         if (is_simple_project(rel->op) && rel->l && !rel_is_ref(rel)) {
     449     1257325 :                 sql_rel *l = rel->l;
     450     1257325 :                 if (is_simple_project(l->op))
     451      323759 :                         return rel_merge_projects_(v, rel);
     452             :                 /* find equal column references, later cases are rewritten into references back to the first
     453             :                  * project () [ i.i L1, i.i L2 ] -> project() [ i.i L1, L1 L2 ] */
     454      933566 :                 if (list_length(rel->exps) > 1) {
     455      787566 :                         list *exps = rel->exps;
     456      787566 :                         node *n = exps->h;
     457      787566 :                         bool needed = false;
     458     4050257 :                         for(n = n->next; n && !needed; n = n->next) {
     459     3262692 :                                 sql_exp *e = n->data;
     460     3262692 :                                 if (e->type == e_column && !is_selfref(e)) {
     461    21322466 :                                         for(node *m = exps->h; m && m != n && !needed; m = m->next) {
     462    18546587 :                                                 sql_exp *h = m->data;
     463    18546587 :                                                 if (exp_match_exp(h,e))
     464        9745 :                                                         needed = true;
     465             :                                         }
     466             :                                 }
     467             :                         }
     468      787565 :                         if (needed) {
     469        9745 :                                 rel->exps = sa_list(v->sql->sa);
     470        9745 :                                 node *n = exps->h;
     471        9745 :                                 list_append(rel->exps, n->data);
     472      120561 :                                 for(n = n->next; n; n = n->next) {
     473      110816 :                                         sql_exp *e = n->data;
     474      110816 :                                         if (e->type == e_column && !is_selfref(e)) {
     475       69793 :                                                 node *m = list_find(rel->exps, e, (fcmp)&exp_match_exp_cmp);
     476       69793 :                                                 if (m) {
     477       12174 :                                                         sql_exp *me = m->data;
     478       12174 :                                                         if (me->alias.label != me->nid) {
     479         793 :                                                                 sql_exp *ne = exp_ref(v->sql, m->data);
     480         793 :                                                                 exp_setalias(ne, e->alias.label, exp_relname(e), exp_name(e));
     481         793 :                                                                 exp_propagate(v->sql->sa, ne, e);
     482         793 :                                                                 set_selfref(ne);
     483         793 :                                                                 e = ne;
     484             :                                                         }
     485             :                                                 }
     486             :                                         }
     487      110816 :                                         list_append(rel->exps, e);
     488             :                                 }
     489             :                         }
     490             :                 }
     491             :         }
     492             : 
     493             :         /* project/project cleanup is done later */
     494     3577394 :         if (is_join(rel->op) || is_select(rel->op)) {
     495     1150627 :                 node *n;
     496     1150627 :                 list *exps = NULL, *l_exps, *r_exps;
     497     1150627 :                 sql_rel *l = rel->l;
     498     1150627 :                 sql_rel *r = rel->r;
     499     1150627 :                 sql_rel *t;
     500     1150627 :                 int nlexps = 0, i = 0;
     501             : 
     502             :                 /* Don't rewrite refs, non projections or constant or
     503             :                    order by projections  */
     504     1150627 :                 if (!l || rel_is_ref(l) || is_topn(l->op) || is_sample(l->op) ||
     505     1063080 :                    (is_join(rel->op) && !list_empty(rel->attr)) ||
     506     1012522 :                    (is_join(rel->op) && (!r || rel_is_ref(r))) ||
     507      987103 :                    (is_left(rel->op) && (rel->flag&MERGE_LEFT) /* can't push projections above merge statements left joins */) ||
     508      987101 :                    (is_select(rel->op) && l->op != op_project) ||
     509      484167 :                    (is_join(rel->op) && ((l->op != op_project && r->op != op_project) || is_topn(r->op) || is_sample(r->op))) ||
     510      167961 :                   ((l->op == op_project && (!l->l || l->r || project_unsafe(l, is_select(rel->op)))) ||
     511       98432 :                    (is_join(rel->op) && (r->op == op_project && (!r->l || r->r || project_unsafe(r, false))))))
     512     1066235 :                         return rel;
     513             : 
     514       84392 :                 if (l->op == op_project && l->l) {
     515     1019369 :                         for (n = l->exps->h; n; n = n->next) {
     516      978676 :                                 sql_exp *e = n->data;
     517      978676 :                                 if (!(is_column(e->type) && exp_is_atom(e) && !(is_right(rel->op) || is_full(rel->op))) &&
     518      977432 :                                         !(e->type == e_column && !has_label(e)))
     519             :                                                 return rel;
     520             :                         }
     521             :                 }
     522       62244 :                 if (is_join(rel->op) && r->op == op_project && r->l) {
     523       65351 :                         for (n = r->exps->h; n; n = n->next) {
     524       60135 :                                 sql_exp *e = n->data;
     525       60135 :                                 if (!(is_column(e->type) && exp_is_atom(e) && !(is_right(rel->op) || is_full(rel->op))) &&
     526       59724 :                                         !(e->type == e_column && !has_label(e)))
     527             :                                                 return rel;
     528             :                         }
     529             :                 }
     530             : 
     531       43681 :                 if (l->op == op_project && l->l) {
     532             :                         /* Go through the list of project expressions.
     533             :                            Check if they can be pushed up, ie are they not
     534             :                            changing or introducing any columns used
     535             :                            by the upper operator. */
     536             : 
     537       39829 :                         exps = new_exp_list(v->sql->sa);
     538      904042 :                         for (n = l->exps->h; n; n = n->next) {
     539      864213 :                                 sql_exp *e = n->data;
     540             : 
     541             :                                 /* we cannot rewrite projection with atomic values from outer joins */
     542      864213 :                                 if (is_column(e->type) && exp_is_atom(e) && !(is_right(rel->op) || is_full(rel->op))) {
     543         662 :                                         list_append(exps, e);
     544      863551 :                                 } else if (e->type == e_column) {
     545      863551 :                                         if (has_label(e))
     546             :                                                 return rel;
     547      863551 :                                         list_append(exps, e);
     548             :                                 } else {
     549             :                                         return rel;
     550             :                                 }
     551             :                         }
     552             :                 } else {
     553        3852 :                         exps = rel_projections(v->sql, l, NULL, 1, 1);
     554             :                 }
     555       43681 :                 nlexps = list_length(exps);
     556             :                 /* also handle right hand of join */
     557       43681 :                 if (is_join(rel->op) && r->op == op_project && r->l && list_empty(rel->attr)) {
     558             :                         /* Here we also check all expressions of r like above
     559             :                            but also we need to check for ambiguous names. */
     560             : 
     561       43628 :                         for (n = r->exps->h; n; n = n->next) {
     562       38662 :                                 sql_exp *e = n->data;
     563             : 
     564             :                                 /* we cannot rewrite projection with atomic values from outer joins */
     565       38662 :                                 if (is_column(e->type) && exp_is_atom(e) && !(is_left(rel->op) || is_full(rel->op))) {
     566         150 :                                         list_append(exps, e);
     567       38512 :                                 } else if (e->type == e_column) {
     568       38262 :                                         if (has_label(e))
     569             :                                                 return rel;
     570       38262 :                                         list_append(exps, e);
     571             :                                 } else {
     572             :                                         return rel;
     573             :                                 }
     574             :                         }
     575       38465 :                 } else if (is_join(rel->op) && list_empty(rel->attr)) {
     576       21097 :                         list *r_exps = rel_projections(v->sql, r, NULL, 1, 1);
     577       21097 :                         list_merge(exps, r_exps, (fdup)NULL);
     578             :                 }
     579       43431 :                 if (!list_empty(rel->attr))
     580           0 :                         append(exps, exp_ref(v->sql, rel->attr->h->data));
     581             :                 /* Here we should check for ambiguous names ? */
     582       43431 :                 if (is_join(rel->op) && r && list_empty(rel->attr)) {
     583       26063 :                         t = (l->op == op_project && l->l)?l->l:l;
     584       26063 :                         l_exps = rel_projections(v->sql, t, NULL, 1, 1);
     585             :                         /* conflict with old right expressions */
     586       26063 :                         r_exps = rel_projections(v->sql, r, NULL, 1, 1);
     587       26063 :                         if (rel->attr)
     588           0 :                                 append(r_exps, exp_ref(v->sql, rel->attr->h->data));
     589       26063 :                         t = (r->op == op_project && r->l)?r->l:r;
     590       26063 :                         r_exps = rel_projections(v->sql, t, NULL, 1, 1);
     591             :                         /* conflict with new right expressions */
     592      691738 :                         for(n = l_exps->h; n; n = n->next) {
     593      666043 :                                 sql_exp *e = n->data;
     594             : 
     595      666043 :                                 if (exp_is_atom(e))
     596           0 :                                         continue;
     597      666043 :                                 if (e->nid && exps_bind_nid(r_exps, e->nid))
     598             :                                         return rel;
     599      665675 :                                 if (e->alias.label && exps_bind_nid(r_exps, e->alias.label))
     600             :                                         return rel;
     601             :                         }
     602             :                         /* conflict with new left expressions */
     603      175819 :                         for(n = r_exps->h; n; n = n->next) {
     604      150124 :                                 sql_exp *e = n->data;
     605             : 
     606      150124 :                                 if (exp_is_atom(e))
     607           0 :                                         continue;
     608      150124 :                                 if (e->nid && exps_bind_nid(l_exps, e->nid))
     609             :                                         return rel;
     610      150124 :                                 if (e->alias.label && exps_bind_nid(l_exps, e->alias.label))
     611             :                                         return rel;
     612             :                         }
     613             :                 }
     614             : 
     615             :                 /* rename operator expressions */
     616       43063 :                 if (l->op == op_project) {
     617             :                         /* rewrite rel from rel->l into rel->l->l */
     618       39430 :                         if (rel->exps) {
     619       81298 :                                 for (n = rel->exps->h; n; n = n->next) {
     620       43255 :                                         sql_exp *e = n->data, *ne;
     621             : 
     622       43255 :                                         ne = exp_rename(v->sql, e, l, l->l);
     623       43255 :                                         assert(ne);
     624       43255 :                                         if (ne != e && exp_name(e))
     625           0 :                                                 exp_propagate(v->sql->sa, ne, e);
     626       43255 :                                         n->data = ne;
     627             :                                 }
     628             :                         }
     629       39430 :                         rel->l = l->l;
     630       39430 :                         l->l = NULL;
     631       39430 :                         rel_destroy(l);
     632             :                 }
     633       43063 :                 if (is_join(rel->op) && r->op == op_project && list_empty(rel->attr)) {
     634             :                         /* rewrite rel from rel->r into rel->r->l */
     635        4608 :                         if (rel->exps) {
     636        8488 :                                 for (n = rel->exps->h; n; n = n->next) {
     637        4536 :                                         sql_exp *e = n->data, *ne;
     638             : 
     639        4536 :                                         ne = exp_rename(v->sql, e, r, r->l);
     640        4536 :                                         assert(ne);
     641        4536 :                                         if (ne != e && exp_name(e))
     642           0 :                                                 exp_propagate(v->sql->sa, ne, e);
     643        4536 :                                         n->data = ne;
     644             :                                 }
     645             :                         }
     646        4608 :                         rel->r = r->l;
     647        4608 :                         r->l = NULL;
     648        4608 :                         rel_destroy(r);
     649             :                 }
     650             :                 /* Done, ie introduce new project */
     651       43063 :                 exps_fix_card(exps, rel->card);
     652             :                 /* Fix nil flag */
     653       43063 :                 if (!list_empty(exps)) {
     654      926484 :                         for (n = exps->h ; n && i < nlexps ; n = n->next, i++) {
     655      883421 :                                 sql_exp *e = n->data;
     656             : 
     657      883421 :                                 if (is_right(rel->op) || is_full(rel->op))
     658         449 :                                         set_has_nil(e);
     659      883421 :                                 set_not_unique(e);
     660             :                         }
     661      186550 :                         for (; n ; n = n->next) {
     662      143487 :                                 sql_exp *e = n->data;
     663             : 
     664      143487 :                                 if (is_left(rel->op) || is_full(rel->op))
     665       44540 :                                         set_has_nil(e);
     666      143487 :                                 set_not_unique(e);
     667             :                         }
     668             :                 }
     669       43063 :                 v->changes++;
     670       43063 :                 return rel_inplace_project(v->sql->sa, rel, NULL, exps);
     671             :         }
     672     2426767 :         if (is_groupby(rel->op) && !rel_is_ref(rel) && rel->exps && list_length(rel->exps) > 1) {
     673       41611 :                 node *n;
     674       41611 :                 int fnd = 0;
     675       41611 :                 list *aexps, *pexps;
     676             : 
     677             :                 /* check if some are expressions aren't e_aggr */
     678      163358 :                 for (n = rel->exps->h; n && !fnd; n = n->next) {
     679      121747 :                         sql_exp *e = n->data;
     680             : 
     681      121747 :                         if (e->type != e_aggr && e->type != e_column && e->type != e_atom && e->card > CARD_ATOM) {
     682      121747 :                                 fnd = 1;
     683             :                         }
     684             :                 }
     685             :                 /* only aggr, no rewrite needed */
     686       41611 :                 if (!fnd)
     687             :                         return rel;
     688             : 
     689           0 :                 aexps = sa_list(v->sql->sa);
     690           0 :                 pexps = sa_list(v->sql->sa);
     691           0 :                 for (n = rel->exps->h; n; n = n->next) {
     692           0 :                         sql_exp *e = n->data, *ne = NULL;
     693             : 
     694           0 :                         switch (e->type) {
     695           0 :                         case e_atom: /* move over to the projection */
     696           0 :                                 list_append(pexps, e);
     697           0 :                                 break;
     698           0 :                         case e_func:
     699           0 :                                 list_append(pexps, e);
     700           0 :                                 list_split_aggr_and_project(v->sql, aexps, e->l);
     701           0 :                                 break;
     702           0 :                         case e_convert:
     703           0 :                                 list_append(pexps, e);
     704           0 :                                 e->l = split_aggr_and_project(v->sql, aexps, e->l);
     705           0 :                                 break;
     706           0 :                         default: /* simple alias */
     707           0 :                                 list_append(aexps, e);
     708           0 :                                 ne = exp_column(v->sql->sa, exp_find_rel_name(e), exp_name(e), exp_subtype(e), e->card, has_nil(e), is_unique(e), is_intern(e));
     709           0 :                                 assert(0);
     710             :                                 list_append(pexps, ne);
     711             :                                 break;
     712             :                         }
     713             :                 }
     714           0 :                 v->changes++;
     715           0 :                 rel->exps = aexps;
     716           0 :                 return rel_inplace_project( v->sql->sa, rel, NULL, pexps);
     717             :         }
     718             :         return rel;
     719             : }
     720             : 
     721             : static sql_rel *
     722      185701 : rel_push_project_up(visitor *v, global_props *gp, sql_rel *rel)
     723             : {
     724      185701 :         (void) gp;
     725      185701 :         return rel_visitor_bottomup(v, rel, &rel_push_project_up_);
     726             : }
     727             : 
     728             : run_optimizer
     729      631226 : bind_push_project_up(visitor *v, global_props *gp)
     730             : {
     731      631226 :         int flag = v->sql->sql_optimizer;
     732      631030 :         return gp->opt_level == 1 && (flag & push_project_up) &&
     733     1262256 :                    gp->cnt[op_project] ? rel_push_project_up : NULL;
     734             : }
     735             : 
     736             : 
     737             : static void split_exps(mvc *sql, list *exps, sql_rel *rel);
     738             : 
     739             : static int
     740     3154627 : exp_refers_cmp( sql_exp *e1, sql_exp *e2)
     741             : {
     742     3154627 :         if (exp_refers(e1,e2))
     743           0 :                 return 0;
     744             :         return -1;
     745             : }
     746             : 
     747             : sql_exp *
     748      133369 : add_exp_too_project(mvc *sql, sql_exp *e, sql_rel *rel)
     749             : {
     750      133369 :         node *n = list_find(rel->exps, e, (fcmp)&exp_match_exp_cmp);
     751             : 
     752             :         /* if not matching we may refer to an older expression */
     753      133369 :         if (!n)
     754       84599 :                 n = list_find(rel->exps, e, (fcmp)&exp_refers_cmp);
     755       84599 :         if (!n) {
     756       84599 :                 exp_label(sql->sa, e, ++sql->label);
     757       84599 :                 append(rel->exps, e);
     758             :         } else {
     759       48770 :                 sql_exp *ne = n->data;
     760             : 
     761       48770 :                 if (rel && rel->l) {
     762       48770 :                         if (ne && ne->alias.label && rel_find_nid(rel->l, ne->alias.label)) {
     763           0 :                                 exp_label(sql->sa, e, ++sql->label);
     764           0 :                                 append(rel->exps, e);
     765           0 :                                 ne = e;
     766             :                         }
     767             :                 }
     768             :                 e = ne;
     769             :         }
     770      133369 :         e = exp_ref(sql, e);
     771      133369 :         return e;
     772             : }
     773             : 
     774             : static void
     775      154593 : add_exps_too_project(mvc *sql, list *exps, sql_rel *rel)
     776             : {
     777      154593 :         node *n;
     778             : 
     779      154593 :         if (!exps)
     780             :                 return;
     781      478718 :         for(n=exps->h; n; n = n->next) {
     782      324125 :                 sql_exp *e = n->data;
     783             : 
     784      324125 :                 if (e->type != e_column && !exp_is_atom(e))
     785      133086 :                         n->data = add_exp_too_project(sql, e, rel);
     786             :         }
     787             : }
     788             : 
     789             : static sql_exp *
     790      509331 : split_exp(mvc *sql, sql_exp *e, sql_rel *rel)
     791             : {
     792      509331 :         if (exp_is_atom(e))
     793             :                 return e;
     794      378434 :         switch(e->type) {
     795             :         case e_column:
     796             :                 return e;
     797       16920 :         case e_convert:
     798       16920 :                 e->l = split_exp(sql, e->l, rel);
     799       16920 :                 return e;
     800      164410 :         case e_aggr:
     801             :         case e_func:
     802      164410 :                 if (!is_analytic(e) && !exp_has_sideeffect(e)) {
     803      162469 :                         sql_subfunc *f = e->f;
     804      162469 :                         if (e->type == e_func && !f->func->s && is_caselike_func(f) /*is_ifthenelse_func(f)*/) {
     805             :                                 return e;
     806             :                         } else {
     807      154593 :                                 split_exps(sql, e->l, rel);
     808      154593 :                                 add_exps_too_project(sql, e->l, rel);
     809             :                         }
     810             :                 }
     811             :                 return e;
     812         486 :         case e_cmp:
     813         486 :                 if (e->flag == cmp_or || e->flag == cmp_filter) {
     814         463 :                         split_exps(sql, e->l, rel);
     815         463 :                         split_exps(sql, e->r, rel);
     816          23 :                 } else if (e->flag == cmp_in || e->flag == cmp_notin) {
     817           0 :                         e->l = split_exp(sql, e->l, rel);
     818           0 :                         split_exps(sql, e->r, rel);
     819             :                 } else {
     820          23 :                         e->l = split_exp(sql, e->l, rel);
     821          23 :                         e->r = split_exp(sql, e->r, rel);
     822          23 :                         if (e->f)
     823          21 :                                 e->f = split_exp(sql, e->f, rel);
     824             :                 }
     825             :                 return e;
     826             :         case e_atom:
     827             :         case e_psm:
     828             :                 return e;
     829             :         }
     830             :         return e;
     831             : }
     832             : 
     833             : static void
     834      155526 : split_exps(mvc *sql, list *exps, sql_rel *rel)
     835             : {
     836      155526 :         if (list_empty(exps))
     837             :                 return;
     838      480598 :         for(node *n=exps->h; n; n = n->next){
     839      325072 :                 sql_exp *e = n->data;
     840             : 
     841      325072 :                 e = split_exp(sql, e, rel);
     842      325072 :                 n->data = e;
     843             :         }
     844             : }
     845             : 
     846             : sql_rel *
     847      639877 : rel_split_project_(visitor *v, sql_rel *rel, int top)
     848             : {
     849     1279760 :         if (mvc_highwater(v->sql))
     850             :                 return rel;
     851             : 
     852      639885 :         if (!rel)
     853             :                 return NULL;
     854      639885 :         if (is_project(rel->op) && list_length(rel->exps) && (is_groupby(rel->op) || rel->l) && !need_distinct(rel) && !is_single(rel)) {
     855      179294 :                 list *exps = rel->exps;
     856      179294 :                 node *n;
     857      179294 :                 int funcs = 0;
     858      179294 :                 sql_rel *nrel;
     859             : 
     860             :                 /* are there functions */
     861     1398047 :                 for (n=exps->h; n && !funcs; n = n->next) {
     862     1218758 :                         sql_exp *e = n->data;
     863             : 
     864     1218758 :                         funcs = exp_has_func(e);
     865             :                 }
     866             :                 /* introduce extra project */
     867      179289 :                 if (funcs && rel->op != op_project) {
     868           7 :                         nrel = rel_project(v->sql->sa, rel->l,
     869           7 :                                 rel_projections(v->sql, rel->l, NULL, 1, 1));
     870           7 :                         rel->l = nrel;
     871             :                         /* recursively split all functions and add those to the projection list */
     872           7 :                         split_exps(v->sql, rel->exps, nrel);
     873          14 :                         if (nrel->l && !(nrel->l = rel_split_project_(v, nrel->l, (is_topn(rel->op)||is_sample(rel->op))?top:0)))
     874             :                                 return NULL;
     875           7 :                         return rel;
     876      179282 :                 } else if (funcs && !top && list_empty(rel->r)) {
     877             :                         /* projects can have columns point back into the expression list, ie
     878             :                          * create a new list including the split expressions */
     879       17210 :                         node *n;
     880       17210 :                         list *exps = rel->exps;
     881             : 
     882       17210 :                         rel->exps = sa_list(v->sql->sa);
     883      184482 :                         for (n=exps->h; n; n = n->next)
     884      167272 :                                 append(rel->exps, split_exp(v->sql, n->data, rel));
     885      162072 :                 } else if (funcs && top && rel_is_ref(rel) && list_empty(rel->r)) {
     886             :                         /* inplace */
     887           9 :                         list *exps = rel_projections(v->sql, rel, NULL, 1, 1);
     888           9 :                         sql_rel *l = rel_project(v->sql->sa, rel->l, NULL);
     889           9 :                         rel->l = l;
     890           9 :                         l->exps = rel->exps;
     891           9 :                         set_processed(l);
     892           9 :                         rel->exps = exps;
     893             :                 }
     894             :         }
     895      639874 :         if (is_mset(rel->op) || is_set(rel->op) || is_basetable(rel->op))
     896             :                 return rel;
     897      419115 :         if (rel->l && (rel->op != op_table || rel->flag != TRIGGER_WRAPPER)) {
     898      439533 :                 rel->l = rel_split_project_(v, rel->l, (is_topn(rel->op)||is_sample(rel->op)||is_ddl(rel->op)||is_modify(rel->op))?top:0);
     899      408732 :                 if (!rel->l)
     900             :                         return NULL;
     901             :         }
     902      419122 :         if ((is_join(rel->op) || is_semi(rel->op)) && rel->r) {
     903      121442 :                 rel->r = rel_split_project_(v, rel->r, (is_topn(rel->op)||is_sample(rel->op)||is_ddl(rel->op)||is_modify(rel->op))?top:0);
     904      121442 :                 if (!rel->r)
     905             :                         return NULL;
     906             :         }
     907             :         return rel;
     908             : }
     909             : 
     910             : static sql_rel *
     911      109701 : rel_split_project(visitor *v, global_props *gp, sql_rel *rel)
     912             : {
     913      109701 :         (void) gp;
     914      109701 :         return rel_split_project_(v, rel, 1);
     915             : }
     916             : 
     917             : run_optimizer
     918      631187 : bind_split_project(visitor *v, global_props *gp)
     919             : {
     920      631187 :         int flag = v->sql->sql_optimizer;
     921      554265 :         return gp->opt_cycle == 0 && gp->opt_level == 1 && (flag & split_project) &&
     922     1185444 :                    (gp->cnt[op_project] || gp->cnt[op_groupby]) ? rel_split_project : NULL;
     923             : }
     924             : 
     925             : 
     926             : static sql_rel *
     927      185892 : rel_project_reduce_casts(visitor *v, global_props *gp, sql_rel *rel)
     928             : {
     929      185892 :         if (!rel)
     930             :                 return NULL;
     931      185892 :         (void) gp;
     932      185892 :         if (gp->opt_level == 1 && v->value_based_opt && is_simple_project(rel->op) && list_length(rel->exps)) {
     933       98976 :                 list *exps = rel->exps;
     934       98976 :                 node *n;
     935             : 
     936      514901 :                 for (n=exps->h; n; n = n->next) {
     937      415925 :                         sql_exp *e = n->data;
     938             : 
     939      415925 :                         if (e && e->type == e_func) {
     940       30657 :                                 sql_subfunc *f = e->f;
     941       30657 :                                 sql_subtype *res = f->res->h->data;
     942             : 
     943       30657 :                                 if (!f->func->s && !strcmp(f->func->base.name, "sql_mul") && res->scale > 0) {
     944           6 :                                         list *args = e->l;
     945           6 :                                         sql_exp *h = args->h->data;
     946           6 :                                         sql_exp *t = args->t->data;
     947           6 :                                         atom *ha = exp_value(v->sql, h), *ta = exp_value(v->sql, t);
     948             : 
     949           6 :                                         if (ha || ta) {
     950           5 :                                                 atom *a = ha ? ha : ta;
     951           5 :                                                 atom *na = reduce_scale(v->sql, a);
     952             : 
     953           5 :                                                 if (na && na != a) {
     954           2 :                                                         int rs = a->tpe.scale - na->tpe.scale;
     955           2 :                                                         res->scale -= rs;
     956           2 :                                                         if (ha) {
     957           2 :                                                                 h->r = NULL;
     958           2 :                                                                 h->l = na;
     959             :                                                         } else {
     960           0 :                                                                 t->r = NULL;
     961           0 :                                                                 t->l = na;
     962             :                                                         }
     963           2 :                                                         v->changes++;
     964             :                                                 }
     965             :                                         }
     966             :                                 }
     967             :                         }
     968             :                 }
     969             :         }
     970             :         return rel;
     971             : }
     972             : 
     973             : run_optimizer
     974      631338 : bind_project_reduce_casts(visitor *v, global_props *gp)
     975             : {
     976      631338 :         int flag = v->sql->sql_optimizer;
     977      631338 :         return gp->cnt[op_project] && (flag & project_reduce_casts) ? rel_project_reduce_casts : NULL;
     978             : }
     979             : 
     980             : static sql_column *
     981       99379 : exp_find_column_( sql_rel *rel, sql_exp *exp, int pnr, sql_rel **bt )
     982             : {
     983       99379 :         if (exp->type == e_column)
     984       99353 :                 return name_find_column(rel, exp->l, exp->r, pnr, bt);
     985             :         return NULL;
     986             : }
     987             : 
     988             : static int
     989           0 : rel_part_nr( sql_rel *rel, sql_exp *e )
     990             : {
     991           0 :         sql_column *c = NULL;
     992           0 :         sql_rel *bt = NULL;
     993           0 :         assert(e->type == e_cmp);
     994             : 
     995           0 :         c = exp_find_column_(rel, e->l, -1, &bt);
     996           0 :         if (!c)
     997           0 :                 c = exp_find_column_(rel, e->r, -1, &bt);
     998           0 :         if (!c && e->f)
     999           0 :                 c = exp_find_column_(rel, e->f, -1, &bt);
    1000           0 :         if (!c || !bt || !rel_base_get_mergetable(bt))
    1001           0 :                 return -1;
    1002           0 :         sql_table *pp = c->t;
    1003           0 :         sql_table *mt = rel_base_get_mergetable(bt);
    1004           0 :         return find_member_pos(mt->members, pp);
    1005             : }
    1006             : 
    1007             : static int
    1008           0 : rel_uses_part_nr( sql_rel *rel, sql_exp *e, int pnr )
    1009             : {
    1010           0 :         sql_column *c = NULL;
    1011           0 :         sql_rel *bt = NULL;
    1012           0 :         assert(e->type == e_cmp);
    1013             : 
    1014             :         /*
    1015             :          * following case fails.
    1016             :          *
    1017             :          * semijoin( A1, union [A1, A2] )
    1018             :          * The union will never return proper column (from A2).
    1019             :          * ie need different solution (probably pass pnr).
    1020             :          */
    1021           0 :         c = exp_find_column_(rel, e->l, pnr, &bt);
    1022           0 :         if (!c)
    1023           0 :                 c = exp_find_column_(rel, e->r, pnr, &bt);
    1024           0 :         if (c && bt && rel_base_get_mergetable(bt)) {
    1025           0 :                 sql_table *pp = c->t;
    1026           0 :                 sql_table *mt = rel_base_get_mergetable(bt);
    1027           0 :                 if (find_member_pos(mt->members, pp) == pnr)
    1028             :                         return 1;
    1029             :         }
    1030             :         /* for projects we may need to do a rename! */
    1031           0 :         if (is_project(rel->op) || is_topn(rel->op) || is_sample(rel->op))
    1032           0 :                 return rel_uses_part_nr( rel->l, e, pnr);
    1033             : 
    1034           0 :         if (is_munion(rel->op)) {
    1035             :                 list *l = rel->l;
    1036             :                 if (rel_uses_part_nr( l->h->data, e, pnr))
    1037             :                         return 1;
    1038           0 :         } else if (is_union(rel->op) || is_join(rel->op) || is_semi(rel->op)) {
    1039           0 :                 if (rel_uses_part_nr( rel->l, e, pnr))
    1040             :                         return 1;
    1041           0 :                 if (!is_semi(rel->op) && rel_uses_part_nr( rel->r, e, pnr))
    1042             :                         return 1;
    1043             :         }
    1044             :         return 0;
    1045             : }
    1046             : 
    1047             : static sql_column *
    1048        3030 : exp_is_pkey(sql_rel *rel, sql_exp *e)
    1049             : {
    1050        3030 :         if (find_prop(e->p, PROP_HASHCOL)) { /* aligned PKEY JOIN */
    1051         247 :                 fcmp cmp = (fcmp)&kc_column_cmp;
    1052         247 :                 sql_column *c = exp_find_column(rel, e, -2);
    1053             : 
    1054         247 :                 if (c && c->t->pkey && list_find(c->t->pkey->k.columns, c, cmp) != NULL)
    1055             :                         return c;
    1056             :         }
    1057             :         return NULL;
    1058             : }
    1059             : 
    1060             : static sql_exp *
    1061        3531 : rel_is_join_on_pkey(sql_rel *rel, bool pk_fk) /* pk_fk is used to verify is a join on pk-fk */
    1062             : {
    1063        3531 :         if (!rel || !rel->exps)
    1064             :                 return NULL;
    1065        4856 :         for (node *n = rel->exps->h; n; n = n->next) {
    1066        2576 :                 sql_exp *je = n->data;
    1067             : 
    1068        3892 :                 if (je->type == e_cmp && je->flag == cmp_equal &&
    1069        2632 :                         (exp_is_pkey(rel, je->l) || exp_is_pkey(rel, je->r)) &&
    1070         246 :                         (!pk_fk || find_prop(je->p, PROP_JOINIDX)))
    1071           0 :                         return je;
    1072             :         }
    1073             :         return NULL;
    1074             : }
    1075             : 
    1076             : static int
    1077           0 : exps_has_predicate( list *l )
    1078             : {
    1079           0 :         node *n;
    1080             : 
    1081           0 :         for( n = l->h; n; n = n->next){
    1082           0 :                 sql_exp *e = n->data;
    1083             : 
    1084           0 :                 if (e->card <= CARD_ATOM)
    1085             :                         return 1;
    1086             :         }
    1087             :         return 0;
    1088             : }
    1089             : 
    1090             : static sql_rel *
    1091         735 : rel_find_select( sql_rel *r)
    1092             : {
    1093        2416 :         while (!is_select(r->op) && r->l && is_project(r->op))
    1094             :                 r = r->l;
    1095         735 :         if (is_select(r->op))
    1096          15 :                 return r;
    1097             :         return NULL;
    1098             : }
    1099             : 
    1100             : static bool
    1101      144654 : rels_share_rel(list *l)
    1102             : {
    1103      144654 :         sql_rel *ref = NULL;
    1104      224815 :         for (node *n = l->h; n; n = n->next) {
    1105      224080 :                 sql_rel *r = n->data;
    1106      224080 :                 if(!ref) {
    1107      144654 :                         if ((ref = rel_find_ref(r)) == NULL)
    1108             :                                 return false;
    1109       79426 :                 } else if (ref != rel_find_ref(r)) {
    1110             :                                 return false;
    1111             :                 }
    1112             :         }
    1113             :         return true;
    1114             : }
    1115             : 
    1116             : static inline sql_rel *
    1117     2802223 : rel_merge_munion(visitor *v, sql_rel *rel)
    1118             : {
    1119     2802223 :         if (is_munion(rel->op) && rels_share_rel(rel->l)) {
    1120         735 :                 list *rels = rel->l, *nrels = NULL;
    1121         735 :                 sql_rel *cur = NULL, *curs = NULL;
    1122             : 
    1123             :                 /* Find selects and try to merge */
    1124         735 :                 for(node *n = rels->h; n; n = n->next) {
    1125         735 :                         sql_rel *r = n->data;
    1126         735 :                         sql_rel *s = rel_find_select(r);
    1127             : 
    1128         735 :                         if (!s)
    1129             :                                 return rel;
    1130          15 :                         if (cur) {
    1131           0 :                                 if (s != r->l || curs->l != s->l || !rel_is_ref(s->l) ||
    1132             :                                         /* for now only union(project*(select(R),project*(select(R))) */
    1133           0 :                                    !s->exps || !curs->exps || exps_has_predicate(s->exps) || exps_has_predicate(curs->exps)) {
    1134           0 :                                         if (nrels)
    1135           0 :                                                 append(nrels, r);
    1136             :                                         else
    1137             :                                                 return rel;
    1138             :                                 }
    1139             : 
    1140             :                                 /* merge, ie. add 'or exp' */
    1141           0 :                                 curs->exps = append(new_exp_list(v->sql->sa), exp_or(v->sql->sa, cur->exps, s->exps, 0));
    1142           0 :                                 if (!nrels) {
    1143           0 :                                         nrels = sa_list(v->sql->sa);
    1144           0 :                                         append(nrels, cur);
    1145             :                                 }
    1146             :                         } else {
    1147          15 :                                 if (s != r->l || !rel_is_ref(s->l)) {
    1148          15 :                                         if (nrels) {
    1149           0 :                                                 append(nrels, r);
    1150             :                                         } else {
    1151             :                                                 return rel;
    1152             :                                         }
    1153             :                                 }
    1154             :                                 cur = r;
    1155             :                                 curs = s;
    1156             :                         }
    1157             :                 }
    1158           0 :                 if (nrels) {
    1159           0 :                         v->changes++;
    1160           0 :                         rel->l = nrels;
    1161             :                 }
    1162           0 :                 return rel;
    1163             :         }
    1164             :         return rel;
    1165             : }
    1166             : 
    1167             : static sql_rel *
    1168     2802223 : rel_optimize_munions_bottomup_(visitor *v, sql_rel *rel)
    1169             : {
    1170     2802223 :         rel = rel_merge_munion(v, rel);
    1171     2802223 :         return rel;
    1172             : }
    1173             : 
    1174             : static sql_rel *
    1175       28972 : rel_optimize_munions_bottomup(visitor *v, global_props *gp, sql_rel *rel)
    1176             : {
    1177       28972 :         (void) gp;
    1178       28972 :         return rel_visitor_bottomup(v, rel, &rel_optimize_munions_bottomup_);
    1179             : }
    1180             : 
    1181             : run_optimizer
    1182      631166 : bind_optimize_unions_bottomup(visitor *v, global_props *gp)
    1183             : {
    1184      631166 :         int flag = v->sql->sql_optimizer;
    1185      630970 :         return gp->opt_level == 1 && gp->cnt[op_munion] && (flag & optimize_unions_bottomup)
    1186      631166 :                    ? rel_optimize_munions_bottomup : NULL;
    1187             : }
    1188             : 
    1189             : 
    1190             : static inline sql_rel *
    1191     3876880 : rel_project_cse(visitor *v, sql_rel *rel)
    1192             : {
    1193     3876880 :         if (is_project(rel->op) && rel->exps) {
    1194     1573791 :                 node *n, *m;
    1195     1573791 :                 list *nexps;
    1196     1573791 :                 int needed = 0;
    1197             : 
    1198     9388226 :                 for (n=rel->exps->h; n && !needed; n = n->next) {
    1199     7814435 :                         sql_exp *e1 = n->data;
    1200             : 
    1201     7814435 :                         if (e1->type != e_column && !exp_is_atom(e1) && exp_name(e1)) {
    1202    16537480 :                                 for (m=n->next; m && !needed; m = m->next){
    1203    15755565 :                                         sql_exp *e2 = m->data;
    1204             : 
    1205    15755565 :                                         if (exp_name(e2) && exp_match_exp(e1, e2))
    1206    15755565 :                                                 needed = 1;
    1207             :                                 }
    1208             :                         }
    1209             :                 }
    1210             : 
    1211     1573791 :                 if (!needed)
    1212             :                         return rel;
    1213             : 
    1214         739 :                 nexps = new_exp_list(v->sql->sa);
    1215        8371 :                 for (n=rel->exps->h; n; n = n->next) {
    1216        7632 :                         sql_exp *e1 = n->data;
    1217             : 
    1218        7632 :                         if (e1->type != e_column && !exp_is_atom(e1) && exp_name(e1)) {
    1219       33802 :                                 for (m=nexps->h; m; m = m->next){
    1220       29202 :                                         sql_exp *e2 = m->data;
    1221             : 
    1222       29202 :                                         if (exp_name(e2) && exp_match_exp(e1, e2)) {
    1223         779 :                                                 assert(e2->alias.label);
    1224         779 :                                                 sql_exp *ne = exp_ref(v->sql, e2);
    1225             : 
    1226         779 :                                                 ne = exp_propagate(v->sql->sa, ne, e1);
    1227         779 :                                                 assert(!e1->ref);
    1228         779 :                                                 exp_setalias(ne, e1->alias.label, exp_relname(e1), exp_name(e1));
    1229         779 :                                                 set_selfref(ne);
    1230         779 :                                                 assert(ne->nid);
    1231             :                                                 e1 = ne;
    1232             :                                                 break;
    1233             :                                         }
    1234             :                                 }
    1235             :                         }
    1236        7632 :                         append(nexps, e1);
    1237             :                 }
    1238         739 :                 rel->exps = nexps;
    1239             :         }
    1240             :         return rel;
    1241             : }
    1242             : 
    1243             : static int exp_is_const_op(sql_exp *exp, sql_exp *tope, sql_rel *expr);
    1244             : 
    1245             : static int
    1246          76 : exps_are_const_op(list *exps, sql_exp *tope, sql_rel *expr)
    1247             : {
    1248          76 :         int ok = 1;
    1249             : 
    1250          76 :         if (list_empty(exps))
    1251             :                 return 1;
    1252         152 :         for (node *n = exps->h; n && ok; n = n->next)
    1253          76 :                 ok &= exp_is_const_op(n->data, tope, expr);
    1254             :         return ok;
    1255             : }
    1256             : 
    1257             : static int
    1258         526 : exp_is_const_op(sql_exp *exp, sql_exp *tope, sql_rel *expr)
    1259             : {
    1260         666 :         switch (exp->type) {
    1261          30 :         case e_atom:
    1262          30 :                 return exp->f ? 0 : 1;
    1263           2 :         case e_convert:
    1264           2 :                 return exp_is_const_op(exp->l, tope, expr);
    1265          76 :         case e_func:
    1266             :         case e_aggr: {
    1267          76 :                 sql_subfunc *f = exp->f;
    1268          76 :                 if (f->func->side_effect || IS_ANALYTIC(f->func))
    1269             :                         return 0;
    1270          76 :                 return exps_are_const_op(exp->l, tope, expr);
    1271             :         }
    1272           0 :         case e_cmp:
    1273           0 :                 if (exp->flag == cmp_or || exp->flag == cmp_filter)
    1274           0 :                         return exps_are_const_op(exp->l, tope, expr) && exps_are_const_op(exp->r, tope, expr);
    1275           0 :                 if (exp->flag == cmp_in || exp->flag == cmp_notin)
    1276           0 :                         return exp_is_const_op(exp->l, tope, expr) && exps_are_const_op(exp->r, tope, expr);
    1277           0 :                 return exp_is_const_op(exp->l, tope, expr) && exp_is_const_op(exp->r, tope, expr) && (!exp->f || exp_is_const_op(exp->f, tope, expr));
    1278         558 :         case e_column: {
    1279         558 :                 if (is_simple_project(expr->op) || is_groupby(expr->op)) {
    1280             :                         /* in a simple projection, self-references may occur */
    1281         558 :                         sql_exp *nexp = exps_bind_nid(expr->exps, exp->nid);
    1282         558 :                         if (nexp && list_position(expr->exps, nexp) < list_position(expr->exps, tope))
    1283             :                                 return exp_is_const_op(nexp, exp, expr);
    1284             :                 }
    1285             :                 return 0;
    1286             :         }
    1287             :         default:
    1288             :                 return 0;
    1289             :         }
    1290             : }
    1291             : 
    1292             : static sql_exp *
    1293          25 : rel_groupby_add_count_star(mvc *sql, sql_rel *rel, sql_exp *count_star_exp, bool *count_added)
    1294             : {
    1295          25 :         if (count_star_exp)
    1296             :                 return count_star_exp;
    1297          18 :         if (!list_empty(rel->exps)) {
    1298          49 :                 for (node *n=rel->exps->h; n ; n = n->next) {
    1299          35 :                         sql_exp *e = n->data;
    1300             : 
    1301          35 :                         if (exp_aggr_is_count(e) && !need_distinct(e) && list_length(e->l) == 0)
    1302           4 :                                 return e;
    1303             :                 }
    1304             :         }
    1305          14 :         sql_subfunc *cf = sql_bind_func(sql, "sys", "count", sql_bind_localtype("void"), NULL, F_AGGR, true, true);
    1306          14 :         *count_added = true;
    1307          14 :         return rel_groupby_add_aggr(sql, rel, exp_aggr(sql->sa, NULL, cf, 0, 0, rel->card, 0));
    1308             : }
    1309             : 
    1310             : /* optimize sum(x + 12) into sum(x) + 12*count(*) */
    1311             : static inline sql_rel *
    1312       42797 : rel_simplify_sum(visitor *v, sql_rel *rel)
    1313             : {
    1314       42797 :         if (is_groupby(rel->op) && !list_empty(rel->exps)) {
    1315       42742 :                 sql_rel *upper = NULL, *groupby = rel, *l = groupby->l;
    1316       42742 :                 sql_exp *count_star_exp = NULL;
    1317       42742 :                 bool count_added = false;
    1318             : 
    1319      134598 :                 for (node *n=groupby->exps->h; n ; n = n->next) {
    1320       91856 :                         sql_exp *e = n->data;
    1321       91856 :                         list *el = e->l;
    1322       91856 :                         sql_subfunc *sf = e->f;
    1323             : 
    1324       91856 :                         if (e->type == e_aggr && !need_distinct(e) && sf->func->type == F_AGGR && !sf->func->s && !strcmp(sf->func->base.name, "sum")) {
    1325        8970 :                                 sql_rel *expr = groupby;
    1326        8970 :                                 sql_exp *exp = (sql_exp*) el->h->data, *oexp = exp;
    1327             : 
    1328        8970 :                                 while (is_numeric_upcast(exp))
    1329           0 :                                         exp = exp->l;
    1330             :                                 /* we want to find a +/- call, so expect them to appear only on simple projections */
    1331       19459 :                                 while (exp && exp->type == e_column && (is_simple_project(expr->op) || is_groupby(expr->op)) && expr->l) {
    1332       12153 :                                         sql_rel *nexpr = NULL;
    1333       12153 :                                         sql_exp *nexp = rel_find_exp_and_corresponding_rel(l, exp, false, &nexpr, NULL);
    1334             : 
    1335             :                                         /* break when it loops on the same relation */
    1336       12153 :                                         if (nexpr == expr && list_position(expr->exps, nexp) >= list_position(expr->exps, exp))
    1337             :                                                 break;
    1338       10489 :                                         expr = nexpr;
    1339       10489 :                                         exp = oexp = nexp;
    1340       10491 :                                         while (exp && is_numeric_upcast(exp))
    1341           2 :                                                 exp = exp->l;
    1342             :                                 }
    1343             : 
    1344        8970 :                                 list *expl = exp ? exp->l : NULL;
    1345       91856 :                                 sql_subfunc *expf = exp ? exp->f : NULL;
    1346             :                                 /* found a candidate function */
    1347        7374 :                                 if (exp && exp->type == e_func && expf->func->type == F_FUNC && !expf->func->s &&
    1348        1255 :                                         (!strcmp(expf->func->base.name, "sql_sub") || !strcmp(expf->func->base.name, "sql_add"))) {
    1349         225 :                                         sql_exp *e1 = (sql_exp*) expl->h->data, *e2 = (sql_exp*) expl->h->next->data;
    1350         225 :                                         int e1ok = exp_is_const_op(e1, oexp, expr), e2ok = exp_is_const_op(e2, oexp, expr);
    1351             : 
    1352         225 :                                         if ((!e1ok && e2ok) || (e1ok && !e2ok)) {
    1353          26 :                                                 sql_exp *ocol = e1ok ? e2 : e1, *constant = e1ok ? e1 : e2, *mul, *colref, *naggr, *newop, *col = ocol, *match;
    1354          26 :                                                 bool add_col = true, prepend = false;
    1355             : 
    1356             :                                                 /* if 'col' is a projection from the under relation, then use it */
    1357          26 :                                                 while (is_numeric_upcast(col))
    1358           0 :                                                         col = col->l;
    1359          26 :                                                 if (col->type == e_column) {
    1360          24 :                                                         sql_exp *colf = exps_find_exp(l->exps, col);
    1361             : 
    1362             :                                                         /* col is already found in the inner relation. Also look for a new reference for col, eg sql_add(col, 1), 1 as col */
    1363          24 :                                                         if (colf && list_position(l->exps, colf) < list_position(l->exps, oexp)) {
    1364             :                                                                 add_col = false;
    1365          11 :                                                         } else if (!colf && is_simple_project(l->op) && list_empty(l->r) && !rel_is_ref(l) && !need_distinct(l)) {
    1366             :                                                                 prepend = true;
    1367             :                                                                 add_col = false;
    1368           1 :                                                         } else if (!colf && (is_simple_project(l->op) || is_groupby(l->op)))  {
    1369             :                                                                 /* on these scenarios the new column expression will be ordered/(grouped for distinct) or create potential ambiguity (multiple ref), so skip */
    1370           1 :                                                                 continue;
    1371             :                                                         }
    1372           2 :                                                 } else if ((is_simple_project(l->op) && (!list_empty(l->r) || rel_is_ref(l) || need_distinct(l))) || is_groupby(l->op)) {
    1373           0 :                                                         continue;
    1374             :                                                 }
    1375             : 
    1376             :                                                 /* add count star */
    1377          25 :                                                 count_star_exp = rel_groupby_add_count_star(v->sql, groupby, count_star_exp, &count_added);
    1378             :                                                 /* multiply constant by count star */
    1379          25 :                                                 if (!(mul = rel_binop_(v->sql, NULL, constant, exp_ref(v->sql, count_star_exp), "sys", "sql_mul", card_value, true))) {
    1380           0 :                                                         v->sql->session->status = 0;
    1381           0 :                                                         v->sql->errstr[0] = '\0';
    1382           0 :                                                         continue;
    1383             :                                                 }
    1384          25 :                                                 if (!has_label(mul))
    1385          25 :                                                         exp_label(v->sql->sa, mul, ++v->sql->label);
    1386             : 
    1387          25 :                                                 colref = exp_ref(v->sql, ocol);
    1388          25 :                                                 if (add_col) /* if 'col' will be added, then make sure it has an unique label */
    1389           3 :                                                         exp_label(v->sql->sa, colref, ++v->sql->label);
    1390             : 
    1391             :                                                 /* 'oexp' contains the type for the input for the 'sum' aggregate */
    1392          25 :                                                 if (!(colref = exp_check_type(v->sql, exp_subtype(oexp), groupby, colref, type_equal))) {
    1393           0 :                                                         v->sql->session->status = 0;
    1394           0 :                                                         v->sql->errstr[0] = '\0';
    1395           0 :                                                         continue;
    1396             :                                                 }
    1397             :                                                 /* update sum to use the column side only */
    1398          25 :                                                 sql_subfunc *a = sql_bind_func(v->sql, "sys", "sum", exp_subtype(colref), NULL, F_AGGR, true, true);
    1399          25 :                                                 if (!a)
    1400           0 :                                                         continue;
    1401          25 :                                                 naggr = exp_aggr(v->sql->sa, list_append(sa_list(v->sql->sa), colref), a, need_distinct(e), need_no_nil(e), groupby->card, has_nil(e));
    1402          25 :                                                 if ((match = exps_any_match(groupby->exps, naggr)) && list_position(groupby->exps, match) < list_position(groupby->exps, e)) { /* found a matching aggregate, use it */
    1403           9 :                                                         naggr = exp_ref(v->sql, match);
    1404           9 :                                                         exp_label(v->sql->sa, naggr, ++v->sql->label);
    1405          16 :                                                 } else if (!has_label(naggr)) { /* otherwise use the new one */
    1406          16 :                                                         exp_label(v->sql->sa, naggr, ++v->sql->label);
    1407             :                                                 }
    1408             : 
    1409             :                                                 /* generate addition/subtraction. subtraction is not commutative, so keep original order! */
    1410          25 :                                                 if (!(newop = rel_binop_(v->sql, NULL, e1 == constant ? mul : exp_ref(v->sql, naggr), e1 == constant ? exp_ref(v->sql, naggr) : mul, "sys", expf->func->base.name, card_value, true))) {
    1411           0 :                                                         v->sql->session->status = 0;
    1412           0 :                                                         v->sql->errstr[0] = '\0';
    1413           0 :                                                         continue;
    1414             :                                                 }
    1415          25 :                                                 if (!(newop = exp_check_type(v->sql, exp_subtype(e), groupby, newop, type_equal))) {
    1416           0 :                                                         v->sql->session->status = 0;
    1417           0 :                                                         v->sql->errstr[0] = '\0';
    1418           0 :                                                         continue;
    1419             :                                                 }
    1420             : 
    1421             :                                                 /* a column reference can be prepended to the inner relation, add it after all the check type calls succeed */
    1422          25 :                                                 if (prepend)
    1423          10 :                                                         list_prepend(l->exps, col);
    1424             : 
    1425             :                                                 /* the new generate function calls are valid, update relations */
    1426             :                                                 /* we need a new relation for the multiplication and addition/subtraction */
    1427          25 :                                                 if (!upper) {
    1428             :                                                         /* be careful with relations with more than 1 reference, so do in-place replacement */
    1429          18 :                                                         list *projs = rel_projections(v->sql, rel, NULL, 1, 1);
    1430          18 :                                                         sql_rel *nrel = rel_groupby(v->sql, rel->l, NULL);
    1431          18 :                                                         nrel->exps = rel->exps;
    1432          18 :                                                         nrel->r = rel->r;
    1433          18 :                                                         nrel->card = rel->card;
    1434          18 :                                                         nrel->nrcols = list_length(rel->exps);
    1435          18 :                                                         set_processed(nrel);
    1436          18 :                                                         rel_dup(rel->l);
    1437          18 :                                                         upper = rel = rel_inplace_project(v->sql->sa, rel, nrel, projs);
    1438          18 :                                                         rel->card = exps_card(projs);
    1439          18 :                                                         groupby = nrel; /* update pointers :) */
    1440          18 :                                                         l = groupby->l;
    1441             :                                                 }
    1442          98 :                                                 for (node *n = upper->exps->h ; n ; ) {
    1443          73 :                                                         node *next = n->next;
    1444          73 :                                                         sql_exp *re = n->data;
    1445             : 
    1446             :                                                         /* remove the old reference to the aggregate because we will use the addition/subtraction instead,
    1447             :                                                            as well as the count star if count_added */
    1448          73 :                                                         if (exp_refers(e, re) || (count_added && exp_refers(count_star_exp, re)))
    1449          39 :                                                                 list_remove_node(upper->exps, NULL, n);
    1450             :                                                         n = next;
    1451             :                                                 }
    1452             : 
    1453             :                                                 /* update sum aggregate with new aggregate or reference to an existing one */
    1454          25 :                                                 n->data = naggr;
    1455          25 :                                                 list_hash_clear(groupby->exps);
    1456             : 
    1457             :                                                 /* add column reference with new label, if 'col' was not found */
    1458          25 :                                                 if (add_col) {
    1459           3 :                                                         if (!is_simple_project(l->op) || !list_empty(l->r) || rel_is_ref(l) || need_distinct(l))
    1460           0 :                                                                 groupby->l = l = rel_project(v->sql->sa, l, rel_projections(v->sql, l, NULL, 1, 1));
    1461           3 :                                                         list_append(l->exps, ocol);
    1462             :                                                 }
    1463             : 
    1464             :                                                 /* propagate alias and add new expression */
    1465          25 :                                                 exp_prop_alias(v->sql->sa, newop, e);
    1466          25 :                                                 list_append(upper->exps, newop);
    1467          25 :                                                 v->changes++;
    1468             :                                         }
    1469             :                                 }
    1470             :                         }
    1471             :                 }
    1472             :         }
    1473       42797 :         return rel;
    1474             : }
    1475             : 
    1476             : /* optimize group by x+1,(y-2)*3,2-z into group by x,y,z */
    1477             : static inline sql_rel *
    1478       42797 : rel_simplify_groupby_columns(visitor *v, sql_rel *rel)
    1479             : {
    1480       42797 :         if (is_groupby(rel->op) && !list_empty(rel->r)) {
    1481       32148 :                 sql_rel *l = rel->l;
    1482             : 
    1483       83304 :                 for (node *n=((list*)rel->r)->h; n ; n = n->next) {
    1484       51156 :                         sql_exp *e = n->data;
    1485       51156 :                         e->used = 0; /* we need to use this flag, clean it first */
    1486             :                 }
    1487       83304 :                 for (node *n=((list*)rel->r)->h; n ; n = n->next) {
    1488       51156 :                         sql_exp *e = n->data;
    1489             : 
    1490       51156 :                         if (e->type == e_column) {
    1491       51119 :                                 bool searching = true;
    1492       51119 :                                 sql_rel *efrel = NULL;
    1493       51119 :                                 sql_exp *exp = rel_find_exp_and_corresponding_rel(l, e, false, &efrel, NULL), *col = NULL, *tope = exp;
    1494             : 
    1495      102257 :                                 while (searching && !col) {
    1496       51138 :                                         sql_exp *exp_col = exp;
    1497             : 
    1498       51138 :                                         if (exp && is_numeric_upcast(exp))
    1499          54 :                                                 exp = exp->l;
    1500       51138 :                                         if (exp && exp->type == e_func) {
    1501         667 :                                                 list *el = exp->l;
    1502         667 :                                                 sql_subfunc *sf = exp->f;
    1503             :                                                 /* At the moment look only at injective math functions */
    1504         667 :                                                 if (sf->func->type == F_FUNC && !sf->func->s &&
    1505         654 :                                                         (!strcmp(sf->func->base.name, "sql_sub") || !strcmp(sf->func->base.name, "sql_add") || !strcmp(sf->func->base.name, "sql_mul"))) {
    1506         114 :                                                         sql_exp *e1 = (sql_exp*) el->h->data, *e2 = (sql_exp*) el->h->next->data;
    1507             :                                                         /* the optimization cannot be done if side-effect calls (e.g. rand()) are present */
    1508         207 :                                                         int e1ok = exp_is_atom(e1) && !exp_unsafe(e1, true, false) && !exp_has_sideeffect(e1), e2ok = exp_is_atom(e2) && !exp_unsafe(e2, true, false) && !exp_has_sideeffect(e2);
    1509             : 
    1510         114 :                                                         if ((!e1ok && e2ok) || (e1ok && !e2ok)) {
    1511          60 :                                                                 sql_exp *c = e1ok ? e2 : e1;
    1512          60 :                                                                 bool done = false;
    1513          60 :                                                                 exp_col = exps_find_exp(efrel->exps, c);
    1514          60 :                                                                 if (exp_col)
    1515          29 :                                                                         c = exp_col;
    1516             : 
    1517         122 :                                                                 while (!done) {
    1518          62 :                                                                         if (is_numeric_upcast(c))
    1519           0 :                                                                                 c = c->l;
    1520          62 :                                                                         if (c->type == e_column) {
    1521          43 :                                                                                 if (is_simple_project(efrel->op) || is_groupby(efrel->op)) {
    1522             :                                                                                         /* in a simple projection, self-references may occur */
    1523          43 :                                                                                         sql_exp *nc = exps_find_exp(efrel->exps, c);
    1524          43 :                                                                                         if (nc && list_position(efrel->exps, nc) < list_position(efrel->exps, exp_col)) {
    1525           2 :                                                                                                 exp_col = c;
    1526           2 :                                                                                                 c = nc;
    1527           2 :                                                                                                 continue;
    1528             :                                                                                         }
    1529             :                                                                                 }
    1530             :                                                                                 col = c; /* 'c' is a column reference from the left relation */
    1531             :                                                                                 done = true;
    1532             :                                                                         } else {
    1533             :                                                                                 exp = c; /* maybe a nested function call, let's continue searching */
    1534             :                                                                                 done = true;
    1535             :                                                                         }
    1536             :                                                                 }
    1537             :                                                         } else {
    1538             :                                                                 searching = false;
    1539             :                                                         }
    1540             :                                                 } else {
    1541             :                                                         searching = false;
    1542             :                                                 }
    1543             :                                         } else {
    1544             :                                                 searching = false;
    1545             :                                         }
    1546             :                                 }
    1547       51119 :                                 if (col) { /* a column reference was found */
    1548          41 :                                         const char *rname = exp_relname(e), *name = exp_name(e);
    1549             : 
    1550             :                                         /* the grouping column has an alias, we have to keep it */
    1551          41 :                                         if ((rname && name && (strcmp(rname, e->l) != 0 || strcmp(name, e->r) != 0)) || (!rname && name && strcmp(name, e->r) != 0)) {
    1552           2 :                                                 if (!has_label(e)) /* dangerous to merge, skip it */
    1553           0 :                                                         continue;
    1554           2 :                                                 if (!is_simple_project(l->op) || !list_empty(l->r) || rel_is_ref(l) || need_distinct(l))
    1555           0 :                                                         rel->l = l = rel_project(v->sql->sa, l, rel_projections(v->sql, l, NULL, 1, 1));
    1556           2 :                                                 list_append(l->exps, e);
    1557           2 :                                                 n->data = e = exp_ref(v->sql, e);
    1558           2 :                                                 list_hash_clear(rel->r);
    1559             :                                         }
    1560             : 
    1561          41 :                                         sql_exp *f = exps_find_exp(rel->r, col);
    1562             : 
    1563          41 :                                         if (f && list_position(rel->r, f) < list_position(rel->r, e)) { /* if already present, remove it */
    1564          10 :                                                 e->used = 1;
    1565             :                                         } else {
    1566             :                                                 /* Use an unique reference to the column found. If there's another grouping column label pointing into it,
    1567             :                                                    rel_groupby_cse will hopefully remove it */
    1568          31 :                                                 sql_exp *colf = exps_find_exp(l->exps, col);
    1569             : 
    1570             :                                                 /* col is already found in the inner relation. Also look for a new reference for col, eg sql_add(col, 1), 1 as col */
    1571          31 :                                                 if (colf && list_position(l->exps, colf) < list_position(l->exps, tope)) {
    1572           2 :                                                         n->data = exp_ref(v->sql, col);
    1573          25 :                                                 } else if (!colf && is_simple_project(l->op) && list_empty(l->r) && !rel_is_ref(l) && !need_distinct(l)) { /* trivial case, it can be added */
    1574          25 :                                                         sql_exp *ne = col;
    1575          25 :                                                         list_prepend(l->exps, ne);
    1576          25 :                                                         n->data = exp_ref(v->sql, ne);
    1577           0 :                                                 } else if (!colf && (is_simple_project(l->op) || is_groupby(l->op)))  {
    1578             :                                                         /* on these scenarios the new column expression will be ordered/(grouped for distinct) or create potential ambiguity (multiple ref), so skip */
    1579           0 :                                                         continue;
    1580             :                                                 } else {
    1581           4 :                                                         sql_exp *ne = exp_ref(v->sql, col);
    1582             : 
    1583           4 :                                                         if (colf) /* a col reference is already there, add a new label */
    1584           4 :                                                                 exp_label(v->sql->sa, ne, ++v->sql->label);
    1585           4 :                                                         if (!is_simple_project(l->op) || !list_empty(l->r) || rel_is_ref(l) || need_distinct(l))
    1586           0 :                                                                 rel->l = l = rel_project(v->sql->sa, l, rel_projections(v->sql, l, NULL, 1, 1));
    1587           4 :                                                         list_append(l->exps, ne);
    1588           4 :                                                         n->data = exp_ref(v->sql, ne);
    1589             :                                                 }
    1590          31 :                                                 list_hash_clear(rel->r);
    1591             :                                         }
    1592          41 :                                         v->changes++;
    1593             :                                 }
    1594             :                         }
    1595             :                 }
    1596       83304 :                 for (node *n=((list*)rel->r)->h; n ; ) {
    1597       51156 :                         node *next = n->next;
    1598       51156 :                         sql_exp *e = n->data;
    1599             : 
    1600       51156 :                         if (e->used) /* remove unnecessary grouping columns */
    1601          10 :                                 list_remove_node(rel->r, NULL, n);
    1602             :                         n = next;
    1603             :                 }
    1604             :         }
    1605       42797 :         return rel;
    1606             : }
    1607             : 
    1608             : /* remove identical grouping columns */
    1609             : static inline sql_rel *
    1610       97544 : rel_groupby_cse(visitor *v, sql_rel *rel)
    1611             : {
    1612       97544 :         if (is_groupby(rel->op) && !list_empty(rel->r)) {
    1613       71988 :                 sql_rel *l = rel->l;
    1614             : 
    1615             :                 /* for every group expression e1 */
    1616      200381 :                 for (node *n=((list*)rel->r)->h; n ; n = n->next) {
    1617      128393 :                         sql_exp *e1 = n->data;
    1618             :                         /* it's good to examine the same expression in the subrelation e.g. in case it's an alias */
    1619             :                         /* TODO maybe cover more cases? Here I only look at the left relation */
    1620      256786 :                         sql_exp *e1_sub = e1->type == e_column ?
    1621      128393 :                                             e1->nid ? exps_bind_nid(l->exps, e1->nid) : exps_find_exp(l->exps, e1) : NULL;
    1622             : 
    1623             :                         /* for every other group expression */
    1624      220978 :                         for (node *m=n->next; m; m = m->next) {
    1625       92585 :                                 sql_exp *e2 = m->data;
    1626      185170 :                                 sql_exp *e2_sub = e2->type == e_column ?
    1627       92585 :                                                 e2->nid ? exps_bind_nid(l->exps, e2->nid) : exps_find_exp(l->exps, e2) : NULL;
    1628             : 
    1629             :                                 /* check if the expression are the same */
    1630       92585 :                                 if (exp_match_exp(e1, e2) || exp_refers(e1, e2) || (e1_sub && e2_sub && (exp_match_exp(e1_sub, e2_sub) || exp_refers(e1_sub, e2_sub)))) {
    1631             : 
    1632             :                                         /* use e2 from rel->exps instead of e2 from the rel->r as it can have an alias from the higher rel */
    1633          41 :                                         sql_exp *e2_in_exps = exps_uses_nid(rel->exps, e2->alias.label);
    1634          41 :                                         assert(e2_in_exps);
    1635             : 
    1636             :                                         /* same as e2 */
    1637          41 :                                         sql_exp *e1_in_exps = exps_uses_nid(rel->exps, e1->alias.label);
    1638          41 :                                         if (!e1_in_exps)
    1639           6 :                                                 continue;
    1640             : 
    1641             :                                         /* write e2 as an e1 alias since the expressions are the same */
    1642          35 :                                         sql_exp* e2_as_e1_alias = exp_copy(v->sql, e1_in_exps);
    1643             :                                         /* NOTE: it is important to get the rname (exp->l) and name (exp->r) from e2 IN the exps
    1644             :                                          * (e2_in_exps), and not from e2, since it could carry an alias from the higher rel */
    1645          35 :                                         exp_setalias(e2_as_e1_alias, e2_as_e1_alias->alias.label, e2_in_exps->l, e2_in_exps->r);
    1646          35 :                                         e2_as_e1_alias->alias.label = e2->alias.label;
    1647             : 
    1648             :                                         /* replace e2 with e2_as_e1_alias in expressions list */
    1649          35 :                                         node *e2_exps_node = list_find(rel->exps, e2_in_exps, NULL);
    1650          35 :                                         list_append_before(rel->exps, e2_exps_node, e2_as_e1_alias);
    1651          35 :                                         list_remove_node(rel->exps, NULL, e2_exps_node);
    1652             : 
    1653             :                                         /* finally remove e2 from the groups' list (->r) since it's redundant */
    1654          35 :                                         node *e2_r_node = list_find(rel->r, e2, NULL);
    1655          35 :                                         list_remove_node(rel->r, NULL, e2_r_node);
    1656             : 
    1657          35 :                                         v->changes++;
    1658             :                                 }
    1659             :                         }
    1660             :                 }
    1661             :         }
    1662       97544 :         return rel;
    1663             : }
    1664             : 
    1665             : sql_exp *list_exps_uses_exp(list *exps, const char *rname, const char *name);
    1666             : 
    1667             : static sql_exp*
    1668       19483 : exp_uses_exp(sql_exp *e, const char *rname, const char *name)
    1669             : {
    1670       19780 :         sql_exp *res = NULL;
    1671             : 
    1672       19780 :         switch (e->type) {
    1673             :                 case e_psm:
    1674             :                         break;
    1675        1923 :                 case e_atom: {
    1676        1923 :                         if (e->f)
    1677           0 :                                 return list_exps_uses_exp(e->f, rname, name);
    1678             :                 } break;
    1679         296 :                 case e_convert:
    1680         296 :                         return exp_uses_exp(e->l, rname, name);
    1681       10958 :                 case e_column: {
    1682       10958 :                         if (e->l && rname && strcmp(e->l, rname) == 0 &&
    1683        2692 :                                 e->r && name && strcmp(e->r, name) == 0)
    1684             :                                 return e;
    1685        8462 :                         if (!e->l && !rname &&
    1686         244 :                                 e->r && name && strcmp(e->r, name) == 0)
    1687             :                                 return e;
    1688             :                 } break;
    1689        6189 :                 case e_func:
    1690             :                 case e_aggr: {
    1691        6189 :                         if (e->l)
    1692        4970 :                                 return list_exps_uses_exp(e->l, rname, name);
    1693             :                 }       break;
    1694         414 :                 case e_cmp: {
    1695         414 :                         if (e->flag == cmp_in || e->flag == cmp_notin) {
    1696           0 :                                 if ((res = exp_uses_exp(e->l, rname, name)))
    1697             :                                         return res;
    1698           0 :                                 return list_exps_uses_exp(e->r, rname, name);
    1699         414 :                         } else if (e->flag == cmp_or || e->flag == cmp_filter) {
    1700           2 :                                 if ((res = list_exps_uses_exp(e->l, rname, name)))
    1701             :                                         return res;
    1702           2 :                                 return list_exps_uses_exp(e->r, rname, name);
    1703             :                         } else {
    1704         412 :                                 if ((res = exp_uses_exp(e->l, rname, name)))
    1705             :                                         return res;
    1706         410 :                                 if ((res = exp_uses_exp(e->r, rname, name)))
    1707             :                                         return res;
    1708         404 :                                 if (e->f)
    1709             :                                         return exp_uses_exp(e->f, rname, name);
    1710             :                         }
    1711             :                 } break;
    1712             :         }
    1713             :         return NULL;
    1714             : }
    1715             : 
    1716             : sql_exp *
    1717       10245 : list_exps_uses_exp(list *exps, const char *rname, const char *name)
    1718             : {
    1719       10245 :         sql_exp *res = NULL;
    1720             : 
    1721       10245 :         if (!exps)
    1722             :                 return NULL;
    1723       28906 :         for (node *n = exps->h; n && !res; n = n->next) {
    1724       18661 :                 sql_exp *e = n->data;
    1725       18661 :                 res = exp_uses_exp(e, rname, name);
    1726             :         }
    1727             :         return res;
    1728             : }
    1729             : 
    1730             : /* find in the list of expression an expression which uses e */
    1731             : sql_exp *
    1732         995 : exps_uses_exp(list *exps, sql_exp *e)
    1733             : {
    1734         995 :         return list_exps_uses_exp(exps, exp_relname(e), exp_name(e));
    1735             : }
    1736             : /*
    1737             :  * Rewrite aggregations over munion all.
    1738             :  *      groupby ([ union all (a, b, c) ], [gbe], [ count, sum ] )
    1739             :  *
    1740             :  * into
    1741             :  *      groupby ([ union all( groupby( a, [gbe], [ count, sum] ),
    1742             :  *                            groupby( b, [gbe], [ count, sum] ),
    1743             :  *                            groupby( c, [gbe], [ count, sum] ) ],
    1744             :  *                       [gbe], [sum, sum] )
    1745             :  */
    1746             : static inline sql_rel *
    1747        1388 : rel_push_aggr_down_n_arry(visitor *v, sql_rel *rel)
    1748             : {
    1749        1388 :         sql_rel *g = rel;
    1750        1388 :         sql_rel *u = rel->l, *ou = u;
    1751        1388 :         sql_rel *r = NULL;
    1752        1388 :         list *rgbe = NULL, *gbe = NULL, *exps = NULL;
    1753        1388 :         node *n, *m;
    1754             : 
    1755             :         // TODO why?
    1756        1388 :         if (u->op == op_project && !need_distinct(u))
    1757         353 :                 u = u->l;
    1758             : 
    1759        1388 :         if (is_recursive(u))
    1760             :                 return rel;
    1761             : 
    1762             :         /* make sure we don't create group by on group by's */
    1763        2810 :         for (node *n = ((list*)u->l)->h; n; n = n->next) {
    1764        2147 :                 r = n->data;
    1765        2147 :                 if (r->op == op_groupby)
    1766             :                         return rel;
    1767             :         }
    1768             : 
    1769             :         /* distinct should be done over the full result */
    1770        1827 :         for (n = g->exps->h; n; n = n->next) {
    1771        1270 :                 sql_exp *e = n->data;
    1772        1270 :                 sql_subfunc *af = e->f;
    1773             : 
    1774        1270 :                 if (e->type == e_atom ||
    1775        1270 :                         e->type == e_func ||
    1776         624 :                    (e->type == e_aggr &&
    1777         624 :                    ((strcmp(af->func->base.name, "sum") &&
    1778         487 :                          strcmp(af->func->base.name, "count") &&
    1779         171 :                          strcmp(af->func->base.name, "min") &&
    1780         624 :                          strcmp(af->func->base.name, "max")) ||
    1781             :                    need_distinct(e))))
    1782             :                         return rel;
    1783             :         }
    1784             : 
    1785         557 :         list *nl = sa_list(v->sql->sa);
    1786        1717 :         for (node *n = ((list*)u->l)->h; n; n = n->next) {
    1787        1160 :                 r = rel_dup(n->data);
    1788             :                 //n->data = NULL; /* clean list as we steal the relation r, stealing is needed else (with multiple references) double project cleanup fails */
    1789        1160 :                 if (!is_project(r->op))
    1790          47 :                         r = rel_project(v->sql->sa, r,
    1791             :                                             rel_projections(v->sql, r, NULL, 1, 1));
    1792        1160 :                 rel_rename_exps(v->sql, u->exps, r->exps);
    1793        1160 :                 if (u != ou) {
    1794         663 :                         bool isproject = is_project(r->op);
    1795         663 :                         r = rel_project(v->sql->sa, r, NULL);
    1796         663 :                         r->exps = exps_copy(v->sql, ou->exps);
    1797         663 :                         rel_rename_exps(v->sql, ou->exps, r->exps);
    1798         663 :                         set_processed(r);
    1799         663 :                         if (isproject)
    1800         663 :                                 r = rel_push_project_down_(v, r); /* cleanup any double projects */
    1801             :                 }
    1802        1160 :                 if (g->r && list_length(g->r) > 0) {
    1803         910 :                         list *gbe = g->r;
    1804         910 :                         rgbe = exps_copy(v->sql, gbe);
    1805             :                 }
    1806        1160 :                 r = rel_groupby(v->sql, r, NULL);
    1807        1160 :                 r->r = rgbe;
    1808        1160 :                 r->nrcols = g->nrcols;
    1809        1160 :                 r->card = g->card;
    1810        1160 :                 r->exps = exps_copy(v->sql, g->exps);
    1811        1160 :                 r->nrcols = list_length(r->exps);
    1812        1160 :                 set_processed(r);
    1813             : 
    1814        1160 :                 assert(r);
    1815        1160 :                 append(nl, r);
    1816             :         }
    1817             : 
    1818             :         /* group by on primary keys which define the partitioning scheme
    1819             :          * don't need a finalizing group by */
    1820             :         /* how to check if a partition is based on some primary key ?
    1821             :          * */
    1822         557 :         if (!list_empty(rel->r)) {
    1823        1074 :                 for (node *n = ((list*)rel->r)->h; n; n = n->next) {
    1824         638 :                         sql_exp *e = n->data;
    1825         638 :                         sql_column *c = NULL;
    1826             : 
    1827         638 :                         if ((c = exp_is_pkey(rel, e)) && partition_find_part(v->sql->session->tr, c->t, NULL)) {
    1828             :                                 /* check if key is partition key */
    1829           0 :                                 v->changes++;
    1830           0 :                                 return rel_inplace_setop_n_ary(v->sql, rel, nl, op_munion,
    1831             :                                                                                            rel_projections(v->sql, rel, NULL, 1, 1));
    1832             :                         }
    1833             :                 }
    1834             :         }
    1835             : 
    1836         557 :         if (!list_empty(rel->r)) {
    1837         436 :                 list *ogbe = rel->r;
    1838             : 
    1839         436 :                 gbe = new_exp_list(v->sql->sa);
    1840        1074 :                 for (n = ogbe->h; n; n = n->next) {
    1841         638 :                         sql_exp *e = n->data, *ne;
    1842             : 
    1843             :                         /* group by in aggregation list */
    1844         638 :                         ne = exps_uses_exp( rel->exps, e);
    1845         638 :                         if (ne) {
    1846         632 :                                 sql_rel *first_munion_rel = nl->h->data;
    1847         632 :                                 ne = list_find_exp(first_munion_rel->exps, ne);
    1848             :                         }
    1849         632 :                         if (!ne) {
    1850             :                                 /* e only in the u1,u2,...un->r (group by list) */
    1851          24 :                                 for (node *n = nl->h; n; n = n->next) {
    1852          16 :                                         ne = exp_ref(v->sql, e);
    1853          16 :                                         list_append(((sql_rel*)n->data)->exps, ne);
    1854             :                                 }
    1855             :                         }
    1856           8 :                         assert(ne);
    1857         638 :                         ne = exp_ref(v->sql, ne);
    1858         638 :                         append(gbe, ne);
    1859             :                 }
    1860             :         }
    1861             : 
    1862         557 :         u = rel_setop_n_ary(v->sql->sa, nl, op_munion);
    1863         557 :         rel_setop_n_ary_set_exps(v->sql, u,
    1864         557 :                                          rel_projections(v->sql, nl->h->data, NULL, 1, 1), false);
    1865         557 :         set_processed(u);
    1866             : 
    1867         557 :         exps = new_exp_list(v->sql->sa);
    1868        1718 :         for (n = u->exps->h, m = rel->exps->h; n && m; n = n->next, m = m->next) {
    1869        1161 :                 sql_exp *ne, *e = n->data, *oa = m->data;
    1870             : 
    1871        1161 :                 if (oa->type == e_aggr) {
    1872         518 :                         sql_subfunc *f = oa->f;
    1873         518 :                         int cnt = exp_aggr_is_count(oa);
    1874         518 :                         sql_subfunc *a = sql_bind_func(v->sql, "sys", (cnt)?"sum":f->func->base.name, exp_subtype(e), NULL, F_AGGR, true, true);
    1875             : 
    1876         518 :                         assert(a);
    1877             :                         /* munion of aggr result may have nils
    1878             :                          * because sum/count of empty set */
    1879         518 :                         set_has_nil(e);
    1880         518 :                         e = exp_ref(v->sql, e);
    1881         518 :                         ne = exp_aggr1(v->sql->sa, e, a, need_distinct(e), 1, e->card, 1);
    1882             :                 } else {
    1883         643 :                         ne = exp_copy(v->sql, oa);
    1884             :                 }
    1885        1161 :                 exp_setalias(ne, oa->alias.label, exp_find_rel_name(oa), exp_name(oa));
    1886        1161 :                 append(exps, ne);
    1887             :         }
    1888         557 :         v->changes++;
    1889         557 :         return rel_inplace_groupby(rel, u, gbe, exps);
    1890             : }
    1891             : 
    1892             : /*
    1893             :  * Rewrite aggregations over union all.
    1894             :  *      groupby ([ union all (a, b) ], [gbe], [ count, sum ] )
    1895             :  *
    1896             :  * into
    1897             :  *      groupby ( [ union all( groupby( a, [gbe], [ count, sum] ), [ groupby( b, [gbe], [ count, sum] )) , [gbe], [sum, sum] )
    1898             :  */
    1899             : static inline sql_rel *
    1900       97544 : rel_push_aggr_down(visitor *v, sql_rel *rel)
    1901             : {
    1902       97544 :         if (rel->op == op_groupby && rel->l) {
    1903       97472 :                 sql_rel *u = rel->l, *ou = u;
    1904       97472 :                 sql_rel *g = rel;
    1905       97472 :                 sql_rel *ul = u->l;
    1906       97472 :                 sql_rel *ur = u->r;
    1907       97472 :                 node *n, *m;
    1908       97472 :                 list *lgbe = NULL, *rgbe = NULL, *gbe = NULL, *exps = NULL;
    1909             : 
    1910       97472 :                 if (u->op == op_project && !need_distinct(u))
    1911       42275 :                         u = u->l;
    1912             : 
    1913       97472 :                 if (!u || !(is_union(u->op) || is_munion(u->op)) || need_distinct(u) || is_single(u) || !u->exps || rel_is_ref(u))
    1914             :                         return rel;
    1915             : 
    1916        1388 :                 if (is_munion(u->op))
    1917        1388 :                         return rel_push_aggr_down_n_arry(v, rel);
    1918             : 
    1919           0 :                 ul = u->l;
    1920           0 :                 ur = u->r;
    1921             : 
    1922             :                 /* make sure we don't create group by on group by's */
    1923           0 :                 if (ul->op == op_groupby || ur->op == op_groupby)
    1924             :                         return rel;
    1925             : 
    1926             :                 /* distinct should be done over the full result */
    1927           0 :                 for (n = g->exps->h; n; n = n->next) {
    1928           0 :                         sql_exp *e = n->data;
    1929           0 :                         sql_subfunc *af = e->f;
    1930             : 
    1931           0 :                         if (e->type == e_atom ||
    1932           0 :                             e->type == e_func ||
    1933           0 :                            (e->type == e_aggr &&
    1934           0 :                            ((strcmp(af->func->base.name, "sum") &&
    1935           0 :                              strcmp(af->func->base.name, "count") &&
    1936           0 :                              strcmp(af->func->base.name, "min") &&
    1937           0 :                              strcmp(af->func->base.name, "max")) ||
    1938             :                            need_distinct(e))))
    1939             :                                 return rel;
    1940             :                 }
    1941             : 
    1942           0 :                 ul = rel_dup(ul);
    1943           0 :                 ur = rel_dup(ur);
    1944           0 :                 if (!is_project(ul->op))
    1945           0 :                         ul = rel_project(v->sql->sa, ul,
    1946             :                                 rel_projections(v->sql, ul, NULL, 1, 1));
    1947           0 :                 if (!is_project(ur->op))
    1948           0 :                         ur = rel_project(v->sql->sa, ur,
    1949             :                                 rel_projections(v->sql, ur, NULL, 1, 1));
    1950           0 :                 rel_rename_exps(v->sql, u->exps, ul->exps);
    1951           0 :                 rel_rename_exps(v->sql, u->exps, ur->exps);
    1952           0 :                 if (u != ou) {
    1953           0 :                         ul = rel_project(v->sql->sa, ul, NULL);
    1954           0 :                         ul->exps = exps_copy(v->sql, ou->exps);
    1955           0 :                         rel_rename_exps(v->sql, ou->exps, ul->exps);
    1956           0 :                         set_processed(ul);
    1957           0 :                         ur = rel_project(v->sql->sa, ur, NULL);
    1958           0 :                         ur->exps = exps_copy(v->sql, ou->exps);
    1959           0 :                         rel_rename_exps(v->sql, ou->exps, ur->exps);
    1960           0 :                         set_processed(ur);
    1961             :                 }
    1962             : 
    1963           0 :                 if (g->r && list_length(g->r) > 0) {
    1964           0 :                         list *gbe = g->r;
    1965             : 
    1966           0 :                         lgbe = exps_copy(v->sql, gbe);
    1967           0 :                         rgbe = exps_copy(v->sql, gbe);
    1968             :                 }
    1969           0 :                 ul = rel_groupby(v->sql, ul, NULL);
    1970           0 :                 ul->r = lgbe;
    1971           0 :                 ul->nrcols = g->nrcols;
    1972           0 :                 ul->card = g->card;
    1973           0 :                 ul->exps = exps_copy(v->sql, g->exps);
    1974           0 :                 ul->nrcols = list_length(ul->exps);
    1975           0 :                 set_processed(ul);
    1976             : 
    1977           0 :                 ur = rel_groupby(v->sql, ur, NULL);
    1978           0 :                 ur->r = rgbe;
    1979           0 :                 ur->nrcols = g->nrcols;
    1980           0 :                 ur->card = g->card;
    1981           0 :                 ur->exps = exps_copy(v->sql, g->exps);
    1982           0 :                 ur->nrcols = list_length(ur->exps);
    1983           0 :                 set_processed(ur);
    1984             : 
    1985             :                 /* group by on primary keys which define the partitioning scheme
    1986             :                  * don't need a finalizing group by */
    1987             :                 /* how to check if a partition is based on some primary key ?
    1988             :                  * */
    1989           0 :                 if (!list_empty(rel->r)) {
    1990           0 :                         for (node *n = ((list*)rel->r)->h; n; n = n->next) {
    1991           0 :                                 sql_exp *e = n->data;
    1992           0 :                                 sql_column *c = NULL;
    1993             : 
    1994           0 :                                 if ((c = exp_is_pkey(rel, e)) && partition_find_part(v->sql->session->tr, c->t, NULL)) {
    1995             :                                         /* check if key is partition key */
    1996           0 :                                         v->changes++;
    1997           0 :                                         return rel_inplace_setop(v->sql, rel, ul, ur, op_union,
    1998             :                                                                                          rel_projections(v->sql, rel, NULL, 1, 1));
    1999             :                                 }
    2000             :                         }
    2001             :                 }
    2002             : 
    2003           0 :                 if (!list_empty(rel->r)) {
    2004           0 :                         list *ogbe = rel->r;
    2005             : 
    2006           0 :                         gbe = new_exp_list(v->sql->sa);
    2007           0 :                         for (n = ogbe->h; n; n = n->next) {
    2008           0 :                                 sql_exp *e = n->data, *ne;
    2009             : 
    2010             :                                 /* group by in aggregation list */
    2011           0 :                                 ne = exps_uses_exp( rel->exps, e);
    2012           0 :                                 if (ne)
    2013           0 :                                         ne = list_find_exp( ul->exps, ne);
    2014           0 :                                 if (!ne) {
    2015             :                                         /* e only in the ul/ur->r (group by list) */
    2016           0 :                                         ne = exp_ref(v->sql, e);
    2017           0 :                                         list_append(ul->exps, ne);
    2018           0 :                                         ne = exp_ref(v->sql, e);
    2019           0 :                                         list_append(ur->exps, ne);
    2020             :                                 }
    2021           0 :                                 assert(ne);
    2022           0 :                                 ne = exp_ref(v->sql, ne);
    2023           0 :                                 append(gbe, ne);
    2024             :                         }
    2025             :                 }
    2026             : 
    2027           0 :                 u = rel_setop(v->sql->sa, ul, ur, op_union);
    2028           0 :                 rel_setop_set_exps(v->sql, u, rel_projections(v->sql, ul, NULL, 1, 1), false);
    2029           0 :                 set_processed(u);
    2030             : 
    2031           0 :                 exps = new_exp_list(v->sql->sa);
    2032           0 :                 for (n = u->exps->h, m = rel->exps->h; n && m; n = n->next, m = m->next) {
    2033           0 :                         sql_exp *ne, *e = n->data, *oa = m->data;
    2034             : 
    2035           0 :                         if (oa->type == e_aggr) {
    2036           0 :                                 sql_subfunc *f = oa->f;
    2037           0 :                                 int cnt = exp_aggr_is_count(oa);
    2038           0 :                                 sql_subfunc *a = sql_bind_func(v->sql, "sys", (cnt)?"sum":f->func->base.name, exp_subtype(e), NULL, F_AGGR, true, true);
    2039             : 
    2040           0 :                                 assert(a);
    2041             :                                 /* union of aggr result may have nils
    2042             :                                  * because sum/count of empty set */
    2043           0 :                                 set_has_nil(e);
    2044           0 :                                 e = exp_ref(v->sql, e);
    2045           0 :                                 ne = exp_aggr1(v->sql->sa, e, a, need_distinct(e), 1, e->card, 1);
    2046             :                         } else {
    2047           0 :                                 ne = exp_copy(v->sql, oa);
    2048             :                         }
    2049           0 :                         exp_setalias(ne, oa->alias.label, exp_find_rel_name(oa), exp_name(oa));
    2050           0 :                         append(exps, ne);
    2051             :                 }
    2052           0 :                 v->changes++;
    2053           0 :                 return rel_inplace_groupby( rel, u, gbe, exps);
    2054             :         }
    2055             :         return rel;
    2056             : }
    2057             : 
    2058             : /*
    2059             :  * More general
    2060             :  *      groupby(
    2061             :  *       [ outer ] join(
    2062             :  *          project(
    2063             :  *            table(A) [ c1, c2, .. ]
    2064             :  *          ) [ c1, c2, identity(c2) as I, .. ],
    2065             :  *          table(B) [ c1, c2, .. ]
    2066             :  *        ) [ A.c1 = B.c1 ]
    2067             :  *      ) [ I ] [ a1, a2, .. ]
    2068             :  *
    2069             :  * ->
    2070             :  *
    2071             :  *      [ outer ] join(
    2072             :  *        project(
    2073             :  *          table(A) [ c1, c2, .. ]
    2074             :  *        ) [ c1, c2, .. ],
    2075             :  *        groupby (
    2076             :  *          table(B) [ c1, c2, .. ]
    2077             :  *        ) [ B.c1 ] [ a1, a2, .. ]
    2078             :  *      ) [ A.c1 = B.c1 ]
    2079             :  */
    2080             : static sql_rel *
    2081       47874 : gen_push_groupby_down(mvc *sql, sql_rel *rel, int *changes)
    2082             : {
    2083       47874 :         sql_rel *j = rel->l;
    2084       47874 :         list *gbe = rel->r;
    2085             : 
    2086       47874 :         if (rel->op == op_groupby && list_length(gbe) == 1 && j->op == op_join){
    2087        6492 :                 sql_rel *jl = j->l, *jr = j->r, *cr, *cl;
    2088        6492 :                 sql_exp *gb = gbe->h->data, *e;
    2089        6492 :                 node *n;
    2090        6492 :                 int left = 1;
    2091        6492 :                 list *aggrs, *aliases, *gbe;
    2092             : 
    2093        6492 :                 if (!is_identity(gb, jl) && !is_identity(gb, jr))
    2094             :                         return rel;
    2095           6 :                 if (jl->op == op_project &&
    2096           4 :                     (e = list_find_exp( jl->exps, gb)) != NULL &&
    2097           2 :                      find_prop(e->p, PROP_HASHCOL) != NULL) {
    2098             :                         left = 0;
    2099             :                         cr = jr;
    2100             :                         cl = jl;
    2101           4 :                 } else if (jr->op == op_project &&
    2102           8 :                     (e = list_find_exp( jr->exps, gb)) != NULL &&
    2103           4 :                      find_prop(e->p, PROP_HASHCOL) != NULL) {
    2104             :                         left = 1;
    2105             :                         cr = jl;
    2106             :                         cl = jr;
    2107             :                 } else {
    2108           0 :                         return rel;
    2109             :                 }
    2110             : 
    2111           6 :                 if ((left && is_base(jl->op)) || (!left && is_base(jr->op))||
    2112           0 :                     (left && is_select(jl->op)) || (!left && is_select(jr->op))
    2113           0 :                     || rel_is_join_on_pkey(j, false))
    2114           6 :                         return rel;
    2115             : 
    2116             :                 /* only add aggr (based on left/right), and repeat the group by column */
    2117           0 :                 aggrs = sa_list(sql->sa);
    2118           0 :                 aliases = sa_list(sql->sa);
    2119           0 :                 if (rel->exps) for (n = rel->exps->h; n; n = n->next) {
    2120           0 :                         sql_exp *ce = n->data;
    2121             : 
    2122           0 :                         if (exp_is_atom(ce))
    2123           0 :                                 list_append(aliases, ce);
    2124           0 :                         else if (ce->type == e_column) {
    2125           0 :                                 if (rel_has_exp(cl, ce, false) == 0) /* collect aliases outside groupby */
    2126           0 :                                         list_append(aliases, ce);
    2127             :                                 else
    2128           0 :                                         list_append(aggrs, ce);
    2129           0 :                         } else if (ce->type == e_aggr) {
    2130           0 :                                 list *args = ce->l;
    2131             : 
    2132             :                                 /* check args are part of left/right */
    2133           0 :                                 if (!list_empty(args) && rel_has_exps(cl, args, false) == 0)
    2134             :                                         return rel;
    2135           0 :                                 if (rel->op != op_join && exp_aggr_is_count(ce))
    2136           0 :                                         ce->p = prop_create(sql->sa, PROP_COUNT, ce->p);
    2137           0 :                                 list_append(aggrs, ce);
    2138             :                         }
    2139             :                 }
    2140             :                 /* TODO move any column expressions (aliases) into the project list */
    2141             : 
    2142             :                 /* find gb in left or right and should be unique */
    2143           0 :                 gbe = sa_list(sql->sa);
    2144             :                 /* push groupby to right, group on join exps */
    2145           0 :                 if (j->exps) for (n = j->exps->h; n; n = n->next) {
    2146           0 :                         sql_exp *ce = n->data, *l = ce->l, *r = ce->r, *e;
    2147             : 
    2148             :                         /* get left/right hand of e_cmp */
    2149           0 :                         assert(ce->type == e_cmp);
    2150           0 :                         if (ce->flag == cmp_equal && is_alias(l->type) && is_alias(r->type) &&
    2151           0 :                                 (((e = rel_find_exp(cr, l)) && rel_find_exp(cl, r)) ||
    2152           0 :                                  ((e = rel_find_exp(cr, r)) && rel_find_exp(cl, l)))) {
    2153           0 :                                 e = exp_ref(sql, e);
    2154           0 :                                 list_append(gbe, e);
    2155             :                         } else {
    2156           0 :                                 return rel;
    2157             :                         }
    2158             :                 }
    2159           0 :                 if (!left)
    2160           0 :                         cr = j->r = rel_groupby(sql, cr, gbe);
    2161             :                 else
    2162           0 :                         cr = j->l = rel_groupby(sql, cr, gbe);
    2163           0 :                 cr->exps = list_merge(cr->exps, aggrs, (fdup)NULL);
    2164           0 :                 set_processed(cr);
    2165           0 :                 if (!is_project(cl->op))
    2166           0 :                         cl = rel_project(sql->sa, cl,
    2167             :                                 rel_projections(sql, cl, NULL, 1, 1));
    2168           0 :                 cl->exps = list_merge(cl->exps, aliases, (fdup)NULL);
    2169           0 :                 set_processed(cl);
    2170           0 :                 if (!left)
    2171           0 :                         j->l = cl;
    2172             :                 else
    2173           0 :                         j->r = cl;
    2174           0 :                 rel -> l = NULL;
    2175           0 :                 rel_destroy(rel);
    2176             : 
    2177           0 :                 if (list_empty(cr->exps) && list_empty(j->exps)) { /* remove crossproduct */
    2178           0 :                         sql_rel *r = cl;
    2179           0 :                         if (!left)
    2180           0 :                                 j->l = NULL;
    2181             :                         else
    2182           0 :                                 j->r = NULL;
    2183           0 :                         rel_destroy(j);
    2184           0 :                         j = r;
    2185             :                 }
    2186           0 :                 (*changes)++;
    2187           0 :                 return j;
    2188             :         }
    2189             :         return rel;
    2190             : }
    2191             : 
    2192             : /*
    2193             :  * Rewrite group(project(join(A,Dict)[a.i==dict.i])[...dict.n])[dict.n][ ... dict.n ]
    2194             :  * into
    2195             :  *      project(join(groupby (A)[a.i],[a.i]), Dict)[a.i==dict.i])[dict.n]
    2196             :  *
    2197             :  */
    2198             : static inline sql_rel *
    2199       97544 : rel_push_groupby_down(visitor *v, sql_rel *rel)
    2200             : {
    2201       97544 :         sql_rel *p = rel->l;
    2202       97544 :         list *gbe = rel->r;
    2203             : 
    2204       97544 :         if (rel->op == op_groupby && gbe && p && is_join(p->op))
    2205       10180 :                 return gen_push_groupby_down(v->sql, rel, &v->changes);
    2206       87364 :         if (rel->op == op_groupby && gbe && p && p->op == op_project) {
    2207       37694 :                 sql_rel *j = p->l;
    2208       37694 :                 sql_rel *jl, *jr;
    2209       37694 :                 node *n;
    2210             : 
    2211       37694 :                 if (!j || j->op != op_join || list_length(j->exps) != 1)
    2212       35545 :                         return gen_push_groupby_down(v->sql, rel, &v->changes);
    2213        2149 :                 jl = j->l;
    2214        2149 :                 jr = j->r;
    2215             : 
    2216             :                 /* check if jr is a dict with index and var still used */
    2217        2149 :                 if (jr->op != op_basetable || jr->l || !jr->r || list_length(jr->exps) != 2)
    2218        2149 :                         return gen_push_groupby_down(v->sql, rel, &v->changes);
    2219             : 
    2220             :                 /* check if group by is done on dict column */
    2221           0 :                 for(n = gbe->h; n; n = n->next) {
    2222           0 :                         sql_exp *ge = n->data, *pe = NULL, *e = NULL;
    2223             : 
    2224             :                         /* find group by exp in project, then in dict */
    2225           0 :                         pe = rel_find_exp(p, ge);
    2226           0 :                         if (pe) /* find project exp in right hand of join, ie dict */
    2227           0 :                                 e = rel_find_exp(jr, pe);
    2228           0 :                         if (pe && e) {  /* Rewrite: join with dict after the group by */
    2229           0 :                                 list *pexps = rel_projections(v->sql, rel, NULL, 1, 1), *npexps;
    2230           0 :                                 node *m;
    2231           0 :                                 sql_exp *ne = j->exps->h->data; /* join exp */
    2232           0 :                                 p->l = jl;   /* Project now only on the left side of the join */
    2233             : 
    2234           0 :                                 ne = ne->l;  /* The left side of the compare is the index of the left */
    2235             : 
    2236             :                                 /* find ge reference in new projection list */
    2237           0 :                                 npexps = sa_list(v->sql->sa);
    2238           0 :                                 for (m = pexps->h; m; m = m->next) {
    2239           0 :                                         sql_exp *a = m->data;
    2240             : 
    2241           0 :                                         if (exp_refers(ge, a)) {
    2242           0 :                                                 sql_exp *sc = jr->exps->t->data;
    2243           0 :                                                 sql_exp *e = exp_ref(v->sql, sc);
    2244           0 :                                                 if (exp_name(a))
    2245           0 :                                                         exp_prop_alias(v->sql->sa, e, a);
    2246             :                                                 a = e;
    2247             :                                         }
    2248           0 :                                         append(npexps, a);
    2249             :                                 }
    2250             : 
    2251             :                                 /* find ge in aggr list */
    2252           0 :                                 for (m = rel->exps->h; m; m = m->next) {
    2253           0 :                                         sql_exp *a = m->data;
    2254             : 
    2255           0 :                                         if (exp_match_exp(a, ge) || exp_refers(ge, a)) {
    2256           0 :                                                 a = exp_ref(v->sql, ne);
    2257           0 :                                                 if (exp_name(ne))
    2258           0 :                                                         exp_prop_alias(v->sql->sa, a, ne);
    2259           0 :                                                 m->data = a;
    2260             :                                         }
    2261             :                                 }
    2262             : 
    2263             :                                 /* change alias pe, ie project out the index  */
    2264           0 :                                 pe->l = (void*)exp_relname(ne);
    2265           0 :                                 pe->r = (void*)exp_name(ne);
    2266           0 :                                 if (exp_name(ne))
    2267           0 :                                         exp_prop_alias(v->sql->sa, pe, ne);
    2268             : 
    2269             :                                 /* change alias ge */
    2270           0 :                                 ge->l = (void*)exp_relname(pe);
    2271           0 :                                 ge->r = (void*)exp_name(pe);
    2272           0 :                                 if (exp_name(pe))
    2273           0 :                                         exp_prop_alias(v->sql->sa, ge, pe);
    2274             : 
    2275             :                                 /* zap both project and groupby name hash tables (as we changed names above) */
    2276           0 :                                 list_hash_clear(rel->exps);
    2277           0 :                                 list_hash_clear((list*)rel->r);
    2278           0 :                                 list_hash_clear(p->exps);
    2279             : 
    2280             :                                 /* add join */
    2281           0 :                                 j->l = rel;
    2282           0 :                                 rel = rel_project(v->sql->sa, j, npexps);
    2283           0 :                                 v->changes++;
    2284             :                         }
    2285             :                 }
    2286             :         }
    2287             :         return rel;
    2288             : }
    2289             : 
    2290             : /* reduce group by expressions based on pkey info
    2291             :  *
    2292             :  * The reduced group by and (derived) aggr expressions are restored via
    2293             :  * extra (new) aggregate columns.
    2294             :  */
    2295             : static inline sql_rel *
    2296       97544 : rel_reduce_groupby_exps(visitor *v, sql_rel *rel)
    2297             : {
    2298       97544 :         list *gbe = rel->r;
    2299             : 
    2300       97544 :         if (is_groupby(rel->op) && rel->r && !rel_is_ref(rel) && list_length(gbe)) {
    2301       48584 :                 node *n, *m;
    2302       48584 :                 int k, j, i, ngbe = list_length(gbe);
    2303       48584 :                 int8_t *scores = SA_NEW_ARRAY(v->sql->ta, int8_t, ngbe);
    2304       48585 :                 sql_column *c;
    2305       48585 :                 sql_table **tbls = SA_NEW_ARRAY(v->sql->ta, sql_table*, ngbe);
    2306       48585 :                 sql_rel **bts = SA_NEW_ARRAY(v->sql->ta, sql_rel*, ngbe), *bt = NULL;
    2307             : 
    2308       48585 :                 gbe = rel->r;
    2309      127067 :                 for (k = 0, i = 0, n = gbe->h; n; n = n->next, k++) {
    2310       78483 :                         sql_exp *e = n->data;
    2311             : 
    2312       78483 :                         c = exp_find_column_(rel, e, -2, &bt);
    2313       78482 :                         if (c) {
    2314       88082 :                                 for(j = 0; j < i; j++)
    2315       33423 :                                         if (c->t == tbls[j] && bts[j] == bt)
    2316             :                                                 break;
    2317       70911 :                                 tbls[j] = c->t;
    2318       70911 :                                 bts[j] = bt;
    2319       70911 :                                 i += (j == i);
    2320             :                         }
    2321             :                 }
    2322       48584 :                 if (i) { /* forall tables find pkey and
    2323             :                                 remove useless other columns */
    2324             :                         /* TODO also remove group by columns which are related to
    2325             :                          * the other columns using a foreign-key join (n->1), ie 1
    2326             :                          * on the to be removed side.
    2327             :                          */
    2328       99716 :                         for(j = 0; j < i; j++) {
    2329       54646 :                                 int l, nr = 0, cnr = 0;
    2330             : 
    2331       54646 :                                 k = list_length(gbe);
    2332       54647 :                                 memset(scores, 0, list_length(gbe));
    2333       54646 :                                 if (tbls[j]->pkey) {
    2334       27420 :                                         for (l = 0, n = gbe->h; l < k && n; l++, n = n->next) {
    2335       20895 :                                                 fcmp cmp = (fcmp)&kc_column_cmp;
    2336       20895 :                                                 sql_exp *e = n->data;
    2337             : 
    2338       20895 :                                                 c = exp_find_column_(rel, e, -2, &bt);
    2339       29799 :                                                 if (c && c->t == tbls[j] && bts[j] == bt &&
    2340        8904 :                                                     list_find(tbls[j]->pkey->k.columns, c, cmp) != NULL) {
    2341         796 :                                                         scores[l] = 1;
    2342         796 :                                                         nr ++;
    2343       18556 :                                                 } else if (c && c->t == tbls[j] && bts[j] == bt) {
    2344             :                                                         /* Okay we can cleanup a group by column */
    2345        8108 :                                                         scores[l] = -1;
    2346        8108 :                                                         cnr ++;
    2347             :                                                 }
    2348             :                                         }
    2349             :                                 }
    2350        6525 :                                 if (nr) {
    2351         787 :                                         int all = (list_length(tbls[j]->pkey->k.columns) == nr);
    2352         787 :                                         sql_kc *kc = tbls[j]->pkey->k.columns->h->data;
    2353             : 
    2354         787 :                                         c = kc->c;
    2355        2102 :                                         for (l = 0, n = gbe->h; l < k && n; l++, n = n->next) {
    2356        1315 :                                                 sql_exp *e = n->data;
    2357             : 
    2358             :                                                 /* pkey based group by */
    2359        1464 :                                                 if (scores[l] == 1 && ((all ||
    2360             :                                                    /* first of key */
    2361         921 :                                                    (c == exp_find_column(rel, e, -2))) && !find_prop(e->p, PROP_HASHCOL)))
    2362          13 :                                                         e->p = prop_create(v->sql->sa, PROP_HASHCOL, e->p);
    2363             :                                         }
    2364        2966 :                                         for (m = rel->exps->h; m; m = m->next ){
    2365        2179 :                                                 sql_exp *e = m->data;
    2366             : 
    2367        7692 :                                                 for (l = 0, n = gbe->h; l < k && n; l++, n = n->next) {
    2368        5526 :                                                         sql_exp *gb = n->data;
    2369             : 
    2370             :                                                         /* pkey based group by */
    2371        5526 :                                                         if (scores[l] == 1 && exp_match_exp(e,gb) && find_prop(gb->p, PROP_HASHCOL) && !find_prop(e->p, PROP_HASHCOL)) {
    2372          13 :                                                                 e->p = prop_create(v->sql->sa, PROP_HASHCOL, e->p);
    2373          13 :                                                                 break;
    2374             :                                                         }
    2375             : 
    2376             :                                                 }
    2377             :                                         }
    2378             :                                 }
    2379       54646 :                                 if (cnr && nr && list_length(tbls[j]->pkey->k.columns) == nr) {
    2380         108 :                                         list *ngbe = new_exp_list(v->sql->sa);
    2381             : 
    2382         392 :                                         for (l = 0, n = gbe->h; l < k && n; l++, n = n->next) {
    2383         284 :                                                 sql_exp *e = n->data;
    2384             : 
    2385             :                                                 /* keep the group by columns which form a primary key
    2386             :                                                  * of this table. And those unrelated to this table. */
    2387         284 :                                                 if (scores[l] != -1)
    2388         139 :                                                         append(ngbe, e);
    2389             :                                         }
    2390         108 :                                         rel->r = ngbe;
    2391             :                                         /* rewrite gbe and aggr, in the aggr list */
    2392         108 :                                         if (0)
    2393             :                                         for (m = rel->exps->h; m; m = m->next ){
    2394             :                                                 sql_exp *e = m->data;
    2395             :                                                 int fnd = 0;
    2396             : 
    2397             :                                                 for (l = 0, n = gbe->h; l < k && n && !fnd; l++, n = n->next) {
    2398             :                                                         sql_exp *gb = n->data;
    2399             : 
    2400             :                                                         if (scores[l] == -1 && exp_refers(gb, e)) {
    2401             :                                                                 /*
    2402             :                                                                 sql_exp *rs = exp_column(v->sql->sa, gb->l?gb->l:exp_relname(gb), gb->r?gb->r:exp_name(gb), exp_subtype(gb), rel->card, has_nil(gb), is_unique(gb), is_intern(gb));
    2403             :                                                                 exp_setalias(rs, e->alias.label, exp_find_rel_name(e), exp_name(e));
    2404             :                                                                 e = rs;
    2405             :                                                                 */
    2406             :                                                                 assert(e->alias.label == e->nid);
    2407             :                                                                 fnd = 1;
    2408             :                                                         }
    2409             :                                                 }
    2410             :                                                 m->data = e;
    2411             :                                         }
    2412             :                                         /* new reduced aggr expression list */
    2413         108 :                                         assert(list_length(rel->exps)>0);
    2414             :                                         /* only one reduction at a time */
    2415         108 :                                         list_hash_clear(rel->exps);
    2416         108 :                                         v->changes++;
    2417         108 :                                         return rel;
    2418             :                                 }
    2419       54538 :                                 gbe = rel->r;
    2420             :                         }
    2421             :                 }
    2422             :         }
    2423             :         /* remove constants from group by list */
    2424       97436 :         if (is_groupby(rel->op) && rel->r && !rel_is_ref(rel)) {
    2425       48984 :                 int i;
    2426       48984 :                 node *n;
    2427             : 
    2428      127183 :                 for (i = 0, n = gbe->h; n; n = n->next) {
    2429       78200 :                         sql_exp *e = n->data;
    2430             : 
    2431       78200 :                         if (exp_is_atom(e))
    2432          26 :                                 i++;
    2433             :                 }
    2434       48983 :                 if (i) {
    2435          26 :                         list *ngbe = new_exp_list(v->sql->sa);
    2436          26 :                         list *dgbe = new_exp_list(v->sql->sa);
    2437             : 
    2438          53 :                         for (n = gbe->h; n; n = n->next) {
    2439          27 :                                 sql_exp *e = n->data;
    2440             : 
    2441          27 :                                 if (!exp_is_atom(e))
    2442           1 :                                         append(ngbe, e);
    2443             :                                 /* we need at least one gbe */
    2444          26 :                                 else if (!n->next && list_empty(ngbe))
    2445          25 :                                         append(ngbe, e);
    2446             :                                 else
    2447           1 :                                         append(dgbe, e);
    2448             :                         }
    2449          26 :                         rel->r = ngbe;
    2450          26 :                         if (!list_empty(dgbe)) {
    2451             :                                 /* use atom's directly in the aggr expr list */
    2452             : 
    2453           3 :                                 for (n = rel->exps->h; n; n = n->next) {
    2454           2 :                                         sql_exp *e = n->data, *ne = NULL;
    2455             : 
    2456           2 :                                         if (e->type == e_column) {
    2457           2 :                                                 if (e->nid)
    2458           2 :                                                         ne = exps_bind_nid(dgbe, e->nid);
    2459           2 :                                                 if (ne) {
    2460           1 :                                                         ne = exp_copy(v->sql, ne);
    2461           1 :                                                         exp_prop_alias(v->sql->sa, ne, e);
    2462           1 :                                                         e = ne;
    2463             :                                                 }
    2464             :                                         }
    2465           2 :                                         n->data = e;
    2466             :                                 }
    2467           1 :                                 list_hash_clear(rel->exps);
    2468           1 :                                 v->changes++;
    2469             :                         }
    2470             :                 }
    2471             :         }
    2472             :         return rel;
    2473             : }
    2474             : 
    2475             : /* if all arguments to a distinct aggregate are unique, remove 'distinct' property */
    2476             : static inline sql_rel *
    2477       97544 : rel_distinct_aggregate_on_unique_values(visitor *v, sql_rel *rel)
    2478             : {
    2479       97544 :         if (is_groupby(rel->op) && !list_empty(rel->exps)) {
    2480      303223 :                 for (node *n = rel->exps->h; n; n = n->next) {
    2481      205756 :                         sql_exp *exp = (sql_exp*) n->data;
    2482             : 
    2483      205756 :                         if (exp->type == e_aggr && need_distinct(exp)) {
    2484         689 :                                 bool all_unique = true;
    2485         689 :                                 list *l = exp->l;
    2486             : 
    2487        1378 :                                 for (node *m = l->h; m && all_unique; m = m->next) {
    2488         689 :                                         sql_exp *arg = (sql_exp*) m->data;
    2489             : 
    2490         823 :                                         all_unique &= arg->type == e_column && is_unique(arg) && (!is_semantics(exp) || !has_nil(arg));
    2491             :                                 }
    2492         689 :                                 if (!all_unique && exps_card(l) > CARD_ATOM)
    2493         394 :                                         all_unique = exps_unique(v->sql, rel, l) && (!is_semantics(exp) || !have_nil(l));
    2494             :                                 if (all_unique) {
    2495         134 :                                         set_nodistinct(exp);
    2496         134 :                                         v->changes++;
    2497             :                                 }
    2498             :                         }
    2499             :                 }
    2500             :         }
    2501       97544 :         return rel;
    2502             : }
    2503             : 
    2504             : static inline sql_rel *
    2505       97543 : rel_remove_const_aggr(visitor *v, sql_rel *rel)
    2506             : {
    2507       97543 :         if (!rel)
    2508             :                 return rel;
    2509       97543 :         if (rel && is_groupby(rel->op) && list_length(rel->exps) >= 1 && !rel_is_ref(rel)) {
    2510       74004 :                 int needed = 0;
    2511      229077 :                 for (node *n = rel->exps->h; n; n = n->next) {
    2512      155073 :                         sql_exp *exp = (sql_exp*) n->data;
    2513             : 
    2514      155073 :                         if (exp_is_atom(exp) && exp->type != e_aggr)
    2515          55 :                                 needed++;
    2516             :                 }
    2517       74004 :                 if (needed) {
    2518          53 :                         if (!list_empty(rel->r)) {
    2519           3 :                                 int atoms = 0;
    2520             :                                 /* corner case, all grouping columns are atoms */
    2521           6 :                                 for (node *n = ((list*)rel->r)->h; n; n = n->next) {
    2522           3 :                                         sql_exp *exp = (sql_exp*) n->data;
    2523             : 
    2524           3 :                                         if (exp_is_atom(exp))
    2525           0 :                                                 atoms++;
    2526             :                                 }
    2527           3 :                                 if (atoms == list_length(rel->r)) {
    2528           0 :                                         list *nexps = sa_list(v->sql->sa);
    2529           0 :                                         for (node *n = rel->exps->h; n; ) {
    2530           0 :                                                 node *next = n->next;
    2531           0 :                                                 sql_exp *e = (sql_exp*) n->data;
    2532             : 
    2533             :                                                 /* remove references to constant group by columns */
    2534           0 :                                                 if (e->type == e_column) {
    2535           0 :                                                         sql_exp *found = NULL;
    2536           0 :                                                         found = exps_bind_nid(rel->r, e->nid);
    2537           0 :                                                         if (found) {
    2538           0 :                                                                 list_append(nexps, found);
    2539           0 :                                                                 list_remove_node(rel->exps, NULL, n);
    2540             :                                                         }
    2541             :                                                 }
    2542             :                                                 n = next;
    2543             :                                         }
    2544           0 :                                         rel->r = NULL; /* transform it into a global aggregate */
    2545           0 :                                         rel->exps = list_merge(nexps, rel->exps, (fdup) NULL); /* add grouping columns back as projections */
    2546             :                                         /* global aggregates may return 1 row, so filter it based on the count */
    2547           0 :                                         sql_subfunc *cf = sql_bind_func(v->sql, "sys", "count", sql_bind_localtype("void"), NULL, F_AGGR, true, true);
    2548           0 :                                         sql_exp *count = exp_aggr(v->sql->sa, NULL, cf, 0, 1, CARD_ATOM, 0);
    2549           0 :                                         count = rel_groupby_add_aggr(v->sql, rel, count);
    2550           0 :                                         sql_exp *cp = exp_compare(v->sql->sa, exp_ref(v->sql, count), exp_atom(v->sql->sa, atom_int(v->sql->sa, exp_subtype(count), 0)), cmp_notequal);
    2551           0 :                                         rel = rel_select(v->sql->sa, rel, cp);
    2552           0 :                                         set_processed(rel);
    2553           0 :                                         return rel;
    2554             :                                 }
    2555          50 :                         } else if (list_length(rel->exps) == needed) { /* all are const */
    2556          48 :                                 sql_rel *ll = rel->l;
    2557          48 :                                 rel->op = op_project;
    2558             :                                 /* TODO check if l->l == const, else change that */
    2559          48 :                                 if (ll && ll->l) {
    2560          44 :                                         rel_destroy(ll);
    2561          44 :                                         rel->l = rel_project_exp(v->sql, exp_atom_bool(v->sql->sa, 1));
    2562             :                                 }
    2563          48 :                                 return rel;
    2564             :                         }
    2565           5 :                         sql_rel *nrel = rel_project(v->sql->sa, rel, rel_projections(v->sql, rel, NULL, 1, 1));
    2566          17 :                         for (node *n = nrel->exps->h; n; n = n->next) {
    2567          12 :                                 sql_exp *exp = (sql_exp*) n->data;
    2568          12 :                                 if (exp->type == e_column) {
    2569          12 :                                         sql_exp *e = rel_find_exp(rel, exp);
    2570             : 
    2571          12 :                                         if (e && exp_is_atom(e) && e->type == e_atom) {
    2572           7 :                                                 sql_exp *ne = exp_copy(v->sql, e);
    2573           7 :                                                 assert(ne->alias.label);
    2574           7 :                                                 exp_setalias(ne, ne->alias.label, exp_find_rel_name(exp), exp_name(exp));
    2575           7 :                                                 n->data = ne;
    2576           7 :                                                 v->changes++;
    2577             :                                         }
    2578             :                                 }
    2579             :                         }
    2580           5 :                         list *nl = sa_list(v->sql->sa);
    2581          17 :                         for (node *n = rel->exps->h; n; n = n->next) {
    2582          12 :                                 sql_exp *exp = (sql_exp*) n->data;
    2583             : 
    2584          12 :                                 if (!exp_is_atom(exp) || exp->type != e_atom)
    2585           5 :                                         append(nl, exp);
    2586             :                         }
    2587           5 :                         rel->exps = nl;
    2588           5 :                         return nrel;
    2589             :                 }
    2590             :         }
    2591             :         return rel;
    2592             : }
    2593             : 
    2594             : #if 0
    2595             : static sql_rel *
    2596             : rel_groupby_distinct2(visitor *v, sql_rel *rel)
    2597             : {
    2598             :         list *ngbes = sa_list(v->sql->sa), *gbes, *naggrs = sa_list(v->sql->sa), *aggrs = sa_list(v->sql->sa);
    2599             :         sql_rel *l;
    2600             :         node *n;
    2601             : 
    2602             :         gbes = rel->r;
    2603             :         if (!gbes)
    2604             :                 return rel;
    2605             : 
    2606             :         /* check if each aggr is, rewritable (max,min,sum,count)
    2607             :          *                        and only has one argument */
    2608             :         for (n = rel->exps->h; n; n = n->next) {
    2609             :                 sql_exp *e = n->data;
    2610             :                 sql_subfunc *af = e->f;
    2611             : 
    2612             :                 if (e->type == e_aggr &&
    2613             :                    (strcmp(af->func->base.name, "sum") &&
    2614             :                      strcmp(af->func->base.name, "count") &&
    2615             :                      strcmp(af->func->base.name, "min") &&
    2616             :                      strcmp(af->func->base.name, "max")))
    2617             :                         return rel;
    2618             :         }
    2619             : 
    2620             :         for (n = gbes->h; n; n = n->next) {
    2621             :                 sql_exp *e = n->data;
    2622             : 
    2623             :                 e = exp_column(v->sql->sa, exp_find_rel_name(e), exp_name(e), exp_subtype(e), e->card, has_nil(e), is_unique(e), is_intern(e));
    2624             :                 append(ngbes, e);
    2625             :         }
    2626             : 
    2627             :         /* 1 for each aggr(distinct v) add the attribute expression v to gbes and aggrs list
    2628             :          * 2 for each aggr(z) add aggr_phase2('z') to the naggrs list
    2629             :          * 3 for each group by col, add also to the naggrs list
    2630             :          * */
    2631             :         for (n = rel->exps->h; n; n = n->next) {
    2632             :                 sql_exp *e = n->data;
    2633             : 
    2634             :                 if (e->type == e_aggr && need_distinct(e)) { /* 1 */
    2635             :                         /* need column expression */
    2636             :                         list *args = e->l;
    2637             :                         sql_exp *v = args->h->data;
    2638             :                         append(gbes, v);
    2639             :                         if (!exp_name(v))
    2640             :                                 exp_label(v->sql->sa, v, ++v->sql->label);
    2641             :                         v = exp_column(v->sql->sa, exp_find_rel_name(v), exp_name(v), exp_subtype(v), v->card, has_nil(v), is_unique(v), is_intern(v));
    2642             :                         append(aggrs, v);
    2643             :                         v = exp_aggr1(v->sql->sa, v, e->f, need_distinct(e), 1, e->card, 1);
    2644             :                         exp_setalias(v, e->alias.label, exp_find_rel_name(e), exp_name(e));
    2645             :                         append(naggrs, v);
    2646             :                 } else if (e->type == e_aggr && !need_distinct(e)) {
    2647             :                         sql_exp *v;
    2648             :                         sql_subfunc *f = e->f;
    2649             :                         int cnt = exp_aggr_is_count(e);
    2650             :                         sql_subfunc *a = sql_bind_func(v->sql, "sys", (cnt)?"sum":f->func->base.name, exp_subtype(e), NULL, F_AGGR, true, true);
    2651             : 
    2652             :                         append(aggrs, e);
    2653             :                         if (!exp_name(e))
    2654             :                                 exp_label(v->sql->sa, e, ++v->sql->label);
    2655             :                         set_has_nil(e);
    2656             :                         v = exp_column(v->sql->sa, exp_find_rel_name(e), exp_name(e), exp_subtype(e), e->card, has_nil(e), is_unique(e), is_intern(e));
    2657             :                         v = exp_aggr1(v->sql->sa, v, a, 0, 1, e->card, 1);
    2658             :                         if (cnt)
    2659             :                                 set_zero_if_empty(v);
    2660             :                         exp_setalias(v, e->alias.label, exp_find_rel_name(e), exp_name(e));
    2661             :                         append(naggrs, v);
    2662             :                 } else { /* group by col */
    2663             :                         if (list_find_exp(gbes, e) || !list_find_exp(naggrs, e)) {
    2664             :                                 append(aggrs, e);
    2665             : 
    2666             :                                 e = exp_column(v->sql->sa, exp_find_rel_name(e), exp_name(e), exp_subtype(e), e->card, has_nil(e), is_unique(e), is_intern(e));
    2667             :                         }
    2668             :                         append(naggrs, e);
    2669             :                 }
    2670             :         }
    2671             : 
    2672             :         l = rel->l = rel_groupby(v->sql, rel->l, gbes);
    2673             :         l->exps = aggrs;
    2674             :         rel->r = ngbes;
    2675             :         rel->exps = naggrs;
    2676             :         v->changes++;
    2677             :         return rel;
    2678             : }
    2679             : #endif
    2680             : 
    2681             : /* Rewrite group by expressions with distinct
    2682             :  *
    2683             :  * ie select a, count(distinct b) from c where ... groupby a;
    2684             :  * No other aggregations should be present
    2685             :  *
    2686             :  * Rewrite the more general case, good for parallel execution
    2687             :  *
    2688             :  * groupby(R) [e,f] [ aggr1 a distinct, aggr2 b distinct, aggr3 c, aggr4 d]
    2689             :  *
    2690             :  * into
    2691             :  *
    2692             :  * groupby(
    2693             :  *      groupby(R) [e,f,a,b] [ a, b, aggr3 c, aggr4 d]
    2694             :  * ) [e,f]( aggr1 a distinct, aggr2 b distinct, aggr3_phase2 c, aggr4_phase2 d)
    2695             :  */
    2696             : static inline sql_rel *
    2697       97544 : rel_groupby_distinct(visitor *v, sql_rel *rel)
    2698             : {
    2699       97544 :         node *n;
    2700             : 
    2701       97544 :         if (is_groupby(rel->op)) {
    2702       97473 :                 sql_rel *l = rel->l;
    2703       97473 :                 if (!l || is_groupby(l->op))
    2704             :                         return rel;
    2705             :         }
    2706       97410 :         if (is_groupby(rel->op) && rel->r && !rel_is_ref(rel)) {
    2707       48963 :                 int nr = 0, anr = 0;
    2708       48963 :                 list *gbe, *ngbe, *arg, *exps, *nexps;
    2709       48963 :                 sql_exp *distinct = NULL, *darg, *found;
    2710       48963 :                 sql_rel *l = NULL;
    2711             : 
    2712      177813 :                 for (n=rel->exps->h; n && nr <= 2; n = n->next) {
    2713      128850 :                         sql_exp *e = n->data;
    2714      128850 :                         if (need_distinct(e)) {
    2715         393 :                                 distinct = n->data;
    2716         393 :                                 nr++;
    2717             :                         }
    2718      128850 :                         anr += is_aggr(e->type);
    2719             :                 }
    2720       48963 :                 if (nr < 1 || distinct->type != e_aggr)
    2721             :                         return rel;
    2722         386 :                 if (nr > 1 || anr > nr)
    2723             :                         return rel;//rel_groupby_distinct2(v, rel);
    2724         117 :                 arg = distinct->l;
    2725         117 :                 if (list_length(arg) != 1 || list_length(rel->r) + nr != list_length(rel->exps))
    2726           2 :                         return rel;
    2727             : 
    2728         115 :                 gbe = rel->r;
    2729         115 :                 ngbe = sa_list(v->sql->sa);
    2730         115 :                 exps = sa_list(v->sql->sa);
    2731         115 :                 nexps = sa_list(v->sql->sa);
    2732         362 :                 for (n=rel->exps->h; n; n = n->next) {
    2733         247 :                         sql_exp *e = n->data;
    2734         247 :                         if (e != distinct) {
    2735         132 :                                 if (e->type == e_aggr) { /* copy the arguments to the aggregate */
    2736           0 :                                         list *args = e->l;
    2737           0 :                                         if (args) {
    2738           0 :                                                 for (node *n = args->h ; n ; n = n->next) {
    2739           0 :                                                         sql_exp *e = n->data;
    2740           0 :                                                         list_append(ngbe, exp_copy(v->sql, e));
    2741           0 :                                                         list_append(exps, exp_copy(v->sql, e));
    2742             :                                                 }
    2743             :                                         }
    2744             :                                 } else {
    2745         132 :                                         e = exp_ref(v->sql, e);
    2746         132 :                                         append(ngbe, e);
    2747         132 :                                         append(exps, e);
    2748             :                                 }
    2749         132 :                                 if (e->type == e_aggr) /* aggregates must be copied */
    2750           0 :                                         e = exp_copy(v->sql, e);
    2751             :                                 else
    2752         132 :                                         e = exp_ref(v->sql, e);
    2753         132 :                                 append(nexps, e);
    2754             :                         }
    2755             :                 }
    2756             : 
    2757         115 :                 darg = arg->h->data;
    2758         115 :                 if ((found = exps_find_exp(exps, darg)) == NULL) { /* not already in the groups projection list */
    2759         112 :                         if ((found = exps_find_exp(gbe, darg))) { /* first find if the aggregate argument already exists in the grouping list */
    2760           0 :                                 darg = exp_ref(v->sql, found);
    2761             :                         } else {
    2762         112 :                                 list_append(gbe, darg = exp_copy(v->sql, darg));
    2763         112 :                                 exp_label(v->sql->sa, darg, ++v->sql->label);
    2764         112 :                                 darg = exp_ref(v->sql, darg);
    2765             :                         }
    2766         112 :                         list_append(exps, darg);
    2767         112 :                         darg = exp_ref(v->sql, darg);
    2768             :                 } else {
    2769           3 :                         darg = exp_ref(v->sql, found);
    2770             :                 }
    2771         115 :                 arg->h->data = darg;
    2772         115 :                 l = rel->l = rel_groupby(v->sql, rel->l, gbe);
    2773         115 :                 l->exps = exps;
    2774         115 :                 set_processed(l);
    2775         115 :                 rel->r = ngbe;
    2776         115 :                 rel->exps = nexps;
    2777         115 :                 set_nodistinct(distinct);
    2778         115 :                 append(nexps, distinct);
    2779         115 :                 v->changes++;
    2780             :         }
    2781             :         return rel;
    2782             : }
    2783             : 
    2784             : /*
    2785             :  * Push Count inside crossjoin down, and multiply the results
    2786             :  *
    2787             :  *     project (                                project(
    2788             :  *          group by (                               crossproduct (
    2789             :  *              crossproduct(                             project (
    2790             :  *                   L,                  =>                    group by (
    2791             :  *                   R                                              L
    2792             :  *              ) [ ] [ count NOT NULL ]                       ) [ ] [ count NOT NULL ]
    2793             :  *          )                                             ),
    2794             :  *     ) [ NOT NULL ]                                     project (
    2795             :  *                                                              group by (
    2796             :  *                                                                  R
    2797             :  *                                                              ) [ ] [ count NOT NULL ]
    2798             :  *                                                        )
    2799             :  *                                                   ) [ sql_mul(.., .. NOT NULL) ]
    2800             :  *                                              )
    2801             :  */
    2802             : static inline sql_rel *
    2803       97543 : rel_push_count_down(visitor *v, sql_rel *rel)
    2804             : {
    2805       97543 :         sql_rel *r = rel->l;
    2806             : 
    2807       97543 :         if (is_groupby(rel->op) && !rel_is_ref(rel) && list_empty(rel->r) &&
    2808       25386 :                 r && !r->exps && r->op == op_join && !(rel_is_ref(r)) &&
    2809             :                 /* currently only single count aggregation is handled, no other projects or aggregation */
    2810          56 :                 list_length(rel->exps) == 1 && exp_aggr_is_count(rel->exps->h->data)) {
    2811          12 :                 sql_exp *nce, *oce, *cnt1 = NULL, *cnt2 = NULL;
    2812          12 :                 sql_rel *gbl = NULL, *gbr = NULL;       /* Group By */
    2813          12 :                 sql_rel *cp = NULL;                                     /* Cross Product */
    2814          12 :                 sql_rel *srel;
    2815             : 
    2816          12 :                 oce = rel->exps->h->data;
    2817          12 :                 if (oce->l) /* we only handle COUNT(*) */
    2818             :                         return rel;
    2819             : 
    2820          10 :                 srel = r->l;
    2821             :                 {
    2822          10 :                         sql_subfunc *cf = sql_bind_func(v->sql, "sys", "count", sql_bind_localtype("void"), NULL, F_AGGR, true, true);
    2823          10 :                         sql_exp *e = exp_aggr(v->sql->sa, NULL, cf, need_distinct(oce), need_no_nil(oce), oce->card, 0);
    2824             : 
    2825          10 :                         exp_label(v->sql->sa, e, ++v->sql->label);
    2826          10 :                         cnt1 = exp_ref(v->sql, e);
    2827          10 :                         gbl = rel_groupby(v->sql, rel_dup(srel), NULL);
    2828          10 :                         set_processed(gbl);
    2829          10 :                         rel_groupby_add_aggr(v->sql, gbl, e);
    2830             :                 }
    2831             : 
    2832          10 :                 srel = r->r;
    2833             :                 {
    2834          10 :                         sql_subfunc *cf = sql_bind_func(v->sql, "sys", "count", sql_bind_localtype("void"), NULL, F_AGGR, true, true);
    2835          10 :                         sql_exp *e = exp_aggr(v->sql->sa, NULL, cf, need_distinct(oce), need_no_nil(oce), oce->card, 0);
    2836             : 
    2837          10 :                         exp_label(v->sql->sa, e, ++v->sql->label);
    2838          10 :                         cnt2 = exp_ref(v->sql, e);
    2839          10 :                         gbr = rel_groupby(v->sql, rel_dup(srel), NULL);
    2840          10 :                         set_processed(gbr);
    2841          10 :                         rel_groupby_add_aggr(v->sql, gbr, e);
    2842             :                 }
    2843             : 
    2844          10 :                 cp = rel_crossproduct(v->sql->sa, gbl, gbr, op_join);
    2845          10 :                 set_processed(cp);
    2846             : 
    2847          10 :                 if (!(nce = rel_binop_(v->sql, NULL, cnt1, cnt2, "sys", "sql_mul", card_value, true))) {
    2848           0 :                         v->sql->session->status = 0;
    2849           0 :                         v->sql->errstr[0] = '\0';
    2850           0 :                         return rel; /* error, fallback to original expression */
    2851             :                 }
    2852             :                 /* because of remote plans, make sure "sql_mul" returns bigint. The cardinality is atomic, so no major performance penalty */
    2853          10 :                 if (subtype_cmp(exp_subtype(oce), exp_subtype(nce)) != 0)
    2854          10 :                         nce = exp_convert(v->sql, nce, exp_subtype(nce), exp_subtype(oce));
    2855          10 :                 if (exp_name(oce))
    2856          10 :                         exp_prop_alias(v->sql->sa, nce, oce);
    2857             : 
    2858          10 :                 rel_destroy(rel);
    2859          10 :                 rel = rel_project(v->sql->sa, cp, append(new_exp_list(v->sql->sa), nce));
    2860          10 :                 set_processed(rel);
    2861             : 
    2862          10 :                 v->changes++;
    2863             :         }
    2864             : 
    2865             :         return rel;
    2866             : }
    2867             : 
    2868             : static inline sql_rel *
    2869       42797 : rel_basecount(visitor *v, sql_rel *rel)
    2870             : {
    2871       53203 :         if (is_groupby(rel->op) && !rel_is_ref(rel) && rel->l && list_empty(rel->r) &&
    2872       19767 :                 list_length(rel->exps) == 1 && exp_aggr_is_count(rel->exps->h->data)) {
    2873        4489 :                 sql_rel *bt = rel->l;
    2874        4489 :                 sql_exp *e = rel->exps->h->data;
    2875        4489 :                 if (is_basetable(bt->op) && list_empty(e->l)) { /* count(*) */
    2876             :                         /* change into select cnt('schema','table') */
    2877         684 :                         sql_table *t = bt->l;
    2878             :                         /* I need to get the declared table's frame number to make this work correctly for those */
    2879         684 :                         if (!isTable(t) || isDeclaredTable(t))
    2880             :                                 return rel;
    2881         605 :                         sql_subfunc *cf = sql_bind_func(v->sql, "sys", "cnt", sql_bind_localtype("str"), sql_bind_localtype("str"), F_FUNC, true, true);
    2882         605 :                         list *exps = sa_list(v->sql->sa);
    2883         605 :                         append(exps, exp_atom_str(v->sql->sa, t->s->base.name, sql_bind_localtype("str")));
    2884         605 :                         append(exps, exp_atom_str(v->sql->sa, t->base.name, sql_bind_localtype("str")));
    2885         605 :                         sql_exp *ne = exp_op(v->sql->sa, exps, cf);
    2886             : 
    2887         605 :                         ne = exp_propagate(v->sql->sa, ne, e);
    2888         605 :                         exp_setalias(ne, e->alias.label, exp_find_rel_name(e), exp_name(e));
    2889         605 :                         rel_destroy(rel);
    2890         605 :                         rel = rel_project(v->sql->sa, NULL, append(sa_list(v->sql->sa), ne));
    2891         605 :                         v->changes++;
    2892             :                 }
    2893             :         }
    2894             :         return rel;
    2895             : }
    2896             : 
    2897             : static inline sql_rel *
    2898       42797 : rel_simplify_count(visitor *v, sql_rel *rel)
    2899             : {
    2900       42797 :         if (is_groupby(rel->op) && !list_empty(rel->exps)) {
    2901       42714 :                 mvc *sql = v->sql;
    2902       42714 :                 int ncountstar = 0;
    2903             : 
    2904             :                 /* Convert count(no null) into count(*) */
    2905      134511 :                 for (node *n = rel->exps->h; n ; n = n->next) {
    2906       91797 :                         sql_exp *e = n->data;
    2907             : 
    2908       91797 :                         if (exp_aggr_is_count(e) && !need_distinct(e)) {
    2909       15639 :                                 if (list_length(e->l) == 0) {
    2910       12879 :                                         ncountstar++;
    2911        2760 :                                 } else if (list_length(e->l) == 1 && !has_nil((sql_exp*)((list*)e->l)->h->data)) {
    2912        2600 :                                         sql_subfunc *cf = sql_bind_func(sql, "sys", "count", sql_bind_localtype("void"), NULL, F_AGGR, true, true);
    2913        2600 :                                         sql_exp *ne = exp_aggr(sql->sa, NULL, cf, 0, 0, e->card, 0);
    2914        2600 :                                         if (exp_name(e))
    2915        2600 :                                                 exp_prop_alias(sql->sa, ne, e);
    2916        2600 :                                         n->data = ne;
    2917        2600 :                                         ncountstar++;
    2918        2600 :                                         v->changes++;
    2919             :                                 }
    2920             :                         }
    2921             :                 }
    2922             :                 /* With multiple count(*), use exp_ref to reduce the number of calls to this aggregate */
    2923       42714 :                 if (ncountstar > 1) {
    2924          56 :                         sql_exp *count_star = NULL;
    2925          56 :                         sql_rel *nrel = rel_project(v->sql->sa, rel, NULL);
    2926          56 :                         list *aexps = sa_list(v->sql->sa), *nexps = sa_list(v->sql->sa);
    2927          56 :                         nrel->exps = nexps;
    2928         247 :                         for (node *n = rel->exps->h; n ; n = n->next) {
    2929         191 :                                 sql_exp *e = n->data;
    2930             : 
    2931         191 :                                 if (exp_aggr_is_count(e) && !need_distinct(e) && list_length(e->l) == 0) {
    2932         126 :                                         if (!count_star) {
    2933          56 :                                                 count_star = e;
    2934          56 :                                                 append(aexps, e);
    2935          56 :                                                 append(nexps, exp_ref(sql, e));
    2936             :                                         } else {
    2937          70 :                                                 sql_exp *ne = exp_ref(sql, count_star);
    2938             : 
    2939          70 :                                                 if (exp_name(e))
    2940          70 :                                                         exp_prop_alias(sql->sa, ne, e);
    2941          70 :                                                 v->changes++;
    2942          70 :                                                 append(nexps, ne);
    2943             :                                         }
    2944             :                                 } else {
    2945          65 :                                         append(aexps, e);
    2946          65 :                                         append(nexps, exp_ref(sql, e));
    2947             :                                 }
    2948             :                         }
    2949          56 :                         rel->exps = aexps;
    2950          56 :                         return nrel;
    2951             :                 }
    2952             :         }
    2953             :         return rel;
    2954             : }
    2955             : 
    2956             : static sql_rel *
    2957       42797 : rel_groupjoin(visitor *v, sql_rel *rel)
    2958             : {
    2959       42797 :         if (!rel || rel_is_ref(rel) || !is_groupby(rel->op) || list_empty(rel->r))
    2960       14632 :                 return rel;
    2961             : 
    2962       28165 :         sql_rel *j = rel->l;
    2963       28165 :         if (!j || rel_is_ref(j) /*|| !is_left(j->op)*/ || j->op != op_join || list_length(rel->exps) > 1 /* only join because left joins aren't optimized jet (TODO), only length 1 as implementation of groupjoins is missing */ || !list_empty(rel->attr))
    2964       27448 :                 return rel;
    2965             :         /* check group by exps == equi join exps */
    2966         717 :         list *gbes = rel->r;
    2967         717 :         if (list_length(gbes) != list_length(j->exps))
    2968             :                 return rel;
    2969         618 :         int nr = 0;
    2970        1233 :         for(node *n = gbes->h; n; n = n->next) {
    2971         618 :                 sql_exp *gbe = n->data;
    2972        1233 :                 for(node *m = j->exps->h; m; m = m->next) {
    2973         618 :                         sql_exp *je = m->data;
    2974         618 :                         if (je->type != e_cmp || je->flag != cmp_equal)
    2975             :                                 return rel;
    2976             :                         /* check if its a join exp (ie not a selection) */
    2977         619 :                         if (!( (!rel_has_exp(j->l, je->l, false) && !rel_has_exp(j->r, je->r, false)) ||
    2978           8 :                                    (!rel_has_exp(j->l, je->r, false) && !rel_has_exp(j->r, je->l, false))))
    2979           0 :                                 return rel;
    2980         615 :                         if (exp_match(je->l, gbe)) {
    2981          76 :                                 nr++;
    2982         539 :                         } else if (exp_match(je->r, gbe)) {
    2983          10 :                                 nr++;
    2984             :                         }
    2985             :                 }
    2986             :         }
    2987         615 :         if (nr == list_length(gbes)) {
    2988             :                 // printf("#group by converted\n");
    2989          86 :                 j = rel_dup(j);
    2990          86 :                 j->attr = rel->exps;
    2991          86 :                 v->changes++;
    2992          86 :                 rel_destroy(rel);
    2993          86 :                 return j;
    2994             :         }
    2995             :         return rel;
    2996             : }
    2997             : 
    2998             : /* select k1 from bla where k1 = const -> select const from bla where k1 = const */
    2999             : static sql_rel *
    3000     3876884 : rel_project_select_exp(visitor *v, sql_rel *rel)
    3001             : {
    3002     3876884 :         if (is_simple_project(rel->op) && rel->exps && rel->l) {
    3003     1267831 :                 sql_rel *l = rel->l;
    3004     1267831 :                 if (is_select(l->op) && l->exps) {
    3005     1294477 :                         for(node *n = rel->exps->h; n; n = n->next) {
    3006      974087 :                                 sql_exp *col = n->data;
    3007      974087 :                                 if (col->type == e_column) {
    3008     1752924 :                                         for(node *m = l->exps->h; m; m = m->next) {
    3009     1043945 :                                                 sql_exp *cmp = m->data;
    3010     1043945 :                                                 if (cmp->type == e_cmp && cmp->flag == cmp_equal && !is_anti(cmp) && !is_semantics(cmp) && exp_is_atom(cmp->r)) {
    3011      631558 :                                                         sql_exp *l = cmp->l;
    3012      631558 :                                                         if(l->type == e_column && col->nid == l->nid) {
    3013             :                                                                 /* replace column with the constant */
    3014       13612 :                                                                 sql_exp *e = n->data = exp_copy(v->sql, cmp->r);
    3015       13612 :                                                                 exp_setalias(e, col->alias.label, exp_relname(col), exp_name(col));
    3016       13612 :                                                                 exp_propagate(v->sql->sa, e, col);
    3017       13612 :                                                                 list_hash_clear(rel->exps);
    3018             :                                                         }
    3019             :                                                 }
    3020             :                                         }
    3021             :                                 }
    3022             :                         }
    3023             :                 }
    3024             :         }
    3025     3876884 :         return rel;
    3026             : }
    3027             : 
    3028             : static sql_rel *
    3029     3876876 : rel_optimize_projections_(visitor *v, sql_rel *rel)
    3030             : {
    3031     3876876 :         rel = rel_project_cse(v, rel);
    3032     3876883 :         rel = rel_project_select_exp(v, rel);
    3033             : 
    3034     3876886 :         if (!rel || !is_groupby(rel->op))
    3035             :                 return rel;
    3036             : 
    3037       97544 :         rel = rel_remove_const_aggr(v, rel);
    3038       97544 :         if (v->value_based_opt) {
    3039       42797 :                 rel = rel_simplify_sum(v, rel);
    3040       42797 :                 rel = rel_simplify_groupby_columns(v, rel);
    3041             :         }
    3042       97544 :         rel = rel_groupby_cse(v, rel);
    3043       97545 :         rel = rel_push_aggr_down(v, rel);
    3044       97543 :         rel = rel_push_groupby_down(v, rel);
    3045       97544 :         rel = rel_reduce_groupby_exps(v, rel);
    3046       97544 :         rel = rel_distinct_aggregate_on_unique_values(v, rel);
    3047       97544 :         rel = rel_groupby_distinct(v, rel);
    3048       97543 :         rel = rel_push_count_down(v, rel);
    3049             :         /* only when value_based_opt is on, ie not for dependency resolution */
    3050       97543 :         if (v->value_based_opt) {
    3051       42797 :                 rel = rel_simplify_count(v, rel);
    3052       42797 :                 rel = rel_basecount(v, rel);
    3053             : 
    3054       42797 :                 rel = rel_groupjoin(v, rel);
    3055             :         }
    3056             :         return rel;
    3057             : }
    3058             : 
    3059             : static sql_rel *
    3060      186407 : rel_optimize_projections(visitor *v, global_props *gp, sql_rel *rel)
    3061             : {
    3062      186407 :         (void) gp;
    3063      186407 :         return rel_visitor_topdown(v, rel, &rel_optimize_projections_);
    3064             : }
    3065             : 
    3066             : run_optimizer
    3067      631001 : bind_optimize_projections(visitor *v, global_props *gp)
    3068             : {
    3069      631001 :         int flag = v->sql->sql_optimizer;
    3070      630855 :         return gp->opt_level == 1 && (gp->cnt[op_groupby] || gp->cnt[op_project] || gp->cnt[op_union] || gp->cnt[op_munion]
    3071     1261856 :                    || gp->cnt[op_inter] || gp->cnt[op_except]) && (flag & optimize_projections) ? rel_optimize_projections : NULL;
    3072             : }
    3073             : 
    3074             : 
    3075             : static inline sql_rel *
    3076     2737873 : rel_push_project_down_union(visitor *v, sql_rel *rel)
    3077             : {
    3078             :         /* first remove distinct if already unique */
    3079     2737873 :         if (rel->op == op_project && need_distinct(rel) && rel->exps && exps_unique(v->sql, rel, rel->exps) && !have_nil(rel->exps)) {
    3080         109 :                 set_nodistinct(rel);
    3081         109 :                 if (exps_card(rel->exps) <= CARD_ATOM && rel->card > CARD_ATOM) { /* if the projection just contains constants, then no topN is needed */
    3082           2 :                         sql_rel *nl = rel->l = rel_topn(v->sql->sa, rel->l, append(sa_list(v->sql->sa), exp_atom_lng(v->sql->sa, 1)));
    3083           2 :                         set_processed(nl);
    3084             :                 }
    3085         109 :                 v->changes++;
    3086             :         }
    3087             : 
    3088     2737873 :         if (rel->op == op_project && rel->l && rel->exps && list_empty(rel->r)) {
    3089      927074 :                 int need_distinct = need_distinct(rel);
    3090      927074 :                 sql_rel *u = rel->l;
    3091      927074 :                 sql_rel *p = rel;
    3092             : 
    3093      927074 :                 if (!u || !(is_union(u->op) || is_munion(u->op)) || need_distinct(u) || is_recursive(u) || !u->exps || rel_is_ref(u) || project_unsafe(rel, false))
    3094      898010 :                         return rel;
    3095             : 
    3096       29064 :                 sql_rel *r;
    3097             : 
    3098       29064 :                 assert(!is_union(u->op));
    3099       29064 :                 if (is_recursive(u))
    3100             :                         return rel;
    3101             : 
    3102             :                 /* don't push project down union of single values */
    3103       87357 :                 for (node *n = ((list*)u->l)->h; n; n = n->next) {
    3104       58315 :                         r = n->data;
    3105             :                         // TODO: does this check make sense?
    3106       58315 :                         if (is_project(r->op) && !r->l)
    3107             :                                 return rel;
    3108             :                 }
    3109             : 
    3110       87335 :                 for (node *n = ((list*)u->l)->h; n; n = n->next) {
    3111       58293 :                         r = rel_dup(n->data);
    3112             : 
    3113             :                         /* introduce projection around each operand if needed */
    3114       58293 :                         if (!is_project(r->op))
    3115         270 :                                 r = rel_project(v->sql->sa, r,
    3116             :                                                 rel_projections(v->sql, r, NULL, 1, 1));
    3117             :                         /* check if we need distinct */
    3118      116586 :                         need_distinct &=
    3119       58293 :                                 (!exps_unique(v->sql, r, r->exps) || have_nil(r->exps));
    3120       58293 :                         rel_rename_exps(v->sql, u->exps, r->exps);
    3121             : 
    3122       58293 :                         rel_destroy(n->data);
    3123       58293 :                         n->data = r;
    3124             :                 }
    3125             : 
    3126             :                 /* once we have checked for need_distinct in every rel we can
    3127             :                  * introduce the projects under the munion which are gonna be
    3128             :                  * copies of the single project above munion */
    3129       87335 :                 for (node *n = ((list*)u->l)->h; n; n = n->next) {
    3130       58293 :                         r = rel_dup(n->data);
    3131             : 
    3132       58293 :                         r = rel_project(v->sql->sa, r, NULL);
    3133       58293 :                         if (need_distinct)
    3134           0 :                                 set_distinct(r);
    3135       58293 :                         r->exps = exps_copy(v->sql, p->exps);
    3136       58293 :                         set_processed(r);
    3137             : 
    3138       58293 :                         rel_destroy(n->data);
    3139       58293 :                         n->data = r;
    3140             :                 }
    3141             : 
    3142             :                 /* turn the project-munion on top into munion. incr operand
    3143             :                  * rels count to make sure that they are not deleted by the
    3144             :                  * subsequent rel_inplace_setop_n_ary */
    3145       87335 :                 for (node *n = ((list*)u->l)->h; n; n = n->next)
    3146       58293 :                         rel_dup(n->data);
    3147       29042 :                 rel = rel_inplace_setop_n_ary(v->sql, rel, u->l, op_munion,
    3148             :                                 rel_projections(v->sql, rel, NULL, 1, 1));
    3149       29042 :                 if (need_distinct)
    3150           0 :                         set_distinct(rel);
    3151       29042 :                 if (is_single(u))
    3152           2 :                         set_single(rel);
    3153             : 
    3154       29042 :                 v->changes++;
    3155             : 
    3156             :                 /* if any operand has two project above then squash them */
    3157       87335 :                 for (node *n = ((list*)u->l)->h; n; n = n->next) {
    3158       58293 :                         r = rel_dup(n->data);
    3159       58293 :                         r = rel_merge_projects_(v, r);
    3160       58293 :                         rel_destroy(n->data);
    3161       58293 :                         n->data = r;
    3162             :                 }
    3163             : 
    3164             :                 return rel;
    3165             :         }
    3166             :         return rel;
    3167             : }
    3168             : 
    3169             : static inline sql_rel *
    3170     2737873 : rel_merge_unions(visitor *v, sql_rel *rel)
    3171             : {
    3172             :         /* stacked munion flattening e.g.
    3173             :          * munion( munion(a, b, c), munion(d, e)) => munion(a,b,c,d,e)
    3174             :          */
    3175     2737873 :         if (rel && is_munion(rel->op) && !is_recursive(rel)) {
    3176       80481 :                 list *l = rel->l;
    3177      518432 :                 for(node *n = l->h; n; ) {
    3178      357470 :                         node *next = n->next;
    3179      357470 :                         sql_rel *oc = n->data;
    3180      357470 :                         sql_rel *c = oc;
    3181             : 
    3182             :                         /* account for any group-bys pushed down between stacked munions */
    3183      357470 :                         if (oc->op == op_groupby)
    3184        3285 :                                 c = oc->l;
    3185             : 
    3186      357470 :                         if (is_munion(c->op)) {
    3187       10600 :                                 c = rel_dup(c);
    3188       10600 :                                 list_remove_node(l, NULL, n);
    3189       10600 :                                 l = list_merge(l, c->l, (fdup)NULL);
    3190       10600 :                                 c->l = NULL;
    3191       10600 :                                 rel_destroy(oc);
    3192       10600 :                                 if (!next)
    3193           3 :                                         next = l->h;
    3194       10600 :                                 v->changes++;
    3195             :                         }
    3196             :                         n = next;
    3197             :                 }
    3198       80481 :                 rel->l = l;
    3199             :         }
    3200     2737873 :         return rel;
    3201             : }
    3202             : 
    3203             : 
    3204             : /*
    3205             :  * Push (semi)joins down unions, this is basically for merge tables, where
    3206             :  * we know that the fk-indices are split over two clustered merge tables.
    3207             :  */
    3208             : static inline sql_rel *
    3209     2737873 : rel_push_join_down_munion(visitor *v, sql_rel *rel)
    3210             : {
    3211     2737873 :         if ((is_join(rel->op) && !is_outerjoin(rel->op) && !is_single(rel)) || is_semi(rel->op)) {
    3212      337455 :                 sql_rel *l = rel->l, *r = rel->r, *ol = l, *or = r;
    3213      337455 :                 list *exps = rel->exps, *attr = rel->attr;
    3214      337455 :                 sql_exp *je = NULL;
    3215             : 
    3216      337455 :                 if (is_recursive(l) || is_recursive(r))
    3217             :                         return rel;
    3218             :                 /* we would like to optimize in place reference rels which point
    3219             :                  * to replica tables and let the replica optimizer handle those
    3220             :                  * later. otherwise we miss the push join down optimization due
    3221             :                  * to the rel_is_ref bailout
    3222             :                  */
    3223      337451 :                 if (rel_is_ref(l) && is_basetable(l->op) && l->l && isReplicaTable((sql_table*)l->l)) {
    3224           1 :                         rel->l = rel_copy(v->sql, l, true);
    3225           1 :                         rel_destroy(l);
    3226             :                 }
    3227      337451 :                 if (rel_is_ref(r) && is_basetable(r->op) && r->l && isReplicaTable((sql_table*)r->l)) {
    3228           4 :                         rel->r = rel_copy(v->sql, r, true);
    3229           4 :                         rel_destroy(r);
    3230             :                 }
    3231             : 
    3232             :                 // TODO: do we need to check if it's l/r are refs?
    3233      337451 :                 if (!l || !r || need_distinct(l) || need_distinct(r) || rel_is_ref(l) || rel_is_ref(r))
    3234             :                         return rel;
    3235      284939 :                 if (l->op == op_project)
    3236       23242 :                         l = l->l;
    3237      284939 :                 if (r->op == op_project)
    3238       26880 :                         r = r->l;
    3239             : 
    3240             :                 /* both sides only if we have a join index ASSUMING pkey-fkey are aligned */
    3241             :                 // TODO: we could also check if the join cols are (not) unique
    3242      284939 :                 bool aligned_pk_fk = true;
    3243      284939 :                 if (!l || !r || (is_munion(l->op) && is_munion(r->op) &&
    3244         173 :                         !(je = rel_is_join_on_pkey(rel, aligned_pk_fk))))
    3245         861 :                         return rel;
    3246             : 
    3247             :                 // TODO: why? bailout for union semijoin without pkey joins expressions
    3248      284078 :                 if (is_semi(rel->op) && is_munion(l->op) && !je)
    3249             :                         return rel;
    3250             : 
    3251             :                 /* if both sides are munions we assume that they will have the same number of children */
    3252      283212 :                 if (is_munion(l->op) && is_munion(r->op) && list_length(l->l) != list_length(r->l))
    3253             :                         return rel;
    3254             : 
    3255      283212 :                 if (is_munion(l->op) && !need_distinct(l) && !is_single(l) && !is_recursive(l) &&
    3256        4559 :                    !is_munion(r->op)){
    3257             :                         /* join(munion(a,b,c), d) -> munion(join(a,d), join(b,d), join(c,d)) */
    3258        4559 :                         list *js = sa_list(v->sql->sa);
    3259       13687 :                         for (node *n = ((list*)l->l)->h; n; n = n->next) {
    3260        9128 :                                 sql_rel *pc = rel_dup(n->data);
    3261        9128 :                                 if (!is_project(pc->op))
    3262          16 :                                         pc = rel_project(v->sql->sa, pc, rel_projections(v->sql, pc, NULL, 1, 1));
    3263        9128 :                                 rel_rename_exps(v->sql, l->exps, pc->exps);
    3264        9128 :                                 if (l != ol) {
    3265          12 :                                         pc = rel_project(v->sql->sa, pc, NULL);
    3266          12 :                                         pc->exps = exps_copy(v->sql, ol->exps);
    3267          12 :                                         set_processed(pc);
    3268             :                                 }
    3269        9128 :                                 pc = rel_crossproduct(v->sql->sa, pc, rel_dup(or), rel->op);
    3270        9128 :                                 pc->exps = exps_copy(v->sql, exps);
    3271        9128 :                                 pc->attr = exps_copy(v->sql, attr);
    3272        9128 :                                 set_processed(pc);
    3273        9128 :                                 pc = rel_project(v->sql->sa, pc, rel_projections(v->sql, pc, NULL, 1, 1));
    3274        9128 :                                 js = append(js, pc);
    3275             :                         }
    3276        4559 :                         v->changes++;
    3277        4559 :                         return rel_inplace_setop_n_ary(v->sql, rel, js, op_munion,
    3278             :                                                                rel_projections(v->sql, rel, NULL, 1, 1));
    3279      278653 :                 } else if (is_munion(l->op) && !need_distinct(l) && !is_single(l) && !is_recursive(l) &&
    3280           0 :                                is_munion(r->op) && !need_distinct(r) && !is_single(r) && !is_recursive(r) &&
    3281             :                                je) {
    3282             :                         /* join(munion(a,b,c), munion(d,e,f)) -> munion(join(a,d), join(b,e), join(c,f)) */
    3283           0 :                         list *cps = sa_list(v->sql->sa);
    3284             :                         /* create pairwise joins between left and right parts. assume eq num of parts (see earlier bailout) */
    3285           0 :                         for (node *n = ((list*)l->l)->h, *m=((list*)r->l)->h; n && m; n = n->next, m = m->next) {
    3286             :                                 /* left part */
    3287           0 :                                 sql_rel *lp = rel_dup(n->data);
    3288           0 :                                 if (!is_project(lp->op))
    3289           0 :                                         lp = rel_project(v->sql->sa, lp, rel_projections(v->sql, lp, NULL, 1, 1));
    3290           0 :                                 rel_rename_exps(v->sql, l->exps, lp->exps);
    3291           0 :                                 if (l != ol) {
    3292           0 :                                         lp = rel_project(v->sql->sa, lp, NULL);
    3293           0 :                                         lp->exps = exps_copy(v->sql, ol->exps);
    3294           0 :                                         set_processed(lp);
    3295             :                                 }
    3296             :                                 /* right part */
    3297           0 :                                 sql_rel *rp = rel_dup(m->data);
    3298           0 :                                 if (!is_project(rp->op))
    3299           0 :                                         rp = rel_project(v->sql->sa, rp, rel_projections(v->sql, rp, NULL, 1, 1));
    3300           0 :                                 rel_rename_exps(v->sql, r->exps, rp->exps);
    3301           0 :                                 if (r != or) {
    3302           0 :                                         rp = rel_project(v->sql->sa, rp, NULL);
    3303           0 :                                         rp->exps = exps_copy(v->sql, or->exps);
    3304           0 :                                         set_processed(rp);
    3305             :                                 }
    3306             :                                 /* combine them */
    3307           0 :                                 sql_rel *cp = rel_crossproduct(v->sql->sa, lp, rp, rel->op);
    3308           0 :                                 cp->exps = exps_copy(v->sql, exps);
    3309           0 :                                 cp->attr = exps_copy(v->sql, attr);
    3310           0 :                                 set_processed(cp);
    3311           0 :                                 cp = rel_project(v->sql->sa, cp, rel_projections(v->sql, cp, NULL, 1, 1));
    3312           0 :                                 cps = append(cps, cp);
    3313             :                         }
    3314           0 :                         v->changes++;
    3315           0 :                         return rel_inplace_setop_n_ary(v->sql, rel, cps, op_munion,
    3316             :                                                                                    rel_projections(v->sql, rel, NULL, 1, 1));
    3317      278653 :                 } else if (!is_munion(l->op) &&
    3318      278645 :                                 is_munion(r->op) && !need_distinct(r) && !is_single(r) && !is_recursive(r) &&
    3319        2945 :                                !is_semi(rel->op)) {
    3320             :                         /* join(a, munion(b,c,d)) -> munion(join(a,b), join(a,c), join(a,d)) */
    3321        1574 :                         list *js = sa_list(v->sql->sa);
    3322        4725 :                         for (node *n = ((list*)r->l)->h; n; n = n->next) {
    3323        3151 :                                 sql_rel *pc = rel_dup(n->data);
    3324        3151 :                                 if (!is_project(pc->op))
    3325          13 :                                         pc = rel_project(v->sql->sa, pc, rel_projections(v->sql, pc, NULL, 1, 1));
    3326        3151 :                                 rel_rename_exps(v->sql, r->exps, pc->exps);
    3327        3151 :                                 if (r != or) {
    3328         125 :                                         pc = rel_project(v->sql->sa, pc, NULL);
    3329         125 :                                         pc->exps = exps_copy(v->sql, or->exps);
    3330         125 :                                         set_processed(pc);
    3331             :                                 }
    3332        3151 :                                 pc = rel_crossproduct(v->sql->sa, rel_dup(ol), pc, rel->op);
    3333        3151 :                                 pc->exps = exps_copy(v->sql, exps);
    3334        3151 :                                 pc->attr = exps_copy(v->sql, attr);
    3335        3151 :                                 set_processed(pc);
    3336        3151 :                                 pc = rel_project(v->sql->sa, pc, rel_projections(v->sql, pc, NULL, 1, 1));
    3337        3151 :                                 js = append(js, pc);
    3338             :                         }
    3339        1574 :                         v->changes++;
    3340        1574 :                         return rel_inplace_setop_n_ary(v->sql, rel, js, op_munion,
    3341             :                                                                rel_projections(v->sql, rel, NULL, 1, 1));
    3342      277079 :                 } else if (!is_munion(l->op) &&
    3343      277071 :                                 is_munion(r->op) && !need_distinct(r) && !is_single(r) && !is_recursive(r) &&
    3344        1371 :                                 is_semi(rel->op) && je) {
    3345             :                         /* {semi}join ( A1, munion (B, A2a, C, A2b)) [A1.partkey = A2.partkey] ->
    3346             :                          * {semi}join ( A1, munion (A2a, A2b))
    3347             :                          * (ie some parts of an n-th munion operand)
    3348             :                          *
    3349             :                          * How to detect that a relation isn't matching?
    3350             :                          *              partitioning is currently done only on pkey/fkey's
    3351             :                          *              ie only matching per part if join is on pkey/fkey (parts)
    3352             :                          *              and part numbers should match.
    3353             :                          * */
    3354           0 :                         int lpnr = rel_part_nr(l, je);
    3355           0 :                         if (lpnr < 0)
    3356             :                                 return rel;
    3357             : 
    3358           0 :                         list *ups = sa_list(v->sql->sa);
    3359           0 :                         for (node *n = ((list*)r->l)->h; n; n = n->next) {
    3360           0 :                                 if (rel_uses_part_nr(n->data, je, lpnr)) {
    3361           0 :                                         sql_rel *pc = rel_dup(n->data);
    3362           0 :                                         ups = append(ups, pc);
    3363             :                                 }
    3364             :                         }
    3365           0 :                         v->changes++;
    3366           0 :                         return rel_inplace_setop_n_ary(v->sql, r, ups, op_munion,
    3367             :                                                                       rel_projections(v->sql, rel, NULL, 1, 1));
    3368             :                 }
    3369             :         }
    3370             :         return rel;
    3371             : }
    3372             : 
    3373             : static sql_rel *
    3374     2737873 : rel_optimize_unions_topdown_(visitor *v, sql_rel *rel)
    3375             : {
    3376     2737873 :         rel = rel_push_project_down_union(v, rel);
    3377     2737873 :         rel = rel_merge_unions(v, rel);
    3378     2737873 :         rel = rel_push_join_down_munion(v, rel);
    3379     2737873 :         return rel;
    3380             : }
    3381             : 
    3382             : static sql_rel *
    3383       28972 : rel_optimize_unions_topdown(visitor *v, global_props *gp, sql_rel *rel)
    3384             : {
    3385       28972 :         (void) gp;
    3386       28972 :         return rel_visitor_topdown(v, rel, &rel_optimize_unions_topdown_);
    3387             : }
    3388             : 
    3389             : run_optimizer
    3390      631265 : bind_optimize_unions_topdown(visitor *v, global_props *gp)
    3391             : {
    3392      631265 :         (void) v;
    3393      631265 :         return gp->opt_level == 1 && gp->cnt[op_munion] ? rel_optimize_unions_topdown : NULL;
    3394             : }
    3395             : 
    3396             : 
    3397             : static sql_column *
    3398           0 : is_fk_column_of_pk(mvc *sql, sql_rel *rel, sql_column *pkc, sql_exp *e) /* test if e is a foreign key column for the pk on pkc */
    3399             : {
    3400           0 :         sql_trans *tr = sql->session->tr;
    3401           0 :         sql_column *c = exp_find_column(rel, e, -2);
    3402             : 
    3403           0 :         if (c) {
    3404           0 :                 sql_table *t = c->t;
    3405             : 
    3406           0 :                 for (node *n = ol_first_node(t->idxs); n; n = n->next) {
    3407           0 :                         sql_idx *li = n->data;
    3408             : 
    3409           0 :                         if (li->type == join_idx) {
    3410           0 :                                 for (node *m = li->columns->h ; m ; m = m->next) {
    3411           0 :                                         sql_kc *fkc = m->data;
    3412             : 
    3413           0 :                                         if (strcmp(fkc->c->base.name, c->base.name) == 0) { /* same fkey column */
    3414           0 :                                                 sql_key *fkey = (sql_key*)os_find_id(tr->cat->objects, tr, ((sql_fkey*)li->key)->rkey);
    3415             : 
    3416           0 :                                                 if (strcmp(fkey->t->base.name, pkc->t->base.name) == 0) { /* to same pk table */
    3417           0 :                                                         for (node *o = fkey->columns->h ; o ; o = n->next) {
    3418           0 :                                                                 sql_kc *kc = m->data;
    3419             : 
    3420           0 :                                                                 if (strcmp(kc->c->base.name, pkc->base.name) == 0) /* to same pk table column */
    3421             :                                                                         return c;
    3422             :                                                         }
    3423             :                                                 }
    3424             :                                         }
    3425             :                                 }
    3426             :                         }
    3427             :                 }
    3428             :         }
    3429             :         return NULL;
    3430             : }
    3431             : 
    3432             : static bool
    3433           0 : has_no_selectivity(mvc *sql, sql_rel *rel)
    3434             : {
    3435           0 :         if (!rel)
    3436             :                 return true;
    3437             : 
    3438           0 :         switch(rel->op){
    3439             :         case op_basetable:
    3440             :         case op_truncate:
    3441             :         case op_table:
    3442             :                 return true;
    3443           0 :         case op_topn:
    3444             :         case op_sample:
    3445             :         case op_project:
    3446             :         case op_groupby:
    3447           0 :                 return has_no_selectivity(sql, rel->l);
    3448             :         case op_ddl:
    3449             :         case op_insert:
    3450             :         case op_update:
    3451             :         case op_delete:
    3452             :         case op_merge:
    3453             :         case op_join:
    3454             :         case op_left:
    3455             :         case op_right:
    3456             :         case op_full:
    3457             :         case op_semi:
    3458             :         case op_anti:
    3459             :         case op_union:
    3460             :         case op_inter:
    3461             :         case op_except:
    3462             :         case op_munion:
    3463             :         case op_select:
    3464             :                 return false;
    3465             :         }
    3466             :         return true;
    3467             : }
    3468             : 
    3469             : static sql_rel *
    3470      706357 : rel_distinct_project2groupby_(visitor *v, sql_rel *rel)
    3471             : {
    3472      706357 :         sql_rel *l = rel->l;
    3473             : 
    3474             :         /* rewrite distinct project (table) [ constant ] -> project [ constant ] */
    3475      714167 :         if (rel->op == op_project && rel->l && !rel->r /* no order by */ && need_distinct(rel) &&
    3476        7810 :             exps_card(rel->exps) <= CARD_ATOM) {
    3477         480 :                 set_nodistinct(rel);
    3478         480 :                 if (rel->card > CARD_ATOM) { /* if the projection just contains constants, then no topN is needed */
    3479         193 :                         sql_rel *nl = rel->l = rel_topn(v->sql->sa, rel->l, append(sa_list(v->sql->sa), exp_atom_lng(v->sql->sa, 1)));
    3480         193 :                         set_processed(nl);
    3481             :                 }
    3482         480 :                 v->changes++;
    3483             :         }
    3484             : 
    3485             :         /* rewrite distinct project [ pk ] ( select ( table ) [ e op val ])
    3486             :          * into project [ pk ] ( select/semijoin ( table )  */
    3487      706357 :         if (rel->op == op_project && rel->l && !rel->r /* no order by */ && need_distinct(rel) &&
    3488        7542 :             (l->op == op_select || l->op == op_semi) && exps_unique(v->sql, rel, rel->exps) &&
    3489         212 :                 (!have_semantics(l->exps) || !have_nil(rel->exps))) {
    3490         212 :                 set_nodistinct(rel);
    3491         212 :                 v->changes++;
    3492             :         }
    3493             : 
    3494             :         /* rewrite distinct project ( join(p,f) [ p.pk = f.fk ] ) [ p.pk ]
    3495             :          *      into project( (semi)join(p,f) [ p.pk = f.fk ] ) [ p.pk ] */
    3496      706357 :         if (rel->op == op_project && rel->l && !rel->r /* no order by */ && need_distinct(rel) &&
    3497        7118 :             l && (is_select(l->op) || l->op == op_join) && rel_is_join_on_pkey(l, true) /* [ pk == fk ] */) {
    3498           0 :                 sql_exp *found = NULL, *pk = NULL, *fk = NULL;
    3499           0 :                 bool all_exps_atoms = true;
    3500           0 :                 sql_column *pkc = NULL;
    3501             : 
    3502           0 :                 for (node *m = l->exps->h ; m ; m = m->next) { /* find a primary key join */
    3503           0 :                         sql_exp *je = (sql_exp *) m->data;
    3504           0 :                         sql_exp *le = je->l, *re = je->r;
    3505             : 
    3506           0 :                         if (!find_prop(je->p, PROP_JOINIDX)) /* must be a pk-fk join expression */
    3507           0 :                                 continue;
    3508             : 
    3509           0 :                         if ((pkc = exp_is_pkey(l, le))) { /* le is the primary key */
    3510           0 :                                 all_exps_atoms = true;
    3511             : 
    3512           0 :                                 for (node *n = rel->exps->h; n && all_exps_atoms; n = n->next) {
    3513           0 :                                         sql_exp *e = (sql_exp *) n->data;
    3514             : 
    3515           0 :                                         if (exp_match(e, le) || exp_refers(e, le))
    3516             :                                                 found = e;
    3517           0 :                                         else if (e->card > CARD_ATOM)
    3518           0 :                                                 all_exps_atoms = false;
    3519             :                                 }
    3520             :                                 pk = le;
    3521             :                                 fk = re;
    3522             :                         }
    3523           0 :                         if (!found && (pkc = exp_is_pkey(l, re))) { /* re is the primary key */
    3524           0 :                                 all_exps_atoms = true;
    3525             : 
    3526           0 :                                 for (node *n = rel->exps->h; n && all_exps_atoms; n = n->next) {
    3527           0 :                                         sql_exp *e = (sql_exp *) n->data;
    3528             : 
    3529           0 :                                         if (exp_match(e, re) || exp_refers(e, re))
    3530             :                                                 found = e;
    3531           0 :                                         else if (e->card > CARD_ATOM)
    3532           0 :                                                 all_exps_atoms = false;
    3533             :                                 }
    3534             :                                 pk = re;
    3535             :                                 fk = le;
    3536             :                         }
    3537             :                 }
    3538             : 
    3539           0 :                 if (all_exps_atoms && found) { /* rel must have the same primary key on the projection list */
    3540             :                         /* if the foreign key has no selectivity, the join can be removed */
    3541           0 :                         if (!(rel_is_ref(l)) && ((rel_find_exp(l->l, fk) && is_fk_column_of_pk(v->sql, l->l, pkc, fk) && has_no_selectivity(v->sql, l->l)) ||
    3542           0 :                                 (l->r && rel_find_exp(l->r, fk) && is_fk_column_of_pk(v->sql, l->r, pkc, fk) && has_no_selectivity(v->sql, l->r)))) {
    3543           0 :                                 sql_rel *side = (rel_find_exp(l->l, pk) != NULL)?l->l:l->r;
    3544             : 
    3545           0 :                                 rel->l = rel_dup(side);
    3546           0 :                                 rel_destroy(l);
    3547           0 :                                 v->changes++;
    3548           0 :                                 set_nodistinct(rel);
    3549           0 :                                 return rel;
    3550             :                         }
    3551             :                         /* if the join has no multiple references it can be re-written into a semijoin */
    3552           0 :                         if (l->op == op_join && !(rel_is_ref(l)) && list_length(rel->exps) == 1) { /* other expressions may come from the other side */
    3553           0 :                                 if (l->r && rel_find_exp(l->r, pk)) {
    3554           0 :                                         sql_rel *temp = l->l;
    3555           0 :                                         l->l = l->r;
    3556           0 :                                         l->r = temp;
    3557             : 
    3558           0 :                                         l->op = op_semi;
    3559           0 :                                 } else if (rel_find_exp(l->l, pk)) {
    3560           0 :                                         l->op = op_semi;
    3561             :                                 }
    3562             :                         }
    3563           0 :                         v->changes++;
    3564           0 :                         set_nodistinct(rel);
    3565           0 :                         return rel;
    3566             :                 }
    3567             :         }
    3568             :         /* rewrite distinct project [ gbe ] ( select ( groupby [ gbe ] [ gbe, e ] )[ e op val ])
    3569             :          * into project [ gbe ] ( select ( group etc ) */
    3570      706357 :         if (rel->op == op_project && rel->l && !rel->r /* no order by */ &&
    3571        7118 :             need_distinct(rel) && l->op == op_select){
    3572        2590 :                 sql_rel *g = l->l;
    3573        2590 :                 if (is_groupby(g->op)) {
    3574           4 :                         list *used = sa_list(v->sql->sa);
    3575           4 :                         list *gbe = g->r;
    3576           4 :                         node *n;
    3577           4 :                         int fnd = 1;
    3578             : 
    3579           8 :                         for (n = rel->exps->h; n && fnd; n = n->next) {
    3580           4 :                                 sql_exp *e = n->data;
    3581             : 
    3582           4 :                                 if (e->card > CARD_ATOM) {
    3583             :                                         /* find e in gbe */
    3584           4 :                                         sql_exp *ne = list_find_exp(g->exps, e);
    3585             : 
    3586           4 :                                         if (ne)
    3587           3 :                                                 ne = list_find_exp( gbe, ne);
    3588           3 :                                         if (ne && !list_find_exp(used, ne)) {
    3589           2 :                                                 fnd++;
    3590           2 :                                                 list_append(used, ne);
    3591             :                                         }
    3592           3 :                                         if (!ne)
    3593             :                                                 fnd = 0;
    3594             :                                 }
    3595             :                         }
    3596           4 :                         if (fnd == (list_length(gbe)+1)) {
    3597           0 :                                 v->changes++;
    3598           0 :                                 set_nodistinct(rel);
    3599             :                         }
    3600             :                 }
    3601             :         }
    3602      706357 :         if (rel->op == op_project && rel->l &&
    3603        7118 :             need_distinct(rel) && exps_card(rel->exps) > CARD_ATOM) {
    3604        7118 :                 node *n;
    3605        7118 :                 list *exps = new_exp_list(v->sql->sa), *gbe = new_exp_list(v->sql->sa);
    3606        7118 :                 list *obe = rel->r; /* we need to read the ordering later */
    3607             : 
    3608        7118 :                 if (obe) {
    3609           0 :                         int fnd = 0;
    3610             : 
    3611           0 :                         for(n = obe->h; n && !fnd; n = n->next) {
    3612           0 :                                 sql_exp *e = n->data;
    3613             : 
    3614           0 :                                 if (e->type != e_column)
    3615             :                                         fnd = 1;
    3616           0 :                                 else if (exps_bind_nid(rel->exps, e->nid) == NULL)
    3617           0 :                                         fnd = 1;
    3618             :                         }
    3619           0 :                         if (fnd)
    3620             :                                 return rel;
    3621             :                 }
    3622        7118 :                 rel->l = rel_project(v->sql->sa, rel->l, rel->exps);
    3623             : 
    3624       21750 :                 for (n = rel->exps->h; n; n = n->next) {
    3625       14632 :                         sql_exp *e = n->data, *ne;
    3626             : 
    3627       14632 :                         set_nodistinct(e);
    3628       14632 :                         ne = exp_ref(v->sql, e);
    3629       14632 :                         if (e->card > CARD_ATOM && !list_find_exp(gbe, ne)) { /* no need to group by on constants, or the same column multiple times */
    3630       14260 :                                 append(gbe, ne);
    3631       14260 :                                 ne = exp_ref(v->sql, ne);
    3632             :                         }
    3633       14632 :                         append(exps, ne);
    3634             :                 }
    3635        7118 :                 rel->op = op_groupby;
    3636        7118 :                 rel->exps = exps;
    3637        7118 :                 rel->r = gbe;
    3638        7118 :                 set_nodistinct(rel);
    3639        7118 :                 if (obe) {
    3640             :                         /* add order again */
    3641           0 :                         rel = rel_project(v->sql->sa, rel, rel_projections(v->sql, rel, NULL, 1, 1));
    3642           0 :                         rel->r = obe;
    3643             :                 }
    3644        7118 :                 v->changes++;
    3645             :         }
    3646             :         return rel;
    3647             : }
    3648             : 
    3649             : static sql_rel *
    3650        5669 : rel_distinct_project2groupby(visitor *v, global_props *gp, sql_rel *rel)
    3651             : {
    3652        5669 :         (void) gp;
    3653        5669 :         return rel_visitor_bottomup(v, rel, &rel_distinct_project2groupby_);
    3654             : }
    3655             : 
    3656             : run_optimizer
    3657      631282 : bind_distinct_project2groupby(visitor *v, global_props *gp)
    3658             : {
    3659      631282 :         int flag = v->sql->sql_optimizer;
    3660      554271 :         return gp->opt_cycle == 0 && gp->opt_level == 1 && gp->needs_distinct && gp->cnt[op_project] &&
    3661      636951 :                    (flag & distinct_project2groupby)? rel_distinct_project2groupby : NULL;
    3662             : }

Generated by: LCOV version 1.14