LCOV - code coverage report
Current view: top level - sql/server - rel_statistics.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 748 778 96.1 %
Date: 2024-04-25 20:03:45 Functions: 24 24 100.0 %

          Line data    Source code
       1             : /*
       2             :  * SPDX-License-Identifier: MPL-2.0
       3             :  *
       4             :  * This Source Code Form is subject to the terms of the Mozilla Public
       5             :  * License, v. 2.0.  If a copy of the MPL was not distributed with this
       6             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       7             :  *
       8             :  * Copyright 2024 MonetDB Foundation;
       9             :  * Copyright August 2008 - 2023 MonetDB B.V.;
      10             :  * Copyright 1997 - July 2008 CWI.
      11             :  */
      12             : 
      13             : #include "monetdb_config.h"
      14             : #include "rel_optimizer_private.h"
      15             : #include "rel_statistics.h"
      16             : #include "rel_rewriter.h"
      17             : 
      18             : static sql_exp *
      19     3883423 : comparison_find_column(sql_exp *input, sql_exp *e)
      20             : {
      21     3883423 :         switch (input->type) {
      22      154123 :         case e_convert: {
      23      154123 :                 list *types = (list *)input->r;
      24      154123 :                 sql_class from = ((sql_subtype*)types->h->data)->type->eclass, to = ((sql_subtype*)types->h->next->data)->type->eclass;
      25      154123 :                 if (from == to)
      26      183595 :                         return comparison_find_column(input->l, e) ? input : NULL;
      27             :                 return NULL;
      28             :         }
      29     3175472 :         case e_column:
      30     3175472 :                 return exp_match(e, input) ? input : NULL;
      31             :         default:
      32             :                 return NULL;
      33             :         }
      34             : }
      35             : 
      36             : /* multi lo <= col <= hi, maybe still be false even if lo or hi are NULL, possibly similar for filter on multiple
      37             :  * columns */
      38             : #define comp_single_column(c) (!c->f)
      39             : 
      40             : static sql_exp *
      41     8627628 : rel_propagate_column_ref_statistics(mvc *sql, sql_rel *rel, sql_exp *e)
      42             : {
      43     8711345 :         assert(e->type == e_column);
      44     8711345 :         if (rel) {
      45     8711303 :                 switch(rel->op) {
      46     4036292 :                 case op_left:
      47             :                 case op_right:
      48             :                 case op_full:
      49             :                 case op_join:
      50             :                 case op_select:
      51             :                 case op_anti:
      52             :                 case op_semi: {
      53     4036292 :                         bool found_without_semantics = false, found_left = false, found_right = false, still_unique = false;
      54             : 
      55     4036292 :                         if ((is_innerjoin(rel->op) || is_select(rel->op)) && list_length(rel->exps) == 1 && exp_is_false(rel->exps->h->data))
      56             :                                 return NULL; /* nothing will pass, skip */
      57             : 
      58             :                         /* propagate from the bottom first */
      59     4028852 :                         if (rel_propagate_column_ref_statistics(sql, rel->l, e))
      60             :                                 found_left = true;
      61     2578825 :                         if (!found_left && is_join(rel->op) && rel_propagate_column_ref_statistics(sql, rel->r, e))
      62     4028852 :                                 found_right = true;
      63             : 
      64     4028852 :                         if (!found_left && !found_right)
      65             :                                 return NULL;
      66     1723140 :                         if (!list_empty(rel->exps) && rel->op != op_anti) { /* if there's an or, the MIN and MAX get difficult to propagate */
      67     3658945 :                                 for (node *n = rel->exps->h ; n ; n = n->next) {
      68     2020790 :                                         sql_exp *comp = n->data, *le = comp->l, *lne = NULL, *re = comp->r, *rne = NULL, *fe = comp->f, *fne = NULL;
      69             : 
      70     2020790 :                                         if (comp->type == e_cmp) {
      71     2020044 :                                                 if (is_theta_exp(comp->flag) && ((lne = comparison_find_column(le, e)) || (rne = comparison_find_column(re, e)) || (fe && (fne = comparison_find_column(fe, e))))) {
      72      135782 :                                                         atom *lval_min = find_prop_and_get(le->p, PROP_MIN), *lval_max = find_prop_and_get(le->p, PROP_MAX),
      73      135782 :                                                                 *rval_min = find_prop_and_get(re->p, PROP_MIN), *rval_max = find_prop_and_get(re->p, PROP_MAX);
      74             : 
      75             :                                                         /* not semantics found or if explicitly filtering not null values from the column */
      76      135782 :                                                         found_without_semantics |= (!is_semantics(comp) && comp_single_column(comp)) || (comp->flag == cmp_equal && lne && is_anti(comp) && exp_is_null(re));
      77      135782 :                                                         still_unique |= comp->flag == cmp_equal && is_unique(le) && is_unique(re); /* unique if only equi-joins on unique columns are there */
      78      135782 :                                                         if (is_full(rel->op) || (is_left(rel->op) && found_left) || (is_right(rel->op) && found_right)) /* on outer joins, min and max cannot be propagated on some cases */
      79       21084 :                                                                 continue;
      80             :                                                         /* if (end2 >= start1 && start2 <= end1) then the 2 intervals are intersected */
      81      114698 :                                                         if (fe && lval_min && lval_max) { /* range case, the middle expression must intersect the other two */
      82        4467 :                                                                 atom *fval_min = find_prop_and_get(fe->p, PROP_MIN), *fval_max = find_prop_and_get(fe->p, PROP_MAX);
      83        4467 :                                                                 int int1 = rval_min && rval_max && atom_cmp(rval_max, lval_min) >= 0 && atom_cmp(rval_min, lval_max) <= 0,
      84        4467 :                                                                         int2 = fval_min && fval_max && atom_cmp(fval_max, lval_min) >= 0 && atom_cmp(fval_min, lval_max) <= 0,
      85        4467 :                                                                         symmetric = is_symmetric(comp);
      86             : 
      87        4467 :                                                                 if (is_anti(comp) || (!symmetric && fval_min && rval_max && atom_cmp(fval_min, rval_max) < 0)) /* for asymmetric case the re range must be after the fe range */
      88        1700 :                                                                         continue;
      89        2767 :                                                                 if (lne && int1 && int2) {
      90         104 :                                                                         if (symmetric) {
      91           0 :                                                                                 prop *p1 = find_prop(e->p, PROP_MIN), *p2 = find_prop(e->p, PROP_MAX);
      92           0 :                                                                                 atom *nmin = statistics_atom_min(sql, rval_min, fval_min), *nmax = statistics_atom_max(sql, rval_max, fval_max);
      93             :                                                                                 /* max is min from le and (max from re and fe max) */
      94           0 :                                                                                 set_minmax_property(sql, e, PROP_MAX, p2 ? statistics_atom_min(sql, nmax, p2->value.pval) : nmax);
      95             :                                                                                 /* min is max from le and (min from re and fe min) */
      96           0 :                                                                                 set_minmax_property(sql, e, PROP_MIN, p1 ? statistics_atom_max(sql, nmin, p1->value.pval) : nmin);
      97             :                                                                         } else {
      98         104 :                                                                                 prop *p1 = find_prop(e->p, PROP_MIN), *p2 = find_prop(e->p, PROP_MAX);
      99             :                                                                                 /* max is min from le and fe max */
     100         104 :                                                                                 set_minmax_property(sql, e, PROP_MAX, p2 ? statistics_atom_min(sql, fval_max, p2->value.pval) : fval_max);
     101             :                                                                                 /* min is max from le and re min */
     102         104 :                                                                                 set_minmax_property(sql, e, PROP_MIN, p1 ? statistics_atom_max(sql, rval_min, p1->value.pval) : rval_min);
     103             :                                                                         }
     104        2663 :                                                                 } else if (rne) {
     105         591 :                                                                         if (symmetric && int1 && int2) { /* min is max from le and (min from re and fe min) */
     106           0 :                                                                                 prop *p = find_prop(e->p, PROP_MIN);
     107           0 :                                                                                 atom *nmin = p ? statistics_atom_min(sql, p->value.pval, fval_min) : fval_min;
     108           0 :                                                                                 set_minmax_property(sql, e, PROP_MIN, statistics_atom_max(sql, nmin, lval_min));
     109         591 :                                                                         } else if (int1) { /* min is max from le and re min */
     110          98 :                                                                                 prop *p = find_prop(e->p, PROP_MIN);
     111          98 :                                                                                 set_minmax_property(sql, e, PROP_MIN, p ? statistics_atom_max(sql, lval_min, p->value.pval) : lval_min);
     112             :                                                                         }
     113        2072 :                                                                 } else if (fne) {
     114         550 :                                                                         if (symmetric && int1 && int2) { /* max is min from le and (max from re and fe max) */
     115           0 :                                                                                 prop *p = find_prop(e->p, PROP_MAX);
     116           0 :                                                                                 atom *nmax = p ? statistics_atom_max(sql, p->value.pval, rval_max) : rval_max;
     117           0 :                                                                                 set_minmax_property(sql, e, PROP_MAX, p ? statistics_atom_min(sql, nmax, lval_max) : nmax);
     118         550 :                                                                         } else if (int2) { /* max is min from le and fe max */
     119         268 :                                                                                 prop *p = find_prop(e->p, PROP_MAX);
     120         268 :                                                                                 set_minmax_property(sql, e, PROP_MAX, p ? statistics_atom_min(sql, lval_max, p->value.pval) : lval_max);
     121             :                                                                         }
     122             :                                                                 }
     123      110231 :                                                         } else if (lval_min && lval_max && rval_min && rval_max && atom_cmp(rval_max, lval_min) >= 0 && atom_cmp(rval_min, lval_max) <= 0) {
     124             :                                                                 /* both min and max must be set and the intervals must overlap */
     125       42391 :                                                                 switch (comp->flag) {
     126       30231 :                                                                 case cmp_equal: /* for equality reduce */
     127       30231 :                                                                         set_minmax_property(sql, e, PROP_MAX, is_anti(comp) ? statistics_atom_max(sql, lval_max, rval_max) : statistics_atom_min(sql, lval_max, rval_max));
     128       30231 :                                                                         set_minmax_property(sql, e, PROP_MIN, is_anti(comp) ? statistics_atom_min(sql, lval_min, rval_min) : statistics_atom_max(sql, lval_min, rval_min));
     129       30231 :                                                                         break;
     130        2695 :                                                                 case cmp_notequal: /* for inequality expand */
     131        2695 :                                                                         set_minmax_property(sql, e, PROP_MAX, is_anti(comp) ? statistics_atom_min(sql, lval_max, rval_max) : statistics_atom_max(sql, lval_max, rval_max));
     132        2695 :                                                                         set_minmax_property(sql, e, PROP_MIN, is_anti(comp) ? statistics_atom_max(sql, lval_min, rval_min) : statistics_atom_min(sql, lval_min, rval_min));
     133        2695 :                                                                         break;
     134        5501 :                                                                 case cmp_gt:
     135             :                                                                 case cmp_gte:
     136       10129 :                                                                         if (!is_anti(comp) && lne) { /* min is max from both min */
     137        4628 :                                                                                 prop *p = find_prop(e->p, PROP_MIN);
     138        4628 :                                                                                 set_minmax_property(sql, e, PROP_MIN, p ? statistics_atom_max(sql, rval_min, p->value.pval) : rval_min);
     139         873 :                                                                         } else if (!is_anti(comp)) { /* max is min from both max */
     140         873 :                                                                                 prop *p = find_prop(e->p, PROP_MAX);
     141         873 :                                                                                 set_minmax_property(sql, e, PROP_MAX, p ? statistics_atom_min(sql, lval_max, p->value.pval) : lval_max);
     142             :                                                                         }
     143             :                                                                         break;
     144        3964 :                                                                 case cmp_lt:
     145             :                                                                 case cmp_lte:
     146        7163 :                                                                         if (!is_anti(comp) && lne) { /* max is min from both max */
     147        3199 :                                                                                 prop *p = find_prop(e->p, PROP_MAX);
     148        3199 :                                                                                 set_minmax_property(sql, e, PROP_MAX, p ? statistics_atom_min(sql, rval_max, p->value.pval) : rval_max);
     149         765 :                                                                         } else if (!is_anti(comp)) { /* min is max from both min */
     150         765 :                                                                                 prop *p = find_prop(e->p, PROP_MIN);
     151         765 :                                                                                 set_minmax_property(sql, e, PROP_MIN, p ? statistics_atom_max(sql, lval_min, p->value.pval) : lval_min);
     152             :                                                                         }
     153             :                                                                         break;
     154             :                                                                 default: /* Maybe later I can do cmp_in and cmp_notin */
     155             :                                                                         break;
     156             :                                                                 }
     157             :                                                         }
     158             :                                                 }
     159             :                                         }
     160             :                                 }
     161             :                         }
     162     1723140 :                         if (is_full(rel->op) || (is_left(rel->op) && found_right) || (is_right(rel->op) && found_left))
     163       31653 :                                 set_has_nil(e);
     164     1723140 :                         if (!is_outerjoin(rel->op) && found_without_semantics) /* at an outer join, null values pass */
     165      103699 :                                 set_has_no_nil(e);
     166     1723140 :                         if (is_join(rel->op) && is_unique(e) && !still_unique)
     167      110709 :                                 set_not_unique(e);
     168     1723140 :                         return e;
     169             :                 }
     170     4572014 :                 case op_table:
     171             :                 case op_basetable:
     172             :                 case op_union:
     173             :                 case op_except:
     174             :                 case op_inter:
     175             :                 case op_project:
     176             :                 case op_groupby: {
     177     4572014 :                         sql_exp *found;
     178     4572014 :                         atom *fval;
     179     4572014 :                         prop *est;
     180     4572014 :                         if ((found = rel_find_exp(rel, e))) {
     181     2091598 :                                 if (rel->op != op_table) { /* At the moment don't propagate statistics for table relations */
     182     2053114 :                                         if ((fval = find_prop_and_get(found->p, PROP_MAX)))
     183     1082920 :                                                 set_minmax_property(sql, e, PROP_MAX, fval);
     184     2053112 :                                         if ((fval = find_prop_and_get(found->p, PROP_MIN)))
     185     1089572 :                                                 set_minmax_property(sql, e, PROP_MIN, fval);
     186     2053110 :                                         if (!has_nil(found))
     187      538218 :                                                 set_has_no_nil(e);
     188     2053110 :                                         if (is_unique(found) || (need_distinct(rel) && list_length(rel->exps) == 1) ||
     189     1675772 :                                                 (is_groupby(rel->op) && (list_empty(rel->r) || (list_length(rel->r) == 1 && exps_find_exp(rel->r, e)))))
     190      388504 :                                                 set_unique(e);
     191             :                                         /* propagate unique estimation for known cases */
     192     2053110 :                                         if (is_groupby(rel->op) && list_empty(rel->r) && !find_prop(e->p, PROP_NUNIQUES)) { /* global aggregate case */
     193        7027 :                                                 prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     194        7027 :                                                 p->value.dval = 1;
     195     2046083 :                                         } else if (((is_basetable(rel->op) || is_except(rel->op) || is_inter(rel->op) || is_simple_project(rel->op) ||
     196       66218 :                                                                  (is_groupby(rel->op) && exps_find_exp(rel->r, e))) &&
     197     1990482 :                                                                 (est = find_prop(found->p, PROP_NUNIQUES)) && !find_prop(e->p, PROP_NUNIQUES))) {
     198     1222575 :                                                 prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     199     1222576 :                                                 p->value.dval = est->value.dval;
     200             :                                         }
     201             :                                 }
     202     2091593 :                                 return e;
     203             :                         }
     204             :                         return NULL;
     205             :                 }
     206       83717 :                 case op_topn:
     207             :                 case op_sample:
     208       83717 :                         return rel_propagate_column_ref_statistics(sql, rel->l, e);
     209             :                 default:
     210             :                         break;
     211             :                 }
     212             :         }
     213             :         return NULL;
     214             : }
     215             : 
     216             : static atom *
     217     1024552 : atom_from_valptr( sql_allocator *sa, sql_subtype *tpe, ValRecord *v)
     218             : {
     219     1024552 :         atom *a = SA_NEW(sa, atom);
     220             : 
     221     1024547 :         assert(!VALisnil(v));
     222     1024541 :         *a = (atom) {.tpe = *tpe,};
     223     1024541 :         SA_VALcopy(sa, &a->data, v);
     224     1024540 :         return a;
     225             : }
     226             : 
     227             : static inline void
     228      883591 : rel_basetable_column_get_statistics(mvc *sql, sql_rel *rel, sql_exp *e)
     229             : {
     230      883591 :         sql_column *c = NULL;
     231             : 
     232      883591 :         if ((c = name_find_column(rel, exp_relname(e), exp_name(e), -2, NULL))) {
     233      776819 :                 bool nonil = false, unique = false;
     234      776819 :                 double unique_est = 0.0;
     235      776819 :                 ValRecord min, max;
     236      776819 :                 int ok = mvc_col_stats(sql, c, &nonil, &unique, &unique_est, &min, &max);
     237             : 
     238      776821 :                 if (has_nil(e) && nonil)
     239       92477 :                         set_has_no_nil(e);
     240      776821 :                 if (!is_unique(e) && unique)
     241      159833 :                         set_unique(e);
     242      776821 :                 if (unique_est != 0.0) {
     243      597766 :                         prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     244      597768 :                         p->value.dval = unique_est;
     245             :                 }
     246      776823 :                 if ((ok & 2) == 2) {
     247      511447 :                         if (!VALisnil(&max)) {
     248      511443 :                                 prop *p = e->p = prop_create(sql->sa, PROP_MAX, e->p);
     249      511441 :                                 p->value.pval = atom_from_valptr(sql->sa, &c->type, &max);
     250             :                         }
     251      511436 :                         VALclear(&max);
     252             :                 }
     253      776814 :                 if ((ok & 1) == 1) {
     254      513139 :                         if (!VALisnil(&min)) {
     255      513138 :                                 prop *p = e->p = prop_create(sql->sa, PROP_MIN, e->p);
     256      513138 :                                 p->value.pval = atom_from_valptr(sql->sa, &c->type, &min);
     257             :                         }
     258      513140 :                         VALclear(&min);
     259             :                 }
     260             :         }
     261      883593 : }
     262             : 
     263             : static bool
     264      112552 : rel_setop_get_statistics(mvc *sql, sql_rel *rel, list *lexps, list *rexps, sql_exp *e, int i)
     265             : {
     266      112552 :         sql_exp *le = list_fetch(lexps, i), *re = list_fetch(rexps, i);
     267      112552 :         atom *lval_min = find_prop_and_get(le->p, PROP_MIN), *lval_max = find_prop_and_get(le->p, PROP_MAX),
     268      112552 :                 *rval_min = find_prop_and_get(re->p, PROP_MIN), *rval_max = find_prop_and_get(re->p, PROP_MAX);
     269      112552 :         prop *est;
     270             : 
     271             :         /* for the intersection, if both expresssions don't overlap, it can be pruned */
     272      112552 :         if (is_inter(rel->op) && !has_nil(le) && !has_nil(re) &&
     273         185 :                 ((rval_max && lval_min && atom_cmp(rval_max, lval_min) < 0) || (rval_min && lval_max && atom_cmp(rval_min, lval_max) > 0)))
     274          23 :                 return true;
     275             : 
     276      112529 :         if (lval_max && rval_max) {
     277       30605 :                 if (is_union(rel->op))
     278       28071 :                         set_minmax_property(sql, e, PROP_MAX, statistics_atom_max(sql, lval_max, rval_max)); /* for union the new max will be the max of the two */
     279        2534 :                 else if (is_inter(rel->op))
     280         396 :                         set_minmax_property(sql, e, PROP_MAX, statistics_atom_min(sql, lval_max, rval_max)); /* for intersect the new max will be the min of the two */
     281             :                 else /* except */
     282        2138 :                         set_minmax_property(sql, e, PROP_MAX, lval_max);
     283             :         }
     284      112529 :         if (lval_min && rval_min) {
     285       30133 :                 if (is_union(rel->op))
     286       27599 :                         set_minmax_property(sql, e, PROP_MIN, statistics_atom_min(sql, lval_min, rval_min)); /* for union the new min will be the min of the two */
     287        2534 :                 else if (is_inter(rel->op))
     288         396 :                         set_minmax_property(sql, e, PROP_MIN, statistics_atom_max(sql, lval_min, rval_min)); /* for intersect the new min will be the max of the two */
     289             :                 else /* except */
     290        2138 :                         set_minmax_property(sql, e, PROP_MIN, lval_min);
     291             :         }
     292             : 
     293      112529 :         if (is_union(rel->op)) {
     294      109687 :                 if (!has_nil(le) && !has_nil(re))
     295       23746 :                         set_has_no_nil(e);
     296      109687 :                 if (need_distinct(rel) && list_length(rel->exps) == 1)
     297        1506 :                         set_unique(e);
     298        2842 :         } else if (is_inter(rel->op)) {
     299         562 :                 if (!has_nil(le) || !has_nil(re))
     300         275 :                         set_has_no_nil(e);
     301         562 :                 if (is_unique(le) || (need_distinct(rel) && list_length(rel->exps) == 1))
     302         342 :                         set_unique(e);
     303             :         } else {
     304        2280 :                 assert(is_except(rel->op));
     305        2280 :                 if (!has_nil(le))
     306         315 :                         set_has_no_nil(e);
     307        2280 :                 if (is_unique(le) || (need_distinct(rel) && list_length(rel->exps) == 1))
     308        2001 :                         set_unique(e);
     309             :         }
     310             :         /* propagate unique estimation for known cases */
     311      112529 :         if (!is_union(rel->op) && (est = find_prop(le->p, PROP_NUNIQUES)) && !find_prop(e->p, PROP_NUNIQUES)) {
     312        1207 :                 prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     313        1207 :                 p->value.dval = est->value.dval;
     314             :         }
     315             :         return false;
     316             : }
     317             : 
     318             : static sql_exp *
     319     3442611 : rel_propagate_statistics(visitor *v, sql_rel *rel, sql_exp *e, int depth)
     320             : {
     321     3442611 :         mvc *sql = v->sql;
     322     3442611 :         atom *lval;
     323             : 
     324     3442611 :         (void) depth;
     325     3442611 :         switch(e->type) {
     326     2096387 :         case e_column:
     327     2096387 :                 switch (rel->op) { /* set relations don't call rel_propagate_statistics */
     328      260625 :                 case op_join:
     329             :                 case op_left:
     330             :                 case op_right:
     331             :                 case op_full:
     332             :                 case op_semi:
     333             :                 case op_anti: {
     334      260625 :                         sql_exp *found = rel_propagate_column_ref_statistics(sql, rel->l, e);
     335      260625 :                         if (!found)
     336      130666 :                                 (void) rel_propagate_column_ref_statistics(sql, rel->r, e);
     337             :                         break;
     338             :                 }
     339     1835762 :                 case op_select:
     340             :                 case op_project:
     341             :                 case op_groupby: {
     342     1835762 :                         sql_exp *found = rel_propagate_column_ref_statistics(sql, rel->l, e); /* labels may be found on the same projection, ugh */
     343     1835762 :                         if (!found && is_simple_project(rel->op))
     344      151612 :                                 (void) rel_propagate_column_ref_statistics(sql, rel, e);
     345             :                         break;
     346             :                 }
     347           0 :                 case op_insert:
     348             :                 case op_update:
     349             :                 case op_delete:
     350           0 :                         (void) rel_propagate_column_ref_statistics(sql, rel->r, e);
     351           0 :                         break;
     352             :                 default:
     353             :                         break;
     354             :                 }
     355             :                 break;
     356      199124 :         case e_convert: {
     357      199124 :                 sql_subtype *to = exp_totype(e), *from = exp_fromtype(e);
     358      199124 :                 sql_exp *l = e->l;
     359      199124 :                 sql_class fr = from->type->eclass, too = to->type->eclass;
     360      199124 :                 prop *est;
     361             : 
     362      199124 :                 if (fr == too) {
     363      154021 :                         if ((lval = find_prop_and_get(l->p, PROP_MAX))) {
     364       97517 :                                 atom *res = atom_copy(sql->sa, lval);
     365       97517 :                                 if ((res = atom_cast(sql->sa, res, to)))
     366       96805 :                                         set_minmax_property(sql, e, PROP_MAX, res);
     367             :                         }
     368      154021 :                         if ((lval = find_prop_and_get(l->p, PROP_MIN))) {
     369       99142 :                                 atom *res = atom_copy(sql->sa, lval);
     370       99142 :                                 if ((res = atom_cast(sql->sa, res, to)))
     371       98430 :                                         set_minmax_property(sql, e, PROP_MIN, res);
     372             :                         }
     373             :                 }
     374             :                 /* propagating nuniques through conversions is tricky. I am adding just the general cases */
     375      199124 :                 if ((est = find_prop(l->p, PROP_NUNIQUES)) &&
     376      134135 :                         (((fr == EC_TIME || fr == EC_TIME_TZ || fr == EC_DATE) && (too == EC_TIMESTAMP || too == EC_TIMESTAMP_TZ)) ||
     377      134110 :                          ((fr == EC_NUM || fr == EC_BIT) && too == EC_NUM) || (fr == EC_DEC && too == EC_DEC) || (fr != EC_EXTERNAL && too == EC_STRING))) {
     378       89631 :                         prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     379       89631 :                         p->value.dval = est->value.dval;
     380             :                 }
     381      199124 :                 if (!has_nil(l))
     382       67474 :                         set_has_no_nil(e);
     383             :                 break;
     384             :         }
     385      338845 :         case e_aggr:
     386             :         case e_func: {
     387      338845 :                 BUN lv;
     388      338845 :                 sql_subfunc *f = e->f;
     389             : 
     390      338845 :                 if (!f->func->s) {
     391      312538 :                         int key = hash_key(f->func->base.name); /* Using hash lookup */
     392      312538 :                         sql_hash_e *he = sql_functions_lookup->buckets[key&(sql_functions_lookup->size-1)];
     393      312538 :                         lookup_function look = NULL;
     394             : 
     395      680550 :                         for (; he && !look; he = he->chain) {
     396      368012 :                                 struct function_properties* fp = (struct function_properties*) he->value;
     397             : 
     398      368012 :                                 if (!strcmp(f->func->base.name, fp->name))
     399      105530 :                                         look = fp->func;
     400             :                         }
     401      312538 :                         if (look)
     402      105530 :                                 look(sql, e);
     403             :                 }
     404             :                 /* for global aggregates with no semantics, if the left relation has values, then the output won't be NULL */
     405      338845 :                 if (!is_semantics(e) && e->l && !have_nil(e->l) &&
     406       41367 :                         (e->type != e_aggr || (is_groupby(rel->op) && list_length(rel->r)) || ((lv = get_rel_count(rel->l)) != BUN_NONE && lv > 0)))
     407       41303 :                         set_has_no_nil(e);
     408             :                 /* set properties for global aggregates */
     409      338845 :                 if (e->type == e_aggr && is_groupby(rel->op) && list_empty(rel->r)) {
     410        7401 :                         if (!find_prop(e->p, PROP_NUNIQUES)) {
     411        7401 :                                 prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     412        7401 :                                 p->value.dval = 1;
     413             :                         }
     414        7401 :                         set_unique(e);
     415             :                 }
     416             :                 break;
     417             :         }
     418      535416 :         case e_atom:
     419      535416 :                 if (e->l) {
     420      521489 :                         atom *a = (atom*) e->l;
     421      521489 :                         if (!a->isnull) {
     422      464549 :                                 set_minmax_property(sql, e, PROP_MAX, a);
     423      464549 :                                 set_minmax_property(sql, e, PROP_MIN, a);
     424             :                         }
     425      521489 :                         prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     426      521489 :                         p->value.dval = 1;
     427       13927 :                 } else if (e->f) {
     428        2669 :                         list *vals = (list *) e->f;
     429        2669 :                         sql_exp *first = vals->h ? vals->h->data : NULL;
     430        2669 :                         atom *max = NULL, *min = NULL; /* all child values must have a valid min/max */
     431        2669 :                         int has_nil = 0;
     432             : 
     433        2669 :                         if (first) {
     434        2669 :                                 max = ((lval = find_prop_and_get(first->p, PROP_MAX))) ? lval : NULL;
     435        2669 :                                 min = ((lval = find_prop_and_get(first->p, PROP_MIN))) ? lval : NULL;
     436        2669 :                                 has_nil |= has_nil(first);
     437             :                         }
     438             : 
     439       12262 :                         for (node *n = vals->h ? vals->h->next : NULL ; n ; n = n->next) {
     440        6924 :                                 sql_exp *ee = n->data;
     441             : 
     442        6924 :                                 if (min && max) {
     443        6474 :                                         if ((lval = find_prop_and_get(ee->p, PROP_MAX))) {
     444        6424 :                                                 max = atom_cmp(lval, max) > 0 ? lval : max;
     445             :                                         } else {
     446             :                                                 max = NULL;
     447             :                                         }
     448        6474 :                                         if ((lval = find_prop_and_get(ee->p, PROP_MIN))) {
     449        6424 :                                                 min = atom_cmp(min, lval) > 0 ? lval : min;
     450             :                                         } else {
     451             :                                                 min = NULL;
     452             :                                         }
     453             :                                 }
     454        6924 :                                 has_nil |= has_nil(ee);
     455             :                         }
     456             : 
     457        2669 :                         if (!has_nil)
     458        2305 :                                 set_has_no_nil(e);
     459        2669 :                         if (min && max) {
     460        2282 :                                 set_minmax_property(sql, e, PROP_MAX, max);
     461        2282 :                                 set_minmax_property(sql, e, PROP_MIN, min);
     462             :                         }
     463        2669 :                         prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     464        2669 :                         p->value.dval = (dbl) list_length(vals);
     465             :                 } else {
     466       11258 :                         prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     467       11258 :                         p->value.dval = 1;
     468             :                 }
     469             :                 break;
     470      272839 :         case e_cmp:
     471             :                 /* TODO? propagating min/max/unique of booleans is not very worth it */
     472      272839 :                 if (e->flag == cmp_or || e->flag == cmp_filter) {
     473       17661 :                         if (!have_nil(e->l) && !have_nil(e->r))
     474        2995 :                                 set_has_no_nil(e);
     475      255178 :                 } else if (e->flag == cmp_in || e->flag == cmp_notin) {
     476       19429 :                         sql_exp *le = e->l;
     477       19429 :                         if (!has_nil(le) && !have_nil(e->r))
     478         836 :                                 set_has_no_nil(e);
     479             :                 } else {
     480      235749 :                         sql_exp *le = e->l, *re = e->r, *fe = e->f;
     481      235749 :                         if (!has_nil(le) && !has_nil(re) && (!e->f || !has_nil(fe)))
     482       24703 :                                 set_has_no_nil(e);
     483             :                 }
     484             :                 break;
     485             :         case e_psm:
     486             :                 break;
     487             :         }
     488             : 
     489             : #ifndef NDEBUG
     490             :         {
     491             :                 /* min and max cannot be NULL and min must be <= than max, if it happens the inner relation must be empty! */
     492     3442611 :                 atom *min = find_prop_and_get(e->p, PROP_MIN), *max = find_prop_and_get(e->p, PROP_MAX);
     493             : 
     494     3442609 :                 (void) min;
     495     3442609 :                 (void) max;
     496     3442609 :                 assert(!min || !min->isnull);
     497     3442609 :                 assert(!max || !max->isnull);
     498     3442609 :                 assert(!min || !max || (min && max)/* atom_cmp(min, max) <= 0*/);
     499             :         }
     500             : #endif
     501     3442609 :         return e;
     502             : }
     503             : 
     504             : static list * /* Remove predicates always false from min/max values */
     505      131364 : rel_prune_predicates(visitor *v, sql_rel *rel)
     506             : {
     507      131364 :         if (rel->l) {
     508      131364 :                 sql_rel *l = rel->l;
     509      131364 :                 if (is_single(l))
     510        3142 :                         return rel->exps;
     511             :         }
     512      128222 :         if (!list_empty(rel->attr))
     513        2927 :                 return rel->exps;
     514      267320 :         for (node *n = rel->exps->h ; n ; n = n->next) {
     515      142025 :                 sql_exp *e = n->data;
     516             : 
     517      142025 :                 if (e->type == e_cmp && is_theta_exp(e->flag)) {
     518      117873 :                         sql_exp *le = e->l, *re = e->r, *fe = e->f;
     519      117873 :                         atom *lval_min = find_prop_and_get(le->p, PROP_MIN), *lval_max = find_prop_and_get(le->p, PROP_MAX),
     520      117873 :                                 *rval_min = find_prop_and_get(re->p, PROP_MIN), *rval_max = find_prop_and_get(re->p, PROP_MAX);
     521      117873 :                         bool always_false = false, always_true = false;
     522             : 
     523      117873 :                         if (fe && !is_symmetric(e)) {
     524        3018 :                                 atom *fval_min = find_prop_and_get(fe->p, PROP_MIN), *fval_max = find_prop_and_get(fe->p, PROP_MAX);
     525        3018 :                                 comp_type lower = range2lcompare(e->flag), higher = range2rcompare(e->flag);
     526        3584 :                                 int not_int1 = rval_min && lval_max && /* the middle and left intervals don't overlap */
     527        1591 :                                         (!is_anti(e) && (lower == cmp_gte ? atom_cmp(rval_min, lval_max) > 0 : atom_cmp(rval_min, lval_max) >= 0)),
     528        3997 :                                         not_int2 = lval_min && fval_max && /* the middle and right intervals don't overlap */
     529        2416 :                                         (!is_anti(e) && (higher == cmp_lte ? atom_cmp(lval_min, fval_max) > 0 : atom_cmp(lval_min, fval_max) >= 0)),
     530        3018 :                                         not_int3 = rval_min && fval_max && /* the left interval is after the right one */
     531        1254 :                                         (!is_anti(e) && (atom_cmp(rval_min, fval_max) > 0));
     532             : 
     533        3018 :                                 always_false |= not_int1 || not_int2 || not_int3;
     534             :                                 /* for anti the middle must be before the left or after the right or the right after the left, for the other the middle must be always between the left and right intervals */
     535        3528 :                                 always_true |= !has_nil(le) && !has_nil(re) && !has_nil(fe) &&
     536        3372 :                                         lval_min && lval_max && rval_min && rval_max && fval_min && fval_max &&
     537          75 :                                         (is_anti(e) ? ((lower == cmp_gte ? atom_cmp(rval_min, lval_max) > 0 : atom_cmp(rval_min, lval_max) >= 0) || (higher == cmp_lte ? atom_cmp(lval_min, fval_max) > 0 : atom_cmp(lval_min, fval_max) >= 0) || atom_cmp(rval_min, fval_max) > 0) :
     538          61 :                                          ((lower == cmp_gte ? atom_cmp(lval_min, rval_max) >= 0 : atom_cmp(lval_min, rval_max) > 0) && (higher == cmp_lte ? atom_cmp(fval_min, lval_max) >= 0 : atom_cmp(fval_min, lval_max) > 0)));
     539             :                         } else if (!fe) {
     540      114837 :                                 if (!is_semantics(e)) /* trival not null cmp null case */
     541      215368 :                                         always_false |= !is_anti(e) && ((exp_is_not_null(le) && exp_is_null(re)) || (exp_is_null(le) && exp_is_not_null(re)));
     542      114837 :                                 switch (e->flag) {
     543      100065 :                                 case cmp_equal:
     544      100065 :                                         if (lval_min && lval_max && rval_min && rval_max && (!is_semantics(e) || !has_nil(le) || !has_nil(re)))
     545      102802 :                                                 always_false |= (is_anti(e) ? (atom_cmp(lval_min, rval_min) == 0 && atom_cmp(lval_max, rval_max) <= 0) : (atom_cmp(rval_max, lval_min) < 0 || atom_cmp(rval_min, lval_max) > 0));
     546      100065 :                                         if (is_semantics(e)) { /* prune *= NULL cases */
     547        7130 :                                                 always_false |= (is_anti(e) ? (exp_is_null(le) && exp_is_null(re)) : ((exp_is_not_null(le) && exp_is_null(re)) || (exp_is_null(le) && exp_is_not_null(re))));
     548       14260 :                                                 always_true |= (is_anti(e) ? ((exp_is_not_null(le) && exp_is_null(re)) || (exp_is_null(le) && exp_is_not_null(re))) : (exp_is_null(le) && exp_is_null(re)));
     549             :                                         }
     550             :                                         break;
     551        6997 :                                 case cmp_notequal:
     552        6997 :                                         if (lval_min && lval_max && rval_min && rval_max)
     553       11050 :                                                 always_true |= !has_nil(le) && !has_nil(re) && (is_anti(e) ? (atom_cmp(lval_min, rval_min) == 0 && atom_cmp(lval_max, rval_max) <= 0) : (atom_cmp(rval_max, lval_min) < 0 || atom_cmp(rval_min, lval_max) > 0));
     554        6997 :                                         if (is_semantics(e)) {
     555          23 :                                                 always_true |= (is_anti(e) ? (exp_is_null(le) && exp_is_null(re)) : ((exp_is_not_null(le) && exp_is_null(re)) || (exp_is_null(le) && exp_is_not_null(re))));
     556          46 :                                                 always_false |= (is_anti(e) ? ((exp_is_not_null(le) && exp_is_null(re)) || (exp_is_null(le) && exp_is_not_null(re))) : (exp_is_null(le) && exp_is_null(re)));
     557             :                                         }
     558             :                                         break;
     559        5158 :                                 case cmp_gt:
     560        5158 :                                         if (lval_max && rval_min)
     561        1909 :                                                 always_false |= (is_anti(e) ? atom_cmp(lval_max, rval_min) > 0 : atom_cmp(lval_max, rval_min) <= 0);
     562        5158 :                                         if (lval_min && rval_max)
     563        3818 :                                                 always_true |= !has_nil(le) && !has_nil(re) && (is_anti(e) ? atom_cmp(lval_min, rval_max) <= 0 : atom_cmp(lval_min, rval_max) > 0);
     564             :                                         break;
     565         111 :                                 case cmp_gte:
     566         111 :                                         if (lval_max && rval_min)
     567          49 :                                                 always_false |= (is_anti(e) ? atom_cmp(lval_max, rval_min) >= 0 : atom_cmp(lval_max, rval_min) < 0);
     568         111 :                                         if (lval_min && rval_max)
     569          98 :                                                 always_true |= !has_nil(le) && !has_nil(re) && (is_anti(e) ? atom_cmp(lval_min, rval_max) < 0 : atom_cmp(lval_min, rval_max) >= 0);
     570             :                                         break;
     571        2423 :                                 case cmp_lt:
     572        2423 :                                         if (lval_min && rval_max)
     573        1372 :                                                 always_false |= (is_anti(e) ? atom_cmp(lval_min, rval_max) < 0 : atom_cmp(lval_min, rval_max) >= 0);
     574        2423 :                                         if (lval_max && rval_min)
     575        2792 :                                                 always_true |= !has_nil(le) && !has_nil(re) && (is_anti(e) ? atom_cmp(lval_max, rval_min) >= 0 : atom_cmp(lval_max, rval_min) < 0);
     576             :                                         break;
     577          83 :                                 case cmp_lte:
     578          83 :                                         if (lval_min && rval_max)
     579          11 :                                                 always_false |= (is_anti(e) ? atom_cmp(lval_min, rval_max) <= 0 : atom_cmp(lval_min, rval_max) > 0);
     580          83 :                                         if (lval_max && rval_min)
     581          22 :                                                 always_true |= !has_nil(le) && !has_nil(re) && (is_anti(e) ? atom_cmp(lval_max, rval_min) > 0 : atom_cmp(lval_max, rval_min) <= 0);
     582             :                                         break;
     583             :                                 default: /* Maybe later I can do cmp_in and cmp_notin but don't forget to remove is_theta_exp check up there */
     584             :                                         break;
     585             :                                 }
     586             :                         }
     587      117873 :                         assert(!always_false || !always_true);
     588      117873 :                         if (always_false || always_true) {
     589        1864 :                                 sql_exp *ne = exp_atom_bool(v->sql->sa, always_true /* if always_true set then true else false */);
     590        1864 :                                 if (exp_name(e))
     591           0 :                                         exp_prop_alias(v->sql->sa, ne, e);
     592        1864 :                                 n->data = ne;
     593        1864 :                                 v->changes++;
     594             :                         }
     595             :                 }
     596             :         }
     597      125295 :         return rel->exps;
     598             : }
     599             : 
     600             : static sql_rel *
     601        4409 : set_setop_side(visitor *v, sql_rel *rel, sql_rel *side)
     602             : {
     603        4409 :         if (side == rel->r)
     604          84 :                 rel->r = NULL;
     605             :         else
     606        4325 :                 rel->l = NULL;
     607        4409 :         if (!is_simple_project(side->op) || rel_is_ref(side) || !list_empty(side->r)) {
     608         658 :                 side = rel_project(v->sql->sa, side, rel_projections(v->sql, side, NULL, 0, 1));
     609         658 :                 set_count_prop(v->sql->sa, side, get_rel_count(side->l));
     610         658 :                 side->exps = exps_exp_visitor_bottomup(v, side, side->exps, 0, &rel_propagate_statistics, false);
     611             :         }
     612       24492 :         for (node *n = rel->exps->h, *m = side->exps->h ; n && m ; n = n->next, m = m->next) {
     613       20083 :                 sql_exp *e1 = n->data, *e2 = m->data;
     614       20083 :                 exp_prop_alias(v->sql->sa, e2, e1);
     615             :         }
     616        4409 :         list_hash_clear(side->exps);
     617             : 
     618        4409 :         if (need_distinct(rel))
     619           4 :                 set_distinct(side);
     620        4409 :         rel_destroy(rel);
     621        4409 :         return side;
     622             : }
     623             : 
     624             : static BUN
     625       13244 : trivial_project_exp_card(sql_exp *e)
     626             : {
     627       13261 :         if (e->type == e_convert)
     628          17 :                 return trivial_project_exp_card(e->l);
     629       13244 :         return e->type == e_atom && e->f ? (BUN) list_length(e->f) : 1;
     630             : }
     631             : 
     632             : static BUN
     633       23907 : rel_calc_nuniques(mvc *sql, sql_rel *l, list *exps)
     634             : {
     635       23907 :         BUN lv = get_rel_count(l);
     636             : 
     637       23907 :         if (lv == 0)
     638             :                 return 0;
     639       21321 :         if (!list_empty(exps)) {
     640       21321 :                 BUN nuniques = 0;
     641             :                 /* compute the highest number of unique values */
     642       50430 :                 for (node *n = exps->h ; n && nuniques != BUN_NONE ; n = n->next) {
     643       29109 :                         sql_exp *e = n->data;
     644       29109 :                         sql_rel *bt = NULL;
     645       29109 :                         prop *p = NULL;
     646       29109 :                         BUN euniques = BUN_NONE;
     647       29109 :                         atom *min, *max, *sub = NULL;
     648       29109 :                         sql_subtype *tp = exp_subtype(e);
     649       29109 :                         sql_class ec = tp ? tp->type->eclass : EC_STRING; /* if 'e' has no type (eg parameter), use a non-number type to fail condition */
     650             : 
     651       29109 :                         if ((p = find_prop(e->p, PROP_NUNIQUES))) {
     652       20261 :                                 euniques = (BUN) p->value.dval;
     653        8848 :                         } else if (e->type == e_column && rel_find_exp_and_corresponding_rel(l, e, false, &bt, NULL) && bt && (p = find_prop(bt->p, PROP_COUNT))) {
     654        5040 :                                 euniques = (BUN) p->value.lval;
     655             :                         }
     656             :                         /* use min to max range to compute number of possible values in the domain for number types */
     657       29109 :                         if ((EC_TEMP(ec)||ec==EC_NUM||ec==EC_MONTH||ec==EC_POS) &&
     658       22418 :                                 (min = find_prop_and_get(e->p, PROP_MIN)) && (max = find_prop_and_get(e->p, PROP_MAX))) {
     659             :                                 /* the range includes min and max, so the atom_inc call is needed */
     660             :                                 /* if 'euniques' has number of distinct values, compute min between both */
     661       17912 :                                 if ((sub = atom_sub(sql->sa, max, min)) && (sub = atom_inc(sql->sa, sub)) && (sub = atom_cast(sql->sa, sub, sql_bind_localtype("oid"))))
     662           0 :                                         euniques = MIN(euniques, (BUN) sub->data.val.oval);
     663             :                         }
     664       29109 :                         if (euniques != BUN_NONE)
     665       25301 :                                 nuniques = MAX(nuniques, euniques); /* the highest cardinality sets the estimation */
     666             :                         else
     667             :                                 nuniques = BUN_NONE;
     668             :                 }
     669       21321 :                 if (nuniques != BUN_NONE)
     670             :                         return nuniques;
     671             :         }
     672             :         return lv;
     673             : }
     674             : 
     675             : static sql_rel *
     676     1907646 : rel_get_statistics_(visitor *v, sql_rel *rel)
     677             : {
     678             :         /* Don't prune updates as pruning will possibly result in removing the joins which therefore cannot be used for constraint checking */
     679     1907646 :         uint8_t has_special_modify = *(uint8_t*) v->data;
     680     1907646 :         bool can_be_pruned = !has_special_modify && v->storage_based_opt;
     681             : 
     682             :         /* Don't look at the same relation twice */
     683     1907646 :         if (are_statistics_gathered(rel->used))
     684             :                 return rel;
     685     1288280 :         rel->used |= statistics_gathered;
     686             : 
     687     1288280 :         switch (rel->op) {
     688      301221 :         case op_basetable: {
     689      301221 :                 sql_table *t = (sql_table *) rel->l;
     690      301221 :                 sqlstore *store = v->sql->session->tr->store;
     691             : 
     692      301221 :                 if (!list_empty(rel->exps)) {
     693     1184764 :                         for (node *n = rel->exps->h ; n ; n = n->next)
     694      883590 :                                 rel_basetable_column_get_statistics(v->sql, rel, n->data);
     695             :                 }
     696             :                 /* Set table row count. TODO? look for remote tables. Don't look at storage for declared tables, because it won't be cleaned */
     697      301223 :                 if (isTable(t) && t->s && !isDeclaredTable(t)) /* count active rows only */
     698      251039 :                         set_count_prop(v->sql->sa, rel, (BUN)store->storage_api.count_col(v->sql->session->tr, ol_first_node(t->columns)->data, 10));
     699             :                 break;
     700             :         }
     701       24084 :         case op_union:
     702             :         case op_inter:
     703             :         case op_except: {
     704       24084 :                 bool empty_cross = false;
     705       24084 :                 int i = 0;
     706       24084 :                 sql_rel *l = rel->l, *pl = rel->l, *r = rel->r, *pr = rel->r;
     707             : 
     708       24157 :                 while (is_sample(pl->op) || is_topn(pl->op)) /* skip topN and sample relations in the middle */
     709          73 :                         pl = pl->l;
     710       24154 :                 while (is_sample(pr->op) || is_topn(pr->op))
     711          70 :                         pr = pr->l;
     712             :                 /* if it's not a projection, then project and propagate statistics */
     713       24084 :                 if (!is_project(pl->op) && !is_base(pl->op)) {
     714           2 :                         pl = rel_project(v->sql->sa, pl, rel_projections(v->sql, pl, NULL, 0, 1));
     715           2 :                         set_count_prop(v->sql->sa, pl, get_rel_count(pl->l));
     716           2 :                         pl->exps = exps_exp_visitor_bottomup(v, pl, pl->exps, 0, &rel_propagate_statistics, false);
     717             :                 }
     718       24084 :                 if (!is_project(pr->op) && !is_base(pr->op)) {
     719          46 :                         pr = rel_project(v->sql->sa, pr, rel_projections(v->sql, pr, NULL, 0, 1));
     720          46 :                         set_count_prop(v->sql->sa, pr, get_rel_count(pr->l));
     721          46 :                         pr->exps = exps_exp_visitor_bottomup(v, pr, pr->exps, 0, &rel_propagate_statistics, false);
     722             :                 }
     723             : 
     724      136636 :                 for (node *n = rel->exps->h ; n ; n = n->next) {
     725      112552 :                         empty_cross |= rel_setop_get_statistics(v->sql, rel, pl->exps, pr->exps, n->data, i);
     726      112552 :                         i++;
     727             :                 }
     728             : 
     729             :                 /* propagate row count */
     730       24084 :                 if (is_union(rel->op)) {
     731       21582 :                         BUN lv = need_distinct(rel) ? rel_calc_nuniques(v->sql, l, l->exps) : get_rel_count(l),
     732       21582 :                                 rv = need_distinct(rel) ? rel_calc_nuniques(v->sql, r, r->exps) : get_rel_count(r);
     733             : 
     734       21582 :                         if (lv == 0 && rv == 0) { /* both sides empty */
     735        1560 :                                 if (can_be_pruned)
     736             :                                         empty_cross = true;
     737             :                                 else
     738         482 :                                         set_count_prop(v->sql->sa, rel, 0);
     739       20022 :                         } else if (can_be_pruned && lv == 0 && !rel_is_ref(rel)) { /* left side empty */
     740          84 :                                 rel = set_setop_side(v, rel, r);
     741          84 :                                 empty_cross = false; /* don't rewrite again */
     742       19938 :                         } else if (can_be_pruned && rv == 0 && !rel_is_ref(rel)) { /* right side empty */
     743        4312 :                                 rel = set_setop_side(v, rel, l);
     744        4312 :                                 empty_cross = false; /* don't rewrite again */
     745       15626 :                         } else if (lv != BUN_NONE && rv != BUN_NONE) {
     746       10620 :                                 set_count_prop(v->sql->sa, rel, (rv > (BUN_MAX - lv)) ? BUN_MAX : (lv + rv)); /* overflow check */
     747             :                         }
     748        2502 :                 } else if (is_inter(rel->op) || is_except(rel->op)) {
     749        2502 :                         BUN lv = need_distinct(rel) ? rel_calc_nuniques(v->sql, l, l->exps) : get_rel_count(l),
     750        2502 :                                 rv = need_distinct(rel) ? rel_calc_nuniques(v->sql, r, r->exps) : get_rel_count(r);
     751             : 
     752        2502 :                         if (lv == 0) { /* left side empty */
     753          56 :                                 if (can_be_pruned)
     754             :                                         empty_cross = true;
     755             :                                 else
     756           6 :                                         set_count_prop(v->sql->sa, rel, 0);
     757        2446 :                         } else if (rv == 0) { /* right side empty */
     758          16 :                                 if (is_inter(rel->op)) {
     759           3 :                                         if (can_be_pruned)
     760             :                                                 empty_cross = true;
     761             :                                         else
     762           0 :                                                 set_count_prop(v->sql->sa, rel, 0);
     763          13 :                                 } else if (can_be_pruned && !rel_is_ref(rel)) {
     764          13 :                                         rel = set_setop_side(v, rel, l);
     765          13 :                                         empty_cross = false; /* don't rewrite again */
     766             :                                 } else {
     767           0 :                                         set_count_prop(v->sql->sa, rel, lv);
     768             :                                 }
     769             :                         } else {
     770        2430 :                                 set_count_prop(v->sql->sa, rel, lv);
     771             :                         }
     772             :                 }
     773       24084 :                 if (can_be_pruned && empty_cross) {
     774        1148 :                         rel_destroy(rel->l);
     775        1148 :                         rel->l = NULL;
     776        1148 :                         rel_destroy(rel->r);
     777        1148 :                         rel->r = NULL;
     778        3977 :                         for (node *n = rel->exps->h ; n ; n = n->next) {
     779        2829 :                                 sql_exp *e = n->data, *a = exp_atom(v->sql->sa, atom_general(v->sql->sa, exp_subtype(e), NULL));
     780        2829 :                                 exp_prop_alias(v->sql->sa, a, e);
     781        2829 :                                 n->data = a;
     782             :                         }
     783        1148 :                         list_hash_clear(rel->exps);
     784        1148 :                         sql_rel *l = rel_project(v->sql->sa, NULL, rel->exps);
     785        1148 :                         set_count_prop(v->sql->sa, l, 1);
     786        1148 :                         l = rel_select(v->sql->sa, l, exp_atom_bool(v->sql->sa, 0));
     787        1148 :                         set_count_prop(v->sql->sa, l, 0);
     788        1148 :                         rel->op = op_project;
     789        1148 :                         rel->l = l;
     790        1148 :                         rel->exps = rel_projections(v->sql, l, NULL, 1, 1);
     791        1148 :                         set_count_prop(v->sql->sa, rel, 0);
     792        1148 :                         set_nodistinct(rel); /* set relations may have distinct flag set */
     793        1148 :                         v->changes++;
     794             :                 }
     795             :                 break;
     796             :         }
     797      499457 :         case op_join:
     798             :         case op_left:
     799             :         case op_right:
     800             :         case op_full:
     801             :         case op_semi:
     802             :         case op_anti:
     803             :         case op_select:
     804             :         case op_project:
     805             :         case op_groupby: {
     806      499457 :                 if (is_groupby(rel->op) && !list_empty(rel->r))
     807       15519 :                         rel->r = exps_exp_visitor_bottomup(v, rel, rel->r, 0, &rel_propagate_statistics, false);
     808      499457 :                 rel->exps = exps_exp_visitor_bottomup(v, rel, rel->exps, 0, &rel_propagate_statistics, false);
     809      499458 :                 if ((is_simple_project(rel->op) || is_groupby(rel->op)) && !list_empty(rel->r))
     810       38010 :                         rel->r = exps_exp_visitor_bottomup(v, rel, rel->r, 0, &rel_propagate_statistics, false);
     811             :                 /* The following optimizations can only be applied after propagating the statistics to rel->exps */
     812      499458 :                 if (can_be_pruned && (is_join(rel->op) || is_select(rel->op)) && !list_empty(rel->exps)) {
     813      131364 :                         int changes = v->changes;
     814      131364 :                         rel->exps = rel_prune_predicates(v, rel);
     815      131364 :                         if (v->changes > changes)
     816        1817 :                                 rel = rewrite_simplify(v, 0, v->value_based_opt, rel);
     817             :                 }
     818             : 
     819             :                 /* propagate row count */
     820      499458 :                 sql_rel *l = rel->l, *r = rel->r;
     821      499458 :                 switch (rel->op) {
     822      128253 :                 case op_join:
     823             :                 case op_left:
     824             :                 case op_right:
     825             :                 case op_full: {
     826      128253 :                         BUN lv = get_rel_count(l), rv = get_rel_count(r), uniques_estimate = BUN_MAX, join_idx_estimate = BUN_MAX;
     827             : 
     828      128253 :                         if (!list_empty(rel->exps) && !is_single(rel)) {
     829      234525 :                                 for (node *n = rel->exps->h ; n ; n = n->next) {
     830      119878 :                                         sql_exp *e = n->data, *el = e->l, *er = e->r;
     831             : 
     832      119878 :                                         if (find_prop(e->p, PROP_JOINIDX)) {
     833         656 :                                                 join_idx_estimate = lv>rv?lv:rv;
     834      119222 :                                         } else if (e->type == e_cmp && e->flag == cmp_equal) {
     835             :                                                 /* if one of the sides is unique, the cardinality will be that exact number, but look for nulls */
     836      115366 :                                                 if (!is_semantics(e) || !has_nil(el) || !has_nil(er)) {
     837      114840 :                                                         BUN lu = 0, ru = 0;
     838      114840 :                                                         prop *p = NULL;
     839      114840 :                                                         if ((p = find_prop(el->p, PROP_NUNIQUES)))
     840       84112 :                                                                 lu = (BUN) p->value.dval;
     841      114840 :                                                         if ((p = find_prop(er->p, PROP_NUNIQUES)))
     842       98678 :                                                                 ru = (BUN) p->value.dval;
     843      114840 :                                                         if (is_unique(el) || lu > lv) {
     844       66585 :                                                                 BUN ncount = (is_right(rel->op) || is_full(rel->op)) ? MAX(lv, rv) : lv;
     845       66585 :                                                                 uniques_estimate = MIN(uniques_estimate, ncount);
     846       48255 :                                                         } else if (is_unique(er) || ru > rv) {
     847       29054 :                                                                 BUN ncount = (is_left(rel->op) || is_full(rel->op)) ? MAX(lv, rv) : rv;
     848       29054 :                                                                 uniques_estimate = MIN(uniques_estimate, ncount);
     849             :                                                         }
     850             :                                                 }
     851             :                                         }
     852             :                                 }
     853             :                         }
     854      128253 :                         if (is_single(rel)) {
     855        4469 :                                 set_count_prop(v->sql->sa, rel, lv);
     856      123784 :                         } else if (join_idx_estimate != BUN_MAX) {
     857         655 :                                 set_count_prop(v->sql->sa, rel, join_idx_estimate);
     858      123129 :                         } else if (uniques_estimate != BUN_MAX) {
     859       91735 :                                 set_count_prop(v->sql->sa, rel, uniques_estimate);
     860       31394 :                         } else if (list_length(rel->exps) == 1 && (exp_is_false(rel->exps->h->data) || exp_is_null(rel->exps->h->data))) {
     861             :                                 /* corner cases for outer joins */
     862          94 :                                 if (is_left(rel->op)) {
     863          81 :                                         set_count_prop(v->sql->sa, rel, lv);
     864          13 :                                 } else if (is_right(rel->op)) {
     865           4 :                                         set_count_prop(v->sql->sa, rel, rv);
     866           9 :                                 } else if (is_full(rel->op) && lv != BUN_NONE && rv != BUN_NONE) {
     867           9 :                                         set_count_prop(v->sql->sa, rel, (rv > (BUN_MAX - lv)) ? BUN_MAX : (lv + rv)); /* overflow check */
     868           0 :                                 } else if (lv != BUN_NONE && rv != BUN_NONE) {
     869           0 :                                         set_count_prop(v->sql->sa, rel, 0);
     870             :                                 }
     871       31300 :                         } else if (lv == 0) {
     872        1170 :                                 set_count_prop(v->sql->sa, rel, (is_right(rel->op) || is_full(rel->op)) ? rv : 0);
     873       30701 :                         } else if (rv == 0) {
     874        1466 :                                 set_count_prop(v->sql->sa, rel, (is_left(rel->op) || is_full(rel->op)) ? lv : 0);
     875       29796 :                         } else if (lv != BUN_NONE && rv != BUN_NONE) {
     876       19729 :                                 set_count_prop(v->sql->sa, rel, (rv > (BUN_MAX / lv)) ? BUN_MAX : (lv * rv)); /* overflow check */
     877             :                         }
     878             :                         break;
     879             :                 }
     880        2722 :                 case op_anti:
     881        2722 :                         set_count_prop(v->sql->sa, rel, get_rel_count(l));
     882        2722 :                         break;
     883       99414 :                 case op_semi:
     884             :                 case op_select:
     885             :                         /* TODO calculate cardinalities using selectivities */
     886       99414 :                         if (list_length(rel->exps) == 1 && (exp_is_false(rel->exps->h->data) || exp_is_null(rel->exps->h->data))) {
     887        1700 :                                 set_count_prop(v->sql->sa, rel, 0);
     888             :                         } else {
     889      192189 :                                 if (!list_empty(rel->exps) && !is_single(rel)) {
     890       94475 :                                         BUN cnt = get_rel_count(l), u = 1;
     891      171996 :                                         for (node *n = rel->exps->h ; n ; n = n->next) {
     892      107460 :                                                 sql_exp *e = n->data, *el = e->l, *er = e->r;
     893             : 
     894             :                                                 /* simple expressions first */
     895      107460 :                                                 if (e->type == e_cmp && e->flag == cmp_equal && exp_is_atom(er)) {
     896             :                                                         /* use selectivity */
     897       54324 :                                                         prop *p;
     898       54324 :                                                         if ((p = find_prop(el->p, PROP_NUNIQUES))) {
     899       29939 :                                                                 u = (BUN) p->value.dval;
     900       29939 :                                                                 break;
     901             :                                                         }
     902             :                                                 }
     903             :                                         }
     904             :                                         /* u is an *estimate*, so don't set count_prop to 0 unless cnt is 0 */
     905       94475 :                                         set_count_prop(v->sql->sa, rel, cnt == 0 ? 0 : u == 0 || u > cnt ? 1 : cnt/u);
     906             :                                 } else {
     907        3239 :                                         set_count_prop(v->sql->sa, rel, get_rel_count(l));
     908             :                                 }
     909             :                         }
     910             :                         break;
     911      246255 :                 case op_project:
     912      246255 :                         if (l) {
     913      236932 :                                 if (need_distinct(rel)) {
     914           0 :                                         set_count_prop(v->sql->sa, rel, rel_calc_nuniques(v->sql, l, rel->exps));
     915             :                                 } else {
     916      236932 :                                         set_count_prop(v->sql->sa, rel, get_rel_count(l));
     917             :                                 }
     918             :                         } else {
     919        9323 :                                 BUN card = 1;
     920             : 
     921        9323 :                                 if (!list_empty(rel->exps)) {
     922       21355 :                                         for (node *n = rel->exps->h ; n ; n = n->next) {
     923       12032 :                                                 sql_exp *e = n->data;
     924       12032 :                                                 card = MAX(card, trivial_project_exp_card(e));
     925             :                                         }
     926             :                                 }
     927        9323 :                                 set_count_prop(v->sql->sa, rel, card);
     928             :                         }
     929             :                         break;
     930       22307 :                 case op_groupby:
     931       22307 :                         if (list_empty(rel->r)) {
     932        6788 :                                 set_count_prop(v->sql->sa, rel, 1);
     933             :                         } else {
     934       15519 :                                 set_count_prop(v->sql->sa, rel, rel_calc_nuniques(v->sql, l, rel->r));
     935             :                         }
     936             :                         break;
     937             :                 default:
     938             :                         break;
     939             :                 }
     940             :                 break;
     941             :         }
     942       16939 :         case op_topn: {
     943       16939 :                 BUN lv = get_rel_count(rel->l);
     944             : 
     945       16939 :                 if (lv != BUN_NONE) {
     946       16930 :                         sql_exp *le = rel->exps->h->data, *oe = list_length(rel->exps) > 1 ? rel->exps->h->next->data : NULL;
     947          96 :                         if (oe && oe->l && !exp_is_null(oe)) { /* no parameters */
     948          96 :                                 BUN offset = (BUN) ((atom*)oe->l)->data.val.lval;
     949          96 :                                 lv = offset >= lv ? 0 : lv - offset;
     950             :                         }
     951       16930 :                         if (le->l && !exp_is_null(le)) {
     952       16897 :                                 BUN limit = (BUN) ((atom*)le->l)->data.val.lval;
     953       16897 :                                 lv = MIN(lv, limit);
     954             :                         }
     955       16930 :                         set_count_prop(v->sql->sa, rel, lv);
     956             :                 }
     957             :                 break;
     958             :         }
     959          22 :         case op_sample: {
     960          22 :                 BUN lv = get_rel_count(rel->l);
     961             : 
     962          22 :                 if (lv != BUN_NONE) {
     963           4 :                         sql_exp *se = rel->exps->h->data;
     964           4 :                         sql_subtype *tp = exp_subtype(se);
     965             : 
     966           4 :                         if (se->l && tp->type->eclass == EC_NUM) { /* sample is a number of rows */
     967           4 :                                 BUN sample = (BUN) ((atom*)se->l)->data.val.lval;
     968           4 :                                 lv = MIN(lv, sample);
     969           0 :                         } else if (se->l) { /* sample is a percentage of rows */
     970           0 :                                 dbl percent = ((atom*)se->l)->data.val.dval;
     971           0 :                                 assert(tp->type->eclass == EC_FLT);
     972           0 :                                 lv = (BUN) ceil((dbl)lv * percent);
     973             :                         }
     974           4 :                         set_count_prop(v->sql->sa, rel, lv);
     975             :                 }
     976             :                 break;
     977             :         }
     978        5533 :         case op_table: {
     979        5533 :                 sql_exp *op = rel->r;
     980        5533 :                 if (rel->flag != TRIGGER_WRAPPER && op) {
     981        5229 :                         sql_subfunc *f = op->f;
     982        5229 :                         if (f->func->lang == FUNC_LANG_SQL) {
     983         216 :                                 set_count_prop(v->sql->sa, rel, 1000 /* just some fallback value */);
     984        5013 :                         } else if (f->func->lang == FUNC_LANG_MAL && strcmp(f->func->base.name, "storage") == 0) {
     985         483 :                                 set_count_prop(v->sql->sa, rel, 1000 /* TODO get size of catalog */);
     986        4530 :                         } else if (f->func->lang == FUNC_LANG_MAL && strcmp(f->func->base.name, "db_users") == 0) {
     987           0 :                                 set_count_prop(v->sql->sa, rel, 1000 /* TODO get size of users */);
     988        4530 :                         } else if (f->func->lang == FUNC_LANG_MAL && strncmp(f->func->base.name, "querylog", 8) == 0) {
     989        1017 :                                 set_count_prop(v->sql->sa, rel, 1000 /* TODO get size of querylog */);
     990        3513 :                         } else if (f->func->lang == FUNC_LANG_MAL &&
     991        3410 :                                            (strcmp(f->func->base.name, "queue") == 0 ||
     992        3152 :                                                 strcmp(f->func->base.name, "optimizers") == 0 ||
     993        2874 :                                                 strcmp(f->func->base.name, "env") == 0 ||
     994        2616 :                                                 strcmp(f->func->base.name, "keywords") == 0 ||
     995        2616 :                                                 strcmp(f->func->base.name, "statistics") == 0 ||
     996        2007 :                                                 strcmp(f->func->base.name, "rejects") == 0 ||
     997        1761 :                                                 strcmp(f->func->base.name, "schemastorage") == 0 ||
     998        1761 :                                                 strncmp(f->func->base.name, "storage", 7) == 0 ||
     999        1761 :                                                 strcmp(f->func->base.name, "sessions") == 0) ) {
    1000        1898 :                                 set_count_prop(v->sql->sa, rel, 1000 /* TODO get size of queue */);
    1001             :                         }
    1002             :                         /* else {
    1003             :                                 printf("%%func needs stats : %s\n", f->func->base.name);
    1004             :                         } */
    1005             :                 }
    1006             :                 break;
    1007             :         }
    1008             :         /*These relations are less important for now
    1009             :           TODO later we can tune it */
    1010             :         case op_insert:
    1011             :         case op_update:
    1012             :         case op_delete:
    1013             :         case op_truncate:
    1014             :         case op_ddl:
    1015             :         default:
    1016             :                 break;
    1017             :         }
    1018             : 
    1019             :         return rel;
    1020             : }
    1021             : 
    1022             : static sql_rel *
    1023      522758 : rel_get_statistics(visitor *v, global_props *gp, sql_rel *rel)
    1024             : {
    1025             :         /* Don't prune updates as pruning will possibly result in removing the joins which therefore cannot be used for constraint checking */
    1026      522758 :         uint8_t has_special_modify = (uint8_t) gp->has_special_modify;
    1027      522758 :         v->data = &has_special_modify;
    1028      522758 :         rel = rel_visitor_bottomup(v, rel, &rel_get_statistics_);
    1029      522761 :         v->data = gp;
    1030      522761 :         return rel;
    1031             : }
    1032             : 
    1033             : run_optimizer
    1034      686774 : bind_get_statistics(visitor *v, global_props *gp)
    1035             : {
    1036      686774 :         (void) v;
    1037      686774 :         return (gp->opt_level == 1 && !gp->cnt[op_insert]) ? rel_get_statistics : NULL;
    1038             : }
    1039             : 
    1040             : 
    1041             : static bool
    1042       97657 : point_select_on_unique_column(sql_rel *rel)
    1043             : {
    1044       97657 :         if (is_select(rel->op) && !list_empty(rel->exps)) {
    1045      133094 :                 for (node *n = rel->exps->h; n ; n = n->next) {
    1046       76390 :                         sql_exp *e = n->data, *el = e->l, *er = e->r, *found = NULL;
    1047             : 
    1048       76390 :                         if (is_compare(e->type) && e->flag == cmp_equal) {
    1049       33947 :                                 if (is_numeric_upcast(el))
    1050           0 :                                         el = el->l;
    1051       33947 :                                 if (is_numeric_upcast(er))
    1052           0 :                                         er = er->l;
    1053       33947 :                                 if (is_alias(el->type) && exp_is_atom(er) && (found = rel_find_exp(rel->l, el)) &&
    1054          27 :                                         is_unique(found) && (!is_semantics(e) || !has_nil(found) || !has_nil(er)))
    1055             :                                         return true;
    1056       33921 :                                 if (is_alias(er->type) && exp_is_atom(el) && (found = rel_find_exp(rel->l, er)) &&
    1057           0 :                                         is_unique(found) && (!is_semantics(e) || !has_nil(el) || !has_nil(found)))
    1058             :                                         return true;
    1059             :                         }
    1060             :                 }
    1061             :         }
    1062             :         return false;
    1063             : }
    1064             : 
    1065             : /*
    1066             :  * A point select on an unique column reduces the number of rows to 1. If the same select is under a
    1067             :  * join, the opposite side's select can be pushed above the join.
    1068             :  */
    1069             : static inline sql_rel *
    1070     1326921 : rel_push_select_up(visitor *v, sql_rel *rel)
    1071             : {
    1072     1326921 :         if ((is_innerjoin(rel->op) || is_left(rel->op) || is_right(rel->op) || is_semi(rel->op)) && !is_single(rel)) {
    1073      261250 :                 sql_rel *l = rel->l, *r = rel->r;
    1074      261250 :                 bool can_pushup_left = is_select(l->op) && !rel_is_ref(l) && !is_single(l) && (is_innerjoin(rel->op) || is_left(rel->op) || is_semi(rel->op)),
    1075      261250 :                         can_pushup_right = is_select(r->op) && !rel_is_ref(r) && !is_single(r) && (is_innerjoin(rel->op) || is_right(rel->op));
    1076             : 
    1077      261250 :                 if (can_pushup_left || can_pushup_right) {
    1078       69410 :                         if (can_pushup_left)
    1079       48429 :                                 can_pushup_left = point_select_on_unique_column(r);
    1080       69410 :                         if (can_pushup_right)
    1081       49228 :                                 can_pushup_right = point_select_on_unique_column(l);
    1082             : 
    1083             :                         /* if both selects retrieve one row each, it's not worth it to push both up */
    1084       69410 :                         if (can_pushup_left && !can_pushup_right) {
    1085           8 :                                 sql_rel *nrel = rel_dup_copy(v->sql->sa, rel);
    1086           8 :                                 nrel->l = l->l;
    1087           8 :                                 rel = rel_inplace_select(rel, nrel, l->exps);
    1088           8 :                                 assert(is_select(rel->op));
    1089           8 :                                 v->changes++;
    1090       69402 :                         } else if (!can_pushup_left && can_pushup_right) {
    1091          10 :                                 sql_rel *nrel = rel_dup_copy(v->sql->sa, rel);
    1092          10 :                                 nrel->r = r->l;
    1093          10 :                                 rel = rel_inplace_select(rel, nrel, r->exps);
    1094          10 :                                 assert(is_select(rel->op));
    1095          10 :                                 v->changes++;
    1096             :                         }
    1097             :                 }
    1098             :         }
    1099     1326921 :         return rel;
    1100             : }
    1101             : 
    1102             : static int
    1103      104019 : sql_class_base_score(visitor *v, sql_column *c, sql_subtype *t, bool equality_based)
    1104             : {
    1105      104019 :         int de;
    1106             : 
    1107      104019 :         if (!t)
    1108             :                 return 0;
    1109      104019 :         switch (ATOMstorage(t->type->localtype)) {
    1110             :         case TYPE_bte:
    1111             :                 return 150 - 8;
    1112        3846 :         case TYPE_sht:
    1113        3846 :                 return 150 - 16;
    1114       39314 :         case TYPE_int:
    1115       39314 :                 return 150 - 32;
    1116         947 :         case TYPE_void:
    1117             :         case TYPE_lng:
    1118         947 :                 return 150 - 64;
    1119             :         case TYPE_uuid:
    1120             : #ifdef HAVE_HGE
    1121             :         case TYPE_hge:
    1122             : #endif
    1123             :                 return 150 - 128;
    1124           5 :         case TYPE_flt:
    1125           5 :                 return 75 - 24;
    1126             :         case TYPE_dbl:
    1127             :                 return 75 - 53;
    1128       33047 :         default:
    1129       33047 :                 if (equality_based && c && v->storage_based_opt && (de = mvc_is_duplicate_eliminated(v->sql, c)))
    1130        1397 :                         return 150 - de * 8;
    1131             :                 /* strings and blobs not duplicate eliminated don't get any points here */
    1132             :                 return 0;
    1133             :         }
    1134             : }
    1135             : 
    1136             : static int
    1137       41904 : score_se_base(visitor *v, sql_rel *rel, sql_exp *e)
    1138             : {
    1139       41904 :         int res = 0;
    1140       41904 :         sql_subtype *t = exp_subtype(e);
    1141       41904 :         sql_column *c = NULL;
    1142             : 
    1143             :         /* can we find out if the underlying table is sorted */
    1144       41904 :         if ((c = exp_find_column(rel, e, -2)) && v->storage_based_opt && mvc_is_sorted(v->sql, c))
    1145       41904 :                 res += 600;
    1146             : 
    1147             :         /* prefer the shorter var types over the longer ones */
    1148       41904 :         res += sql_class_base_score(v, c, t, is_equality_or_inequality_exp(e->flag)); /* smaller the type, better */
    1149       41904 :         return res;
    1150             : }
    1151             : 
    1152             : static int
    1153       66329 : score_se(visitor *v, sql_rel *rel, sql_exp *e)
    1154             : {
    1155       66329 :         int score = 0;
    1156       66329 :         if (e->type == e_cmp && !is_complex_exp(e->flag)) {
    1157       41904 :                 sql_exp *l = e->l;
    1158             : 
    1159       41904 :                 while (l->type == e_cmp) { /* go through nested comparisons */
    1160           2 :                         sql_exp *ll;
    1161             : 
    1162           2 :                         if (l->flag == cmp_filter || l->flag == cmp_or)
    1163           0 :                                 ll = ((list*)l->l)->h->data;
    1164             :                         else
    1165           2 :                                 ll = l->l;
    1166           2 :                         if (ll->type != e_cmp)
    1167             :                                 break;
    1168             :                         l = ll;
    1169             :                 }
    1170       41904 :                 score += score_se_base(v, rel, l);
    1171             :         }
    1172       66329 :         score += exp_keyvalue(e);
    1173       66329 :         return score;
    1174             : }
    1175             : 
    1176             : static inline sql_rel *
    1177     1326921 : rel_select_order(visitor *v, sql_rel *rel)
    1178             : {
    1179     1326921 :         int *scores = NULL;
    1180     1326921 :         sql_exp **exps = NULL;
    1181             : 
    1182     1326921 :         if (is_select(rel->op) && list_length(rel->exps) > 1) {
    1183       31071 :                 node *n;
    1184       31071 :                 int i, nexps = list_length(rel->exps);
    1185       31071 :                 scores = SA_NEW_ARRAY(v->sql->ta, int, nexps);
    1186       31071 :                 exps = SA_NEW_ARRAY(v->sql->ta, sql_exp*, nexps);
    1187             : 
    1188       97400 :                 for (i = 0, n = rel->exps->h; n; i++, n = n->next) {
    1189       66378 :                         exps[i] = n->data;
    1190       66378 :                         if (find_prop(exps[i]->p, PROP_HASHCOL))
    1191             :                                 return rel;
    1192       66329 :                         scores[i] = score_se(v, rel, n->data);
    1193             :                 }
    1194       31022 :                 GDKqsort(scores, exps, NULL, nexps, sizeof(int), sizeof(void *), TYPE_int, true, true);
    1195             : 
    1196       97351 :                 for (i = 0, n = rel->exps->h; n; i++, n = n->next)
    1197       66329 :                         n->data = exps[i];
    1198             :         }
    1199             : 
    1200             :         return rel;
    1201             : }
    1202             : 
    1203             : /* Compute the efficiency of using this expression early in a group by list */
    1204             : static int
    1205       62115 : score_gbe(visitor *v, sql_rel *rel, sql_exp *e)
    1206             : {
    1207       62115 :         int res = 0;
    1208       62115 :         sql_subtype *t = exp_subtype(e);
    1209       62115 :         sql_column *c = exp_find_column(rel, e, -2);
    1210             : 
    1211       62115 :         if (e->card == CARD_ATOM) /* constants are trivial to group */
    1212          67 :                 res += 1000;
    1213             :         /* can we find out if the underlying table is sorted */
    1214       62115 :         if (is_unique(e) || find_prop(e->p, PROP_HASHCOL) || (c && v->storage_based_opt && mvc_is_unique(v->sql, c))) /* distinct columns */
    1215        3920 :                 res += 700;
    1216       41519 :         if (c && v->storage_based_opt && mvc_is_sorted(v->sql, c))
    1217        4000 :                 res += 500;
    1218       62115 :         if (find_prop(e->p, PROP_HASHIDX)) /* has hash index */
    1219           0 :                 res += 200;
    1220             : 
    1221             :         /* prefer the shorter var types over the longer ones */
    1222       62115 :         res += sql_class_base_score(v, c, t, true); /* smaller the type, better */
    1223       62115 :         return res;
    1224             : }
    1225             : 
    1226             : /* reorder group by expressions */
    1227             : static inline sql_rel *
    1228     1326922 : rel_groupby_order(visitor *v, sql_rel *rel)
    1229             : {
    1230     1326922 :         int *scores = NULL;
    1231     1326922 :         sql_exp **exps = NULL;
    1232             : 
    1233     1326922 :         if (is_groupby(rel->op) && list_length(rel->r) > 1) {
    1234       26233 :                 node *n;
    1235       26233 :                 list *gbe = rel->r;
    1236       26233 :                 int i, ngbe = list_length(gbe);
    1237       26233 :                 scores = SA_NEW_ARRAY(v->sql->ta, int, ngbe);
    1238       26233 :                 exps = SA_NEW_ARRAY(v->sql->ta, sql_exp*, ngbe);
    1239             : 
    1240             :                 /* first sorting step, give priority for integers and sorted columns */
    1241       88348 :                 for (i = 0, n = gbe->h; n; i++, n = n->next) {
    1242       62115 :                         exps[i] = n->data;
    1243       62115 :                         scores[i] = score_gbe(v, rel, exps[i]);
    1244             :                 }
    1245       26233 :                 GDKqsort(scores, exps, NULL, ngbe, sizeof(int), sizeof(void *), TYPE_int, true, true);
    1246             : 
    1247             :                 /* second sorting step, give priority to strings with lower number of digits */
    1248       51847 :                 for (i = ngbe - 1; i && !scores[i]; i--); /* find expressions with no score from the first round */
    1249       26233 :                 if (scores[i])
    1250       25249 :                         i++;
    1251       26233 :                 if (ngbe - i > 1) {
    1252        8599 :                         for (int j = i; j < ngbe; j++) {
    1253        6595 :                                 sql_subtype *t = exp_subtype(exps[j]);
    1254        6595 :                                 scores[j] = t ? t->digits : 0;
    1255             :                         }
    1256             :                         /* the less number of digits the better, order ascending */
    1257        2004 :                         GDKqsort(scores + i, exps + i, NULL, ngbe - i, sizeof(int), sizeof(void *), TYPE_int, false, true);
    1258             :                 }
    1259             : 
    1260       88348 :                 for (i = 0, n = gbe->h; n; i++, n = n->next)
    1261       62115 :                         n->data = exps[i];
    1262             :         }
    1263             : 
    1264     1326922 :         return rel;
    1265             : }
    1266             : 
    1267             : /* This optimization loop contains optimizations that can potentially use statistics */
    1268             : static sql_rel *
    1269     1326921 : rel_final_optimization_loop_(visitor *v, sql_rel *rel)
    1270             : {
    1271             :         /* Run rel_push_select_up only once at the end to avoid an infinite optimization loop */
    1272     1326921 :         rel = rel_push_select_up(v, rel);
    1273     1326921 :         rel = rel_select_order(v, rel);
    1274             : 
    1275             :         /* TODO? Maybe later add rel_simplify_count, rel_join2semijoin, rel_simplify_fk_joins,
    1276             :            rel_distinct_project2groupby, rel_simplify_predicates, rel_simplify_math,
    1277             :            rel_distinct_aggregate_on_unique_values */
    1278             : 
    1279     1326922 :         rel = rel_groupby_order(v, rel);
    1280     1326922 :         return rel;
    1281             : }
    1282             : 
    1283             : static sql_rel *
    1284       55364 : rel_final_optimization_loop(visitor *v, global_props *gp, sql_rel *rel)
    1285             : {
    1286       55364 :         (void) gp;
    1287       55364 :         return rel_visitor_bottomup(v, rel, &rel_final_optimization_loop_);
    1288             : }
    1289             : 
    1290             : run_optimizer
    1291      686778 : bind_final_optimization_loop(visitor *v, global_props *gp)
    1292             : {
    1293      686778 :         int flag = v->sql->sql_optimizer;
    1294             :         /* At the moment, this optimizer has dependency on 3 flags */
    1295      530588 :         return gp->opt_level == 1 && (gp->cnt[op_groupby] || gp->cnt[op_select]) &&
    1296      742144 :                 (flag & push_select_up) && (flag & optimize_select_and_joins_topdown) && (flag & optimize_projections) ? rel_final_optimization_loop : NULL;
    1297             : }

Generated by: LCOV version 1.14