LCOV - code coverage report
Current view: top level - sql/server - rel_statistics.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 831 883 94.1 %
Date: 2024-10-07 21:21:43 Functions: 26 26 100.0 %

          Line data    Source code
       1             : /*
       2             :  * SPDX-License-Identifier: MPL-2.0
       3             :  *
       4             :  * This Source Code Form is subject to the terms of the Mozilla Public
       5             :  * License, v. 2.0.  If a copy of the MPL was not distributed with this
       6             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       7             :  *
       8             :  * Copyright 2024 MonetDB Foundation;
       9             :  * Copyright August 2008 - 2023 MonetDB B.V.;
      10             :  * Copyright 1997 - July 2008 CWI.
      11             :  */
      12             : 
      13             : #include "monetdb_config.h"
      14             : #include "rel_optimizer_private.h"
      15             : #include "rel_statistics.h"
      16             : #include "rel_basetable.h"
      17             : #include "rel_rewriter.h"
      18             : 
      19             : static sql_exp *
      20     3453542 : comparison_find_column(sql_exp *input, sql_exp *e)
      21             : {
      22     3453542 :         switch (input->type) {
      23       97926 :         case e_convert: {
      24       97926 :                 list *types = (list *)input->r;
      25       97926 :                 sql_class from = ((sql_subtype*)types->h->data)->type->eclass, to = ((sql_subtype*)types->h->next->data)->type->eclass;
      26       97926 :                 if (from == to)
      27      188870 :                         return comparison_find_column(input->l, e) ? input : NULL;
      28             :                 return NULL;
      29             :         }
      30     3043519 :         case e_column:
      31     3043519 :                 return exp_match(e, input) ? input : NULL;
      32             :         default:
      33             :                 return NULL;
      34             :         }
      35             : }
      36             : 
      37             : /* multi lo <= col <= hi, maybe still be false even if lo or hi are NULL, possibly similar for filter on multiple
      38             :  * columns */
      39             : #define comp_single_column(c) (!c->f)
      40             : 
      41             : static sql_exp *
      42     8705869 : rel_propagate_column_ref_statistics(mvc *sql, sql_rel *rel, sql_exp *e)
      43             : {
      44     8788747 :         assert(e->type == e_column);
      45     8788747 :         if (rel) {
      46     8788693 :                 switch(rel->op) {
      47     4072480 :                 case op_left:
      48             :                 case op_right:
      49             :                 case op_full:
      50             :                 case op_join:
      51             :                 case op_select:
      52             :                 case op_anti:
      53             :                 case op_semi: {
      54     4072480 :                         bool found_without_semantics = false, found_left = false, found_right = false, still_unique = false;
      55             : 
      56     4072480 :                         if ((is_innerjoin(rel->op) || is_select(rel->op)) && list_length(rel->exps) == 1 && exp_is_false(rel->exps->h->data))
      57             :                                 return NULL; /* nothing will pass, skip */
      58             : 
      59             :                         /* propagate from the bottom first */
      60     4058404 :                         if (rel_propagate_column_ref_statistics(sql, rel->l, e))
      61             :                                 found_left = true;
      62     2581504 :                         if (!found_left && is_join(rel->op) && rel_propagate_column_ref_statistics(sql, rel->r, e))
      63     4058404 :                                 found_right = true;
      64             : 
      65     4058404 :                         if (!found_left && !found_right)
      66             :                                 return NULL;
      67     1769764 :                         if (!list_empty(rel->exps) && rel->op != op_anti) { /* if there's an or, the MIN and MAX get difficult to propagate */
      68     3484820 :                                 for (node *n = rel->exps->h ; n ; n = n->next) {
      69     1804381 :                                         sql_exp *comp = n->data, *le = comp->l, *lne = NULL, *re = comp->r, *rne = NULL, *fe = comp->f, *fne = NULL;
      70             : 
      71     1804381 :                                         if (comp->type == e_cmp) {
      72     1803379 :                                                 if (is_theta_exp(comp->flag) && ((lne = comparison_find_column(le, e)) || (rne = comparison_find_column(re, e)) || (fe && (fne = comparison_find_column(fe, e))))) {
      73      124672 :                                                         atom *lval_min = find_prop_and_get(le->p, PROP_MIN), *lval_max = find_prop_and_get(le->p, PROP_MAX),
      74      124672 :                                                                 *rval_min = find_prop_and_get(re->p, PROP_MIN), *rval_max = find_prop_and_get(re->p, PROP_MAX);
      75             : 
      76             :                                                         /* not semantics found or if explicitly filtering not null values from the column */
      77      124672 :                                                         found_without_semantics |= (!is_semantics(comp) && comp_single_column(comp)) || (comp->flag == cmp_equal && lne && is_anti(comp) && exp_is_null(re));
      78      124672 :                                                         still_unique |= comp->flag == cmp_equal && is_unique(le) && is_unique(re); /* unique if only equi-joins on unique columns are there */
      79      124672 :                                                         if (is_full(rel->op) || (is_left(rel->op) && found_left) || (is_right(rel->op) && found_right)) /* on outer joins, min and max cannot be propagated on some cases */
      80       19339 :                                                                 continue;
      81             :                                                         /* if (end2 >= start1 && start2 <= end1) then the 2 intervals are intersected */
      82      105333 :                                                         if (fe && lval_min && lval_max) { /* range case, the middle expression must intersect the other two */
      83        4622 :                                                                 atom *fval_min = find_prop_and_get(fe->p, PROP_MIN), *fval_max = find_prop_and_get(fe->p, PROP_MAX);
      84        4622 :                                                                 int int1 = rval_min && rval_max && atom_cmp(rval_max, lval_min) >= 0 && atom_cmp(rval_min, lval_max) <= 0,
      85        4622 :                                                                         int2 = fval_min && fval_max && atom_cmp(fval_max, lval_min) >= 0 && atom_cmp(fval_min, lval_max) <= 0,
      86        4622 :                                                                         symmetric = is_symmetric(comp);
      87             : 
      88        4622 :                                                                 if (is_anti(comp) || (!symmetric && fval_min && rval_max && atom_cmp(fval_min, rval_max) < 0)) /* for asymmetric case the re range must be after the fe range */
      89        1817 :                                                                         continue;
      90        2805 :                                                                 if (lne && int1 && int2) {
      91         104 :                                                                         if (symmetric) {
      92           0 :                                                                                 prop *p1 = find_prop(e->p, PROP_MIN), *p2 = find_prop(e->p, PROP_MAX);
      93           0 :                                                                                 atom *nmin = statistics_atom_min(sql, rval_min, fval_min), *nmax = statistics_atom_max(sql, rval_max, fval_max);
      94             :                                                                                 /* max is min from le and (max from re and fe max) */
      95           0 :                                                                                 set_minmax_property(sql, e, PROP_MAX, p2 ? statistics_atom_min(sql, nmax, p2->value.pval) : nmax);
      96             :                                                                                 /* min is max from le and (min from re and fe min) */
      97           0 :                                                                                 set_minmax_property(sql, e, PROP_MIN, p1 ? statistics_atom_max(sql, nmin, p1->value.pval) : nmin);
      98             :                                                                         } else {
      99         104 :                                                                                 prop *p1 = find_prop(e->p, PROP_MIN), *p2 = find_prop(e->p, PROP_MAX);
     100             :                                                                                 /* max is min from le and fe max */
     101         104 :                                                                                 set_minmax_property(sql, e, PROP_MAX, p2 ? statistics_atom_min(sql, fval_max, p2->value.pval) : fval_max);
     102             :                                                                                 /* min is max from le and re min */
     103         104 :                                                                                 set_minmax_property(sql, e, PROP_MIN, p1 ? statistics_atom_max(sql, rval_min, p1->value.pval) : rval_min);
     104             :                                                                         }
     105        2701 :                                                                 } else if (rne) {
     106         590 :                                                                         if (symmetric && int1 && int2) { /* min is max from le and (min from re and fe min) */
     107           0 :                                                                                 prop *p = find_prop(e->p, PROP_MIN);
     108           0 :                                                                                 atom *nmin = p ? statistics_atom_min(sql, p->value.pval, fval_min) : fval_min;
     109           0 :                                                                                 set_minmax_property(sql, e, PROP_MIN, statistics_atom_max(sql, nmin, lval_min));
     110         590 :                                                                         } else if (int1) { /* min is max from le and re min */
     111         100 :                                                                                 prop *p = find_prop(e->p, PROP_MIN);
     112         100 :                                                                                 set_minmax_property(sql, e, PROP_MIN, p ? statistics_atom_max(sql, lval_min, p->value.pval) : lval_min);
     113             :                                                                         }
     114        2111 :                                                                 } else if (fne) {
     115         549 :                                                                         if (symmetric && int1 && int2) { /* max is min from le and (max from re and fe max) */
     116           0 :                                                                                 prop *p = find_prop(e->p, PROP_MAX);
     117           0 :                                                                                 atom *nmax = p ? statistics_atom_max(sql, p->value.pval, rval_max) : rval_max;
     118           0 :                                                                                 set_minmax_property(sql, e, PROP_MAX, p ? statistics_atom_min(sql, nmax, lval_max) : nmax);
     119         549 :                                                                         } else if (int2) { /* max is min from le and fe max */
     120         266 :                                                                                 prop *p = find_prop(e->p, PROP_MAX);
     121         266 :                                                                                 set_minmax_property(sql, e, PROP_MAX, p ? statistics_atom_min(sql, lval_max, p->value.pval) : lval_max);
     122             :                                                                         }
     123             :                                                                 }
     124      100711 :                                                         } else if (lval_min && lval_max && rval_min && rval_max && atom_cmp(rval_max, lval_min) >= 0 && atom_cmp(rval_min, lval_max) <= 0) {
     125             :                                                                 /* both min and max must be set and the intervals must overlap */
     126       42731 :                                                                 switch (comp->flag) {
     127       28522 :                                                                 case cmp_equal: /* for equality reduce */
     128       28522 :                                                                         set_minmax_property(sql, e, PROP_MAX, is_anti(comp) ? statistics_atom_max(sql, lval_max, rval_max) : statistics_atom_min(sql, lval_max, rval_max));
     129       28522 :                                                                         set_minmax_property(sql, e, PROP_MIN, is_anti(comp) ? statistics_atom_min(sql, lval_min, rval_min) : statistics_atom_max(sql, lval_min, rval_min));
     130       28522 :                                                                         break;
     131        4611 :                                                                 case cmp_notequal: /* for inequality expand */
     132        4611 :                                                                         set_minmax_property(sql, e, PROP_MAX, is_anti(comp) ? statistics_atom_min(sql, lval_max, rval_max) : statistics_atom_max(sql, lval_max, rval_max));
     133        4611 :                                                                         set_minmax_property(sql, e, PROP_MIN, is_anti(comp) ? statistics_atom_max(sql, lval_min, rval_min) : statistics_atom_min(sql, lval_min, rval_min));
     134        4611 :                                                                         break;
     135        5595 :                                                                 case cmp_gt:
     136             :                                                                 case cmp_gte:
     137       10294 :                                                                         if (!is_anti(comp) && lne) { /* min is max from both min */
     138        4699 :                                                                                 prop *p = find_prop(e->p, PROP_MIN);
     139        4699 :                                                                                 set_minmax_property(sql, e, PROP_MIN, p ? statistics_atom_max(sql, rval_min, p->value.pval) : rval_min);
     140         896 :                                                                         } else if (!is_anti(comp)) { /* max is min from both max */
     141         896 :                                                                                 prop *p = find_prop(e->p, PROP_MAX);
     142         896 :                                                                                 set_minmax_property(sql, e, PROP_MAX, p ? statistics_atom_min(sql, lval_max, p->value.pval) : lval_max);
     143             :                                                                         }
     144             :                                                                         break;
     145        4003 :                                                                 case cmp_lt:
     146             :                                                                 case cmp_lte:
     147        7200 :                                                                         if (!is_anti(comp) && lne) { /* max is min from both max */
     148        3197 :                                                                                 prop *p = find_prop(e->p, PROP_MAX);
     149        3197 :                                                                                 set_minmax_property(sql, e, PROP_MAX, p ? statistics_atom_min(sql, rval_max, p->value.pval) : rval_max);
     150         806 :                                                                         } else if (!is_anti(comp)) { /* min is max from both min */
     151         806 :                                                                                 prop *p = find_prop(e->p, PROP_MIN);
     152         806 :                                                                                 set_minmax_property(sql, e, PROP_MIN, p ? statistics_atom_max(sql, lval_min, p->value.pval) : lval_min);
     153             :                                                                         }
     154             :                                                                         break;
     155             :                                                                 default: /* Maybe later I can do cmp_in and cmp_notin */
     156             :                                                                         break;
     157             :                                                                 }
     158             :                                                         }
     159             :                                                 }
     160             :                                         }
     161             :                                 }
     162             :                         }
     163     1769764 :                         if (is_full(rel->op) || (is_left(rel->op) && found_right) || (is_right(rel->op) && found_left))
     164       37254 :                                 set_has_nil(e);
     165     1769764 :                         if (!is_outerjoin(rel->op) && found_without_semantics) /* at an outer join, null values pass */
     166       94005 :                                 set_has_no_nil(e);
     167     1769764 :                         if (is_join(rel->op) && is_unique(e) && !still_unique)
     168      117241 :                                 set_not_unique(e);
     169     1769764 :                         return e;
     170             :                 }
     171     4613974 :                 case op_table:
     172             :                 case op_basetable:
     173             :                 case op_union:
     174             :                 case op_except:
     175             :                 case op_inter:
     176             :                 case op_munion:
     177             :                 case op_project:
     178             :                 case op_groupby: {
     179     4613974 :                         sql_exp *found;
     180     4613974 :                         atom *fval;
     181     4613974 :                         prop *est;
     182     4613974 :                         if ((found = rel_find_exp(rel, e))) {
     183     2160349 :                                 if (rel->op != op_table) { /* At the moment don't propagate statistics for table relations */
     184     2116083 :                                         if ((fval = find_prop_and_get(found->p, PROP_MAX)))
     185     1121739 :                                                 set_minmax_property(sql, e, PROP_MAX, fval);
     186     2116082 :                                         if ((fval = find_prop_and_get(found->p, PROP_MIN)))
     187     1128780 :                                                 set_minmax_property(sql, e, PROP_MIN, fval);
     188     2116080 :                                         if (!has_nil(found))
     189     1344630 :                                                 set_has_no_nil(e);
     190     2116080 :                                         if (is_unique(found) || (need_distinct(rel) && list_length(rel->exps) == 1) ||
     191     1710576 :                                                 (is_groupby(rel->op) && (list_empty(rel->r) || (list_length(rel->r) == 1 && exps_find_exp(rel->r, e)))))
     192      416219 :                                                 set_unique(e);
     193             :                                         /* propagate unique estimation for known cases */
     194     2116080 :                                         if (is_groupby(rel->op) && list_empty(rel->r) && !find_prop(e->p, PROP_NUNIQUES)) { /* global aggregate case */
     195        7460 :                                                 prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     196        7460 :                                                 p->value.dval = 1;
     197     2108621 :                                         } else if (((is_basetable(rel->op) || is_except(rel->op) || is_inter(rel->op) || is_simple_project(rel->op) ||
     198       65209 :                                                                  (is_groupby(rel->op) && exps_find_exp(rel->r, e))) &&
     199     2051429 :                                                                 (est = find_prop(found->p, PROP_NUNIQUES)) && !find_prop(e->p, PROP_NUNIQUES))) {
     200      187324 :                                                 prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     201      187323 :                                                 p->value.dval = est->value.dval;
     202             :                                         }
     203             :                                 }
     204     2160344 :                                 return e;
     205             :                         }
     206             :                         return NULL;
     207             :                 }
     208       82878 :                 case op_topn:
     209             :                 case op_sample:
     210       82878 :                         return rel_propagate_column_ref_statistics(sql, rel->l, e);
     211             :                 default:
     212             :                         break;
     213             :                 }
     214             :         }
     215             :         return NULL;
     216             : }
     217             : 
     218             : static atom *
     219     4504191 : atom_from_valptr( allocator *sa, sql_subtype *tpe, ValRecord *v)
     220             : {
     221     4504191 :         atom *a = SA_NEW(sa, atom);
     222             : 
     223     4504169 :         assert(!VALisnil(v));
     224     4504219 :         *a = (atom) {.tpe = *tpe,};
     225     4504219 :         SA_VALcopy(sa, &a->data, v);
     226     4504318 :         return a;
     227             : }
     228             : 
     229             : void
     230     4137734 : sql_column_get_statistics(mvc *sql, sql_column *c, sql_exp *e)
     231             : {
     232     4137734 :         bool nonil = false, unique = false;
     233     4137734 :         double unique_est = 0.0;
     234     4137734 :         ValRecord min, max;
     235     4137734 :         int ok = mvc_col_stats(sql, c, &nonil, &unique, &unique_est, &min, &max);
     236             : 
     237     4138993 :         if (has_nil(e) && nonil)
     238     2599467 :                 set_has_no_nil(e);
     239     4138993 :         if (!is_unique(e) && unique)
     240     1093527 :                 set_unique(e);
     241     4138993 :         if (unique_est != 0.0) {
     242     2906603 :                 prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     243     2906447 :                 p->value.dval = unique_est;
     244             :         }
     245     4138837 :         unsigned int digits = 0;
     246     4138837 :         sql_subtype *et = exp_subtype(e);
     247     4138776 :         if (et->type->eclass == EC_DEC || et->type->eclass == EC_NUM)
     248     2687376 :                 digits = et->digits;
     249     4138776 :         if ((ok & 2) == 2) {
     250     2249449 :                 if (!VALisnil(&max)) {
     251     2249410 :                         prop *p = e->p = prop_create(sql->sa, PROP_MAX, e->p);
     252     2249366 :                         p->value.pval = atom_from_valptr(sql->sa, &c->type, &max);
     253     2249329 :                         if (digits) {
     254     1673659 :                                 unsigned int nd = atom_digits(p->value.pval);
     255     1673571 :                                 if (nd < digits)
     256             :                                         digits = nd;
     257     1673571 :                                 if (!digits)
     258             :                                         digits = 1;
     259             :                         }
     260             :                 }
     261     2249199 :                 VALclear(&max);
     262             :         }
     263     4138511 :         if ((ok & 1) == 1) {
     264     2255234 :                 if (!VALisnil(&min)) {
     265     2255238 :                         prop *p = e->p = prop_create(sql->sa, PROP_MIN, e->p);
     266     2255284 :                         p->value.pval = atom_from_valptr(sql->sa, &c->type, &min);
     267     2255426 :                         if (digits) {
     268     1680822 :                                 unsigned int nd = atom_digits(p->value.pval);
     269     1680829 :                                 if (nd > digits) /* possibly set to low by max value */
     270             :                                         digits = nd;
     271             :                                 if (!digits)
     272             :                                         digits = 1;
     273             :                         }
     274             :                 }
     275     2255419 :                 VALclear(&min);
     276             :         }
     277     4138753 :         if (digits)
     278     2687323 :                 et->digits = digits;
     279     4138753 :         if (et->type->eclass == EC_DEC && et->digits <= et->scale)
     280         216 :                 et->digits = et->scale + 1;
     281     4138753 : }
     282             : 
     283             : static void
     284      931615 : rel_basetable_column_get_statistics(mvc *sql, sql_rel *rel, sql_exp *e)
     285             : {
     286      931615 :         if (e->p)
     287             :                 return;
     288      301132 :         sql_column *c = NULL;
     289             : 
     290      301132 :         if ((c = rel_base_find_column(rel, e->nid))) {
     291      203712 :                 sql_column_get_statistics(sql, c, e);
     292             :         }
     293             : }
     294             : 
     295             : static bool
     296        8834 : rel_setop_get_statistics(mvc *sql, sql_rel *rel, list *lexps, list *rexps, sql_exp *e, int i)
     297             : {
     298        8834 :         sql_exp *le = list_fetch(lexps, i), *re = list_fetch(rexps, i);
     299        8834 :         atom *lval_min = find_prop_and_get(le->p, PROP_MIN), *lval_max = find_prop_and_get(le->p, PROP_MAX),
     300        8834 :                 *rval_min = find_prop_and_get(re->p, PROP_MIN), *rval_max = find_prop_and_get(re->p, PROP_MAX);
     301        8834 :         prop *est;
     302             : 
     303             :         /* for the intersection, if both expressions don't overlap, it can be pruned */
     304        8834 :         if (is_inter(rel->op) && !has_nil(le) && !has_nil(re) &&
     305         504 :                 ((rval_max && lval_min && atom_cmp(rval_max, lval_min) < 0) || (rval_min && lval_max && atom_cmp(rval_min, lval_max) > 0)))
     306          26 :                 return true;
     307             : 
     308        8808 :         if (lval_max && rval_max) {
     309        2523 :                 if (is_union(rel->op))
     310           3 :                         set_minmax_property(sql, e, PROP_MAX, statistics_atom_max(sql, lval_max, rval_max)); /* for union the new max will be the max of the two */
     311        2520 :                 else if (is_inter(rel->op))
     312         391 :                         set_minmax_property(sql, e, PROP_MAX, statistics_atom_min(sql, lval_max, rval_max)); /* for intersect the new max will be the min of the two */
     313             :                 else /* except */
     314        2129 :                         set_minmax_property(sql, e, PROP_MAX, lval_max);
     315             :         }
     316        8808 :         if (lval_min && rval_min) {
     317        2523 :                 if (is_union(rel->op))
     318           3 :                         set_minmax_property(sql, e, PROP_MIN, statistics_atom_min(sql, lval_min, rval_min)); /* for union the new min will be the min of the two */
     319        2520 :                 else if (is_inter(rel->op))
     320         391 :                         set_minmax_property(sql, e, PROP_MIN, statistics_atom_max(sql, lval_min, rval_min)); /* for intersect the new min will be the max of the two */
     321             :                 else /* except */
     322        2129 :                         set_minmax_property(sql, e, PROP_MIN, lval_min);
     323             :         }
     324             : 
     325        8808 :         if (is_union(rel->op) || is_munion(rel->op)) {
     326        5986 :                 if (!has_nil(le) && !has_nil(re))
     327         879 :                         set_has_no_nil(e);
     328        5986 :                 if (need_distinct(rel) && list_length(rel->exps) == 1)
     329           0 :                         set_unique(e);
     330        2822 :         } else if (is_inter(rel->op)) {
     331         555 :                 if (!has_nil(le) || !has_nil(re))
     332         501 :                         set_has_no_nil(e);
     333         555 :                 if (is_unique(le) || (need_distinct(rel) && list_length(rel->exps) == 1))
     334         342 :                         set_unique(e);
     335             :         } else {
     336        2267 :                 assert(is_except(rel->op));
     337        2267 :                 if (!has_nil(le))
     338        2208 :                         set_has_no_nil(e);
     339        2267 :                 if (is_unique(le) || (need_distinct(rel) && list_length(rel->exps) == 1))
     340        2006 :                         set_unique(e);
     341             :         }
     342             :         /* propagate unique estimation for known cases */
     343        8808 :         if (!is_union(rel->op) && (est = find_prop(le->p, PROP_NUNIQUES)) && !find_prop(e->p, PROP_NUNIQUES)) {
     344         205 :                 prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     345         205 :                 p->value.dval = est->value.dval;
     346             :         }
     347             :         return false;
     348             : }
     349             : 
     350             : 
     351             : static void
     352       60732 : rel_munion_get_statistics(mvc *sql, sql_rel *rel, list *rels, sql_exp *e, int i)
     353             : {
     354       60732 :         assert(is_munion(rel->op));
     355             : 
     356       60732 :         sql_rel *l = rels->h->data;
     357       60732 :         sql_exp *le = list_fetch(l->exps, i);
     358       60732 :         atom *lval_min = find_prop_and_get(le->p, PROP_MIN), *lval_max = find_prop_and_get(le->p, PROP_MAX);
     359       60732 :         bool has_nonil = !has_nil(le);
     360             : 
     361      175853 :         for(node *n = rels->h->next; n; n = n->next) {
     362      115121 :                 sql_rel *r = n->data;
     363      115121 :                 sql_exp *re = list_fetch(r->exps, i);
     364      115121 :                 atom *rval_min = find_prop_and_get(re->p, PROP_MIN), *rval_max = find_prop_and_get(re->p, PROP_MAX);
     365             : 
     366      115121 :                 if (lval_max && rval_max) {
     367       42431 :                         set_minmax_property(sql, e, PROP_MAX, statistics_atom_max(sql, lval_max, rval_max)); /* for union the new max will be the max of the two */
     368       42431 :                         lval_max = find_prop_and_get(e->p, PROP_MAX);
     369             :                 }
     370      115121 :                 if (lval_min && rval_min) {
     371       41906 :                         set_minmax_property(sql, e, PROP_MIN, statistics_atom_min(sql, lval_min, rval_min)); /* for union the new min will be the min of the two */
     372       41906 :                         lval_min = find_prop_and_get(e->p, PROP_MIN);
     373             :                 }
     374      115121 :                 has_nonil &= !has_nil(re);
     375             : 
     376             :         }
     377             : 
     378       60732 :         if (has_nonil)
     379       40886 :                 set_has_no_nil(e);
     380             : 
     381       60732 :         if (need_distinct(rel) && list_length(rel->exps) == 1)
     382        1593 :                 set_unique(e);
     383       60732 : }
     384             : 
     385             : 
     386             : static sql_exp *
     387     3455045 : rel_propagate_statistics(visitor *v, sql_rel *rel, sql_exp *e, int depth)
     388             : {
     389     3455045 :         mvc *sql = v->sql;
     390     3455045 :         atom *lval;
     391             : 
     392     3455045 :         (void) depth;
     393     3455045 :         switch(e->type) {
     394     2165905 :         case e_column:
     395     2165905 :                 switch (rel->op) { /* set relations don't call rel_propagate_statistics */
     396      276265 :                 case op_join:
     397             :                 case op_left:
     398             :                 case op_right:
     399             :                 case op_full:
     400             :                 case op_semi:
     401             :                 case op_anti: {
     402      276265 :                         sql_exp *found = rel_propagate_column_ref_statistics(sql, rel->l, e);
     403      276265 :                         if (!found)
     404      138575 :                                 (void) rel_propagate_column_ref_statistics(sql, rel->r, e);
     405             :                         break;
     406             :                 }
     407     1889640 :                 case op_select:
     408             :                 case op_project:
     409             :                 case op_groupby: {
     410     1889640 :                         sql_exp *found = rel_propagate_column_ref_statistics(sql, rel->l, e); /* labels may be found on the same projection, ugh */
     411     1889637 :                         if (!found && is_simple_project(rel->op))
     412      123259 :                                 (void) rel_propagate_column_ref_statistics(sql, rel, e);
     413             :                         break;
     414             :                 }
     415           0 :                 case op_insert:
     416             :                 case op_update:
     417             :                 case op_delete:
     418           0 :                         (void) rel_propagate_column_ref_statistics(sql, rel->r, e);
     419           0 :                         break;
     420             :                 default:
     421             :                         break;
     422             :                 }
     423             :                 break;
     424       97869 :         case e_convert: {
     425       97869 :                 sql_subtype *to = exp_totype(e), *from = exp_fromtype(e);
     426       97869 :                 sql_exp *l = e->l;
     427       97869 :                 sql_class fr = from->type->eclass, too = to->type->eclass;
     428       97869 :                 prop *est;
     429             : 
     430       97869 :                 if (fr == too) {
     431       88918 :                         if ((lval = find_prop_and_get(l->p, PROP_MAX))) {
     432       58483 :                                 atom *res = atom_copy(sql->sa, lval);
     433       58483 :                                 if ((res = atom_cast(sql->sa, res, to)))
     434       58460 :                                         set_minmax_property(sql, e, PROP_MAX, res);
     435             :                         }
     436       88918 :                         if ((lval = find_prop_and_get(l->p, PROP_MIN))) {
     437       59098 :                                 atom *res = atom_copy(sql->sa, lval);
     438       59098 :                                 if ((res = atom_cast(sql->sa, res, to)))
     439       59075 :                                         set_minmax_property(sql, e, PROP_MIN, res);
     440             :                         }
     441             :                 }
     442             :                 /* propagating nuniques through conversions is tricky. I am adding just the general cases */
     443       97870 :                 if ((est = find_prop(l->p, PROP_NUNIQUES)) &&
     444       60683 :                         (((fr == EC_TIME || fr == EC_TIME_TZ || fr == EC_DATE) && (too == EC_TIMESTAMP || too == EC_TIMESTAMP_TZ)) ||
     445       60658 :                          ((fr == EC_NUM || fr == EC_BIT) && too == EC_NUM) || (fr == EC_DEC && too == EC_DEC) || (fr != EC_EXTERNAL && too == EC_STRING))) {
     446       57285 :                         prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     447       57285 :                         p->value.dval = est->value.dval;
     448             :                 }
     449       97869 :                 if (!has_nil(l))
     450       55025 :                         set_has_no_nil(e);
     451             :                 break;
     452             :         }
     453      339577 :         case e_aggr:
     454             :         case e_func: {
     455      339577 :                 BUN lv;
     456      339577 :                 sql_subfunc *f = e->f;
     457             : 
     458      339577 :                 if (!f->func->s) {
     459      313243 :                         int key = hash_key(f->func->base.name); /* Using hash lookup */
     460      313243 :                         sql_hash_e *he = sql_functions_lookup->buckets[key&(sql_functions_lookup->size-1)];
     461      313243 :                         lookup_function look = NULL;
     462             : 
     463      684478 :                         for (; he && !look; he = he->chain) {
     464      371235 :                                 struct function_properties* fp = (struct function_properties*) he->value;
     465             : 
     466      371235 :                                 if (!strcmp(f->func->base.name, fp->name))
     467      107207 :                                         look = fp->func;
     468             :                         }
     469      313243 :                         if (look)
     470      107208 :                                 look(sql, e);
     471             :                 }
     472             :                 /* for global aggregates with no semantics, if the left relation has values, then the output won't be NULL */
     473      339579 :                 if (!is_semantics(e) && e->l && !have_nil(e->l) &&
     474       88211 :                         (e->type != e_aggr || (is_groupby(rel->op) && list_length(rel->r)) || ((lv = get_rel_count(rel->l)) != BUN_NONE && lv > 0)))
     475       87885 :                         set_has_no_nil(e);
     476             :                 /* set properties for global aggregates */
     477      339579 :                 if (e->type == e_aggr && is_groupby(rel->op) && list_empty(rel->r)) {
     478        7828 :                         if (!find_prop(e->p, PROP_NUNIQUES)) {
     479        7828 :                                 prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     480        7828 :                                 p->value.dval = 1;
     481             :                         }
     482        7828 :                         set_unique(e);
     483             :                 }
     484             :                 break;
     485             :         }
     486      568330 :         case e_atom:
     487      568330 :                 if (e->l) {
     488      550751 :                         atom *a = (atom*) e->l;
     489      550751 :                         if (!a->isnull) {
     490      489138 :                                 set_minmax_property(sql, e, PROP_MAX, a);
     491      489137 :                                 set_minmax_property(sql, e, PROP_MIN, a);
     492             :                         }
     493      550750 :                         prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     494      550750 :                         p->value.dval = 1;
     495       17579 :                 } else if (e->f) {
     496        4117 :                         list *vals = (list *) e->f;
     497        4117 :                         sql_exp *first = vals->h ? vals->h->data : NULL;
     498        4117 :                         atom *max = NULL, *min = NULL; /* all child values must have a valid min/max */
     499        4117 :                         int has_nil = 0;
     500             : 
     501        4117 :                         if (first) {
     502        4117 :                                 max = ((lval = find_prop_and_get(first->p, PROP_MAX))) ? lval : NULL;
     503        4117 :                                 min = ((lval = find_prop_and_get(first->p, PROP_MIN))) ? lval : NULL;
     504        4117 :                                 has_nil |= has_nil(first);
     505             :                         }
     506             : 
     507       17730 :                         for (node *n = vals->h ? vals->h->next : NULL ; n ; n = n->next) {
     508        9496 :                                 sql_exp *ee = n->data;
     509             : 
     510        9496 :                                 if (min && max) {
     511        9005 :                                         if ((lval = find_prop_and_get(ee->p, PROP_MAX))) {
     512        8959 :                                                 max = atom_cmp(lval, max) > 0 ? lval : max;
     513             :                                         } else {
     514             :                                                 max = NULL;
     515             :                                         }
     516        9005 :                                         if ((lval = find_prop_and_get(ee->p, PROP_MIN))) {
     517        8959 :                                                 min = atom_cmp(min, lval) > 0 ? lval : min;
     518             :                                         } else {
     519             :                                                 min = NULL;
     520             :                                         }
     521             :                                 }
     522        9496 :                                 has_nil |= has_nil(ee);
     523             :                         }
     524             : 
     525        4117 :                         if (!has_nil)
     526        3750 :                                 set_has_no_nil(e);
     527        4117 :                         if (min && max) {
     528        3708 :                                 set_minmax_property(sql, e, PROP_MAX, max);
     529        3708 :                                 set_minmax_property(sql, e, PROP_MIN, min);
     530             :                         }
     531        4117 :                         prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     532        4117 :                         p->value.dval = (dbl) list_length(vals);
     533             :                 } else {
     534       13462 :                         prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     535       13462 :                         p->value.dval = 1;
     536             :                 }
     537             :                 break;
     538      283364 :         case e_cmp:
     539             :                 /* TODO? propagating min/max/unique of booleans is not very worth it */
     540      283364 :                 if (e->flag == cmp_or || e->flag == cmp_filter) {
     541       17362 :                         if (!have_nil(e->l) && !have_nil(e->r))
     542       12920 :                                 set_has_no_nil(e);
     543      266002 :                 } else if (e->flag == cmp_in || e->flag == cmp_notin) {
     544       21693 :                         sql_exp *le = e->l;
     545       21693 :                         if (!has_nil(le) && !have_nil(e->r))
     546       18491 :                                 set_has_no_nil(e);
     547             :                 } else {
     548      244309 :                         sql_exp *le = e->l, *re = e->r, *fe = e->f;
     549      244309 :                         if (!has_nil(le) && !has_nil(re) && (!e->f || !has_nil(fe)))
     550      169713 :                                 set_has_no_nil(e);
     551             :                 }
     552             :                 break;
     553             :         case e_psm:
     554             :                 break;
     555             :         }
     556             : 
     557             : #ifndef NDEBUG
     558             :         {
     559             :                 /* min and max cannot be NULL and min must be <= than max, if it happens the inner relation must be empty! */
     560     3455043 :                 atom *min = find_prop_and_get(e->p, PROP_MIN), *max = find_prop_and_get(e->p, PROP_MAX);
     561             : 
     562     3455035 :                 (void) min;
     563     3455035 :                 (void) max;
     564     3455035 :                 assert(!min || !min->isnull);
     565     3455035 :                 assert(!max || !max->isnull);
     566     3455035 :                 assert(!min || !max || (min && max)/* atom_cmp(min, max) <= 0*/);
     567             :         }
     568             : #endif
     569     3455035 :         return e;
     570             : }
     571             : 
     572             : static list * /* Remove predicates always false from min/max values */
     573      137650 : rel_prune_predicates(visitor *v, sql_rel *rel)
     574             : {
     575      137650 :         if (rel->l) {
     576      137650 :                 sql_rel *l = rel->l;
     577      137650 :                 if (is_single(l))
     578        3259 :                         return rel->exps;
     579             :         }
     580      134391 :         if (!list_empty(rel->attr))
     581        2868 :                 return rel->exps;
     582      279067 :         for (node *n = rel->exps->h ; n ; n = n->next) {
     583      147544 :                 sql_exp *e = n->data;
     584             : 
     585      147544 :                 if (e->type == e_cmp && is_theta_exp(e->flag)) {
     586      121455 :                         sql_exp *le = e->l, *re = e->r, *fe = e->f;
     587      121455 :                         atom *lval_min = find_prop_and_get(le->p, PROP_MIN), *lval_max = find_prop_and_get(le->p, PROP_MAX),
     588      121455 :                                 *rval_min = find_prop_and_get(re->p, PROP_MIN), *rval_max = find_prop_and_get(re->p, PROP_MAX);
     589      121455 :                         bool always_false = false, always_true = false;
     590             : 
     591      121455 :                         if (fe && !is_symmetric(e)) {
     592        3058 :                                 atom *fval_min = find_prop_and_get(fe->p, PROP_MIN), *fval_max = find_prop_and_get(fe->p, PROP_MAX);
     593        3058 :                                 comp_type lower = range2lcompare(e->flag), higher = range2rcompare(e->flag);
     594        3660 :                                 int not_int1 = rval_min && lval_max && /* the middle and left intervals don't overlap */
     595        1663 :                                         (!is_anti(e) && (lower == cmp_gte ? atom_cmp(rval_min, lval_max) > 0 : atom_cmp(rval_min, lval_max) >= 0)),
     596        4072 :                                         not_int2 = lval_min && fval_max && /* the middle and right intervals don't overlap */
     597        2485 :                                         (!is_anti(e) && (higher == cmp_lte ? atom_cmp(lval_min, fval_max) > 0 : atom_cmp(lval_min, fval_max) >= 0)),
     598        3058 :                                         not_int3 = rval_min && fval_max && /* the left interval is after the right one */
     599        1287 :                                         (!is_anti(e) && (atom_cmp(rval_min, fval_max) > 0));
     600             : 
     601        3058 :                                 always_false |= not_int1 || not_int2 || not_int3;
     602             :                                 /* for anti the middle must be before the left or after the right or the right after the left, for the other the middle must be always between the left and right intervals */
     603        4113 :                                 always_true |= !has_nil(le) && !has_nil(re) && !has_nil(fe) &&
     604        3957 :                                         lval_min && lval_max && rval_min && rval_max && fval_min && fval_max &&
     605         575 :                                         (is_anti(e) ? ((lower == cmp_gte ? atom_cmp(rval_min, lval_max) > 0 : atom_cmp(rval_min, lval_max) >= 0) || (higher == cmp_lte ? atom_cmp(lval_min, fval_max) > 0 : atom_cmp(lval_min, fval_max) >= 0) || atom_cmp(rval_min, fval_max) > 0) :
     606         488 :                                          ((lower == cmp_gte ? atom_cmp(lval_min, rval_max) >= 0 : atom_cmp(lval_min, rval_max) > 0) && (higher == cmp_lte ? atom_cmp(fval_min, lval_max) >= 0 : atom_cmp(fval_min, lval_max) > 0)));
     607             :                         } else if (!fe) {
     608      118379 :                                 if (!is_semantics(e)) /* trivial not null cmp null case */
     609      225476 :                                         always_false |= !is_anti(e) && ((exp_is_not_null(le) && exp_is_null(re)) || (exp_is_null(le) && exp_is_not_null(re)));
     610      118379 :                                 switch (e->flag) {
     611      102930 :                                 case cmp_equal:
     612      102930 :                                         if (lval_min && lval_max && rval_min && rval_max && (!is_semantics(e) || !has_nil(le) || !has_nil(re)))
     613      131024 :                                                 always_false |= (is_anti(e) ? (atom_cmp(lval_min, rval_min) == 0 && atom_cmp(lval_max, rval_max) <= 0) : (atom_cmp(rval_max, lval_min) < 0 || atom_cmp(rval_min, lval_max) > 0));
     614      102930 :                                         if (is_semantics(e)) { /* prune *= NULL cases */
     615        5612 :                                                 always_false |= (is_anti(e) ? (exp_is_null(le) && exp_is_null(re)) : ((exp_is_not_null(le) && exp_is_null(re)) || (exp_is_null(le) && exp_is_not_null(re))));
     616       11224 :                                                 always_true |= (is_anti(e) ? ((exp_is_not_null(le) && exp_is_null(re)) || (exp_is_null(le) && exp_is_not_null(re))) : (exp_is_null(le) && exp_is_null(re)));
     617             :                                         }
     618             :                                         break;
     619        7210 :                                 case cmp_notequal:
     620        7210 :                                         if (lval_min && lval_max && rval_min && rval_max)
     621       11318 :                                                 always_true |= !has_nil(le) && !has_nil(re) && (is_anti(e) ? (atom_cmp(lval_min, rval_min) == 0 && atom_cmp(lval_max, rval_max) <= 0) : (atom_cmp(rval_max, lval_min) < 0 || atom_cmp(rval_min, lval_max) > 0));
     622        7210 :                                         if (is_semantics(e)) {
     623          29 :                                                 always_true |= (is_anti(e) ? (exp_is_null(le) && exp_is_null(re)) : ((exp_is_not_null(le) && exp_is_null(re)) || (exp_is_null(le) && exp_is_not_null(re))));
     624          58 :                                                 always_false |= (is_anti(e) ? ((exp_is_not_null(le) && exp_is_null(re)) || (exp_is_null(le) && exp_is_not_null(re))) : (exp_is_null(le) && exp_is_null(re)));
     625             :                                         }
     626             :                                         break;
     627        5598 :                                 case cmp_gt:
     628        5598 :                                         if (lval_max && rval_min)
     629        1916 :                                                 always_false |= (is_anti(e) ? atom_cmp(lval_max, rval_min) > 0 : atom_cmp(lval_max, rval_min) <= 0);
     630        5598 :                                         if (lval_min && rval_max)
     631        3832 :                                                 always_true |= !has_nil(le) && !has_nil(re) && (is_anti(e) ? atom_cmp(lval_min, rval_max) <= 0 : atom_cmp(lval_min, rval_max) > 0);
     632             :                                         break;
     633         119 :                                 case cmp_gte:
     634         119 :                                         if (lval_max && rval_min)
     635          49 :                                                 always_false |= (is_anti(e) ? atom_cmp(lval_max, rval_min) >= 0 : atom_cmp(lval_max, rval_min) < 0);
     636         119 :                                         if (lval_min && rval_max)
     637          98 :                                                 always_true |= !has_nil(le) && !has_nil(re) && (is_anti(e) ? atom_cmp(lval_min, rval_max) < 0 : atom_cmp(lval_min, rval_max) >= 0);
     638             :                                         break;
     639        2440 :                                 case cmp_lt:
     640        2440 :                                         if (lval_min && rval_max)
     641        1382 :                                                 always_false |= (is_anti(e) ? atom_cmp(lval_min, rval_max) < 0 : atom_cmp(lval_min, rval_max) >= 0);
     642        2440 :                                         if (lval_max && rval_min)
     643        2812 :                                                 always_true |= !has_nil(le) && !has_nil(re) && (is_anti(e) ? atom_cmp(lval_max, rval_min) >= 0 : atom_cmp(lval_max, rval_min) < 0);
     644             :                                         break;
     645          82 :                                 case cmp_lte:
     646          82 :                                         if (lval_min && rval_max)
     647          10 :                                                 always_false |= (is_anti(e) ? atom_cmp(lval_min, rval_max) <= 0 : atom_cmp(lval_min, rval_max) > 0);
     648          82 :                                         if (lval_max && rval_min)
     649          20 :                                                 always_true |= !has_nil(le) && !has_nil(re) && (is_anti(e) ? atom_cmp(lval_max, rval_min) > 0 : atom_cmp(lval_max, rval_min) <= 0);
     650             :                                         break;
     651             :                                 default: /* Maybe later I can do cmp_in and cmp_notin but don't forget to remove is_theta_exp check up there */
     652             :                                         break;
     653             :                                 }
     654             :                         }
     655      121455 :                         assert(!always_false || !always_true);
     656      121455 :                         if (always_false || always_true) {
     657        3534 :                                 sql_exp *ne = exp_atom_bool(v->sql->sa, always_true /* if always_true set then true else false */);
     658        3534 :                                 if (exp_name(e))
     659           0 :                                         exp_prop_alias(v->sql->sa, ne, e);
     660        3534 :                                 n->data = ne;
     661        3534 :                                 v->changes++;
     662             :                         }
     663             :                 }
     664             :         }
     665      131523 :         return rel->exps;
     666             : }
     667             : 
     668             : static sql_rel *
     669          14 : set_setop_side(visitor *v, sql_rel *rel, sql_rel *side)
     670             : {
     671          14 :         if (side == rel->r)
     672           0 :                 rel->r = NULL;
     673             :         else
     674          14 :                 rel->l = NULL;
     675          14 :         if (!is_simple_project(side->op) || rel_is_ref(side) || !list_empty(side->r)) {
     676           0 :                 side = rel_project(v->sql->sa, side, rel_projections(v->sql, side, NULL, 0, 1));
     677           0 :                 set_count_prop(v->sql->sa, side, get_rel_count(side->l));
     678           0 :                 side->exps = exps_exp_visitor_bottomup(v, side, side->exps, 0, &rel_propagate_statistics, false);
     679             :         }
     680          35 :         for (node *n = rel->exps->h, *m = side->exps->h ; n && m ; n = n->next, m = m->next) {
     681          21 :                 sql_exp *e1 = n->data, *e2 = m->data;
     682          21 :                 exp_prop_alias(v->sql->sa, e2, e1);
     683             :         }
     684          14 :         list_hash_clear(side->exps);
     685             : 
     686          14 :         if (need_distinct(rel))
     687           0 :                 set_distinct(side);
     688          14 :         side->p = prop_copy(v->sql->sa, rel->p);
     689          14 :         rel_destroy(rel);
     690          14 :         return side;
     691             : }
     692             : 
     693             : static BUN
     694       20042 : trivial_project_exp_card(sql_exp *e)
     695             : {
     696       20343 :         if (e->type == e_convert)
     697         301 :                 return trivial_project_exp_card(e->l);
     698       20042 :         return e->type == e_atom && e->f ? (BUN) list_length(e->f) : 1;
     699             : }
     700             : 
     701             : static BUN
     702       23800 : rel_calc_nuniques(mvc *sql, sql_rel *l, list *exps)
     703             : {
     704       23800 :         BUN lv = get_rel_count(l);
     705             : 
     706       23800 :         if (lv == 0)
     707             :                 return 0;
     708       21099 :         if (!list_empty(exps)) {
     709       21099 :                 BUN nuniques = 0;
     710             :                 /* compute the highest number of unique values */
     711       50072 :                 for (node *n = exps->h ; n && nuniques != BUN_NONE ; n = n->next) {
     712       28972 :                         sql_exp *e = n->data;
     713       28972 :                         sql_rel *bt = NULL;
     714       28972 :                         prop *p = NULL;
     715       28972 :                         BUN euniques = BUN_NONE;
     716       28972 :                         atom *min, *max, *sub = NULL;
     717       28972 :                         sql_subtype *tp = exp_subtype(e);
     718       28972 :                         sql_class ec = tp ? tp->type->eclass : EC_STRING; /* if 'e' has no type (eg parameter), use a non-number type to fail condition */
     719             : 
     720       28972 :                         if ((p = find_prop(e->p, PROP_NUNIQUES))) {
     721       20449 :                                 euniques = (BUN) p->value.dval;
     722        8523 :                         } else if (e->type == e_column && e->nid && rel_find_exp_and_corresponding_rel(l, e, false, &bt, NULL) && bt && (p = find_prop(bt->p, PROP_COUNT))) {
     723        6497 :                                 euniques = (BUN) p->value.lval;
     724             :                         }
     725             :                         /* use min to max range to compute number of possible values in the domain for number types */
     726       28972 :                         if ((EC_TEMP(ec)||ec==EC_NUM||ec==EC_MONTH||ec==EC_POS) &&
     727       21773 :                                 (min = find_prop_and_get(e->p, PROP_MIN)) && (max = find_prop_and_get(e->p, PROP_MAX))) {
     728             :                                 /* the range includes min and max, so the atom_inc call is needed */
     729             :                                 /* if 'euniques' has number of distinct values, compute min between both */
     730       16976 :                                 if ((sub = atom_sub(sql->sa, max, min)) && (sub = atom_inc(sql->sa, sub)) && (sub = atom_cast(sql->sa, sub, sql_bind_localtype("oid"))))
     731           0 :                                         euniques = MIN(euniques, (BUN) sub->data.val.oval);
     732             :                         }
     733       28973 :                         if (euniques != BUN_NONE)
     734       26947 :                                 nuniques = MAX(nuniques, euniques); /* the highest cardinality sets the estimation */
     735             :                         else
     736             :                                 nuniques = BUN_NONE;
     737             :                 }
     738       21100 :                 if (nuniques != BUN_NONE)
     739             :                         return nuniques;
     740             :         }
     741             :         return lv;
     742             : }
     743             : 
     744             : static sql_rel *
     745     2061074 : rel_get_statistics_(visitor *v, sql_rel *rel)
     746             : {
     747             :         /* Don't prune updates as pruning will possibly result in removing the joins which therefore cannot be used for constraint checking */
     748     2061074 :         uint8_t has_special_modify = *(uint8_t*) v->data;
     749     2061074 :         bool can_be_pruned = !has_special_modify && v->storage_based_opt;
     750             : 
     751             :         /* Don't look at the same relation twice */
     752     2061074 :         if (are_statistics_gathered(rel->used))
     753             :                 return rel;
     754     1322991 :         rel->used |= statistics_gathered;
     755             : 
     756     1322991 :         switch (rel->op) {
     757      310036 :         case op_basetable: {
     758      310036 :                 sql_table *t = (sql_table *) rel->l;
     759      310036 :                 sqlstore *store = v->sql->session->tr->store;
     760             : 
     761      310036 :                 if (!list_empty(rel->exps)) {
     762     1241800 :                         for (node *n = rel->exps->h ; n ; n = n->next)
     763      931585 :                                 rel_basetable_column_get_statistics(v->sql, rel, n->data);
     764             :                 }
     765             :                 /* Set table row count. TODO? look for remote tables. Don't look at storage for declared tables, because it won't be cleaned */
     766      310225 :                 if (isTable(t) && t->s && !isDeclaredTable(t)) /* count active rows only */
     767      257022 :                         set_count_prop(v->sql->sa, rel, (BUN)store->storage_api.count_col(v->sql->session->tr, ol_first_node(t->columns)->data, 10));
     768             :                 break;
     769             :         }
     770        2779 :         case op_union:
     771             :         case op_inter:
     772             :         case op_except: {
     773        2779 :                 bool empty_cross = false;
     774        2779 :                 int i = 0;
     775        2779 :                 sql_rel *l = rel->l, *pl = rel->l, *r = rel->r, *pr = rel->r;
     776             : 
     777        2779 :                 while (is_sample(pl->op) || is_topn(pl->op)) /* skip topN and sample relations in the middle */
     778           0 :                         pl = pl->l;
     779        2779 :                 while (is_sample(pr->op) || is_topn(pr->op))
     780           0 :                         pr = pr->l;
     781             :                 /* if it's not a projection, then project and propagate statistics */
     782        2779 :                 if (!is_project(pl->op) && !is_base(pl->op)) {
     783           0 :                         pl = rel_project(v->sql->sa, pl, rel_projections(v->sql, pl, NULL, 0, 1));
     784           0 :                         set_count_prop(v->sql->sa, pl, get_rel_count(pl->l));
     785           0 :                         pl->exps = exps_exp_visitor_bottomup(v, pl, pl->exps, 0, &rel_propagate_statistics, false);
     786             :                 }
     787        2779 :                 if (!is_project(pr->op) && !is_base(pr->op)) {
     788           0 :                         pr = rel_project(v->sql->sa, pr, rel_projections(v->sql, pr, NULL, 0, 1));
     789           0 :                         set_count_prop(v->sql->sa, pr, get_rel_count(pr->l));
     790           0 :                         pr->exps = exps_exp_visitor_bottomup(v, pr, pr->exps, 0, &rel_propagate_statistics, false);
     791             :                 }
     792             : 
     793       11613 :                 for (node *n = rel->exps->h ; n ; n = n->next) {
     794        8834 :                         empty_cross |= rel_setop_get_statistics(v->sql, rel, pl->exps, pr->exps, n->data, i);
     795        8834 :                         i++;
     796             :                 }
     797             : 
     798             :                 /* propagate row count */
     799        2779 :                 if (is_union(rel->op)) {
     800         277 :                         BUN lv = need_distinct(rel) ? rel_calc_nuniques(v->sql, l, l->exps) : get_rel_count(l),
     801         277 :                                 rv = need_distinct(rel) ? rel_calc_nuniques(v->sql, r, r->exps) : get_rel_count(r);
     802             : 
     803         277 :                         if (lv == 0 && rv == 0) { /* both sides empty */
     804           2 :                                 if (can_be_pruned)
     805             :                                         empty_cross = true;
     806             :                                 else
     807           2 :                                         set_count_prop(v->sql->sa, rel, 0);
     808         275 :                         } else if (can_be_pruned && lv == 0 && !rel_is_ref(rel)) { /* left side empty */
     809           0 :                                 rel = set_setop_side(v, rel, r);
     810           0 :                                 empty_cross = false; /* don't rewrite again */
     811         275 :                         } else if (can_be_pruned && rv == 0 && !rel_is_ref(rel)) { /* right side empty */
     812           0 :                                 rel = set_setop_side(v, rel, l);
     813           0 :                                 empty_cross = false; /* don't rewrite again */
     814         275 :                         } else if (lv != BUN_NONE && rv != BUN_NONE) {
     815           7 :                                 set_count_prop(v->sql->sa, rel, (rv > (BUN_MAX - lv)) ? BUN_MAX : (lv + rv)); /* overflow check */
     816             :                         }
     817        2502 :                 } else if (is_inter(rel->op) || is_except(rel->op)) {
     818        2502 :                         BUN lv = need_distinct(rel) ? rel_calc_nuniques(v->sql, l, l->exps) : get_rel_count(l),
     819        2502 :                                 rv = need_distinct(rel) ? rel_calc_nuniques(v->sql, r, r->exps) : get_rel_count(r);
     820             : 
     821        2502 :                         if (lv == 0) { /* left side empty */
     822          62 :                                 if (can_be_pruned)
     823             :                                         empty_cross = true;
     824             :                                 else
     825           5 :                                         set_count_prop(v->sql->sa, rel, 0);
     826        2440 :                         } else if (rv == 0) { /* right side empty */
     827          17 :                                 if (is_inter(rel->op)) {
     828           3 :                                         if (can_be_pruned)
     829             :                                                 empty_cross = true;
     830             :                                         else
     831           0 :                                                 set_count_prop(v->sql->sa, rel, 0);
     832          14 :                                 } else if (can_be_pruned && !rel_is_ref(rel)) {
     833          14 :                                         rel = set_setop_side(v, rel, l);
     834          14 :                                         empty_cross = false; /* don't rewrite again */
     835             :                                 } else {
     836           0 :                                         set_count_prop(v->sql->sa, rel, lv);
     837             :                                 }
     838             :                         } else {
     839        2423 :                                 set_count_prop(v->sql->sa, rel, lv);
     840             :                         }
     841             :                 }
     842        2779 :                 if (can_be_pruned && empty_cross) {
     843          80 :                         rel_destroy(rel->l);
     844          80 :                         rel->l = NULL;
     845          80 :                         rel_destroy(rel->r);
     846          80 :                         rel->r = NULL;
     847         244 :                         for (node *n = rel->exps->h ; n ; n = n->next) {
     848         164 :                                 sql_exp *e = n->data, *a = exp_atom(v->sql->sa, atom_general(v->sql->sa, exp_subtype(e), NULL, 0));
     849         164 :                                 exp_prop_alias(v->sql->sa, a, e);
     850         164 :                                 n->data = a;
     851             :                         }
     852          80 :                         list_hash_clear(rel->exps);
     853          80 :                         sql_rel *l = rel_project(v->sql->sa, NULL, rel->exps);
     854          80 :                         set_count_prop(v->sql->sa, l, 1);
     855          80 :                         l = rel_select(v->sql->sa, l, exp_atom_bool(v->sql->sa, 0));
     856          80 :                         set_count_prop(v->sql->sa, l, 0);
     857          80 :                         rel->op = op_project;
     858          80 :                         rel->l = l;
     859          80 :                         rel->exps = rel_projections(v->sql, l, NULL, 1, 1);
     860          80 :                         set_count_prop(v->sql->sa, rel, 0);
     861          80 :                         set_nodistinct(rel); /* set relations may have distinct flag set */
     862          80 :                         v->changes++;
     863             :                 }
     864             :                 break;
     865             :         }
     866       12838 :         case op_munion: {
     867       12838 :                 list *l = rel->l, *nrels = sa_list(v->sql->sa);
     868       12838 :                 BUN cnt = 0;
     869       12838 :                 bool needs_pruning = false;
     870             : 
     871       49097 :                 for (node *n = l->h; n; n = n->next) {
     872       36259 :                         sql_rel *r = n->data, *pl = r;
     873             : 
     874       36259 :                         while (is_sample(pl->op) || is_topn(pl->op)) /* skip topN and sample relations in the middle */
     875           0 :                                         pl = pl->l;
     876             :                         /* if it's not a projection, then project and propagate statistics */
     877       36259 :                         if (!is_project(pl->op) && !is_base(pl->op)) {
     878           0 :                                 pl = rel_project(v->sql->sa, pl, rel_projections(v->sql, pl, NULL, 0, 1));
     879           0 :                                 set_count_prop(v->sql->sa, pl, get_rel_count(pl->l));
     880           0 :                                 pl->exps = exps_exp_visitor_bottomup(v, pl, pl->exps, 0, &rel_propagate_statistics, false);
     881             :                         }
     882       36259 :                         nrels = append(nrels, pl);
     883             :                         /* we need new munion statistics */
     884             :                         /* propagate row count */
     885       36259 :                         BUN rv = need_distinct(rel) ? rel_calc_nuniques(v->sql, r, r->exps) : get_rel_count(r);
     886             :                         /* if PROP_COUNT does not exist we assume at least a row (see get_rel_count def) */
     887       36259 :                         if (rv == BUN_NONE) {
     888        1152 :                                 cnt++;
     889        1152 :                                 continue;
     890             :                         }
     891       35107 :                         if (!rv && can_be_pruned)
     892        6625 :                                 needs_pruning = true;
     893             :                         /* overflow check */
     894       35107 :                         if (rv > (BUN_MAX - cnt))
     895       36259 :                                 rv = BUN_MAX;
     896             :                         else
     897       35107 :                                 cnt += rv;
     898             :                 }
     899       12838 :                 int i = 0;
     900       73570 :                 for (node *n = rel->exps->h ; n ; n = n->next, i++)
     901       60732 :                         rel_munion_get_statistics(v->sql, rel, nrels, n->data, i);
     902             : 
     903       12838 :                 if (needs_pruning && !rel_is_ref(rel)) {
     904        4464 :                         v->changes++;
     905        4464 :                         list *nl = sa_list(l->sa);
     906             : 
     907       16464 :                         for (node *n = nrels->h; n; n = n->next) {
     908       12000 :                                 sql_rel *r = n->data;
     909       12000 :                                 BUN rv = need_distinct(rel) ? rel_calc_nuniques(v->sql, r, r->exps) : get_rel_count(r);
     910             : 
     911       12000 :                                 if (!rv) { /* keep last for now */
     912        6155 :                                         rel_destroy(r);
     913        6155 :                                         continue;
     914             :                                 }
     915        5845 :                                 nl = append(nl, r);
     916             :                         }
     917        4464 :                         rel->l = nl;
     918        4464 :                         if (list_length(nl) == 1) {
     919        4139 :                                 sql_rel *l = rel->l = nl->h->data; /* ugh */
     920        4139 :                                 rel->r = NULL;
     921        4139 :                                 rel->op = op_project;
     922             : 
     923       20405 :                                 for (node *n = rel->exps->h, *m = l->exps->h ; n && m ; n = n->next, m = m->next) {
     924       16266 :                                         sql_exp *pe = n->data, *ie = m->data;
     925       16266 :                                         sql_exp *ne = exp_ref(v->sql, ie);
     926       16266 :                                         exp_prop_alias(v->sql->sa, ne, pe);
     927       16266 :                                         n->data = ne;
     928             :                                 }
     929        4139 :                                 list_hash_clear(rel->exps);
     930         325 :                         } else if (list_empty(nl)) {
     931             :                                 /* empty select (project [ nils ] ) */
     932         454 :                                 for (node *n = rel->exps->h ; n ; n = n->next) {
     933         354 :                                         sql_exp *e = n->data, *a = exp_atom(v->sql->sa, atom_general(v->sql->sa, exp_subtype(e), NULL, 0));
     934         354 :                                         exp_prop_alias(v->sql->sa, a, e);
     935         354 :                                         n->data = a;
     936             :                                 }
     937         100 :                                 list_hash_clear(rel->exps);
     938         100 :                                 sql_rel *l = rel_project(v->sql->sa, NULL, rel->exps);
     939         100 :                                 set_count_prop(v->sql->sa, l, 1);
     940         100 :                                 l = rel_select(v->sql->sa, l, exp_atom_bool(v->sql->sa, 0));
     941         100 :                                 set_count_prop(v->sql->sa, l, 0);
     942         100 :                                 rel->op = op_project;
     943         100 :                                 rel->r = NULL;
     944         100 :                                 rel->l = l;
     945         100 :                                 rel->exps = rel_projections(v->sql, l, NULL, 1, 1);
     946         100 :                                 set_count_prop(v->sql->sa, rel, 0);
     947         100 :                                 set_nodistinct(rel); /* set relations may have distinct flag set */
     948             :                         }
     949             :                 } else {
     950        8374 :                         set_count_prop(v->sql->sa, rel, cnt);
     951             :                 }
     952             :                 break;
     953             :         }
     954      526077 :         case op_join:
     955             :         case op_left:
     956             :         case op_right:
     957             :         case op_full:
     958             :         case op_semi:
     959             :         case op_anti:
     960             :         case op_select:
     961             :         case op_project:
     962             :         case op_groupby: {
     963      526077 :                 if (is_groupby(rel->op) && !list_empty(rel->r))
     964       15116 :                         rel->r = exps_exp_visitor_bottomup(v, rel, rel->r, 0, &rel_propagate_statistics, false);
     965      526076 :                 rel->exps = exps_exp_visitor_bottomup(v, rel, rel->exps, 0, &rel_propagate_statistics, false);
     966      526067 :                 if ((is_simple_project(rel->op) || is_groupby(rel->op)) && !list_empty(rel->r))
     967       38422 :                         rel->r = exps_exp_visitor_bottomup(v, rel, rel->r, 0, &rel_propagate_statistics, false);
     968             :                 /* The following optimizations can only be applied after propagating the statistics to rel->exps */
     969      526068 :                 if (can_be_pruned && (is_join(rel->op) || is_select(rel->op)) && !list_empty(rel->exps)) {
     970      137650 :                         int changes = v->changes;
     971      137650 :                         rel->exps = rel_prune_predicates(v, rel);
     972      137650 :                         if (v->changes > changes)
     973        3501 :                                 rel = rewrite_simplify(v, 0, v->value_based_opt, rel);
     974             :                 }
     975             : 
     976             :                 /* propagate row count */
     977      526068 :                 sql_rel *l = rel->l, *r = rel->r;
     978      526068 :                 switch (rel->op) {
     979      132804 :                 case op_join:
     980             :                 case op_left:
     981             :                 case op_right:
     982             :                 case op_full: {
     983      132804 :                         BUN lv = get_rel_count(l), rv = get_rel_count(r), uniques_estimate = BUN_MAX, join_idx_estimate = BUN_MAX;
     984             : 
     985      132804 :                         if (!list_empty(rel->exps) && !is_single(rel)) {
     986      243710 :                                 for (node *n = rel->exps->h ; n ; n = n->next) {
     987      124374 :                                         sql_exp *e = n->data, *el = e->l, *er = e->r;
     988             : 
     989      124374 :                                         if (find_prop(e->p, PROP_JOINIDX)) {
     990         667 :                                                 join_idx_estimate = lv>rv?lv:rv;
     991      123707 :                                         } else if (e->type == e_cmp && e->flag == cmp_equal) {
     992             :                                                 /* if one of the sides is unique, the cardinality will be that exact number, but look for nulls */
     993      119886 :                                                 if (!is_semantics(e) || !has_nil(el) || !has_nil(er)) {
     994      119557 :                                                         BUN lu = 0, ru = 0;
     995      119557 :                                                         prop *p = NULL;
     996      119557 :                                                         if ((p = find_prop(el->p, PROP_NUNIQUES)))
     997       88577 :                                                                 lu = (BUN) p->value.dval;
     998      119557 :                                                         if ((p = find_prop(er->p, PROP_NUNIQUES)))
     999      102982 :                                                                 ru = (BUN) p->value.dval;
    1000      119557 :                                                         if (is_unique(el) || lu > lv) {
    1001       72871 :                                                                 BUN ncount = (is_right(rel->op) || is_full(rel->op)) ? MAX(lv, rv) : lv;
    1002       72871 :                                                                 uniques_estimate = MIN(uniques_estimate, ncount);
    1003       46686 :                                                         } else if (is_unique(er) || ru > rv) {
    1004       29181 :                                                                 BUN ncount = (is_left(rel->op) || is_full(rel->op)) ? MAX(lv, rv) : rv;
    1005       29181 :                                                                 uniques_estimate = MIN(uniques_estimate, ncount);
    1006             :                                                         }
    1007             :                                                 }
    1008             :                                         }
    1009             :                                 }
    1010             :                         }
    1011      132804 :                         if (is_single(rel)) {
    1012        4510 :                                 set_count_prop(v->sql->sa, rel, lv);
    1013      128294 :                         } else if (join_idx_estimate != BUN_MAX) {
    1014         665 :                                 set_count_prop(v->sql->sa, rel, join_idx_estimate);
    1015      127629 :                         } else if (uniques_estimate != BUN_MAX) {
    1016       95691 :                                 set_count_prop(v->sql->sa, rel, uniques_estimate);
    1017       31938 :                         } else if (list_length(rel->exps) == 1 && (exp_is_false(rel->exps->h->data) || exp_is_null(rel->exps->h->data))) {
    1018             :                                 /* corner cases for outer joins */
    1019         124 :                                 if (is_left(rel->op)) {
    1020         112 :                                         set_count_prop(v->sql->sa, rel, lv);
    1021          12 :                                 } else if (is_right(rel->op)) {
    1022           3 :                                         set_count_prop(v->sql->sa, rel, rv);
    1023           9 :                                 } else if (is_full(rel->op) && lv != BUN_NONE && rv != BUN_NONE) {
    1024           9 :                                         set_count_prop(v->sql->sa, rel, (rv > (BUN_MAX - lv)) ? BUN_MAX : (lv + rv)); /* overflow check */
    1025           0 :                                 } else if (lv != BUN_NONE && rv != BUN_NONE) {
    1026           0 :                                         set_count_prop(v->sql->sa, rel, 0);
    1027             :                                 }
    1028       31814 :                         } else if (lv == 0) {
    1029        1164 :                                 set_count_prop(v->sql->sa, rel, (is_right(rel->op) || is_full(rel->op)) ? rv : 0);
    1030       31218 :                         } else if (rv == 0) {
    1031        1556 :                                 set_count_prop(v->sql->sa, rel, (is_left(rel->op) || is_full(rel->op)) ? lv : 0);
    1032       30156 :                         } else if (lv != BUN_NONE && rv != BUN_NONE) {
    1033       17758 :                                 set_count_prop(v->sql->sa, rel, (rv > (BUN_MAX / lv)) ? BUN_MAX : (lv * rv)); /* overflow check */
    1034             :                         }
    1035             :                         break;
    1036             :                 }
    1037        2618 :                 case op_anti:
    1038        2618 :                         set_count_prop(v->sql->sa, rel, get_rel_count(l));
    1039        2618 :                         break;
    1040      107905 :                 case op_semi:
    1041             :                 case op_select:
    1042             :                         /* TODO calculate cardinalities using selectivities */
    1043      107905 :                         if (list_length(rel->exps) == 1 && (exp_is_false(rel->exps->h->data) || exp_is_null(rel->exps->h->data))) {
    1044        2560 :                                 set_count_prop(v->sql->sa, rel, 0);
    1045             :                         } else {
    1046      207466 :                                 if (!list_empty(rel->exps) && !is_single(rel)) {
    1047      102121 :                                         BUN cnt = get_rel_count(l), u = 1;
    1048      170557 :                                         for (node *n = rel->exps->h ; n ; n = n->next) {
    1049      111959 :                                                 sql_exp *e = n->data, *el = e->l, *er = e->r;
    1050             : 
    1051             :                                                 /* simple expressions first */
    1052      111959 :                                                 if (e->type == e_cmp && e->flag == cmp_equal && exp_is_atom(er)) {
    1053             :                                                         /* use selectivity */
    1054       56649 :                                                         prop *p;
    1055       56649 :                                                         if ((p = find_prop(el->p, PROP_NUNIQUES))) {
    1056       43523 :                                                                 u = (BUN) p->value.dval;
    1057       43523 :                                                                 break;
    1058             :                                                         }
    1059             :                                                 }
    1060             :                                         }
    1061             :                                         /* u is an *estimate*, so don't set count_prop to 0 unless cnt is 0 */
    1062      102121 :                                         set_count_prop(v->sql->sa, rel, cnt == 0 ? 0 : u == 0 || u > cnt ? 1 : cnt/u);
    1063             :                                 } else {
    1064        3224 :                                         set_count_prop(v->sql->sa, rel, get_rel_count(l));
    1065             :                                 }
    1066             :                         }
    1067             :                         break;
    1068      259922 :                 case op_project:
    1069      259922 :                         if (l) {
    1070      250284 :                                 if (need_distinct(rel)) {
    1071           0 :                                         set_count_prop(v->sql->sa, rel, rel_calc_nuniques(v->sql, l, rel->exps));
    1072             :                                 } else {
    1073      250284 :                                         set_count_prop(v->sql->sa, rel, get_rel_count(l));
    1074             :                                 }
    1075             :                         } else {
    1076        9638 :                                 BUN card = 1;
    1077             : 
    1078        9638 :                                 if (!list_empty(rel->exps)) {
    1079       28379 :                                         for (node *n = rel->exps->h ; n ; n = n->next) {
    1080       18741 :                                                 sql_exp *e = n->data;
    1081       18741 :                                                 card = MAX(card, trivial_project_exp_card(e));
    1082             :                                         }
    1083             :                                 }
    1084        9638 :                                 set_count_prop(v->sql->sa, rel, card);
    1085             :                         }
    1086             :                         break;
    1087       22454 :                 case op_groupby:
    1088       22454 :                         if (list_empty(rel->r)) {
    1089        7338 :                                 set_count_prop(v->sql->sa, rel, 1);
    1090             :                         } else {
    1091       15116 :                                 set_count_prop(v->sql->sa, rel, rel_calc_nuniques(v->sql, l, rel->r));
    1092             :                         }
    1093             :                         break;
    1094             :                 default:
    1095             :                         break;
    1096             :                 }
    1097             :                 break;
    1098             :         }
    1099       16744 :         case op_topn: {
    1100       16744 :                 BUN lv = get_rel_count(rel->l);
    1101             : 
    1102       16744 :                 if (lv != BUN_NONE) {
    1103       16727 :                         sql_exp *le = rel->exps->h->data, *oe = list_length(rel->exps) > 1 ? rel->exps->h->next->data : NULL;
    1104          95 :                         if (oe && oe->l && exp_is_not_null(oe)) { /* no parameters */
    1105          95 :                                 BUN offset = (BUN) ((atom*)oe->l)->data.val.lval;
    1106          95 :                                 lv = offset >= lv ? 0 : lv - offset;
    1107             :                         }
    1108       16727 :                         if (le->l && exp_is_not_null(le)) {
    1109       16689 :                                 BUN limit = (BUN) ((atom*)le->l)->data.val.lval;
    1110       16689 :                                 lv = MIN(lv, limit);
    1111             :                         }
    1112       16727 :                         set_count_prop(v->sql->sa, rel, lv);
    1113             :                 }
    1114             :                 break;
    1115             :         }
    1116          21 :         case op_sample: {
    1117          21 :                 BUN lv = get_rel_count(rel->l);
    1118             : 
    1119          21 :                 if (lv != BUN_NONE) {
    1120           3 :                         sql_exp *se = rel->exps->h->data;
    1121           3 :                         sql_subtype *tp = exp_subtype(se);
    1122             : 
    1123           3 :                         if (se->l && tp->type->eclass == EC_NUM) { /* sample is a number of rows */
    1124           3 :                                 BUN sample = (BUN) ((atom*)se->l)->data.val.lval;
    1125           3 :                                 lv = MIN(lv, sample);
    1126           0 :                         } else if (se->l) { /* sample is a percentage of rows */
    1127           0 :                                 dbl percent = ((atom*)se->l)->data.val.dval;
    1128           0 :                                 assert(tp->type->eclass == EC_FLT);
    1129           0 :                                 lv = (BUN) ceil((dbl)lv * percent);
    1130             :                         }
    1131           3 :                         set_count_prop(v->sql->sa, rel, lv);
    1132             :                 }
    1133             :                 break;
    1134             :         }
    1135        6050 :         case op_table: {
    1136        6050 :                 sql_exp *op = rel->r;
    1137        6050 :                 if (rel->flag != TRIGGER_WRAPPER && op) {
    1138        5738 :                         sql_subfunc *f = op->f;
    1139        5738 :                         if (f->func->lang == FUNC_LANG_SQL) {
    1140          78 :                                 set_count_prop(v->sql->sa, rel, 1000 /* just some fallback value */);
    1141        5660 :                         } else if (f->func->lang == FUNC_LANG_MAL && strcmp(f->func->base.name, "storage") == 0) {
    1142         827 :                                 set_count_prop(v->sql->sa, rel, 1000 /* TODO get size of catalog */);
    1143        4833 :                         } else if (f->func->lang == FUNC_LANG_MAL && strcmp(f->func->base.name, "db_users") == 0) {
    1144           0 :                                 set_count_prop(v->sql->sa, rel, 1000 /* TODO get size of users */);
    1145        4833 :                         } else if (f->func->lang == FUNC_LANG_MAL && strncmp(f->func->base.name, "querylog", 8) == 0) {
    1146        1081 :                                 set_count_prop(v->sql->sa, rel, 1000 /* TODO get size of querylog */);
    1147        3752 :                         } else if (f->func->lang == FUNC_LANG_MAL &&
    1148        3642 :                                            (strcmp(f->func->base.name, "queue") == 0 ||
    1149        3379 :                                                 strcmp(f->func->base.name, "optimizers") == 0 ||
    1150        3068 :                                                 strcmp(f->func->base.name, "env") == 0 ||
    1151        2777 :                                                 strcmp(f->func->base.name, "keywords") == 0 ||
    1152        2777 :                                                 strcmp(f->func->base.name, "statistics") == 0 ||
    1153        2112 :                                                 strcmp(f->func->base.name, "rejects") == 0 ||
    1154        1861 :                                                 strcmp(f->func->base.name, "schemastorage") == 0 ||
    1155        1861 :                                                 strncmp(f->func->base.name, "storage", 7) == 0 ||
    1156        1861 :                                                 strcmp(f->func->base.name, "sessions") == 0) ) {
    1157        2070 :                                 set_count_prop(v->sql->sa, rel, 1000 /* TODO get size of queue */);
    1158             :                         }
    1159             :                         /* else {
    1160             :                                 printf("%%func needs stats : %s\n", f->func->base.name);
    1161             :                         } */
    1162             :                 }
    1163             :                 break;
    1164             :         }
    1165             :         /*These relations are less important for now
    1166             :           TODO later we can tune it */
    1167             :         case op_insert:
    1168             :         case op_update:
    1169             :         case op_delete:
    1170             :         case op_truncate:
    1171             :         case op_ddl:
    1172             :         default:
    1173             :                 break;
    1174             :         }
    1175             : 
    1176             :         return rel;
    1177             : }
    1178             : 
    1179             : static sql_rel *
    1180      538896 : rel_get_statistics(visitor *v, global_props *gp, sql_rel *rel)
    1181             : {
    1182             :         /* Don't prune updates as pruning will possibly result in removing the joins which therefore cannot be used for constraint checking */
    1183      538896 :         uint8_t has_special_modify = (uint8_t) gp->has_special_modify;
    1184      538896 :         v->data = &has_special_modify;
    1185      538896 :         rel = rel_visitor_bottomup(v, rel, &rel_get_statistics_);
    1186      539387 :         v->data = gp;
    1187      539387 :         return rel;
    1188             : }
    1189             : 
    1190             : run_optimizer
    1191      714728 : bind_get_statistics(visitor *v, global_props *gp)
    1192             : {
    1193      714728 :         (void) v;
    1194      714728 :         return (gp->opt_level == 1 && !gp->cnt[op_insert]) ? rel_get_statistics : NULL;
    1195             : }
    1196             : 
    1197             : 
    1198             : static bool
    1199       94163 : point_select_on_unique_column(sql_rel *rel)
    1200             : {
    1201       94163 :         if (is_select(rel->op) && !list_empty(rel->exps)) {
    1202      130326 :                 for (node *n = rel->exps->h; n ; n = n->next) {
    1203       75084 :                         sql_exp *e = n->data, *el = e->l, *er = e->r, *found = NULL;
    1204             : 
    1205       75084 :                         if (is_compare(e->type) && e->flag == cmp_equal) {
    1206       33230 :                                 if (is_numeric_upcast(el))
    1207           0 :                                         el = el->l;
    1208       33230 :                                 if (is_numeric_upcast(er))
    1209           0 :                                         er = er->l;
    1210       33230 :                                 if (is_alias(el->type) && exp_is_atom(er) && (found = rel_find_exp(rel->l, el)) &&
    1211         746 :                                         is_unique(found) && (!is_semantics(e) || !has_nil(found) || !has_nil(er)))
    1212             :                                         return true;
    1213       32484 :                                 if (is_alias(er->type) && exp_is_atom(el) && (found = rel_find_exp(rel->l, er)) &&
    1214           0 :                                         is_unique(found) && (!is_semantics(e) || !has_nil(el) || !has_nil(found)))
    1215             :                                         return true;
    1216             :                         }
    1217             :                 }
    1218             :         }
    1219             :         return false;
    1220             : }
    1221             : 
    1222             : /*
    1223             :  * A point select on an unique column reduces the number of rows to 1. If the same select is under a
    1224             :  * join, the opposite side's select can be pushed above the join.
    1225             :  */
    1226             : static inline sql_rel *
    1227     1234619 : rel_push_select_up(visitor *v, sql_rel *rel)
    1228             : {
    1229     1234619 :         if ((is_innerjoin(rel->op) || is_left(rel->op) || is_right(rel->op) || is_semi(rel->op)) && !is_single(rel)) {
    1230      252069 :                 sql_rel *l = rel->l, *r = rel->r;
    1231      252069 :                 bool can_pushup_left = is_select(l->op) && !rel_is_ref(l) && !is_single(l) && (is_innerjoin(rel->op) || is_left(rel->op) || is_semi(rel->op)),
    1232      252069 :                         can_pushup_right = is_select(r->op) && !rel_is_ref(r) && !is_single(r) && (is_innerjoin(rel->op) || is_right(rel->op));
    1233             : 
    1234      252069 :                 if (can_pushup_left || can_pushup_right) {
    1235       66405 :                         if (can_pushup_left)
    1236       45087 :                                 can_pushup_left = point_select_on_unique_column(r);
    1237       66405 :                         if (can_pushup_right)
    1238       49076 :                                 can_pushup_right = point_select_on_unique_column(l);
    1239             : 
    1240             :                         /* if both selects retrieve one row each, it's not worth it to push both up */
    1241       66405 :                         if (can_pushup_left && !can_pushup_right) {
    1242         680 :                                 sql_rel *nrel = rel_dup_copy(v->sql->sa, rel);
    1243         680 :                                 nrel->l = l->l;
    1244         680 :                                 rel = rel_inplace_select(rel, nrel, l->exps);
    1245         680 :                                 assert(is_select(rel->op));
    1246         680 :                                 v->changes++;
    1247       65725 :                         } else if (!can_pushup_left && can_pushup_right) {
    1248          14 :                                 sql_rel *nrel = rel_dup_copy(v->sql->sa, rel);
    1249          14 :                                 nrel->r = r->l;
    1250          14 :                                 rel = rel_inplace_select(rel, nrel, r->exps);
    1251          14 :                                 assert(is_select(rel->op));
    1252          14 :                                 v->changes++;
    1253             :                         }
    1254             :                 }
    1255             :         }
    1256     1234619 :         return rel;
    1257             : }
    1258             : 
    1259             : static int
    1260       92654 : sql_class_base_score(visitor *v, sql_column *c, sql_subtype *t, bool equality_based)
    1261             : {
    1262       92654 :         int de;
    1263             : 
    1264       92654 :         if (!t)
    1265             :                 return 0;
    1266       92654 :         switch (ATOMstorage(t->type->localtype)) {
    1267             :         case TYPE_bte:
    1268             :                 return 150 - 8;
    1269        1671 :         case TYPE_sht:
    1270        1671 :                 return 150 - 16;
    1271       36762 :         case TYPE_int:
    1272       36762 :                 return 150 - 32;
    1273         515 :         case TYPE_void:
    1274             :         case TYPE_lng:
    1275         515 :                 return 150 - 64;
    1276             :         case TYPE_uuid:
    1277             : #ifdef HAVE_HGE
    1278             :         case TYPE_hge:
    1279             : #endif
    1280             :                 return 150 - 128;
    1281           2 :         case TYPE_flt:
    1282           2 :                 return 75 - 24;
    1283             :         case TYPE_dbl:
    1284             :                 return 75 - 53;
    1285       30466 :         default:
    1286       30466 :                 if (equality_based && c && v->storage_based_opt && (de = mvc_is_duplicate_eliminated(v->sql, c)))
    1287        1550 :                         return 150 - de * 8;
    1288             :                 /* strings and blobs not duplicate eliminated don't get any points here */
    1289             :                 return 0;
    1290             :         }
    1291             : }
    1292             : 
    1293             : static int
    1294       34432 : score_se_base(visitor *v, sql_rel *rel, sql_exp *e)
    1295             : {
    1296       34432 :         int res = 0;
    1297       34432 :         sql_subtype *t = exp_subtype(e);
    1298       34432 :         sql_column *c = NULL;
    1299             : 
    1300       34432 :         if (e->type == e_convert) /* keep unsafes at the end (TODO improve) */
    1301             :                 return -1000;
    1302             :         /* can we find out if the underlying table is sorted */
    1303       33831 :         if ((c = exp_find_column(rel, e, -2)) && v->storage_based_opt && mvc_is_sorted(v->sql, c))
    1304       33831 :                 res += 600;
    1305             : 
    1306             :         /* prefer the shorter var types over the longer ones */
    1307       33831 :         res += sql_class_base_score(v, c, t, is_equality_or_inequality_exp(e->flag)); /* smaller the type, better */
    1308       33831 :         return res;
    1309             : }
    1310             : 
    1311             : static int
    1312       58345 : score_se(visitor *v, sql_rel *rel, sql_exp *e)
    1313             : {
    1314       58345 :         int score = 0;
    1315       58345 :         if (e->type == e_cmp && !is_complex_exp(e->flag)) {
    1316       34432 :                 sql_exp *l = e->l;
    1317             : 
    1318       34432 :                 while (l->type == e_cmp) { /* go through nested comparisons */
    1319           2 :                         sql_exp *ll;
    1320             : 
    1321           2 :                         if (l->flag == cmp_filter || l->flag == cmp_or)
    1322           0 :                                 ll = ((list*)l->l)->h->data;
    1323             :                         else
    1324           2 :                                 ll = l->l;
    1325           2 :                         if (ll->type != e_cmp)
    1326             :                                 break;
    1327             :                         l = ll;
    1328             :                 }
    1329       34432 :                 score += score_se_base(v, rel, l);
    1330             :         }
    1331       58345 :         score += exp_keyvalue(e);
    1332       58345 :         return score;
    1333             : }
    1334             : 
    1335             : static inline sql_rel *
    1336     1234620 : rel_select_order(visitor *v, sql_rel *rel)
    1337             : {
    1338     1234620 :         int *scores = NULL;
    1339     1234620 :         sql_exp **exps = NULL;
    1340             : 
    1341     1234620 :         if (is_select(rel->op) && list_length(rel->exps) > 1) {
    1342       27292 :                 node *n;
    1343       27292 :                 int i, nexps = list_length(rel->exps);
    1344       27292 :                 scores = SA_NEW_ARRAY(v->sql->ta, int, nexps);
    1345       27292 :                 exps = SA_NEW_ARRAY(v->sql->ta, sql_exp*, nexps);
    1346             : 
    1347       85637 :                 for (i = 0, n = rel->exps->h; n; i++, n = n->next) {
    1348       58375 :                         exps[i] = n->data;
    1349       58375 :                         if (find_prop(exps[i]->p, PROP_HASHCOL))
    1350             :                                 return rel;
    1351       58345 :                         scores[i] = score_se(v, rel, n->data);
    1352             :                 }
    1353       27262 :                 GDKqsort(scores, exps, NULL, nexps, sizeof(int), sizeof(void *), TYPE_int, true, true);
    1354             : 
    1355       85607 :                 for (i = 0, n = rel->exps->h; n; i++, n = n->next)
    1356       58345 :                         n->data = exps[i];
    1357             :         }
    1358             : 
    1359             :         return rel;
    1360             : }
    1361             : 
    1362             : /* Compute the efficiency of using this expression early in a group by list */
    1363             : static int
    1364       58823 : score_gbe(visitor *v, sql_rel *rel, sql_exp *e)
    1365             : {
    1366       58823 :         int res = 0;
    1367       58823 :         sql_subtype *t = exp_subtype(e);
    1368       58823 :         sql_column *c = exp_find_column(rel, e, -2);
    1369             : 
    1370       58823 :         if (e->card == CARD_ATOM) /* constants are trivial to group */
    1371          38 :                 res += 1000;
    1372             :         /* can we find out if the underlying table is sorted */
    1373       58823 :         if (is_unique(e) || find_prop(e->p, PROP_HASHCOL) || (c && v->storage_based_opt && mvc_is_unique(v->sql, c))) /* distinct columns */
    1374        3216 :                 res += 700;
    1375       38412 :         if (c && v->storage_based_opt && mvc_is_sorted(v->sql, c))
    1376        3357 :                 res += 500;
    1377       58823 :         if (find_prop(e->p, PROP_HASHIDX)) /* has hash index */
    1378           0 :                 res += 200;
    1379             : 
    1380             :         /* prefer the shorter var types over the longer ones */
    1381       58823 :         res += sql_class_base_score(v, c, t, true); /* smaller the type, better */
    1382       58823 :         return res;
    1383             : }
    1384             : 
    1385             : /* reorder group by expressions */
    1386             : static inline sql_rel *
    1387     1234620 : rel_groupby_order(visitor *v, sql_rel *rel)
    1388             : {
    1389     1234620 :         int *scores = NULL;
    1390     1234620 :         sql_exp **exps = NULL;
    1391             : 
    1392     1234620 :         if (is_groupby(rel->op) && list_length(rel->r) > 1) {
    1393       25321 :                 node *n;
    1394       25321 :                 list *gbe = rel->r;
    1395       25321 :                 int i, ngbe = list_length(gbe);
    1396       25321 :                 scores = SA_NEW_ARRAY(v->sql->ta, int, ngbe);
    1397       25321 :                 exps = SA_NEW_ARRAY(v->sql->ta, sql_exp*, ngbe);
    1398             : 
    1399             :                 /* first sorting step, give priority for integers and sorted columns */
    1400       84144 :                 for (i = 0, n = gbe->h; n; i++, n = n->next) {
    1401       58823 :                         exps[i] = n->data;
    1402       58823 :                         scores[i] = score_gbe(v, rel, exps[i]);
    1403             :                 }
    1404       25321 :                 GDKqsort(scores, exps, NULL, ngbe, sizeof(int), sizeof(void *), TYPE_int, true, true);
    1405             : 
    1406             :                 /* second sorting step, give priority to strings with lower number of digits */
    1407       50131 :                 for (i = ngbe - 1; i && !scores[i]; i--); /* find expressions with no score from the first round */
    1408       25321 :                 if (scores[i])
    1409       24327 :                         i++;
    1410       25321 :                 if (ngbe - i > 1) {
    1411        8569 :                         for (int j = i; j < ngbe; j++) {
    1412        6570 :                                 sql_subtype *t = exp_subtype(exps[j]);
    1413        6570 :                                 scores[j] = t ? t->digits : 0;
    1414             :                         }
    1415             :                         /* the less number of digits the better, order ascending */
    1416        1999 :                         GDKqsort(scores + i, exps + i, NULL, ngbe - i, sizeof(int), sizeof(void *), TYPE_int, false, true);
    1417             :                 }
    1418             : 
    1419       84144 :                 for (i = 0, n = gbe->h; n; i++, n = n->next)
    1420       58823 :                         n->data = exps[i];
    1421             :         }
    1422             : 
    1423     1234620 :         return rel;
    1424             : }
    1425             : 
    1426             : /* This optimization loop contains optimizations that can potentially use statistics */
    1427             : static sql_rel *
    1428     1234619 : rel_final_optimization_loop_(visitor *v, sql_rel *rel)
    1429             : {
    1430             :         /* Run rel_push_select_up only once at the end to avoid an infinite optimization loop */
    1431     1234619 :         rel = rel_push_select_up(v, rel);
    1432     1234620 :         rel = rel_select_order(v, rel);
    1433             : 
    1434             :         /* TODO? Maybe later add rel_simplify_count, rel_join2semijoin, rel_simplify_fk_joins,
    1435             :            rel_distinct_project2groupby, rel_simplify_predicates, rel_simplify_math,
    1436             :            rel_distinct_aggregate_on_unique_values */
    1437             : 
    1438     1234620 :         rel = rel_groupby_order(v, rel);
    1439     1234619 :         return rel;
    1440             : }
    1441             : 
    1442             : static sql_rel *
    1443       65446 : rel_final_optimization_loop(visitor *v, global_props *gp, sql_rel *rel)
    1444             : {
    1445       65446 :         (void) gp;
    1446       65446 :         return rel_visitor_bottomup(v, rel, &rel_final_optimization_loop_);
    1447             : }
    1448             : 
    1449             : run_optimizer
    1450      715333 : bind_final_optimization_loop(visitor *v, global_props *gp)
    1451             : {
    1452      715333 :         int flag = v->sql->sql_optimizer;
    1453             :         /* At the moment, this optimizer has dependency on 3 flags */
    1454      546863 :         return gp->opt_level == 1 && (gp->cnt[op_groupby] || gp->cnt[op_select]) &&
    1455      780781 :                 (flag & push_select_up) && (flag & optimize_select_and_joins_topdown) && (flag & optimize_projections) ? rel_final_optimization_loop : NULL;
    1456             : }

Generated by: LCOV version 1.14