LCOV - code coverage report
Current view: top level - sql/server - rel_statistics.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 831 883 94.1 %
Date: 2024-11-13 22:44:48 Functions: 26 26 100.0 %

          Line data    Source code
       1             : /*
       2             :  * SPDX-License-Identifier: MPL-2.0
       3             :  *
       4             :  * This Source Code Form is subject to the terms of the Mozilla Public
       5             :  * License, v. 2.0.  If a copy of the MPL was not distributed with this
       6             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       7             :  *
       8             :  * Copyright 2024 MonetDB Foundation;
       9             :  * Copyright August 2008 - 2023 MonetDB B.V.;
      10             :  * Copyright 1997 - July 2008 CWI.
      11             :  */
      12             : 
      13             : #include "monetdb_config.h"
      14             : #include "rel_optimizer_private.h"
      15             : #include "rel_statistics.h"
      16             : #include "rel_basetable.h"
      17             : #include "rel_rewriter.h"
      18             : 
      19             : static sql_exp *
      20     3244857 : comparison_find_column(sql_exp *input, sql_exp *e)
      21             : {
      22     3244857 :         switch (input->type) {
      23       92278 :         case e_convert: {
      24       92278 :                 list *types = (list *)input->r;
      25       92278 :                 sql_class from = ((sql_subtype*)types->h->data)->type->eclass, to = ((sql_subtype*)types->h->next->data)->type->eclass;
      26       92278 :                 if (from == to)
      27      177800 :                         return comparison_find_column(input->l, e) ? input : NULL;
      28             :                 return NULL;
      29             :         }
      30     2871894 :         case e_column:
      31     2871894 :                 return exp_match(e, input) ? input : NULL;
      32             :         default:
      33             :                 return NULL;
      34             :         }
      35             : }
      36             : 
      37             : /* multi lo <= col <= hi, maybe still be false even if lo or hi are NULL, possibly similar for filter on multiple
      38             :  * columns */
      39             : #define comp_single_column(c) (!c->f)
      40             : 
      41             : static sql_exp *
      42     8290810 : rel_propagate_column_ref_statistics(mvc *sql, sql_rel *rel, sql_exp *e)
      43             : {
      44     8375036 :         assert(e->type == e_column);
      45     8375036 :         if (rel) {
      46     8374957 :                 switch(rel->op) {
      47     3894597 :                 case op_left:
      48             :                 case op_right:
      49             :                 case op_full:
      50             :                 case op_join:
      51             :                 case op_select:
      52             :                 case op_anti:
      53             :                 case op_semi: {
      54     3894597 :                         bool found_without_semantics = false, found_left = false, found_right = false, still_unique = false;
      55             : 
      56     3894597 :                         if ((is_innerjoin(rel->op) || is_select(rel->op)) && list_length(rel->exps) == 1 && exp_is_false(rel->exps->h->data))
      57             :                                 return NULL; /* nothing will pass, skip */
      58             : 
      59             :                         /* propagate from the bottom first */
      60     3886834 :                         if (rel_propagate_column_ref_statistics(sql, rel->l, e))
      61             :                                 found_left = true;
      62     2498956 :                         if (!found_left && is_join(rel->op) && rel_propagate_column_ref_statistics(sql, rel->r, e))
      63     3886835 :                                 found_right = true;
      64             : 
      65     3886835 :                         if (!found_left && !found_right)
      66             :                                 return NULL;
      67     1661126 :                         if (!list_empty(rel->exps) && rel->op != op_anti) { /* if there's an or, the MIN and MAX get difficult to propagate */
      68     3273095 :                                 for (node *n = rel->exps->h ; n ; n = n->next) {
      69     1693430 :                                         sql_exp *comp = n->data, *le = comp->l, *lne = NULL, *re = comp->r, *rne = NULL, *fe = comp->f, *fne = NULL;
      70             : 
      71     1693430 :                                         if (comp->type == e_cmp) {
      72     1692734 :                                                 if (is_theta_exp(comp->flag) && ((lne = comparison_find_column(le, e)) || (rne = comparison_find_column(re, e)) || (fe && (fne = comparison_find_column(fe, e))))) {
      73      113435 :                                                         atom *lval_min = find_prop_and_get(le->p, PROP_MIN), *lval_max = find_prop_and_get(le->p, PROP_MAX),
      74      113435 :                                                                 *rval_min = find_prop_and_get(re->p, PROP_MIN), *rval_max = find_prop_and_get(re->p, PROP_MAX);
      75             : 
      76             :                                                         /* not semantics found or if explicitly filtering not null values from the column */
      77      113435 :                                                         found_without_semantics |= (!is_semantics(comp) && comp_single_column(comp)) || (comp->flag == cmp_equal && lne && is_anti(comp) && exp_is_null(re));
      78      113435 :                                                         still_unique |= comp->flag == cmp_equal && is_unique(le) && is_unique(re); /* unique if only equi-joins on unique columns are there */
      79      113435 :                                                         if (is_full(rel->op) || (is_left(rel->op) && found_left) || (is_right(rel->op) && found_right)) /* on outer joins, min and max cannot be propagated on some cases */
      80       17946 :                                                                 continue;
      81             :                                                         /* if (end2 >= start1 && start2 <= end1) then the 2 intervals are intersected */
      82       95489 :                                                         if (fe && lval_min && lval_max) { /* range case, the middle expression must intersect the other two */
      83        4622 :                                                                 atom *fval_min = find_prop_and_get(fe->p, PROP_MIN), *fval_max = find_prop_and_get(fe->p, PROP_MAX);
      84        4622 :                                                                 int int1 = rval_min && rval_max && atom_cmp(rval_max, lval_min) >= 0 && atom_cmp(rval_min, lval_max) <= 0,
      85        4622 :                                                                         int2 = fval_min && fval_max && atom_cmp(fval_max, lval_min) >= 0 && atom_cmp(fval_min, lval_max) <= 0,
      86        4622 :                                                                         symmetric = is_symmetric(comp);
      87             : 
      88        4622 :                                                                 if (is_anti(comp) || (!symmetric && fval_min && rval_max && atom_cmp(fval_min, rval_max) < 0)) /* for asymmetric case the re range must be after the fe range */
      89        1817 :                                                                         continue;
      90        2805 :                                                                 if (lne && int1 && int2) {
      91         104 :                                                                         if (symmetric) {
      92           0 :                                                                                 prop *p1 = find_prop(e->p, PROP_MIN), *p2 = find_prop(e->p, PROP_MAX);
      93           0 :                                                                                 atom *nmin = statistics_atom_min(sql, rval_min, fval_min), *nmax = statistics_atom_max(sql, rval_max, fval_max);
      94             :                                                                                 /* max is min from le and (max from re and fe max) */
      95           0 :                                                                                 set_minmax_property(sql, e, PROP_MAX, p2 ? statistics_atom_min(sql, nmax, p2->value.pval) : nmax);
      96             :                                                                                 /* min is max from le and (min from re and fe min) */
      97           0 :                                                                                 set_minmax_property(sql, e, PROP_MIN, p1 ? statistics_atom_max(sql, nmin, p1->value.pval) : nmin);
      98             :                                                                         } else {
      99         104 :                                                                                 prop *p1 = find_prop(e->p, PROP_MIN), *p2 = find_prop(e->p, PROP_MAX);
     100             :                                                                                 /* max is min from le and fe max */
     101         104 :                                                                                 set_minmax_property(sql, e, PROP_MAX, p2 ? statistics_atom_min(sql, fval_max, p2->value.pval) : fval_max);
     102             :                                                                                 /* min is max from le and re min */
     103         104 :                                                                                 set_minmax_property(sql, e, PROP_MIN, p1 ? statistics_atom_max(sql, rval_min, p1->value.pval) : rval_min);
     104             :                                                                         }
     105        2701 :                                                                 } else if (rne) {
     106         590 :                                                                         if (symmetric && int1 && int2) { /* min is max from le and (min from re and fe min) */
     107           0 :                                                                                 prop *p = find_prop(e->p, PROP_MIN);
     108           0 :                                                                                 atom *nmin = p ? statistics_atom_min(sql, p->value.pval, fval_min) : fval_min;
     109           0 :                                                                                 set_minmax_property(sql, e, PROP_MIN, statistics_atom_max(sql, nmin, lval_min));
     110         590 :                                                                         } else if (int1) { /* min is max from le and re min */
     111         100 :                                                                                 prop *p = find_prop(e->p, PROP_MIN);
     112         100 :                                                                                 set_minmax_property(sql, e, PROP_MIN, p ? statistics_atom_max(sql, lval_min, p->value.pval) : lval_min);
     113             :                                                                         }
     114        2111 :                                                                 } else if (fne) {
     115         549 :                                                                         if (symmetric && int1 && int2) { /* max is min from le and (max from re and fe max) */
     116           0 :                                                                                 prop *p = find_prop(e->p, PROP_MAX);
     117           0 :                                                                                 atom *nmax = p ? statistics_atom_max(sql, p->value.pval, rval_max) : rval_max;
     118           0 :                                                                                 set_minmax_property(sql, e, PROP_MAX, p ? statistics_atom_min(sql, nmax, lval_max) : nmax);
     119         549 :                                                                         } else if (int2) { /* max is min from le and fe max */
     120         266 :                                                                                 prop *p = find_prop(e->p, PROP_MAX);
     121         266 :                                                                                 set_minmax_property(sql, e, PROP_MAX, p ? statistics_atom_min(sql, lval_max, p->value.pval) : lval_max);
     122             :                                                                         }
     123             :                                                                 }
     124       90867 :                                                         } else if (lval_min && lval_max && rval_min && rval_max && atom_cmp(rval_max, lval_min) >= 0 && atom_cmp(rval_min, lval_max) <= 0) {
     125             :                                                                 /* both min and max must be set and the intervals must overlap */
     126       39794 :                                                                 switch (comp->flag) {
     127       25916 :                                                                 case cmp_equal: /* for equality reduce */
     128       25916 :                                                                         set_minmax_property(sql, e, PROP_MAX, is_anti(comp) ? statistics_atom_max(sql, lval_max, rval_max) : statistics_atom_min(sql, lval_max, rval_max));
     129       25916 :                                                                         set_minmax_property(sql, e, PROP_MIN, is_anti(comp) ? statistics_atom_min(sql, lval_min, rval_min) : statistics_atom_max(sql, lval_min, rval_min));
     130       25916 :                                                                         break;
     131        4586 :                                                                 case cmp_notequal: /* for inequality expand */
     132        4586 :                                                                         set_minmax_property(sql, e, PROP_MAX, is_anti(comp) ? statistics_atom_min(sql, lval_max, rval_max) : statistics_atom_max(sql, lval_max, rval_max));
     133        4586 :                                                                         set_minmax_property(sql, e, PROP_MIN, is_anti(comp) ? statistics_atom_max(sql, lval_min, rval_min) : statistics_atom_min(sql, lval_min, rval_min));
     134        4586 :                                                                         break;
     135        5286 :                                                                 case cmp_gt:
     136             :                                                                 case cmp_gte:
     137        9634 :                                                                         if (!is_anti(comp) && lne) { /* min is max from both min */
     138        4348 :                                                                                 prop *p = find_prop(e->p, PROP_MIN);
     139        4348 :                                                                                 set_minmax_property(sql, e, PROP_MIN, p ? statistics_atom_max(sql, rval_min, p->value.pval) : rval_min);
     140         938 :                                                                         } else if (!is_anti(comp)) { /* max is min from both max */
     141         938 :                                                                                 prop *p = find_prop(e->p, PROP_MAX);
     142         938 :                                                                                 set_minmax_property(sql, e, PROP_MAX, p ? statistics_atom_min(sql, lval_max, p->value.pval) : lval_max);
     143             :                                                                         }
     144             :                                                                         break;
     145        4006 :                                                                 case cmp_lt:
     146             :                                                                 case cmp_lte:
     147        7205 :                                                                         if (!is_anti(comp) && lne) { /* max is min from both max */
     148        3199 :                                                                                 prop *p = find_prop(e->p, PROP_MAX);
     149        3199 :                                                                                 set_minmax_property(sql, e, PROP_MAX, p ? statistics_atom_min(sql, rval_max, p->value.pval) : rval_max);
     150         807 :                                                                         } else if (!is_anti(comp)) { /* min is max from both min */
     151         807 :                                                                                 prop *p = find_prop(e->p, PROP_MIN);
     152         807 :                                                                                 set_minmax_property(sql, e, PROP_MIN, p ? statistics_atom_max(sql, lval_min, p->value.pval) : lval_min);
     153             :                                                                         }
     154             :                                                                         break;
     155             :                                                                 default: /* Maybe later I can do cmp_in and cmp_notin */
     156             :                                                                         break;
     157             :                                                                 }
     158             :                                                         }
     159             :                                                 }
     160             :                                         }
     161             :                                 }
     162             :                         }
     163     1661126 :                         if (is_full(rel->op) || (is_left(rel->op) && found_right) || (is_right(rel->op) && found_left))
     164       32645 :                                 set_has_nil(e);
     165     1661126 :                         if (!is_outerjoin(rel->op) && found_without_semantics) /* at an outer join, null values pass */
     166       85991 :                                 set_has_no_nil(e);
     167     1661126 :                         if (is_join(rel->op) && is_unique(e) && !still_unique)
     168      111462 :                                 set_not_unique(e);
     169     1661126 :                         return e;
     170             :                 }
     171     4376773 :                 case op_table:
     172             :                 case op_basetable:
     173             :                 case op_union:
     174             :                 case op_except:
     175             :                 case op_inter:
     176             :                 case op_munion:
     177             :                 case op_project:
     178             :                 case op_groupby: {
     179     4376773 :                         sql_exp *found;
     180     4376773 :                         atom *fval;
     181     4376773 :                         prop *est;
     182     4376773 :                         if ((found = rel_find_exp(rel, e))) {
     183     1982687 :                                 if (rel->op != op_table) { /* At the moment don't propagate statistics for table relations */
     184     1946118 :                                         if ((fval = find_prop_and_get(found->p, PROP_MAX)))
     185     1048990 :                                                 set_minmax_property(sql, e, PROP_MAX, fval);
     186     1946119 :                                         if ((fval = find_prop_and_get(found->p, PROP_MIN)))
     187     1056938 :                                                 set_minmax_property(sql, e, PROP_MIN, fval);
     188     1946115 :                                         if (!has_nil(found))
     189     1273822 :                                                 set_has_no_nil(e);
     190     1946115 :                                         if (is_unique(found) || (need_distinct(rel) && list_length(rel->exps) == 1) ||
     191     1568526 :                                                 (is_groupby(rel->op) && (list_empty(rel->r) || (list_length(rel->r) == 1 && exps_find_exp(rel->r, e)))))
     192      388442 :                                                 set_unique(e);
     193             :                                         /* propagate unique estimation for known cases */
     194     1946115 :                                         if (is_groupby(rel->op) && list_empty(rel->r) && !find_prop(e->p, PROP_NUNIQUES)) { /* global aggregate case */
     195        7535 :                                                 prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     196        7535 :                                                 p->value.dval = 1;
     197     1938580 :                                         } else if (((is_basetable(rel->op) || is_except(rel->op) || is_inter(rel->op) || is_simple_project(rel->op) ||
     198       67963 :                                                                  (is_groupby(rel->op) && exps_find_exp(rel->r, e))) &&
     199     1884839 :                                                                 (est = find_prop(found->p, PROP_NUNIQUES)) && !find_prop(e->p, PROP_NUNIQUES))) {
     200      166725 :                                                 prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     201      166725 :                                                 p->value.dval = est->value.dval;
     202             :                                         }
     203             :                                 }
     204     1982685 :                                 return e;
     205             :                         }
     206             :                         return NULL;
     207             :                 }
     208       84226 :                 case op_topn:
     209             :                 case op_sample:
     210       84226 :                         return rel_propagate_column_ref_statistics(sql, rel->l, e);
     211             :                 default:
     212             :                         break;
     213             :                 }
     214             :         }
     215             :         return NULL;
     216             : }
     217             : 
     218             : static atom *
     219     4086149 : atom_from_valptr( allocator *sa, sql_subtype *tpe, ValRecord *v)
     220             : {
     221     4086149 :         atom *a = SA_NEW(sa, atom);
     222             : 
     223     4086087 :         assert(!VALisnil(v));
     224     4085994 :         *a = (atom) {.tpe = *tpe,};
     225     4085994 :         SA_VALcopy(sa, &a->data, v);
     226     4085944 :         return a;
     227             : }
     228             : 
     229             : void
     230     3596267 : sql_column_get_statistics(mvc *sql, sql_column *c, sql_exp *e)
     231             : {
     232     3596267 :         bool nonil = false, unique = false;
     233     3596267 :         double unique_est = 0.0;
     234     3596267 :         ValRecord min, max;
     235     3596267 :         int ok = mvc_col_stats(sql, c, &nonil, &unique, &unique_est, &min, &max);
     236             : 
     237     3596269 :         if (has_nil(e) && nonil)
     238     2453932 :                 set_has_no_nil(e);
     239     3596269 :         if (!is_unique(e) && unique)
     240     1055296 :                 set_unique(e);
     241     3596269 :         if (unique_est != 0.0) {
     242     2398661 :                 prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     243     2398653 :                 p->value.dval = unique_est;
     244             :         }
     245     3596261 :         unsigned int digits = 0;
     246     3596261 :         sql_subtype *et = exp_subtype(e);
     247     3596277 :         if (et->type->eclass == EC_DEC || et->type->eclass == EC_NUM)
     248     2364880 :                 digits = et->digits;
     249     3596277 :         if ((ok & 2) == 2) {
     250     2039554 :                 if (!VALisnil(&max)) {
     251     2039538 :                         prop *p = e->p = prop_create(sql->sa, PROP_MAX, e->p);
     252     2039529 :                         p->value.pval = atom_from_valptr(sql->sa, &c->type, &max);
     253     2039480 :                         if (digits) {
     254     1530169 :                                 unsigned int nd = atom_digits(p->value.pval);
     255     1530213 :                                 if (nd < digits)
     256             :                                         digits = nd;
     257     1530213 :                                 if (!digits)
     258             :                                         digits = 1;
     259             :                         }
     260             :                 }
     261     2039527 :                 VALclear(&max);
     262             :         }
     263     3596232 :         if ((ok & 1) == 1) {
     264     2046890 :                 if (!VALisnil(&min)) {
     265     2046878 :                         prop *p = e->p = prop_create(sql->sa, PROP_MIN, e->p);
     266     2046884 :                         p->value.pval = atom_from_valptr(sql->sa, &c->type, &min);
     267     2046847 :                         if (digits) {
     268     1538621 :                                 unsigned int nd = atom_digits(p->value.pval);
     269     1538617 :                                 if (nd > digits) /* possibly set to low by max value */
     270             :                                         digits = nd;
     271             :                                 if (!digits)
     272             :                                         digits = 1;
     273             :                         }
     274             :                 }
     275     2046838 :                 VALclear(&min);
     276             :         }
     277     3596167 :         if (digits)
     278     2364771 :                 et->digits = digits;
     279     3596167 :         if (et->type->eclass == EC_DEC && et->digits <= et->scale)
     280         216 :                 et->digits = et->scale + 1;
     281     3596167 : }
     282             : 
     283             : static void
     284      876141 : rel_basetable_column_get_statistics(mvc *sql, sql_rel *rel, sql_exp *e)
     285             : {
     286      876141 :         if (e->p)
     287             :                 return;
     288      292010 :         sql_column *c = NULL;
     289             : 
     290      292010 :         if ((c = rel_base_find_column(rel, e->nid))) {
     291      195835 :                 sql_column_get_statistics(sql, c, e);
     292             :         }
     293             : }
     294             : 
     295             : static bool
     296        8872 : rel_setop_get_statistics(mvc *sql, sql_rel *rel, list *lexps, list *rexps, sql_exp *e, int i)
     297             : {
     298        8872 :         sql_exp *le = list_fetch(lexps, i), *re = list_fetch(rexps, i);
     299        8872 :         atom *lval_min = find_prop_and_get(le->p, PROP_MIN), *lval_max = find_prop_and_get(le->p, PROP_MAX),
     300        8872 :                 *rval_min = find_prop_and_get(re->p, PROP_MIN), *rval_max = find_prop_and_get(re->p, PROP_MAX);
     301        8872 :         prop *est;
     302             : 
     303             :         /* for the intersection, if both expressions don't overlap, it can be pruned */
     304        8872 :         if (is_inter(rel->op) && !has_nil(le) && !has_nil(re) &&
     305         511 :                 ((rval_max && lval_min && atom_cmp(rval_max, lval_min) < 0) || (rval_min && lval_max && atom_cmp(rval_min, lval_max) > 0)))
     306          26 :                 return true;
     307             : 
     308        8846 :         if (lval_max && rval_max) {
     309        2557 :                 if (is_union(rel->op))
     310           3 :                         set_minmax_property(sql, e, PROP_MAX, statistics_atom_max(sql, lval_max, rval_max)); /* for union the new max will be the max of the two */
     311        2554 :                 else if (is_inter(rel->op))
     312         399 :                         set_minmax_property(sql, e, PROP_MAX, statistics_atom_min(sql, lval_max, rval_max)); /* for intersect the new max will be the min of the two */
     313             :                 else /* except */
     314        2155 :                         set_minmax_property(sql, e, PROP_MAX, lval_max);
     315             :         }
     316        8846 :         if (lval_min && rval_min) {
     317        2557 :                 if (is_union(rel->op))
     318           3 :                         set_minmax_property(sql, e, PROP_MIN, statistics_atom_min(sql, lval_min, rval_min)); /* for union the new min will be the min of the two */
     319        2554 :                 else if (is_inter(rel->op))
     320         399 :                         set_minmax_property(sql, e, PROP_MIN, statistics_atom_max(sql, lval_min, rval_min)); /* for intersect the new min will be the max of the two */
     321             :                 else /* except */
     322        2155 :                         set_minmax_property(sql, e, PROP_MIN, lval_min);
     323             :         }
     324             : 
     325        8846 :         if (is_union(rel->op) || is_munion(rel->op)) {
     326        5986 :                 if (!has_nil(le) && !has_nil(re))
     327         879 :                         set_has_no_nil(e);
     328        5986 :                 if (need_distinct(rel) && list_length(rel->exps) == 1)
     329           0 :                         set_unique(e);
     330        2860 :         } else if (is_inter(rel->op)) {
     331         564 :                 if (!has_nil(le) || !has_nil(re))
     332         509 :                         set_has_no_nil(e);
     333         564 :                 if (is_unique(le) || (need_distinct(rel) && list_length(rel->exps) == 1))
     334         345 :                         set_unique(e);
     335             :         } else {
     336        2296 :                 assert(is_except(rel->op));
     337        2296 :                 if (!has_nil(le))
     338        2233 :                         set_has_no_nil(e);
     339        2296 :                 if (is_unique(le) || (need_distinct(rel) && list_length(rel->exps) == 1))
     340        2009 :                         set_unique(e);
     341             :         }
     342             :         /* propagate unique estimation for known cases */
     343        8846 :         if (!is_union(rel->op) && (est = find_prop(le->p, PROP_NUNIQUES)) && !find_prop(e->p, PROP_NUNIQUES)) {
     344         205 :                 prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     345         205 :                 p->value.dval = est->value.dval;
     346             :         }
     347             :         return false;
     348             : }
     349             : 
     350             : 
     351             : static void
     352       56927 : rel_munion_get_statistics(mvc *sql, sql_rel *rel, list *rels, sql_exp *e, int i)
     353             : {
     354       56927 :         assert(is_munion(rel->op));
     355             : 
     356       56927 :         sql_rel *l = rels->h->data;
     357       56927 :         sql_exp *le = list_fetch(l->exps, i);
     358       56927 :         atom *lval_min = find_prop_and_get(le->p, PROP_MIN), *lval_max = find_prop_and_get(le->p, PROP_MAX);
     359       56927 :         bool has_nonil = !has_nil(le);
     360             : 
     361      160702 :         for(node *n = rels->h->next; n; n = n->next) {
     362      103775 :                 sql_rel *r = n->data;
     363      103775 :                 sql_exp *re = list_fetch(r->exps, i);
     364      103775 :                 atom *rval_min = find_prop_and_get(re->p, PROP_MIN), *rval_max = find_prop_and_get(re->p, PROP_MAX);
     365             : 
     366      103775 :                 if (lval_max && rval_max) {
     367       39696 :                         set_minmax_property(sql, e, PROP_MAX, statistics_atom_max(sql, lval_max, rval_max)); /* for union the new max will be the max of the two */
     368       39696 :                         lval_max = find_prop_and_get(e->p, PROP_MAX);
     369             :                 }
     370      103775 :                 if (lval_min && rval_min) {
     371       39263 :                         set_minmax_property(sql, e, PROP_MIN, statistics_atom_min(sql, lval_min, rval_min)); /* for union the new min will be the min of the two */
     372       39263 :                         lval_min = find_prop_and_get(e->p, PROP_MIN);
     373             :                 }
     374      103775 :                 has_nonil &= !has_nil(re);
     375             : 
     376             :         }
     377             : 
     378       56927 :         if (has_nonil)
     379       40429 :                 set_has_no_nil(e);
     380             : 
     381       56927 :         if (need_distinct(rel) && list_length(rel->exps) == 1)
     382        1594 :                 set_unique(e);
     383       56927 : }
     384             : 
     385             : 
     386             : static sql_exp *
     387     3201170 : rel_propagate_statistics(visitor *v, sql_rel *rel, sql_exp *e, int depth)
     388             : {
     389     3201170 :         mvc *sql = v->sql;
     390     3201170 :         atom *lval;
     391             : 
     392     3201170 :         (void) depth;
     393     3201170 :         switch(e->type) {
     394     1987358 :         case e_column:
     395     1987358 :                 switch (rel->op) { /* set relations don't call rel_propagate_statistics */
     396      259459 :                 case op_join:
     397             :                 case op_left:
     398             :                 case op_right:
     399             :                 case op_full:
     400             :                 case op_semi:
     401             :                 case op_anti: {
     402      259459 :                         sql_exp *found = rel_propagate_column_ref_statistics(sql, rel->l, e);
     403      259459 :                         if (!found)
     404      130122 :                                 (void) rel_propagate_column_ref_statistics(sql, rel->r, e);
     405             :                         break;
     406             :                 }
     407     1727899 :                 case op_select:
     408             :                 case op_project:
     409             :                 case op_groupby: {
     410     1727899 :                         sql_exp *found = rel_propagate_column_ref_statistics(sql, rel->l, e); /* labels may be found on the same projection, ugh */
     411     1727899 :                         if (!found && is_simple_project(rel->op))
     412      117872 :                                 (void) rel_propagate_column_ref_statistics(sql, rel, e);
     413             :                         break;
     414             :                 }
     415           0 :                 case op_insert:
     416             :                 case op_update:
     417             :                 case op_delete:
     418           0 :                         (void) rel_propagate_column_ref_statistics(sql, rel->r, e);
     419           0 :                         break;
     420             :                 default:
     421             :                         break;
     422             :                 }
     423             :                 break;
     424       91799 :         case e_convert: {
     425       91799 :                 sql_subtype *to = exp_totype(e), *from = exp_fromtype(e);
     426       91799 :                 sql_exp *l = e->l;
     427       91799 :                 sql_class fr = from->type->eclass, too = to->type->eclass;
     428       91799 :                 prop *est;
     429             : 
     430       91799 :                 if (fr == too) {
     431       82893 :                         if ((lval = find_prop_and_get(l->p, PROP_MAX))) {
     432       54791 :                                 atom *res = atom_copy(sql->sa, lval);
     433       54791 :                                 if ((res = atom_cast(sql->sa, res, to)))
     434       54768 :                                         set_minmax_property(sql, e, PROP_MAX, res);
     435             :                         }
     436       82893 :                         if ((lval = find_prop_and_get(l->p, PROP_MIN))) {
     437       55408 :                                 atom *res = atom_copy(sql->sa, lval);
     438       55408 :                                 if ((res = atom_cast(sql->sa, res, to)))
     439       55385 :                                         set_minmax_property(sql, e, PROP_MIN, res);
     440             :                         }
     441             :                 }
     442             :                 /* propagating nuniques through conversions is tricky. I am adding just the general cases */
     443       91799 :                 if ((est = find_prop(l->p, PROP_NUNIQUES)) &&
     444       56543 :                         (((fr == EC_TIME || fr == EC_TIME_TZ || fr == EC_DATE) && (too == EC_TIMESTAMP || too == EC_TIMESTAMP_TZ)) ||
     445       56518 :                          ((fr == EC_NUM || fr == EC_BIT) && too == EC_NUM) || (fr == EC_DEC && too == EC_DEC) || (fr != EC_EXTERNAL && too == EC_STRING))) {
     446       53121 :                         prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     447       53121 :                         p->value.dval = est->value.dval;
     448             :                 }
     449       91799 :                 if (!has_nil(l))
     450       50907 :                         set_has_no_nil(e);
     451             :                 break;
     452             :         }
     453      326637 :         case e_aggr:
     454             :         case e_func: {
     455      326637 :                 BUN lv;
     456      326637 :                 sql_subfunc *f = e->f;
     457             : 
     458      326637 :                 if (!f->func->s) {
     459      301185 :                         int key = hash_key(f->func->base.name); /* Using hash lookup */
     460      301185 :                         sql_hash_e *he = sql_functions_lookup->buckets[key&(sql_functions_lookup->size-1)];
     461      301185 :                         lookup_function look = NULL;
     462             : 
     463      660119 :                         for (; he && !look; he = he->chain) {
     464      358934 :                                 struct function_properties* fp = (struct function_properties*) he->value;
     465             : 
     466      358934 :                                 if (!strcmp(f->func->base.name, fp->name))
     467      104855 :                                         look = fp->func;
     468             :                         }
     469      301185 :                         if (look)
     470      104855 :                                 look(sql, e);
     471             :                 }
     472             :                 /* for global aggregates with no semantics, if the left relation has values, then the output won't be NULL */
     473      326637 :                 if (!is_semantics(e) && e->l && !have_nil(e->l) &&
     474       86062 :                         (e->type != e_aggr || (is_groupby(rel->op) && list_length(rel->r)) || ((lv = get_rel_count(rel->l)) != BUN_NONE && lv > 0)))
     475       85735 :                         set_has_no_nil(e);
     476             :                 /* set properties for global aggregates */
     477      326637 :                 if (e->type == e_aggr && is_groupby(rel->op) && list_empty(rel->r)) {
     478        7868 :                         if (!find_prop(e->p, PROP_NUNIQUES)) {
     479        7868 :                                 prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     480        7868 :                                 p->value.dval = 1;
     481             :                         }
     482        7868 :                         set_unique(e);
     483             :                 }
     484             :                 break;
     485             :         }
     486      531712 :         case e_atom:
     487      531712 :                 if (e->l) {
     488      514867 :                         atom *a = (atom*) e->l;
     489      514867 :                         if (!a->isnull) {
     490      459785 :                                 set_minmax_property(sql, e, PROP_MAX, a);
     491      459785 :                                 set_minmax_property(sql, e, PROP_MIN, a);
     492             :                         }
     493      514867 :                         prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     494      514867 :                         p->value.dval = 1;
     495       16845 :                 } else if (e->f) {
     496        3520 :                         list *vals = (list *) e->f;
     497        3520 :                         sql_exp *first = vals->h ? vals->h->data : NULL;
     498        3520 :                         atom *max = NULL, *min = NULL; /* all child values must have a valid min/max */
     499        3520 :                         int has_nil = 0;
     500             : 
     501        3520 :                         if (first) {
     502        3520 :                                 max = ((lval = find_prop_and_get(first->p, PROP_MAX))) ? lval : NULL;
     503        3520 :                                 min = ((lval = find_prop_and_get(first->p, PROP_MIN))) ? lval : NULL;
     504        3520 :                                 has_nil |= has_nil(first);
     505             :                         }
     506             : 
     507       15389 :                         for (node *n = vals->h ? vals->h->next : NULL ; n ; n = n->next) {
     508        8349 :                                 sql_exp *ee = n->data;
     509             : 
     510        8349 :                                 if (min && max) {
     511        7856 :                                         if ((lval = find_prop_and_get(ee->p, PROP_MAX))) {
     512        7810 :                                                 max = atom_cmp(lval, max) > 0 ? lval : max;
     513             :                                         } else {
     514             :                                                 max = NULL;
     515             :                                         }
     516        7856 :                                         if ((lval = find_prop_and_get(ee->p, PROP_MIN))) {
     517        7810 :                                                 min = atom_cmp(min, lval) > 0 ? lval : min;
     518             :                                         } else {
     519             :                                                 min = NULL;
     520             :                                         }
     521             :                                 }
     522        8349 :                                 has_nil |= has_nil(ee);
     523             :                         }
     524             : 
     525        3520 :                         if (!has_nil)
     526        3150 :                                 set_has_no_nil(e);
     527        3520 :                         if (min && max) {
     528        3108 :                                 set_minmax_property(sql, e, PROP_MAX, max);
     529        3108 :                                 set_minmax_property(sql, e, PROP_MIN, min);
     530             :                         }
     531        3520 :                         prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     532        3520 :                         p->value.dval = (dbl) list_length(vals);
     533             :                 } else {
     534       13325 :                         prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     535       13325 :                         p->value.dval = 1;
     536             :                 }
     537             :                 break;
     538      263664 :         case e_cmp:
     539             :                 /* TODO? propagating min/max/unique of booleans is not very worth it */
     540      263664 :                 if (e->flag == cmp_or || e->flag == cmp_filter) {
     541       17012 :                         if (!have_nil(e->l) && !have_nil(e->r))
     542       12832 :                                 set_has_no_nil(e);
     543      246652 :                 } else if (e->flag == cmp_in || e->flag == cmp_notin) {
     544       21114 :                         sql_exp *le = e->l;
     545       21114 :                         if (!has_nil(le) && !have_nil(e->r))
     546       18440 :                                 set_has_no_nil(e);
     547             :                 } else {
     548      225538 :                         sql_exp *le = e->l, *re = e->r, *fe = e->f;
     549      225538 :                         if (!has_nil(le) && !has_nil(re) && (!e->f || !has_nil(fe)))
     550      168477 :                                 set_has_no_nil(e);
     551             :                 }
     552             :                 break;
     553             :         case e_psm:
     554             :                 break;
     555             :         }
     556             : 
     557             : #ifndef NDEBUG
     558             :         {
     559             :                 /* min and max cannot be NULL and min must be <= than max, if it happens the inner relation must be empty! */
     560     3201170 :                 atom *min = find_prop_and_get(e->p, PROP_MIN), *max = find_prop_and_get(e->p, PROP_MAX);
     561             : 
     562     3201170 :                 (void) min;
     563     3201170 :                 (void) max;
     564     3201170 :                 assert(!min || !min->isnull);
     565     3201170 :                 assert(!max || !max->isnull);
     566     3201170 :                 assert(!min || !max || (min && max)/* atom_cmp(min, max) <= 0*/);
     567             :         }
     568             : #endif
     569     3201170 :         return e;
     570             : }
     571             : 
     572             : static list * /* Remove predicates always false from min/max values */
     573      120948 : rel_prune_predicates(visitor *v, sql_rel *rel)
     574             : {
     575      120948 :         if (rel->l) {
     576      120948 :                 sql_rel *l = rel->l;
     577      120948 :                 if (is_single(l))
     578        2173 :                         return rel->exps;
     579             :         }
     580      118775 :         if (!list_empty(rel->attr))
     581        2963 :                 return rel->exps;
     582      246190 :         for (node *n = rel->exps->h ; n ; n = n->next) {
     583      130378 :                 sql_exp *e = n->data;
     584             : 
     585      130378 :                 if (e->type == e_cmp && is_theta_exp(e->flag)) {
     586      105956 :                         sql_exp *le = e->l, *re = e->r, *fe = e->f;
     587      105956 :                         atom *lval_min = find_prop_and_get(le->p, PROP_MIN), *lval_max = find_prop_and_get(le->p, PROP_MAX),
     588      105956 :                                 *rval_min = find_prop_and_get(re->p, PROP_MIN), *rval_max = find_prop_and_get(re->p, PROP_MAX);
     589      105956 :                         bool always_false = false, always_true = false;
     590             : 
     591      105956 :                         if (fe && !is_symmetric(e)) {
     592        3058 :                                 atom *fval_min = find_prop_and_get(fe->p, PROP_MIN), *fval_max = find_prop_and_get(fe->p, PROP_MAX);
     593        3058 :                                 comp_type lower = range2lcompare(e->flag), higher = range2rcompare(e->flag);
     594        3660 :                                 int not_int1 = rval_min && lval_max && /* the middle and left intervals don't overlap */
     595        1663 :                                         (!is_anti(e) && (lower == cmp_gte ? atom_cmp(rval_min, lval_max) > 0 : atom_cmp(rval_min, lval_max) >= 0)),
     596        4072 :                                         not_int2 = lval_min && fval_max && /* the middle and right intervals don't overlap */
     597        2485 :                                         (!is_anti(e) && (higher == cmp_lte ? atom_cmp(lval_min, fval_max) > 0 : atom_cmp(lval_min, fval_max) >= 0)),
     598        3058 :                                         not_int3 = rval_min && fval_max && /* the left interval is after the right one */
     599        1287 :                                         (!is_anti(e) && (atom_cmp(rval_min, fval_max) > 0));
     600             : 
     601        3058 :                                 always_false |= not_int1 || not_int2 || not_int3;
     602             :                                 /* for anti the middle must be before the left or after the right or the right after the left, for the other the middle must be always between the left and right intervals */
     603        4113 :                                 always_true |= !has_nil(le) && !has_nil(re) && !has_nil(fe) &&
     604        3957 :                                         lval_min && lval_max && rval_min && rval_max && fval_min && fval_max &&
     605         575 :                                         (is_anti(e) ? ((lower == cmp_gte ? atom_cmp(rval_min, lval_max) > 0 : atom_cmp(rval_min, lval_max) >= 0) || (higher == cmp_lte ? atom_cmp(lval_min, fval_max) > 0 : atom_cmp(lval_min, fval_max) >= 0) || atom_cmp(rval_min, fval_max) > 0) :
     606         488 :                                          ((lower == cmp_gte ? atom_cmp(lval_min, rval_max) >= 0 : atom_cmp(lval_min, rval_max) > 0) && (higher == cmp_lte ? atom_cmp(fval_min, lval_max) >= 0 : atom_cmp(fval_min, lval_max) > 0)));
     607             :                         } else if (!fe) {
     608      102880 :                                 if (!is_semantics(e)) /* trivial not null cmp null case */
     609      196110 :                                         always_false |= !is_anti(e) && ((exp_is_not_null(le) && exp_is_null(re)) || (exp_is_null(le) && exp_is_not_null(re)));
     610      102880 :                                 switch (e->flag) {
     611       88983 :                                 case cmp_equal:
     612       88983 :                                         if (lval_min && lval_max && rval_min && rval_max && (!is_semantics(e) || !has_nil(le) || !has_nil(re)))
     613      117424 :                                                 always_false |= (is_anti(e) ? (atom_cmp(lval_min, rval_min) == 0 && atom_cmp(lval_max, rval_max) <= 0) : (atom_cmp(rval_max, lval_min) < 0 || atom_cmp(rval_min, lval_max) > 0));
     614       88983 :                                         if (is_semantics(e)) { /* prune *= NULL cases */
     615        4796 :                                                 always_false |= (is_anti(e) ? (exp_is_null(le) && exp_is_null(re)) : ((exp_is_not_null(le) && exp_is_null(re)) || (exp_is_null(le) && exp_is_not_null(re))));
     616        9592 :                                                 always_true |= (is_anti(e) ? ((exp_is_not_null(le) && exp_is_null(re)) || (exp_is_null(le) && exp_is_not_null(re))) : (exp_is_null(le) && exp_is_null(re)));
     617             :                                         }
     618             :                                         break;
     619        6464 :                                 case cmp_notequal:
     620        6464 :                                         if (lval_min && lval_max && rval_min && rval_max)
     621       10730 :                                                 always_true |= !has_nil(le) && !has_nil(re) && (is_anti(e) ? (atom_cmp(lval_min, rval_min) == 0 && atom_cmp(lval_max, rval_max) <= 0) : (atom_cmp(rval_max, lval_min) < 0 || atom_cmp(rval_min, lval_max) > 0));
     622        6464 :                                         if (is_semantics(e)) {
     623          29 :                                                 always_true |= (is_anti(e) ? (exp_is_null(le) && exp_is_null(re)) : ((exp_is_not_null(le) && exp_is_null(re)) || (exp_is_null(le) && exp_is_not_null(re))));
     624          58 :                                                 always_false |= (is_anti(e) ? ((exp_is_not_null(le) && exp_is_null(re)) || (exp_is_null(le) && exp_is_not_null(re))) : (exp_is_null(le) && exp_is_null(re)));
     625             :                                         }
     626             :                                         break;
     627        4805 :                                 case cmp_gt:
     628        4805 :                                         if (lval_max && rval_min)
     629        1789 :                                                 always_false |= (is_anti(e) ? atom_cmp(lval_max, rval_min) > 0 : atom_cmp(lval_max, rval_min) <= 0);
     630        4805 :                                         if (lval_min && rval_max)
     631        3578 :                                                 always_true |= !has_nil(le) && !has_nil(re) && (is_anti(e) ? atom_cmp(lval_min, rval_max) <= 0 : atom_cmp(lval_min, rval_max) > 0);
     632             :                                         break;
     633         103 :                                 case cmp_gte:
     634         103 :                                         if (lval_max && rval_min)
     635          41 :                                                 always_false |= (is_anti(e) ? atom_cmp(lval_max, rval_min) >= 0 : atom_cmp(lval_max, rval_min) < 0);
     636         103 :                                         if (lval_min && rval_max)
     637          82 :                                                 always_true |= !has_nil(le) && !has_nil(re) && (is_anti(e) ? atom_cmp(lval_min, rval_max) < 0 : atom_cmp(lval_min, rval_max) >= 0);
     638             :                                         break;
     639        2441 :                                 case cmp_lt:
     640        2441 :                                         if (lval_min && rval_max)
     641        1383 :                                                 always_false |= (is_anti(e) ? atom_cmp(lval_min, rval_max) < 0 : atom_cmp(lval_min, rval_max) >= 0);
     642        2441 :                                         if (lval_max && rval_min)
     643        2814 :                                                 always_true |= !has_nil(le) && !has_nil(re) && (is_anti(e) ? atom_cmp(lval_max, rval_min) >= 0 : atom_cmp(lval_max, rval_min) < 0);
     644             :                                         break;
     645          84 :                                 case cmp_lte:
     646          84 :                                         if (lval_min && rval_max)
     647          11 :                                                 always_false |= (is_anti(e) ? atom_cmp(lval_min, rval_max) <= 0 : atom_cmp(lval_min, rval_max) > 0);
     648          84 :                                         if (lval_max && rval_min)
     649          22 :                                                 always_true |= !has_nil(le) && !has_nil(re) && (is_anti(e) ? atom_cmp(lval_max, rval_min) > 0 : atom_cmp(lval_max, rval_min) <= 0);
     650             :                                         break;
     651             :                                 default: /* Maybe later I can do cmp_in and cmp_notin but don't forget to remove is_theta_exp check up there */
     652             :                                         break;
     653             :                                 }
     654             :                         }
     655      105956 :                         assert(!always_false || !always_true);
     656      105956 :                         if (always_false || always_true) {
     657        3392 :                                 sql_exp *ne = exp_atom_bool(v->sql->sa, always_true /* if always_true set then true else false */);
     658        3392 :                                 if (exp_name(e))
     659           0 :                                         exp_prop_alias(v->sql->sa, ne, e);
     660        3392 :                                 n->data = ne;
     661        3392 :                                 v->changes++;
     662             :                         }
     663             :                 }
     664             :         }
     665      115812 :         return rel->exps;
     666             : }
     667             : 
     668             : static sql_rel *
     669          14 : set_setop_side(visitor *v, sql_rel *rel, sql_rel *side)
     670             : {
     671          14 :         if (side == rel->r)
     672           0 :                 rel->r = NULL;
     673             :         else
     674          14 :                 rel->l = NULL;
     675          14 :         if (!is_simple_project(side->op) || rel_is_ref(side) || !list_empty(side->r)) {
     676           0 :                 side = rel_project(v->sql->sa, side, rel_projections(v->sql, side, NULL, 0, 1));
     677           0 :                 set_count_prop(v->sql->sa, side, get_rel_count(side->l));
     678           0 :                 side->exps = exps_exp_visitor_bottomup(v, side, side->exps, 0, &rel_propagate_statistics, false);
     679             :         }
     680          35 :         for (node *n = rel->exps->h, *m = side->exps->h ; n && m ; n = n->next, m = m->next) {
     681          21 :                 sql_exp *e1 = n->data, *e2 = m->data;
     682          21 :                 exp_prop_alias(v->sql->sa, e2, e1);
     683             :         }
     684          14 :         list_hash_clear(side->exps);
     685             : 
     686          14 :         if (need_distinct(rel))
     687           0 :                 set_distinct(side);
     688          14 :         side->p = prop_copy(v->sql->sa, rel->p);
     689          14 :         rel_destroy(rel);
     690          14 :         return side;
     691             : }
     692             : 
     693             : static BUN
     694       14897 : trivial_project_exp_card(sql_exp *e)
     695             : {
     696       15104 :         if (e->type == e_convert)
     697         207 :                 return trivial_project_exp_card(e->l);
     698       14897 :         return e->type == e_atom && e->f ? (BUN) list_length(e->f) : 1;
     699             : }
     700             : 
     701             : static BUN
     702       24956 : rel_calc_nuniques(mvc *sql, sql_rel *l, list *exps)
     703             : {
     704       24956 :         BUN lv = get_rel_count(l);
     705             : 
     706       24956 :         if (lv == 0)
     707             :                 return 0;
     708       22407 :         if (!list_empty(exps)) {
     709       22407 :                 BUN nuniques = 0;
     710             :                 /* compute the highest number of unique values */
     711       55862 :                 for (node *n = exps->h ; n && nuniques != BUN_NONE ; n = n->next) {
     712       33454 :                         sql_exp *e = n->data;
     713       33454 :                         sql_rel *bt = NULL;
     714       33454 :                         prop *p = NULL;
     715       33454 :                         BUN euniques = BUN_NONE;
     716       33454 :                         atom *min, *max, *sub = NULL;
     717       33454 :                         sql_subtype *tp = exp_subtype(e);
     718       33454 :                         sql_class ec = tp ? tp->type->eclass : EC_STRING; /* if 'e' has no type (eg parameter), use a non-number type to fail condition */
     719             : 
     720       33454 :                         if ((p = find_prop(e->p, PROP_NUNIQUES))) {
     721       24110 :                                 euniques = (BUN) p->value.dval;
     722        9344 :                         } else if (e->type == e_column && e->nid && rel_find_exp_and_corresponding_rel(l, e, false, &bt, NULL) && bt && (p = find_prop(bt->p, PROP_COUNT))) {
     723        7329 :                                 euniques = (BUN) p->value.lval;
     724             :                         }
     725             :                         /* use min to max range to compute number of possible values in the domain for number types */
     726       33454 :                         if ((EC_TEMP(ec)||ec==EC_NUM||ec==EC_MONTH||ec==EC_POS) &&
     727       23407 :                                 (min = find_prop_and_get(e->p, PROP_MIN)) && (max = find_prop_and_get(e->p, PROP_MAX))) {
     728             :                                 /* the range includes min and max, so the atom_inc call is needed */
     729             :                                 /* if 'euniques' has number of distinct values, compute min between both */
     730       18955 :                                 if ((sub = atom_sub(sql->sa, max, min)) && (sub = atom_inc(sql->sa, sub)) && (sub = atom_cast(sql->sa, sub, sql_bind_localtype("oid"))))
     731           0 :                                         euniques = MIN(euniques, (BUN) sub->data.val.oval);
     732             :                         }
     733       33455 :                         if (euniques != BUN_NONE)
     734       31440 :                                 nuniques = MAX(nuniques, euniques); /* the highest cardinality sets the estimation */
     735             :                         else
     736             :                                 nuniques = BUN_NONE;
     737             :                 }
     738       22408 :                 if (nuniques != BUN_NONE)
     739             :                         return nuniques;
     740             :         }
     741             :         return lv;
     742             : }
     743             : 
     744             : static sql_rel *
     745     2007191 : rel_get_statistics_(visitor *v, sql_rel *rel)
     746             : {
     747             :         /* Don't prune updates as pruning will possibly result in removing the joins which therefore cannot be used for constraint checking */
     748     2007191 :         uint8_t has_special_modify = *(uint8_t*) v->data;
     749     2007191 :         bool can_be_pruned = !has_special_modify && v->storage_based_opt;
     750             : 
     751             :         /* Don't look at the same relation twice */
     752     2007191 :         if (are_statistics_gathered(rel->used))
     753             :                 return rel;
     754     1270875 :         rel->used |= statistics_gathered;
     755             : 
     756     1270875 :         switch (rel->op) {
     757      297259 :         case op_basetable: {
     758      297259 :                 sql_table *t = (sql_table *) rel->l;
     759      297259 :                 sqlstore *store = v->sql->session->tr->store;
     760             : 
     761      297259 :                 if (!list_empty(rel->exps)) {
     762     1173351 :                         for (node *n = rel->exps->h ; n ; n = n->next)
     763      876141 :                                 rel_basetable_column_get_statistics(v->sql, rel, n->data);
     764             :                 }
     765             :                 /* Set table row count. TODO? look for remote tables. Don't look at storage for declared tables, because it won't be cleaned */
     766      297259 :                 if (isTable(t) && t->s && !isDeclaredTable(t)) /* count active rows only */
     767      243903 :                         set_count_prop(v->sql->sa, rel, (BUN)store->storage_api.count_col(v->sql->session->tr, ol_first_node(t->columns)->data, 10));
     768             :                 break;
     769             :         }
     770        2790 :         case op_union:
     771             :         case op_inter:
     772             :         case op_except: {
     773        2790 :                 bool empty_cross = false;
     774        2790 :                 int i = 0;
     775        2790 :                 sql_rel *l = rel->l, *pl = rel->l, *r = rel->r, *pr = rel->r;
     776             : 
     777        2790 :                 while (is_sample(pl->op) || is_topn(pl->op)) /* skip topN and sample relations in the middle */
     778           0 :                         pl = pl->l;
     779        2790 :                 while (is_sample(pr->op) || is_topn(pr->op))
     780           0 :                         pr = pr->l;
     781             :                 /* if it's not a projection, then project and propagate statistics */
     782        2790 :                 if (!is_project(pl->op) && !is_base(pl->op)) {
     783           0 :                         pl = rel_project(v->sql->sa, pl, rel_projections(v->sql, pl, NULL, 0, 1));
     784           0 :                         set_count_prop(v->sql->sa, pl, get_rel_count(pl->l));
     785           0 :                         pl->exps = exps_exp_visitor_bottomup(v, pl, pl->exps, 0, &rel_propagate_statistics, false);
     786             :                 }
     787        2790 :                 if (!is_project(pr->op) && !is_base(pr->op)) {
     788           0 :                         pr = rel_project(v->sql->sa, pr, rel_projections(v->sql, pr, NULL, 0, 1));
     789           0 :                         set_count_prop(v->sql->sa, pr, get_rel_count(pr->l));
     790           0 :                         pr->exps = exps_exp_visitor_bottomup(v, pr, pr->exps, 0, &rel_propagate_statistics, false);
     791             :                 }
     792             : 
     793       11662 :                 for (node *n = rel->exps->h ; n ; n = n->next) {
     794        8872 :                         empty_cross |= rel_setop_get_statistics(v->sql, rel, pl->exps, pr->exps, n->data, i);
     795        8872 :                         i++;
     796             :                 }
     797             : 
     798             :                 /* propagate row count */
     799        2790 :                 if (is_union(rel->op)) {
     800         277 :                         BUN lv = need_distinct(rel) ? rel_calc_nuniques(v->sql, l, l->exps) : get_rel_count(l),
     801         277 :                                 rv = need_distinct(rel) ? rel_calc_nuniques(v->sql, r, r->exps) : get_rel_count(r);
     802             : 
     803         277 :                         if (lv == 0 && rv == 0) { /* both sides empty */
     804           2 :                                 if (can_be_pruned)
     805             :                                         empty_cross = true;
     806             :                                 else
     807           2 :                                         set_count_prop(v->sql->sa, rel, 0);
     808         275 :                         } else if (can_be_pruned && lv == 0 && !rel_is_ref(rel)) { /* left side empty */
     809           0 :                                 rel = set_setop_side(v, rel, r);
     810           0 :                                 empty_cross = false; /* don't rewrite again */
     811         275 :                         } else if (can_be_pruned && rv == 0 && !rel_is_ref(rel)) { /* right side empty */
     812           0 :                                 rel = set_setop_side(v, rel, l);
     813           0 :                                 empty_cross = false; /* don't rewrite again */
     814         275 :                         } else if (lv != BUN_NONE && rv != BUN_NONE) {
     815           7 :                                 set_count_prop(v->sql->sa, rel, (rv > (BUN_MAX - lv)) ? BUN_MAX : (lv + rv)); /* overflow check */
     816             :                         }
     817        2513 :                 } else if (is_inter(rel->op) || is_except(rel->op)) {
     818        2513 :                         BUN lv = need_distinct(rel) ? rel_calc_nuniques(v->sql, l, l->exps) : get_rel_count(l),
     819        2513 :                                 rv = need_distinct(rel) ? rel_calc_nuniques(v->sql, r, r->exps) : get_rel_count(r);
     820             : 
     821        2513 :                         if (lv == 0) { /* left side empty */
     822          62 :                                 if (can_be_pruned)
     823             :                                         empty_cross = true;
     824             :                                 else
     825           5 :                                         set_count_prop(v->sql->sa, rel, 0);
     826        2451 :                         } else if (rv == 0) { /* right side empty */
     827          17 :                                 if (is_inter(rel->op)) {
     828           3 :                                         if (can_be_pruned)
     829             :                                                 empty_cross = true;
     830             :                                         else
     831           0 :                                                 set_count_prop(v->sql->sa, rel, 0);
     832          14 :                                 } else if (can_be_pruned && !rel_is_ref(rel)) {
     833          14 :                                         rel = set_setop_side(v, rel, l);
     834          14 :                                         empty_cross = false; /* don't rewrite again */
     835             :                                 } else {
     836           0 :                                         set_count_prop(v->sql->sa, rel, lv);
     837             :                                 }
     838             :                         } else {
     839        2434 :                                 set_count_prop(v->sql->sa, rel, lv);
     840             :                         }
     841             :                 }
     842        2790 :                 if (can_be_pruned && empty_cross) {
     843          80 :                         rel_destroy(rel->l);
     844          80 :                         rel->l = NULL;
     845          80 :                         rel_destroy(rel->r);
     846          80 :                         rel->r = NULL;
     847         244 :                         for (node *n = rel->exps->h ; n ; n = n->next) {
     848         164 :                                 sql_exp *e = n->data, *a = exp_atom(v->sql->sa, atom_general(v->sql->sa, exp_subtype(e), NULL, 0));
     849         164 :                                 exp_prop_alias(v->sql->sa, a, e);
     850         164 :                                 n->data = a;
     851             :                         }
     852          80 :                         list_hash_clear(rel->exps);
     853          80 :                         sql_rel *l = rel_project(v->sql->sa, NULL, rel->exps);
     854          80 :                         set_count_prop(v->sql->sa, l, 1);
     855          80 :                         l = rel_select(v->sql->sa, l, exp_atom_bool(v->sql->sa, 0));
     856          80 :                         set_count_prop(v->sql->sa, l, 0);
     857          80 :                         rel->op = op_project;
     858          80 :                         rel->l = l;
     859          80 :                         rel->exps = rel_projections(v->sql, l, NULL, 1, 1);
     860          80 :                         set_count_prop(v->sql->sa, rel, 0);
     861          80 :                         set_nodistinct(rel); /* set relations may have distinct flag set */
     862          80 :                         v->changes++;
     863             :                 }
     864             :                 break;
     865             :         }
     866       12221 :         case op_munion: {
     867       12221 :                 list *l = rel->l, *nrels = sa_list(v->sql->sa);
     868       12221 :                 BUN cnt = 0;
     869       12221 :                 bool needs_pruning = false;
     870             : 
     871       46040 :                 for (node *n = l->h; n; n = n->next) {
     872       33819 :                         sql_rel *r = n->data, *pl = r;
     873             : 
     874       33819 :                         while (is_sample(pl->op) || is_topn(pl->op)) /* skip topN and sample relations in the middle */
     875           0 :                                         pl = pl->l;
     876             :                         /* if it's not a projection, then project and propagate statistics */
     877       33819 :                         if (!is_project(pl->op) && !is_base(pl->op)) {
     878           0 :                                 pl = rel_project(v->sql->sa, pl, rel_projections(v->sql, pl, NULL, 0, 1));
     879           0 :                                 set_count_prop(v->sql->sa, pl, get_rel_count(pl->l));
     880           0 :                                 pl->exps = exps_exp_visitor_bottomup(v, pl, pl->exps, 0, &rel_propagate_statistics, false);
     881             :                         }
     882       33819 :                         nrels = append(nrels, pl);
     883             :                         /* we need new munion statistics */
     884             :                         /* propagate row count */
     885       33819 :                         BUN rv = need_distinct(rel) ? rel_calc_nuniques(v->sql, r, r->exps) : get_rel_count(r);
     886             :                         /* if PROP_COUNT does not exist we assume at least a row (see get_rel_count def) */
     887       33819 :                         if (rv == BUN_NONE) {
     888        1076 :                                 cnt++;
     889        1076 :                                 continue;
     890             :                         }
     891       32743 :                         if (!rv && can_be_pruned)
     892        5613 :                                 needs_pruning = true;
     893             :                         /* overflow check */
     894       32743 :                         if (rv > (BUN_MAX - cnt))
     895       33819 :                                 rv = BUN_MAX;
     896             :                         else
     897       32743 :                                 cnt += rv;
     898             :                 }
     899       12221 :                 int i = 0;
     900       69148 :                 for (node *n = rel->exps->h ; n ; n = n->next, i++)
     901       56927 :                         rel_munion_get_statistics(v->sql, rel, nrels, n->data, i);
     902             : 
     903       12221 :                 if (needs_pruning && !rel_is_ref(rel)) {
     904        3828 :                         v->changes++;
     905        3828 :                         list *nl = sa_list(l->sa);
     906             : 
     907       13517 :                         for (node *n = nrels->h; n; n = n->next) {
     908        9689 :                                 sql_rel *r = n->data;
     909        9689 :                                 BUN rv = need_distinct(rel) ? rel_calc_nuniques(v->sql, r, r->exps) : get_rel_count(r);
     910             : 
     911        9689 :                                 if (!rv) { /* keep last for now */
     912        5159 :                                         rel_destroy(r);
     913        5159 :                                         continue;
     914             :                                 }
     915        4530 :                                 nl = append(nl, r);
     916             :                         }
     917        3828 :                         rel->l = nl;
     918        3828 :                         if (list_length(nl) == 1) {
     919        3606 :                                 sql_rel *l = rel->l = nl->h->data; /* ugh */
     920        3606 :                                 rel->r = NULL;
     921        3606 :                                 rel->op = op_project;
     922             : 
     923       17555 :                                 for (node *n = rel->exps->h, *m = l->exps->h ; n && m ; n = n->next, m = m->next) {
     924       13949 :                                         sql_exp *pe = n->data, *ie = m->data;
     925       13949 :                                         sql_exp *ne = exp_ref(v->sql, ie);
     926       13949 :                                         exp_prop_alias(v->sql->sa, ne, pe);
     927       13949 :                                         n->data = ne;
     928             :                                 }
     929        3606 :                                 list_hash_clear(rel->exps);
     930         222 :                         } else if (list_empty(nl)) {
     931             :                                 /* empty select (project [ nils ] ) */
     932         454 :                                 for (node *n = rel->exps->h ; n ; n = n->next) {
     933         354 :                                         sql_exp *e = n->data, *a = exp_atom(v->sql->sa, atom_general(v->sql->sa, exp_subtype(e), NULL, 0));
     934         354 :                                         exp_prop_alias(v->sql->sa, a, e);
     935         354 :                                         n->data = a;
     936             :                                 }
     937         100 :                                 list_hash_clear(rel->exps);
     938         100 :                                 sql_rel *l = rel_project(v->sql->sa, NULL, rel->exps);
     939         100 :                                 set_count_prop(v->sql->sa, l, 1);
     940         100 :                                 l = rel_select(v->sql->sa, l, exp_atom_bool(v->sql->sa, 0));
     941         100 :                                 set_count_prop(v->sql->sa, l, 0);
     942         100 :                                 rel->op = op_project;
     943         100 :                                 rel->r = NULL;
     944         100 :                                 rel->l = l;
     945         100 :                                 rel->exps = rel_projections(v->sql, l, NULL, 1, 1);
     946         100 :                                 set_count_prop(v->sql->sa, rel, 0);
     947         100 :                                 set_nodistinct(rel); /* set relations may have distinct flag set */
     948             :                         }
     949             :                 } else {
     950        8393 :                         set_count_prop(v->sql->sa, rel, cnt);
     951             :                 }
     952             :                 break;
     953             :         }
     954      488049 :         case op_join:
     955             :         case op_left:
     956             :         case op_right:
     957             :         case op_full:
     958             :         case op_semi:
     959             :         case op_anti:
     960             :         case op_select:
     961             :         case op_project:
     962             :         case op_groupby: {
     963      488049 :                 if (is_groupby(rel->op) && !list_empty(rel->r))
     964       15762 :                         rel->r = exps_exp_visitor_bottomup(v, rel, rel->r, 0, &rel_propagate_statistics, false);
     965      488049 :                 rel->exps = exps_exp_visitor_bottomup(v, rel, rel->exps, 0, &rel_propagate_statistics, false);
     966      488049 :                 if ((is_simple_project(rel->op) || is_groupby(rel->op)) && !list_empty(rel->r))
     967       35962 :                         rel->r = exps_exp_visitor_bottomup(v, rel, rel->r, 0, &rel_propagate_statistics, false);
     968             :                 /* The following optimizations can only be applied after propagating the statistics to rel->exps */
     969      488049 :                 if (can_be_pruned && (is_join(rel->op) || is_select(rel->op)) && !list_empty(rel->exps)) {
     970      120948 :                         int changes = v->changes;
     971      120948 :                         rel->exps = rel_prune_predicates(v, rel);
     972      120948 :                         if (v->changes > changes)
     973        3359 :                                 rel = rewrite_simplify(v, 0, v->value_based_opt, rel);
     974             :                 }
     975             : 
     976             :                 /* propagate row count */
     977      488049 :                 sql_rel *l = rel->l, *r = rel->r;
     978      488049 :                 switch (rel->op) {
     979      125258 :                 case op_join:
     980             :                 case op_left:
     981             :                 case op_right:
     982             :                 case op_full: {
     983      125258 :                         BUN lv = get_rel_count(l), rv = get_rel_count(r), uniques_estimate = BUN_MAX, join_idx_estimate = BUN_MAX;
     984             : 
     985      125258 :                         if (!list_empty(rel->exps) && !is_single(rel)) {
     986      229546 :                                 for (node *n = rel->exps->h ; n ; n = n->next) {
     987      117126 :                                         sql_exp *e = n->data, *el = e->l, *er = e->r;
     988             : 
     989      117126 :                                         if (find_prop(e->p, PROP_JOINIDX)) {
     990         667 :                                                 join_idx_estimate = lv>rv?lv:rv;
     991      116459 :                                         } else if (e->type == e_cmp && e->flag == cmp_equal) {
     992             :                                                 /* if one of the sides is unique, the cardinality will be that exact number, but look for nulls */
     993      112633 :                                                 if (!is_semantics(e) || !has_nil(el) || !has_nil(er)) {
     994      112484 :                                                         BUN lu = 0, ru = 0;
     995      112484 :                                                         prop *p = NULL;
     996      112484 :                                                         if ((p = find_prop(el->p, PROP_NUNIQUES)))
     997       82336 :                                                                 lu = (BUN) p->value.dval;
     998      112484 :                                                         if ((p = find_prop(er->p, PROP_NUNIQUES)))
     999       96330 :                                                                 ru = (BUN) p->value.dval;
    1000      112484 :                                                         if (is_unique(el) || lu > lv) {
    1001       67856 :                                                                 BUN ncount = (is_right(rel->op) || is_full(rel->op)) ? MAX(lv, rv) : lv;
    1002       67856 :                                                                 uniques_estimate = MIN(uniques_estimate, ncount);
    1003       44628 :                                                         } else if (is_unique(er) || ru > rv) {
    1004       27437 :                                                                 BUN ncount = (is_left(rel->op) || is_full(rel->op)) ? MAX(lv, rv) : rv;
    1005       27437 :                                                                 uniques_estimate = MIN(uniques_estimate, ncount);
    1006             :                                                         }
    1007             :                                                 }
    1008             :                                         }
    1009             :                                 }
    1010             :                         }
    1011      125258 :                         if (is_single(rel)) {
    1012        3513 :                                 set_count_prop(v->sql->sa, rel, lv);
    1013      121745 :                         } else if (join_idx_estimate != BUN_MAX) {
    1014         665 :                                 set_count_prop(v->sql->sa, rel, join_idx_estimate);
    1015      121080 :                         } else if (uniques_estimate != BUN_MAX) {
    1016       89117 :                                 set_count_prop(v->sql->sa, rel, uniques_estimate);
    1017       31963 :                         } else if (list_length(rel->exps) == 1 && (exp_is_false(rel->exps->h->data) || exp_is_null(rel->exps->h->data))) {
    1018             :                                 /* corner cases for outer joins */
    1019          84 :                                 if (is_left(rel->op)) {
    1020          72 :                                         set_count_prop(v->sql->sa, rel, lv);
    1021          12 :                                 } else if (is_right(rel->op)) {
    1022           3 :                                         set_count_prop(v->sql->sa, rel, rv);
    1023           9 :                                 } else if (is_full(rel->op) && lv != BUN_NONE && rv != BUN_NONE) {
    1024           9 :                                         set_count_prop(v->sql->sa, rel, (rv > (BUN_MAX - lv)) ? BUN_MAX : (lv + rv)); /* overflow check */
    1025           0 :                                 } else if (lv != BUN_NONE && rv != BUN_NONE) {
    1026           0 :                                         set_count_prop(v->sql->sa, rel, 0);
    1027             :                                 }
    1028       31879 :                         } else if (lv == 0) {
    1029        1166 :                                 set_count_prop(v->sql->sa, rel, (is_right(rel->op) || is_full(rel->op)) ? rv : 0);
    1030       31282 :                         } else if (rv == 0) {
    1031        1568 :                                 set_count_prop(v->sql->sa, rel, (is_left(rel->op) || is_full(rel->op)) ? lv : 0);
    1032       30208 :                         } else if (lv != BUN_NONE && rv != BUN_NONE) {
    1033       17870 :                                 set_count_prop(v->sql->sa, rel, (rv > (BUN_MAX / lv)) ? BUN_MAX : (lv * rv)); /* overflow check */
    1034             :                         }
    1035             :                         break;
    1036             :                 }
    1037        2054 :                 case op_anti:
    1038        2054 :                         set_count_prop(v->sql->sa, rel, get_rel_count(l));
    1039        2054 :                         break;
    1040       97157 :                 case op_semi:
    1041             :                 case op_select:
    1042             :                         /* TODO calculate cardinalities using selectivities */
    1043       97157 :                         if (list_length(rel->exps) == 1 && (exp_is_false(rel->exps->h->data) || exp_is_null(rel->exps->h->data))) {
    1044        1739 :                                 set_count_prop(v->sql->sa, rel, 0);
    1045             :                         } else {
    1046      187596 :                                 if (!list_empty(rel->exps) && !is_single(rel)) {
    1047       92178 :                                         BUN cnt = get_rel_count(l), u = 1;
    1048      155466 :                                         for (node *n = rel->exps->h ; n ; n = n->next) {
    1049      101154 :                                                 sql_exp *e = n->data, *el = e->l, *er = e->r;
    1050             : 
    1051             :                                                 /* simple expressions first */
    1052      101154 :                                                 if (e->type == e_cmp && e->flag == cmp_equal && exp_is_atom(er)) {
    1053             :                                                         /* use selectivity */
    1054       50387 :                                                         prop *p;
    1055       50387 :                                                         if ((p = find_prop(el->p, PROP_NUNIQUES))) {
    1056       37866 :                                                                 u = (BUN) p->value.dval;
    1057       37866 :                                                                 break;
    1058             :                                                         }
    1059             :                                                 }
    1060             :                                         }
    1061             :                                         /* u is an *estimate*, so don't set count_prop to 0 unless cnt is 0 */
    1062       92178 :                                         set_count_prop(v->sql->sa, rel, cnt == 0 ? 0 : u == 0 || u > cnt ? 1 : cnt/u);
    1063             :                                 } else {
    1064        3240 :                                         set_count_prop(v->sql->sa, rel, get_rel_count(l));
    1065             :                                 }
    1066             :                         }
    1067             :                         break;
    1068      240036 :                 case op_project:
    1069      240036 :                         if (l) {
    1070      231213 :                                 if (need_distinct(rel)) {
    1071           0 :                                         set_count_prop(v->sql->sa, rel, rel_calc_nuniques(v->sql, l, rel->exps));
    1072             :                                 } else {
    1073      231213 :                                         set_count_prop(v->sql->sa, rel, get_rel_count(l));
    1074             :                                 }
    1075             :                         } else {
    1076        8823 :                                 BUN card = 1;
    1077             : 
    1078        8823 :                                 if (!list_empty(rel->exps)) {
    1079       22671 :                                         for (node *n = rel->exps->h ; n ; n = n->next) {
    1080       13848 :                                                 sql_exp *e = n->data;
    1081       13848 :                                                 card = MAX(card, trivial_project_exp_card(e));
    1082             :                                         }
    1083             :                                 }
    1084        8823 :                                 set_count_prop(v->sql->sa, rel, card);
    1085             :                         }
    1086             :                         break;
    1087       23180 :                 case op_groupby:
    1088       23180 :                         if (list_empty(rel->r)) {
    1089        7417 :                                 set_count_prop(v->sql->sa, rel, 1);
    1090             :                         } else {
    1091       15762 :                                 set_count_prop(v->sql->sa, rel, rel_calc_nuniques(v->sql, l, rel->r));
    1092             :                         }
    1093             :                         break;
    1094             :                 default:
    1095             :                         break;
    1096             :                 }
    1097             :                 break;
    1098             :         }
    1099       17016 :         case op_topn: {
    1100       17016 :                 BUN lv = get_rel_count(rel->l);
    1101             : 
    1102       17016 :                 if (lv != BUN_NONE) {
    1103       16999 :                         sql_exp *le = rel->exps->h->data, *oe = list_length(rel->exps) > 1 ? rel->exps->h->next->data : NULL;
    1104          96 :                         if (oe && oe->l && exp_is_not_null(oe)) { /* no parameters */
    1105          96 :                                 BUN offset = (BUN) ((atom*)oe->l)->data.val.lval;
    1106          96 :                                 lv = offset >= lv ? 0 : lv - offset;
    1107             :                         }
    1108       16999 :                         if (le->l && exp_is_not_null(le)) {
    1109       16961 :                                 BUN limit = (BUN) ((atom*)le->l)->data.val.lval;
    1110       16961 :                                 lv = MIN(lv, limit);
    1111             :                         }
    1112       16999 :                         set_count_prop(v->sql->sa, rel, lv);
    1113             :                 }
    1114             :                 break;
    1115             :         }
    1116          22 :         case op_sample: {
    1117          22 :                 BUN lv = get_rel_count(rel->l);
    1118             : 
    1119          22 :                 if (lv != BUN_NONE) {
    1120           4 :                         sql_exp *se = rel->exps->h->data;
    1121           4 :                         sql_subtype *tp = exp_subtype(se);
    1122             : 
    1123           4 :                         if (se->l && tp->type->eclass == EC_NUM) { /* sample is a number of rows */
    1124           4 :                                 BUN sample = (BUN) ((atom*)se->l)->data.val.lval;
    1125           4 :                                 lv = MIN(lv, sample);
    1126           0 :                         } else if (se->l) { /* sample is a percentage of rows */
    1127           0 :                                 dbl percent = ((atom*)se->l)->data.val.dval;
    1128           0 :                                 assert(tp->type->eclass == EC_FLT);
    1129           0 :                                 lv = (BUN) ceil((dbl)lv * percent);
    1130             :                         }
    1131           4 :                         set_count_prop(v->sql->sa, rel, lv);
    1132             :                 }
    1133             :                 break;
    1134             :         }
    1135        5357 :         case op_table: {
    1136        5357 :                 sql_exp *op = rel->r;
    1137        5357 :                 if (rel->flag != TRIGGER_WRAPPER && op) {
    1138        5045 :                         sql_subfunc *f = op->f;
    1139        5045 :                         if (f->func->lang == FUNC_LANG_SQL) {
    1140          97 :                                 set_count_prop(v->sql->sa, rel, 1000 /* just some fallback value */);
    1141        4948 :                         } else if (f->func->lang == FUNC_LANG_MAL && strcmp(f->func->base.name, "storage") == 0) {
    1142         597 :                                 set_count_prop(v->sql->sa, rel, 1000 /* TODO get size of catalog */);
    1143        4351 :                         } else if (f->func->lang == FUNC_LANG_MAL && strcmp(f->func->base.name, "db_users") == 0) {
    1144           0 :                                 set_count_prop(v->sql->sa, rel, 1000 /* TODO get size of users */);
    1145        4351 :                         } else if (f->func->lang == FUNC_LANG_MAL && strncmp(f->func->base.name, "querylog", 8) == 0) {
    1146         965 :                                 set_count_prop(v->sql->sa, rel, 1000 /* TODO get size of querylog */);
    1147        3386 :                         } else if (f->func->lang == FUNC_LANG_MAL &&
    1148        3276 :                                            (strcmp(f->func->base.name, "queue") == 0 ||
    1149        3020 :                                                 strcmp(f->func->base.name, "optimizers") == 0 ||
    1150        2772 :                                                 strcmp(f->func->base.name, "env") == 0 ||
    1151        2488 :                                                 strcmp(f->func->base.name, "keywords") == 0 ||
    1152        2488 :                                                 strcmp(f->func->base.name, "statistics") == 0 ||
    1153        1941 :                                                 strcmp(f->func->base.name, "rejects") == 0 ||
    1154        1695 :                                                 strcmp(f->func->base.name, "schemastorage") == 0 ||
    1155        1695 :                                                 strncmp(f->func->base.name, "storage", 7) == 0 ||
    1156        1695 :                                                 strcmp(f->func->base.name, "sessions") == 0) ) {
    1157        1839 :                                 set_count_prop(v->sql->sa, rel, 1000 /* TODO get size of queue */);
    1158             :                         }
    1159             :                         /* else {
    1160             :                                 printf("%%func needs stats : %s\n", f->func->base.name);
    1161             :                         } */
    1162             :                 }
    1163             :                 break;
    1164             :         }
    1165             :         /*These relations are less important for now
    1166             :           TODO later we can tune it */
    1167             :         case op_insert:
    1168             :         case op_update:
    1169             :         case op_delete:
    1170             :         case op_truncate:
    1171             :         case op_ddl:
    1172             :         default:
    1173             :                 break;
    1174             :         }
    1175             : 
    1176             :         return rel;
    1177             : }
    1178             : 
    1179             : static sql_rel *
    1180      534110 : rel_get_statistics(visitor *v, global_props *gp, sql_rel *rel)
    1181             : {
    1182             :         /* Don't prune updates as pruning will possibly result in removing the joins which therefore cannot be used for constraint checking */
    1183      534110 :         uint8_t has_special_modify = (uint8_t) gp->has_special_modify;
    1184      534110 :         v->data = &has_special_modify;
    1185      534110 :         rel = rel_visitor_bottomup(v, rel, &rel_get_statistics_);
    1186      534114 :         v->data = gp;
    1187      534114 :         return rel;
    1188             : }
    1189             : 
    1190             : run_optimizer
    1191      668215 : bind_get_statistics(visitor *v, global_props *gp)
    1192             : {
    1193      668215 :         (void) v;
    1194      668215 :         return (gp->opt_level == 1 && !gp->cnt[op_insert]) ? rel_get_statistics : NULL;
    1195             : }
    1196             : 
    1197             : 
    1198             : static bool
    1199       90736 : point_select_on_unique_column(sql_rel *rel)
    1200             : {
    1201       90736 :         if (is_select(rel->op) && !list_empty(rel->exps)) {
    1202      126015 :                 for (node *n = rel->exps->h; n ; n = n->next) {
    1203       72939 :                         sql_exp *e = n->data, *el = e->l, *er = e->r, *found = NULL;
    1204             : 
    1205       72939 :                         if (is_compare(e->type) && e->flag == cmp_equal) {
    1206       30990 :                                 if (is_numeric_upcast(el))
    1207           0 :                                         el = el->l;
    1208       30990 :                                 if (is_numeric_upcast(er))
    1209           0 :                                         er = er->l;
    1210       30990 :                                 if (is_alias(el->type) && exp_is_atom(er) && (found = rel_find_exp(rel->l, el)) &&
    1211         749 :                                         is_unique(found) && (!is_semantics(e) || !has_nil(found) || !has_nil(er)))
    1212             :                                         return true;
    1213       30241 :                                 if (is_alias(er->type) && exp_is_atom(el) && (found = rel_find_exp(rel->l, er)) &&
    1214           0 :                                         is_unique(found) && (!is_semantics(e) || !has_nil(el) || !has_nil(found)))
    1215             :                                         return true;
    1216             :                         }
    1217             :                 }
    1218             :         }
    1219             :         return false;
    1220             : }
    1221             : 
    1222             : /*
    1223             :  * A point select on an unique column reduces the number of rows to 1. If the same select is under a
    1224             :  * join, the opposite side's select can be pushed above the join.
    1225             :  */
    1226             : static inline sql_rel *
    1227     1188360 : rel_push_select_up(visitor *v, sql_rel *rel)
    1228             : {
    1229     1188360 :         if ((is_innerjoin(rel->op) || is_left(rel->op) || is_right(rel->op) || is_semi(rel->op)) && !is_single(rel)) {
    1230      245920 :                 sql_rel *l = rel->l, *r = rel->r;
    1231      245920 :                 bool can_pushup_left = is_select(l->op) && !rel_is_ref(l) && !is_single(l) && (is_innerjoin(rel->op) || is_left(rel->op) || is_semi(rel->op)),
    1232      245920 :                         can_pushup_right = is_select(r->op) && !rel_is_ref(r) && !is_single(r) && (is_innerjoin(rel->op) || is_right(rel->op));
    1233             : 
    1234      245920 :                 if (can_pushup_left || can_pushup_right) {
    1235       63939 :                         if (can_pushup_left)
    1236       43434 :                                 can_pushup_left = point_select_on_unique_column(r);
    1237       63939 :                         if (can_pushup_right)
    1238       47302 :                                 can_pushup_right = point_select_on_unique_column(l);
    1239             : 
    1240             :                         /* if both selects retrieve one row each, it's not worth it to push both up */
    1241       63939 :                         if (can_pushup_left && !can_pushup_right) {
    1242         683 :                                 sql_rel *nrel = rel_dup_copy(v->sql->sa, rel);
    1243         683 :                                 nrel->l = l->l;
    1244         683 :                                 rel = rel_inplace_select(rel, nrel, l->exps);
    1245         683 :                                 assert(is_select(rel->op));
    1246         683 :                                 v->changes++;
    1247       63256 :                         } else if (!can_pushup_left && can_pushup_right) {
    1248          14 :                                 sql_rel *nrel = rel_dup_copy(v->sql->sa, rel);
    1249          14 :                                 nrel->r = r->l;
    1250          14 :                                 rel = rel_inplace_select(rel, nrel, r->exps);
    1251          14 :                                 assert(is_select(rel->op));
    1252          14 :                                 v->changes++;
    1253             :                         }
    1254             :                 }
    1255             :         }
    1256     1188360 :         return rel;
    1257             : }
    1258             : 
    1259             : static int
    1260       94236 : sql_class_base_score(visitor *v, sql_column *c, sql_subtype *t, bool equality_based)
    1261             : {
    1262       94236 :         int de;
    1263             : 
    1264       94236 :         if (!t)
    1265             :                 return 0;
    1266       94236 :         switch (ATOMstorage(t->type->localtype)) {
    1267             :         case TYPE_bte:
    1268             :                 return 150 - 8;
    1269        1307 :         case TYPE_sht:
    1270        1307 :                 return 150 - 16;
    1271       37399 :         case TYPE_int:
    1272       37399 :                 return 150 - 32;
    1273         518 :         case TYPE_void:
    1274             :         case TYPE_lng:
    1275         518 :                 return 150 - 64;
    1276             :         case TYPE_uuid:
    1277             : #ifdef HAVE_HGE
    1278             :         case TYPE_hge:
    1279             : #endif
    1280             :                 return 150 - 128;
    1281           2 :         case TYPE_flt:
    1282           2 :                 return 75 - 24;
    1283             :         case TYPE_dbl:
    1284             :                 return 75 - 53;
    1285       32104 :         default:
    1286       32104 :                 if (equality_based && c && v->storage_based_opt && (de = mvc_is_duplicate_eliminated(v->sql, c)))
    1287        1478 :                         return 150 - de * 8;
    1288             :                 /* strings and blobs not duplicate eliminated don't get any points here */
    1289             :                 return 0;
    1290             :         }
    1291             : }
    1292             : 
    1293             : static int
    1294       32731 : score_se_base(visitor *v, sql_rel *rel, sql_exp *e)
    1295             : {
    1296       32731 :         int res = 0;
    1297       32731 :         sql_subtype *t = exp_subtype(e);
    1298       32731 :         sql_column *c = NULL;
    1299             : 
    1300       32731 :         if (e->type == e_convert) /* keep unsafes at the end (TODO improve) */
    1301             :                 return -1000;
    1302             :         /* can we find out if the underlying table is sorted */
    1303       32130 :         if ((c = exp_find_column(rel, e, -2)) && v->storage_based_opt && mvc_is_sorted(v->sql, c))
    1304       32130 :                 res += 600;
    1305             : 
    1306             :         /* prefer the shorter var types over the longer ones */
    1307       32130 :         res += sql_class_base_score(v, c, t, is_equality_or_inequality_exp(e->flag)); /* smaller the type, better */
    1308       32130 :         return res;
    1309             : }
    1310             : 
    1311             : static int
    1312       56433 : score_se(visitor *v, sql_rel *rel, sql_exp *e)
    1313             : {
    1314       56433 :         int score = 0;
    1315       56433 :         if (e->type == e_cmp && !is_complex_exp(e->flag)) {
    1316       32731 :                 sql_exp *l = e->l;
    1317             : 
    1318       32731 :                 while (l->type == e_cmp) { /* go through nested comparisons */
    1319           2 :                         sql_exp *ll;
    1320             : 
    1321           2 :                         if (l->flag == cmp_filter || l->flag == cmp_or)
    1322           0 :                                 ll = ((list*)l->l)->h->data;
    1323             :                         else
    1324           2 :                                 ll = l->l;
    1325           2 :                         if (ll->type != e_cmp)
    1326             :                                 break;
    1327             :                         l = ll;
    1328             :                 }
    1329       32731 :                 score += score_se_base(v, rel, l);
    1330             :         }
    1331       56433 :         score += exp_keyvalue(e);
    1332       56433 :         return score;
    1333             : }
    1334             : 
    1335             : static inline sql_rel *
    1336     1188360 : rel_select_order(visitor *v, sql_rel *rel)
    1337             : {
    1338     1188360 :         int *scores = NULL;
    1339     1188360 :         sql_exp **exps = NULL;
    1340             : 
    1341     1188360 :         if (is_select(rel->op) && list_length(rel->exps) > 1) {
    1342       26503 :                 node *n;
    1343       26503 :                 int i, nexps = list_length(rel->exps);
    1344       26503 :                 scores = SA_NEW_ARRAY(v->sql->ta, int, nexps);
    1345       26503 :                 exps = SA_NEW_ARRAY(v->sql->ta, sql_exp*, nexps);
    1346             : 
    1347       82936 :                 for (i = 0, n = rel->exps->h; n; i++, n = n->next) {
    1348       56463 :                         exps[i] = n->data;
    1349       56463 :                         if (find_prop(exps[i]->p, PROP_HASHCOL))
    1350             :                                 return rel;
    1351       56433 :                         scores[i] = score_se(v, rel, n->data);
    1352             :                 }
    1353       26473 :                 GDKqsort(scores, exps, NULL, nexps, sizeof(int), sizeof(void *), TYPE_int, true, true);
    1354             : 
    1355       82906 :                 for (i = 0, n = rel->exps->h; n; i++, n = n->next)
    1356       56433 :                         n->data = exps[i];
    1357             :         }
    1358             : 
    1359             :         return rel;
    1360             : }
    1361             : 
    1362             : /* Compute the efficiency of using this expression early in a group by list */
    1363             : static int
    1364       62106 : score_gbe(visitor *v, sql_rel *rel, sql_exp *e)
    1365             : {
    1366       62106 :         int res = 0;
    1367       62106 :         sql_subtype *t = exp_subtype(e);
    1368       62106 :         sql_column *c = exp_find_column(rel, e, -2);
    1369             : 
    1370       62106 :         if (e->card == CARD_ATOM) /* constants are trivial to group */
    1371          38 :                 res += 1000;
    1372             :         /* can we find out if the underlying table is sorted */
    1373       62106 :         if (is_unique(e) || find_prop(e->p, PROP_HASHCOL) || (c && v->storage_based_opt && mvc_is_unique(v->sql, c))) /* distinct columns */
    1374        3396 :                 res += 700;
    1375       41123 :         if (c && v->storage_based_opt && mvc_is_sorted(v->sql, c))
    1376        3436 :                 res += 500;
    1377       62106 :         if (find_prop(e->p, PROP_HASHIDX)) /* has hash index */
    1378           0 :                 res += 200;
    1379             : 
    1380             :         /* prefer the shorter var types over the longer ones */
    1381       62106 :         res += sql_class_base_score(v, c, t, true); /* smaller the type, better */
    1382       62106 :         return res;
    1383             : }
    1384             : 
    1385             : /* reorder group by expressions */
    1386             : static inline sql_rel *
    1387     1188360 : rel_groupby_order(visitor *v, sql_rel *rel)
    1388             : {
    1389     1188360 :         int *scores = NULL;
    1390     1188360 :         sql_exp **exps = NULL;
    1391             : 
    1392     1188360 :         if (is_groupby(rel->op) && list_length(rel->r) > 1) {
    1393       26255 :                 node *n;
    1394       26255 :                 list *gbe = rel->r;
    1395       26255 :                 int i, ngbe = list_length(gbe);
    1396       26255 :                 scores = SA_NEW_ARRAY(v->sql->ta, int, ngbe);
    1397       26255 :                 exps = SA_NEW_ARRAY(v->sql->ta, sql_exp*, ngbe);
    1398             : 
    1399             :                 /* first sorting step, give priority for integers and sorted columns */
    1400       88361 :                 for (i = 0, n = gbe->h; n; i++, n = n->next) {
    1401       62106 :                         exps[i] = n->data;
    1402       62106 :                         scores[i] = score_gbe(v, rel, exps[i]);
    1403             :                 }
    1404       26255 :                 GDKqsort(scores, exps, NULL, ngbe, sizeof(int), sizeof(void *), TYPE_int, true, true);
    1405             : 
    1406             :                 /* second sorting step, give priority to strings with lower number of digits */
    1407       53405 :                 for (i = ngbe - 1; i && !scores[i]; i--); /* find expressions with no score from the first round */
    1408       26255 :                 if (scores[i])
    1409       25289 :                         i++;
    1410       26255 :                 if (ngbe - i > 1) {
    1411       11678 :                         for (int j = i; j < ngbe; j++) {
    1412        8795 :                                 sql_subtype *t = exp_subtype(exps[j]);
    1413        8795 :                                 scores[j] = t ? t->digits : 0;
    1414             :                         }
    1415             :                         /* the less number of digits the better, order ascending */
    1416        2883 :                         GDKqsort(scores + i, exps + i, NULL, ngbe - i, sizeof(int), sizeof(void *), TYPE_int, false, true);
    1417             :                 }
    1418             : 
    1419       88361 :                 for (i = 0, n = gbe->h; n; i++, n = n->next)
    1420       62106 :                         n->data = exps[i];
    1421             :         }
    1422             : 
    1423     1188360 :         return rel;
    1424             : }
    1425             : 
    1426             : /* This optimization loop contains optimizations that can potentially use statistics */
    1427             : static sql_rel *
    1428     1188360 : rel_final_optimization_loop_(visitor *v, sql_rel *rel)
    1429             : {
    1430             :         /* Run rel_push_select_up only once at the end to avoid an infinite optimization loop */
    1431     1188360 :         rel = rel_push_select_up(v, rel);
    1432     1188360 :         rel = rel_select_order(v, rel);
    1433             : 
    1434             :         /* TODO? Maybe later add rel_simplify_count, rel_join2semijoin, rel_simplify_fk_joins,
    1435             :            rel_distinct_project2groupby, rel_simplify_predicates, rel_simplify_math,
    1436             :            rel_distinct_aggregate_on_unique_values */
    1437             : 
    1438     1188360 :         rel = rel_groupby_order(v, rel);
    1439     1188359 :         return rel;
    1440             : }
    1441             : 
    1442             : static sql_rel *
    1443       60681 : rel_final_optimization_loop(visitor *v, global_props *gp, sql_rel *rel)
    1444             : {
    1445       60681 :         (void) gp;
    1446       60681 :         return rel_visitor_bottomup(v, rel, &rel_final_optimization_loop_);
    1447             : }
    1448             : 
    1449             : run_optimizer
    1450      668214 : bind_final_optimization_loop(visitor *v, global_props *gp)
    1451             : {
    1452      668214 :         int flag = v->sql->sql_optimizer;
    1453             :         /* At the moment, this optimizer has dependency on 3 flags */
    1454      541546 :         return gp->opt_level == 1 && (gp->cnt[op_groupby] || gp->cnt[op_select]) &&
    1455      728897 :                 (flag & push_select_up) && (flag & optimize_select_and_joins_topdown) && (flag & optimize_projections) ? rel_final_optimization_loop : NULL;
    1456             : }

Generated by: LCOV version 1.14