LCOV - code coverage report
Current view: top level - sql/server - rel_statistics.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 831 883 94.1 %
Date: 2024-10-03 20:03:20 Functions: 26 26 100.0 %

          Line data    Source code
       1             : /*
       2             :  * SPDX-License-Identifier: MPL-2.0
       3             :  *
       4             :  * This Source Code Form is subject to the terms of the Mozilla Public
       5             :  * License, v. 2.0.  If a copy of the MPL was not distributed with this
       6             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       7             :  *
       8             :  * Copyright 2024 MonetDB Foundation;
       9             :  * Copyright August 2008 - 2023 MonetDB B.V.;
      10             :  * Copyright 1997 - July 2008 CWI.
      11             :  */
      12             : 
      13             : #include "monetdb_config.h"
      14             : #include "rel_optimizer_private.h"
      15             : #include "rel_statistics.h"
      16             : #include "rel_basetable.h"
      17             : #include "rel_rewriter.h"
      18             : 
      19             : static sql_exp *
      20     3462283 : comparison_find_column(sql_exp *input, sql_exp *e)
      21             : {
      22     3462283 :         switch (input->type) {
      23       98263 :         case e_convert: {
      24       98263 :                 list *types = (list *)input->r;
      25       98263 :                 sql_class from = ((sql_subtype*)types->h->data)->type->eclass, to = ((sql_subtype*)types->h->next->data)->type->eclass;
      26       98263 :                 if (from == to)
      27      189472 :                         return comparison_find_column(input->l, e) ? input : NULL;
      28             :                 return NULL;
      29             :         }
      30     3051104 :         case e_column:
      31     3051104 :                 return exp_match(e, input) ? input : NULL;
      32             :         default:
      33             :                 return NULL;
      34             :         }
      35             : }
      36             : 
      37             : /* multi lo <= col <= hi, maybe still be false even if lo or hi are NULL, possibly similar for filter on multiple
      38             :  * columns */
      39             : #define comp_single_column(c) (!c->f)
      40             : 
      41             : static sql_exp *
      42     8732090 : rel_propagate_column_ref_statistics(mvc *sql, sql_rel *rel, sql_exp *e)
      43             : {
      44     8815172 :         assert(e->type == e_column);
      45     8815172 :         if (rel) {
      46     8815098 :                 switch(rel->op) {
      47     4080719 :                 case op_left:
      48             :                 case op_right:
      49             :                 case op_full:
      50             :                 case op_join:
      51             :                 case op_select:
      52             :                 case op_anti:
      53             :                 case op_semi: {
      54     4080719 :                         bool found_without_semantics = false, found_left = false, found_right = false, still_unique = false;
      55             : 
      56     4080719 :                         if ((is_innerjoin(rel->op) || is_select(rel->op)) && list_length(rel->exps) == 1 && exp_is_false(rel->exps->h->data))
      57             :                                 return NULL; /* nothing will pass, skip */
      58             : 
      59             :                         /* propagate from the bottom first */
      60     4066605 :                         if (rel_propagate_column_ref_statistics(sql, rel->l, e))
      61             :                                 found_left = true;
      62     2585583 :                         if (!found_left && is_join(rel->op) && rel_propagate_column_ref_statistics(sql, rel->r, e))
      63     4066605 :                                 found_right = true;
      64             : 
      65     4066605 :                         if (!found_left && !found_right)
      66             :                                 return NULL;
      67     1775396 :                         if (!list_empty(rel->exps) && rel->op != op_anti) { /* if there's an or, the MIN and MAX get difficult to propagate */
      68     3493738 :                                 for (node *n = rel->exps->h ; n ; n = n->next) {
      69     1809115 :                                         sql_exp *comp = n->data, *le = comp->l, *lne = NULL, *re = comp->r, *rne = NULL, *fe = comp->f, *fne = NULL;
      70             : 
      71     1809115 :                                         if (comp->type == e_cmp) {
      72     1808113 :                                                 if (is_theta_exp(comp->flag) && ((lne = comparison_find_column(le, e)) || (rne = comparison_find_column(re, e)) || (fe && (fne = comparison_find_column(fe, e))))) {
      73      125605 :                                                         atom *lval_min = find_prop_and_get(le->p, PROP_MIN), *lval_max = find_prop_and_get(le->p, PROP_MAX),
      74      125605 :                                                                 *rval_min = find_prop_and_get(re->p, PROP_MIN), *rval_max = find_prop_and_get(re->p, PROP_MAX);
      75             : 
      76             :                                                         /* not semantics found or if explicitly filtering not null values from the column */
      77      125605 :                                                         found_without_semantics |= (!is_semantics(comp) && comp_single_column(comp)) || (comp->flag == cmp_equal && lne && is_anti(comp) && exp_is_null(re));
      78      125605 :                                                         still_unique |= comp->flag == cmp_equal && is_unique(le) && is_unique(re); /* unique if only equi-joins on unique columns are there */
      79      125605 :                                                         if (is_full(rel->op) || (is_left(rel->op) && found_left) || (is_right(rel->op) && found_right)) /* on outer joins, min and max cannot be propagated on some cases */
      80       19749 :                                                                 continue;
      81             :                                                         /* if (end2 >= start1 && start2 <= end1) then the 2 intervals are intersected */
      82      105856 :                                                         if (fe && lval_min && lval_max) { /* range case, the middle expression must intersect the other two */
      83        4622 :                                                                 atom *fval_min = find_prop_and_get(fe->p, PROP_MIN), *fval_max = find_prop_and_get(fe->p, PROP_MAX);
      84        4622 :                                                                 int int1 = rval_min && rval_max && atom_cmp(rval_max, lval_min) >= 0 && atom_cmp(rval_min, lval_max) <= 0,
      85        4622 :                                                                         int2 = fval_min && fval_max && atom_cmp(fval_max, lval_min) >= 0 && atom_cmp(fval_min, lval_max) <= 0,
      86        4622 :                                                                         symmetric = is_symmetric(comp);
      87             : 
      88        4622 :                                                                 if (is_anti(comp) || (!symmetric && fval_min && rval_max && atom_cmp(fval_min, rval_max) < 0)) /* for asymmetric case the re range must be after the fe range */
      89        1817 :                                                                         continue;
      90        2805 :                                                                 if (lne && int1 && int2) {
      91         104 :                                                                         if (symmetric) {
      92           0 :                                                                                 prop *p1 = find_prop(e->p, PROP_MIN), *p2 = find_prop(e->p, PROP_MAX);
      93           0 :                                                                                 atom *nmin = statistics_atom_min(sql, rval_min, fval_min), *nmax = statistics_atom_max(sql, rval_max, fval_max);
      94             :                                                                                 /* max is min from le and (max from re and fe max) */
      95           0 :                                                                                 set_minmax_property(sql, e, PROP_MAX, p2 ? statistics_atom_min(sql, nmax, p2->value.pval) : nmax);
      96             :                                                                                 /* min is max from le and (min from re and fe min) */
      97           0 :                                                                                 set_minmax_property(sql, e, PROP_MIN, p1 ? statistics_atom_max(sql, nmin, p1->value.pval) : nmin);
      98             :                                                                         } else {
      99         104 :                                                                                 prop *p1 = find_prop(e->p, PROP_MIN), *p2 = find_prop(e->p, PROP_MAX);
     100             :                                                                                 /* max is min from le and fe max */
     101         104 :                                                                                 set_minmax_property(sql, e, PROP_MAX, p2 ? statistics_atom_min(sql, fval_max, p2->value.pval) : fval_max);
     102             :                                                                                 /* min is max from le and re min */
     103         104 :                                                                                 set_minmax_property(sql, e, PROP_MIN, p1 ? statistics_atom_max(sql, rval_min, p1->value.pval) : rval_min);
     104             :                                                                         }
     105        2701 :                                                                 } else if (rne) {
     106         590 :                                                                         if (symmetric && int1 && int2) { /* min is max from le and (min from re and fe min) */
     107           0 :                                                                                 prop *p = find_prop(e->p, PROP_MIN);
     108           0 :                                                                                 atom *nmin = p ? statistics_atom_min(sql, p->value.pval, fval_min) : fval_min;
     109           0 :                                                                                 set_minmax_property(sql, e, PROP_MIN, statistics_atom_max(sql, nmin, lval_min));
     110         590 :                                                                         } else if (int1) { /* min is max from le and re min */
     111         100 :                                                                                 prop *p = find_prop(e->p, PROP_MIN);
     112         100 :                                                                                 set_minmax_property(sql, e, PROP_MIN, p ? statistics_atom_max(sql, lval_min, p->value.pval) : lval_min);
     113             :                                                                         }
     114        2111 :                                                                 } else if (fne) {
     115         549 :                                                                         if (symmetric && int1 && int2) { /* max is min from le and (max from re and fe max) */
     116           0 :                                                                                 prop *p = find_prop(e->p, PROP_MAX);
     117           0 :                                                                                 atom *nmax = p ? statistics_atom_max(sql, p->value.pval, rval_max) : rval_max;
     118           0 :                                                                                 set_minmax_property(sql, e, PROP_MAX, p ? statistics_atom_min(sql, nmax, lval_max) : nmax);
     119         549 :                                                                         } else if (int2) { /* max is min from le and fe max */
     120         266 :                                                                                 prop *p = find_prop(e->p, PROP_MAX);
     121         266 :                                                                                 set_minmax_property(sql, e, PROP_MAX, p ? statistics_atom_min(sql, lval_max, p->value.pval) : lval_max);
     122             :                                                                         }
     123             :                                                                 }
     124      101234 :                                                         } else if (lval_min && lval_max && rval_min && rval_max && atom_cmp(rval_max, lval_min) >= 0 && atom_cmp(rval_min, lval_max) <= 0) {
     125             :                                                                 /* both min and max must be set and the intervals must overlap */
     126       43126 :                                                                 switch (comp->flag) {
     127       28790 :                                                                 case cmp_equal: /* for equality reduce */
     128       28790 :                                                                         set_minmax_property(sql, e, PROP_MAX, is_anti(comp) ? statistics_atom_max(sql, lval_max, rval_max) : statistics_atom_min(sql, lval_max, rval_max));
     129       28790 :                                                                         set_minmax_property(sql, e, PROP_MIN, is_anti(comp) ? statistics_atom_min(sql, lval_min, rval_min) : statistics_atom_max(sql, lval_min, rval_min));
     130       28790 :                                                                         break;
     131        4679 :                                                                 case cmp_notequal: /* for inequality expand */
     132        4679 :                                                                         set_minmax_property(sql, e, PROP_MAX, is_anti(comp) ? statistics_atom_min(sql, lval_max, rval_max) : statistics_atom_max(sql, lval_max, rval_max));
     133        4679 :                                                                         set_minmax_property(sql, e, PROP_MIN, is_anti(comp) ? statistics_atom_max(sql, lval_min, rval_min) : statistics_atom_min(sql, lval_min, rval_min));
     134        4679 :                                                                         break;
     135        5651 :                                                                 case cmp_gt:
     136             :                                                                 case cmp_gte:
     137       10364 :                                                                         if (!is_anti(comp) && lne) { /* min is max from both min */
     138        4713 :                                                                                 prop *p = find_prop(e->p, PROP_MIN);
     139        4713 :                                                                                 set_minmax_property(sql, e, PROP_MIN, p ? statistics_atom_max(sql, rval_min, p->value.pval) : rval_min);
     140         938 :                                                                         } else if (!is_anti(comp)) { /* max is min from both max */
     141         938 :                                                                                 prop *p = find_prop(e->p, PROP_MAX);
     142         938 :                                                                                 set_minmax_property(sql, e, PROP_MAX, p ? statistics_atom_min(sql, lval_max, p->value.pval) : lval_max);
     143             :                                                                         }
     144             :                                                                         break;
     145        4006 :                                                                 case cmp_lt:
     146             :                                                                 case cmp_lte:
     147        7205 :                                                                         if (!is_anti(comp) && lne) { /* max is min from both max */
     148        3199 :                                                                                 prop *p = find_prop(e->p, PROP_MAX);
     149        3199 :                                                                                 set_minmax_property(sql, e, PROP_MAX, p ? statistics_atom_min(sql, rval_max, p->value.pval) : rval_max);
     150         807 :                                                                         } else if (!is_anti(comp)) { /* min is max from both min */
     151         807 :                                                                                 prop *p = find_prop(e->p, PROP_MIN);
     152         807 :                                                                                 set_minmax_property(sql, e, PROP_MIN, p ? statistics_atom_max(sql, lval_min, p->value.pval) : lval_min);
     153             :                                                                         }
     154             :                                                                         break;
     155             :                                                                 default: /* Maybe later I can do cmp_in and cmp_notin */
     156             :                                                                         break;
     157             :                                                                 }
     158             :                                                         }
     159             :                                                 }
     160             :                                         }
     161             :                                 }
     162             :                         }
     163     1775396 :                         if (is_full(rel->op) || (is_left(rel->op) && found_right) || (is_right(rel->op) && found_left))
     164       37930 :                                 set_has_nil(e);
     165     1775396 :                         if (!is_outerjoin(rel->op) && found_without_semantics) /* at an outer join, null values pass */
     166       94467 :                                 set_has_no_nil(e);
     167     1775396 :                         if (is_join(rel->op) && is_unique(e) && !still_unique)
     168      118627 :                                 set_not_unique(e);
     169     1775396 :                         return e;
     170             :                 }
     171     4631950 :                 case op_table:
     172             :                 case op_basetable:
     173             :                 case op_union:
     174             :                 case op_except:
     175             :                 case op_inter:
     176             :                 case op_munion:
     177             :                 case op_project:
     178             :                 case op_groupby: {
     179     4631950 :                         sql_exp *found;
     180     4631950 :                         atom *fval;
     181     4631950 :                         prop *est;
     182     4631950 :                         if ((found = rel_find_exp(rel, e))) {
     183     2173278 :                                 if (rel->op != op_table) { /* At the moment don't propagate statistics for table relations */
     184     2128849 :                                         if ((fval = find_prop_and_get(found->p, PROP_MAX)))
     185     1130712 :                                                 set_minmax_property(sql, e, PROP_MAX, fval);
     186     2128845 :                                         if ((fval = find_prop_and_get(found->p, PROP_MIN)))
     187     1138005 :                                                 set_minmax_property(sql, e, PROP_MIN, fval);
     188     2128841 :                                         if (!has_nil(found))
     189     1351406 :                                                 set_has_no_nil(e);
     190     2128841 :                                         if (is_unique(found) || (need_distinct(rel) && list_length(rel->exps) == 1) ||
     191     1718543 :                                                 (is_groupby(rel->op) && (list_empty(rel->r) || (list_length(rel->r) == 1 && exps_find_exp(rel->r, e)))))
     192      421174 :                                                 set_unique(e);
     193             :                                         /* propagate unique estimation for known cases */
     194     2128842 :                                         if (is_groupby(rel->op) && list_empty(rel->r) && !find_prop(e->p, PROP_NUNIQUES)) { /* global aggregate case */
     195        7769 :                                                 prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     196        7769 :                                                 p->value.dval = 1;
     197     2121073 :                                         } else if (((is_basetable(rel->op) || is_except(rel->op) || is_inter(rel->op) || is_simple_project(rel->op) ||
     198       66649 :                                                                  (is_groupby(rel->op) && exps_find_exp(rel->r, e))) &&
     199     2063427 :                                                                 (est = find_prop(found->p, PROP_NUNIQUES)) && !find_prop(e->p, PROP_NUNIQUES))) {
     200      188124 :                                                 prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     201      188124 :                                                 p->value.dval = est->value.dval;
     202             :                                         }
     203             :                                 }
     204     2173273 :                                 return e;
     205             :                         }
     206             :                         return NULL;
     207             :                 }
     208       83082 :                 case op_topn:
     209             :                 case op_sample:
     210       83082 :                         return rel_propagate_column_ref_statistics(sql, rel->l, e);
     211             :                 default:
     212             :                         break;
     213             :                 }
     214             :         }
     215             :         return NULL;
     216             : }
     217             : 
     218             : static atom *
     219     4440786 : atom_from_valptr( allocator *sa, sql_subtype *tpe, ValRecord *v)
     220             : {
     221     4440786 :         atom *a = SA_NEW(sa, atom);
     222             : 
     223     4440730 :         assert(!VALisnil(v));
     224     4440595 :         *a = (atom) {.tpe = *tpe,};
     225     4440595 :         SA_VALcopy(sa, &a->data, v);
     226     4440549 :         return a;
     227             : }
     228             : 
     229             : void
     230     4053783 : sql_column_get_statistics(mvc *sql, sql_column *c, sql_exp *e)
     231             : {
     232     4053783 :         bool nonil = false, unique = false;
     233     4053783 :         double unique_est = 0.0;
     234     4053783 :         ValRecord min, max;
     235     4053783 :         int ok = mvc_col_stats(sql, c, &nonil, &unique, &unique_est, &min, &max);
     236             : 
     237     4053924 :         if (has_nil(e) && nonil)
     238     2565399 :                 set_has_no_nil(e);
     239     4053924 :         if (!is_unique(e) && unique)
     240     1098858 :                 set_unique(e);
     241     4053924 :         if (unique_est != 0.0) {
     242     2817694 :                 prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     243     2817676 :                 p->value.dval = unique_est;
     244             :         }
     245     4053906 :         unsigned int digits = 0;
     246     4053906 :         sql_subtype *et = exp_subtype(e);
     247     4053928 :         if (et->type->eclass == EC_DEC || et->type->eclass == EC_NUM)
     248     2637245 :                 digits = et->digits;
     249     4053928 :         if ((ok & 2) == 2) {
     250     2217376 :                 if (!VALisnil(&max)) {
     251     2217371 :                         prop *p = e->p = prop_create(sql->sa, PROP_MAX, e->p);
     252     2217338 :                         p->value.pval = atom_from_valptr(sql->sa, &c->type, &max);
     253     2217287 :                         if (digits) {
     254     1652189 :                                 unsigned int nd = atom_digits(p->value.pval);
     255     1652211 :                                 if (nd < digits)
     256             :                                         digits = nd;
     257     1652211 :                                 if (!digits)
     258             :                                         digits = 1;
     259             :                         }
     260             :                 }
     261     2217317 :                 VALclear(&max);
     262             :         }
     263     4053847 :         if ((ok & 1) == 1) {
     264     2223686 :                 if (!VALisnil(&min)) {
     265     2223672 :                         prop *p = e->p = prop_create(sql->sa, PROP_MIN, e->p);
     266     2223658 :                         p->value.pval = atom_from_valptr(sql->sa, &c->type, &min);
     267     2223632 :                         if (digits) {
     268     1659603 :                                 unsigned int nd = atom_digits(p->value.pval);
     269     1659601 :                                 if (nd > digits) /* possibly set to low by max value */
     270             :                                         digits = nd;
     271             :                                 if (!digits)
     272             :                                         digits = 1;
     273             :                         }
     274             :                 }
     275     2223626 :                 VALclear(&min);
     276             :         }
     277     4053774 :         if (digits)
     278     2637091 :                 et->digits = digits;
     279     4053774 :         if (et->type->eclass == EC_DEC && et->digits <= et->scale)
     280         216 :                 et->digits = et->scale + 1;
     281     4053774 : }
     282             : 
     283             : static void
     284      934632 : rel_basetable_column_get_statistics(mvc *sql, sql_rel *rel, sql_exp *e)
     285             : {
     286      934632 :         if (e->p)
     287             :                 return;
     288      302290 :         sql_column *c = NULL;
     289             : 
     290      302290 :         if ((c = rel_base_find_column(rel, e->nid))) {
     291      204237 :                 sql_column_get_statistics(sql, c, e);
     292             :         }
     293             : }
     294             : 
     295             : static bool
     296        8872 : rel_setop_get_statistics(mvc *sql, sql_rel *rel, list *lexps, list *rexps, sql_exp *e, int i)
     297             : {
     298        8872 :         sql_exp *le = list_fetch(lexps, i), *re = list_fetch(rexps, i);
     299        8872 :         atom *lval_min = find_prop_and_get(le->p, PROP_MIN), *lval_max = find_prop_and_get(le->p, PROP_MAX),
     300        8872 :                 *rval_min = find_prop_and_get(re->p, PROP_MIN), *rval_max = find_prop_and_get(re->p, PROP_MAX);
     301        8872 :         prop *est;
     302             : 
     303             :         /* for the intersection, if both expressions don't overlap, it can be pruned */
     304        8872 :         if (is_inter(rel->op) && !has_nil(le) && !has_nil(re) &&
     305         511 :                 ((rval_max && lval_min && atom_cmp(rval_max, lval_min) < 0) || (rval_min && lval_max && atom_cmp(rval_min, lval_max) > 0)))
     306          26 :                 return true;
     307             : 
     308        8846 :         if (lval_max && rval_max) {
     309        2557 :                 if (is_union(rel->op))
     310           3 :                         set_minmax_property(sql, e, PROP_MAX, statistics_atom_max(sql, lval_max, rval_max)); /* for union the new max will be the max of the two */
     311        2554 :                 else if (is_inter(rel->op))
     312         399 :                         set_minmax_property(sql, e, PROP_MAX, statistics_atom_min(sql, lval_max, rval_max)); /* for intersect the new max will be the min of the two */
     313             :                 else /* except */
     314        2155 :                         set_minmax_property(sql, e, PROP_MAX, lval_max);
     315             :         }
     316        8846 :         if (lval_min && rval_min) {
     317        2557 :                 if (is_union(rel->op))
     318           3 :                         set_minmax_property(sql, e, PROP_MIN, statistics_atom_min(sql, lval_min, rval_min)); /* for union the new min will be the min of the two */
     319        2554 :                 else if (is_inter(rel->op))
     320         399 :                         set_minmax_property(sql, e, PROP_MIN, statistics_atom_max(sql, lval_min, rval_min)); /* for intersect the new min will be the max of the two */
     321             :                 else /* except */
     322        2155 :                         set_minmax_property(sql, e, PROP_MIN, lval_min);
     323             :         }
     324             : 
     325        8846 :         if (is_union(rel->op) || is_munion(rel->op)) {
     326        5986 :                 if (!has_nil(le) && !has_nil(re))
     327         879 :                         set_has_no_nil(e);
     328        5986 :                 if (need_distinct(rel) && list_length(rel->exps) == 1)
     329           0 :                         set_unique(e);
     330        2860 :         } else if (is_inter(rel->op)) {
     331         564 :                 if (!has_nil(le) || !has_nil(re))
     332         509 :                         set_has_no_nil(e);
     333         564 :                 if (is_unique(le) || (need_distinct(rel) && list_length(rel->exps) == 1))
     334         345 :                         set_unique(e);
     335             :         } else {
     336        2296 :                 assert(is_except(rel->op));
     337        2296 :                 if (!has_nil(le))
     338        2233 :                         set_has_no_nil(e);
     339        2296 :                 if (is_unique(le) || (need_distinct(rel) && list_length(rel->exps) == 1))
     340        2009 :                         set_unique(e);
     341             :         }
     342             :         /* propagate unique estimation for known cases */
     343        8846 :         if (!is_union(rel->op) && (est = find_prop(le->p, PROP_NUNIQUES)) && !find_prop(e->p, PROP_NUNIQUES)) {
     344         205 :                 prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     345         205 :                 p->value.dval = est->value.dval;
     346             :         }
     347             :         return false;
     348             : }
     349             : 
     350             : 
     351             : static void
     352       60906 : rel_munion_get_statistics(mvc *sql, sql_rel *rel, list *rels, sql_exp *e, int i)
     353             : {
     354       60906 :         assert(is_munion(rel->op));
     355             : 
     356       60906 :         sql_rel *l = rels->h->data;
     357       60906 :         sql_exp *le = list_fetch(l->exps, i);
     358       60906 :         atom *lval_min = find_prop_and_get(le->p, PROP_MIN), *lval_max = find_prop_and_get(le->p, PROP_MAX);
     359       60906 :         bool has_nonil = !has_nil(le);
     360             : 
     361      176339 :         for(node *n = rels->h->next; n; n = n->next) {
     362      115433 :                 sql_rel *r = n->data;
     363      115433 :                 sql_exp *re = list_fetch(r->exps, i);
     364      115433 :                 atom *rval_min = find_prop_and_get(re->p, PROP_MIN), *rval_max = find_prop_and_get(re->p, PROP_MAX);
     365             : 
     366      115433 :                 if (lval_max && rval_max) {
     367       42595 :                         set_minmax_property(sql, e, PROP_MAX, statistics_atom_max(sql, lval_max, rval_max)); /* for union the new max will be the max of the two */
     368       42595 :                         lval_max = find_prop_and_get(e->p, PROP_MAX);
     369             :                 }
     370      115433 :                 if (lval_min && rval_min) {
     371       42097 :                         set_minmax_property(sql, e, PROP_MIN, statistics_atom_min(sql, lval_min, rval_min)); /* for union the new min will be the min of the two */
     372       42097 :                         lval_min = find_prop_and_get(e->p, PROP_MIN);
     373             :                 }
     374      115433 :                 has_nonil &= !has_nil(re);
     375             : 
     376             :         }
     377             : 
     378       60906 :         if (has_nonil)
     379       40993 :                 set_has_no_nil(e);
     380             : 
     381       60906 :         if (need_distinct(rel) && list_length(rel->exps) == 1)
     382        1594 :                 set_unique(e);
     383       60906 : }
     384             : 
     385             : 
     386             : static sql_exp *
     387     3473333 : rel_propagate_statistics(visitor *v, sql_rel *rel, sql_exp *e, int depth)
     388             : {
     389     3473333 :         mvc *sql = v->sql;
     390     3473333 :         atom *lval;
     391             : 
     392     3473333 :         (void) depth;
     393     3473333 :         switch(e->type) {
     394     2178896 :         case e_column:
     395     2178896 :                 switch (rel->op) { /* set relations don't call rel_propagate_statistics */
     396      277903 :                 case op_join:
     397             :                 case op_left:
     398             :                 case op_right:
     399             :                 case op_full:
     400             :                 case op_semi:
     401             :                 case op_anti: {
     402      277903 :                         sql_exp *found = rel_propagate_column_ref_statistics(sql, rel->l, e);
     403      277903 :                         if (!found)
     404      139396 :                                 (void) rel_propagate_column_ref_statistics(sql, rel->r, e);
     405             :                         break;
     406             :                 }
     407     1900993 :                 case op_select:
     408             :                 case op_project:
     409             :                 case op_groupby: {
     410     1900993 :                         sql_exp *found = rel_propagate_column_ref_statistics(sql, rel->l, e); /* labels may be found on the same projection, ugh */
     411     1900988 :                         if (!found && is_simple_project(rel->op))
     412      124031 :                                 (void) rel_propagate_column_ref_statistics(sql, rel, e);
     413             :                         break;
     414             :                 }
     415           0 :                 case op_insert:
     416             :                 case op_update:
     417             :                 case op_delete:
     418           0 :                         (void) rel_propagate_column_ref_statistics(sql, rel->r, e);
     419           0 :                         break;
     420             :                 default:
     421             :                         break;
     422             :                 }
     423             :                 break;
     424       98450 :         case e_convert: {
     425       98450 :                 sql_subtype *to = exp_totype(e), *from = exp_fromtype(e);
     426       98450 :                 sql_exp *l = e->l;
     427       98450 :                 sql_class fr = from->type->eclass, too = to->type->eclass;
     428       98450 :                 prop *est;
     429             : 
     430       98450 :                 if (fr == too) {
     431       89427 :                         if ((lval = find_prop_and_get(l->p, PROP_MAX))) {
     432       58820 :                                 atom *res = atom_copy(sql->sa, lval);
     433       58820 :                                 if ((res = atom_cast(sql->sa, res, to)))
     434       58797 :                                         set_minmax_property(sql, e, PROP_MAX, res);
     435             :                         }
     436       89427 :                         if ((lval = find_prop_and_get(l->p, PROP_MIN))) {
     437       59434 :                                 atom *res = atom_copy(sql->sa, lval);
     438       59434 :                                 if ((res = atom_cast(sql->sa, res, to)))
     439       59411 :                                         set_minmax_property(sql, e, PROP_MIN, res);
     440             :                         }
     441             :                 }
     442             :                 /* propagating nuniques through conversions is tricky. I am adding just the general cases */
     443       98450 :                 if ((est = find_prop(l->p, PROP_NUNIQUES)) &&
     444       61046 :                         (((fr == EC_TIME || fr == EC_TIME_TZ || fr == EC_DATE) && (too == EC_TIMESTAMP || too == EC_TIMESTAMP_TZ)) ||
     445       61021 :                          ((fr == EC_NUM || fr == EC_BIT) && too == EC_NUM) || (fr == EC_DEC && too == EC_DEC) || (fr != EC_EXTERNAL && too == EC_STRING))) {
     446       57625 :                         prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     447       57625 :                         p->value.dval = est->value.dval;
     448             :                 }
     449       98450 :                 if (!has_nil(l))
     450       55382 :                         set_has_no_nil(e);
     451             :                 break;
     452             :         }
     453      341951 :         case e_aggr:
     454             :         case e_func: {
     455      341951 :                 BUN lv;
     456      341951 :                 sql_subfunc *f = e->f;
     457             : 
     458      341951 :                 if (!f->func->s) {
     459      315514 :                         int key = hash_key(f->func->base.name); /* Using hash lookup */
     460      315514 :                         sql_hash_e *he = sql_functions_lookup->buckets[key&(sql_functions_lookup->size-1)];
     461      315514 :                         lookup_function look = NULL;
     462             : 
     463      688994 :                         for (; he && !look; he = he->chain) {
     464      373480 :                                 struct function_properties* fp = (struct function_properties*) he->value;
     465             : 
     466      373480 :                                 if (!strcmp(f->func->base.name, fp->name))
     467      107858 :                                         look = fp->func;
     468             :                         }
     469      315514 :                         if (look)
     470      107858 :                                 look(sql, e);
     471             :                 }
     472             :                 /* for global aggregates with no semantics, if the left relation has values, then the output won't be NULL */
     473      341951 :                 if (!is_semantics(e) && e->l && !have_nil(e->l) &&
     474       88656 :                         (e->type != e_aggr || (is_groupby(rel->op) && list_length(rel->r)) || ((lv = get_rel_count(rel->l)) != BUN_NONE && lv > 0)))
     475       88333 :                         set_has_no_nil(e);
     476             :                 /* set properties for global aggregates */
     477      341951 :                 if (e->type == e_aggr && is_groupby(rel->op) && list_empty(rel->r)) {
     478        8091 :                         if (!find_prop(e->p, PROP_NUNIQUES)) {
     479        8091 :                                 prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     480        8091 :                                 p->value.dval = 1;
     481             :                         }
     482        8091 :                         set_unique(e);
     483             :                 }
     484             :                 break;
     485             :         }
     486      567982 :         case e_atom:
     487      567982 :                 if (e->l) {
     488      551308 :                         atom *a = (atom*) e->l;
     489      551308 :                         if (!a->isnull) {
     490      489346 :                                 set_minmax_property(sql, e, PROP_MAX, a);
     491      489346 :                                 set_minmax_property(sql, e, PROP_MIN, a);
     492             :                         }
     493      551308 :                         prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     494      551308 :                         p->value.dval = 1;
     495       16674 :                 } else if (e->f) {
     496        3149 :                         list *vals = (list *) e->f;
     497        3149 :                         sql_exp *first = vals->h ? vals->h->data : NULL;
     498        3149 :                         atom *max = NULL, *min = NULL; /* all child values must have a valid min/max */
     499        3149 :                         int has_nil = 0;
     500             : 
     501        3149 :                         if (first) {
     502        3149 :                                 max = ((lval = find_prop_and_get(first->p, PROP_MAX))) ? lval : NULL;
     503        3149 :                                 min = ((lval = find_prop_and_get(first->p, PROP_MIN))) ? lval : NULL;
     504        3149 :                                 has_nil |= has_nil(first);
     505             :                         }
     506             : 
     507       13760 :                         for (node *n = vals->h ? vals->h->next : NULL ; n ; n = n->next) {
     508        7462 :                                 sql_exp *ee = n->data;
     509             : 
     510        7462 :                                 if (min && max) {
     511        7009 :                                         if ((lval = find_prop_and_get(ee->p, PROP_MAX))) {
     512        6963 :                                                 max = atom_cmp(lval, max) > 0 ? lval : max;
     513             :                                         } else {
     514             :                                                 max = NULL;
     515             :                                         }
     516        7009 :                                         if ((lval = find_prop_and_get(ee->p, PROP_MIN))) {
     517        6963 :                                                 min = atom_cmp(min, lval) > 0 ? lval : min;
     518             :                                         } else {
     519             :                                                 min = NULL;
     520             :                                         }
     521             :                                 }
     522        7462 :                                 has_nil |= has_nil(ee);
     523             :                         }
     524             : 
     525        3149 :                         if (!has_nil)
     526        2783 :                                 set_has_no_nil(e);
     527        3149 :                         if (min && max) {
     528        2760 :                                 set_minmax_property(sql, e, PROP_MAX, max);
     529        2760 :                                 set_minmax_property(sql, e, PROP_MIN, min);
     530             :                         }
     531        3149 :                         prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     532        3149 :                         p->value.dval = (dbl) list_length(vals);
     533             :                 } else {
     534       13525 :                         prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     535       13525 :                         p->value.dval = 1;
     536             :                 }
     537             :                 break;
     538      286054 :         case e_cmp:
     539             :                 /* TODO? propagating min/max/unique of booleans is not very worth it */
     540      286054 :                 if (e->flag == cmp_or || e->flag == cmp_filter) {
     541       18009 :                         if (!have_nil(e->l) && !have_nil(e->r))
     542       13554 :                                 set_has_no_nil(e);
     543      268045 :                 } else if (e->flag == cmp_in || e->flag == cmp_notin) {
     544       21193 :                         sql_exp *le = e->l;
     545       21193 :                         if (!has_nil(le) && !have_nil(e->r))
     546       17994 :                                 set_has_no_nil(e);
     547             :                 } else {
     548      246852 :                         sql_exp *le = e->l, *re = e->r, *fe = e->f;
     549      246852 :                         if (!has_nil(le) && !has_nil(re) && (!e->f || !has_nil(fe)))
     550      171615 :                                 set_has_no_nil(e);
     551             :                 }
     552             :                 break;
     553             :         case e_psm:
     554             :                 break;
     555             :         }
     556             : 
     557             : #ifndef NDEBUG
     558             :         {
     559             :                 /* min and max cannot be NULL and min must be <= than max, if it happens the inner relation must be empty! */
     560     3473328 :                 atom *min = find_prop_and_get(e->p, PROP_MIN), *max = find_prop_and_get(e->p, PROP_MAX);
     561             : 
     562     3473326 :                 (void) min;
     563     3473326 :                 (void) max;
     564     3473326 :                 assert(!min || !min->isnull);
     565     3473326 :                 assert(!max || !max->isnull);
     566     3473326 :                 assert(!min || !max || (min && max)/* atom_cmp(min, max) <= 0*/);
     567             :         }
     568             : #endif
     569     3473326 :         return e;
     570             : }
     571             : 
     572             : static list * /* Remove predicates always false from min/max values */
     573      138136 : rel_prune_predicates(visitor *v, sql_rel *rel)
     574             : {
     575      138136 :         if (rel->l) {
     576      138136 :                 sql_rel *l = rel->l;
     577      138136 :                 if (is_single(l))
     578        3263 :                         return rel->exps;
     579             :         }
     580      134873 :         if (!list_empty(rel->attr))
     581        2959 :                 return rel->exps;
     582      279905 :         for (node *n = rel->exps->h ; n ; n = n->next) {
     583      147991 :                 sql_exp *e = n->data;
     584             : 
     585      147991 :                 if (e->type == e_cmp && is_theta_exp(e->flag)) {
     586      121840 :                         sql_exp *le = e->l, *re = e->r, *fe = e->f;
     587      121840 :                         atom *lval_min = find_prop_and_get(le->p, PROP_MIN), *lval_max = find_prop_and_get(le->p, PROP_MAX),
     588      121840 :                                 *rval_min = find_prop_and_get(re->p, PROP_MIN), *rval_max = find_prop_and_get(re->p, PROP_MAX);
     589      121840 :                         bool always_false = false, always_true = false;
     590             : 
     591      121840 :                         if (fe && !is_symmetric(e)) {
     592        3058 :                                 atom *fval_min = find_prop_and_get(fe->p, PROP_MIN), *fval_max = find_prop_and_get(fe->p, PROP_MAX);
     593        3058 :                                 comp_type lower = range2lcompare(e->flag), higher = range2rcompare(e->flag);
     594        3660 :                                 int not_int1 = rval_min && lval_max && /* the middle and left intervals don't overlap */
     595        1663 :                                         (!is_anti(e) && (lower == cmp_gte ? atom_cmp(rval_min, lval_max) > 0 : atom_cmp(rval_min, lval_max) >= 0)),
     596        4072 :                                         not_int2 = lval_min && fval_max && /* the middle and right intervals don't overlap */
     597        2485 :                                         (!is_anti(e) && (higher == cmp_lte ? atom_cmp(lval_min, fval_max) > 0 : atom_cmp(lval_min, fval_max) >= 0)),
     598        3058 :                                         not_int3 = rval_min && fval_max && /* the left interval is after the right one */
     599        1288 :                                         (!is_anti(e) && (atom_cmp(rval_min, fval_max) > 0));
     600             : 
     601        3058 :                                 always_false |= not_int1 || not_int2 || not_int3;
     602             :                                 /* for anti the middle must be before the left or after the right or the right after the left, for the other the middle must be always between the left and right intervals */
     603        4113 :                                 always_true |= !has_nil(le) && !has_nil(re) && !has_nil(fe) &&
     604        3957 :                                         lval_min && lval_max && rval_min && rval_max && fval_min && fval_max &&
     605         575 :                                         (is_anti(e) ? ((lower == cmp_gte ? atom_cmp(rval_min, lval_max) > 0 : atom_cmp(rval_min, lval_max) >= 0) || (higher == cmp_lte ? atom_cmp(lval_min, fval_max) > 0 : atom_cmp(lval_min, fval_max) >= 0) || atom_cmp(rval_min, fval_max) > 0) :
     606         488 :                                          ((lower == cmp_gte ? atom_cmp(lval_min, rval_max) >= 0 : atom_cmp(lval_min, rval_max) > 0) && (higher == cmp_lte ? atom_cmp(fval_min, lval_max) >= 0 : atom_cmp(fval_min, lval_max) > 0)));
     607             :                         } else if (!fe) {
     608      118764 :                                 if (!is_semantics(e)) /* trivial not null cmp null case */
     609      225762 :                                         always_false |= !is_anti(e) && ((exp_is_not_null(le) && exp_is_null(re)) || (exp_is_null(le) && exp_is_not_null(re)));
     610      118764 :                                 switch (e->flag) {
     611      103286 :                                 case cmp_equal:
     612      103286 :                                         if (lval_min && lval_max && rval_min && rval_max && (!is_semantics(e) || !has_nil(le) || !has_nil(re)))
     613      131450 :                                                 always_false |= (is_anti(e) ? (atom_cmp(lval_min, rval_min) == 0 && atom_cmp(lval_max, rval_max) <= 0) : (atom_cmp(rval_max, lval_min) < 0 || atom_cmp(rval_min, lval_max) > 0));
     614      103286 :                                         if (is_semantics(e)) { /* prune *= NULL cases */
     615        5857 :                                                 always_false |= (is_anti(e) ? (exp_is_null(le) && exp_is_null(re)) : ((exp_is_not_null(le) && exp_is_null(re)) || (exp_is_null(le) && exp_is_not_null(re))));
     616       11714 :                                                 always_true |= (is_anti(e) ? ((exp_is_not_null(le) && exp_is_null(re)) || (exp_is_null(le) && exp_is_not_null(re))) : (exp_is_null(le) && exp_is_null(re)));
     617             :                                         }
     618             :                                         break;
     619        7215 :                                 case cmp_notequal:
     620        7215 :                                         if (lval_min && lval_max && rval_min && rval_max)
     621       11374 :                                                 always_true |= !has_nil(le) && !has_nil(re) && (is_anti(e) ? (atom_cmp(lval_min, rval_min) == 0 && atom_cmp(lval_max, rval_max) <= 0) : (atom_cmp(rval_max, lval_min) < 0 || atom_cmp(rval_min, lval_max) > 0));
     622        7215 :                                         if (is_semantics(e)) {
     623          26 :                                                 always_true |= (is_anti(e) ? (exp_is_null(le) && exp_is_null(re)) : ((exp_is_not_null(le) && exp_is_null(re)) || (exp_is_null(le) && exp_is_not_null(re))));
     624          52 :                                                 always_false |= (is_anti(e) ? ((exp_is_not_null(le) && exp_is_null(re)) || (exp_is_null(le) && exp_is_not_null(re))) : (exp_is_null(le) && exp_is_null(re)));
     625             :                                         }
     626             :                                         break;
     627        5632 :                                 case cmp_gt:
     628        5632 :                                         if (lval_max && rval_min)
     629        1947 :                                                 always_false |= (is_anti(e) ? atom_cmp(lval_max, rval_min) > 0 : atom_cmp(lval_max, rval_min) <= 0);
     630        5632 :                                         if (lval_min && rval_max)
     631        3894 :                                                 always_true |= !has_nil(le) && !has_nil(re) && (is_anti(e) ? atom_cmp(lval_min, rval_max) <= 0 : atom_cmp(lval_min, rval_max) > 0);
     632             :                                         break;
     633         119 :                                 case cmp_gte:
     634         119 :                                         if (lval_max && rval_min)
     635          49 :                                                 always_false |= (is_anti(e) ? atom_cmp(lval_max, rval_min) >= 0 : atom_cmp(lval_max, rval_min) < 0);
     636         119 :                                         if (lval_min && rval_max)
     637          98 :                                                 always_true |= !has_nil(le) && !has_nil(re) && (is_anti(e) ? atom_cmp(lval_min, rval_max) < 0 : atom_cmp(lval_min, rval_max) >= 0);
     638             :                                         break;
     639        2428 :                                 case cmp_lt:
     640        2428 :                                         if (lval_min && rval_max)
     641        1376 :                                                 always_false |= (is_anti(e) ? atom_cmp(lval_min, rval_max) < 0 : atom_cmp(lval_min, rval_max) >= 0);
     642        2428 :                                         if (lval_max && rval_min)
     643        2800 :                                                 always_true |= !has_nil(le) && !has_nil(re) && (is_anti(e) ? atom_cmp(lval_max, rval_min) >= 0 : atom_cmp(lval_max, rval_min) < 0);
     644             :                                         break;
     645          84 :                                 case cmp_lte:
     646          84 :                                         if (lval_min && rval_max)
     647          11 :                                                 always_false |= (is_anti(e) ? atom_cmp(lval_min, rval_max) <= 0 : atom_cmp(lval_min, rval_max) > 0);
     648          84 :                                         if (lval_max && rval_min)
     649          22 :                                                 always_true |= !has_nil(le) && !has_nil(re) && (is_anti(e) ? atom_cmp(lval_max, rval_min) > 0 : atom_cmp(lval_max, rval_min) <= 0);
     650             :                                         break;
     651             :                                 default: /* Maybe later I can do cmp_in and cmp_notin but don't forget to remove is_theta_exp check up there */
     652             :                                         break;
     653             :                                 }
     654             :                         }
     655      121840 :                         assert(!always_false || !always_true);
     656      121840 :                         if (always_false || always_true) {
     657        3554 :                                 sql_exp *ne = exp_atom_bool(v->sql->sa, always_true /* if always_true set then true else false */);
     658        3554 :                                 if (exp_name(e))
     659           0 :                                         exp_prop_alias(v->sql->sa, ne, e);
     660        3554 :                                 n->data = ne;
     661        3554 :                                 v->changes++;
     662             :                         }
     663             :                 }
     664             :         }
     665      131914 :         return rel->exps;
     666             : }
     667             : 
     668             : static sql_rel *
     669          14 : set_setop_side(visitor *v, sql_rel *rel, sql_rel *side)
     670             : {
     671          14 :         if (side == rel->r)
     672           0 :                 rel->r = NULL;
     673             :         else
     674          14 :                 rel->l = NULL;
     675          14 :         if (!is_simple_project(side->op) || rel_is_ref(side) || !list_empty(side->r)) {
     676           0 :                 side = rel_project(v->sql->sa, side, rel_projections(v->sql, side, NULL, 0, 1));
     677           0 :                 set_count_prop(v->sql->sa, side, get_rel_count(side->l));
     678           0 :                 side->exps = exps_exp_visitor_bottomup(v, side, side->exps, 0, &rel_propagate_statistics, false);
     679             :         }
     680          35 :         for (node *n = rel->exps->h, *m = side->exps->h ; n && m ; n = n->next, m = m->next) {
     681          21 :                 sql_exp *e1 = n->data, *e2 = m->data;
     682          21 :                 exp_prop_alias(v->sql->sa, e2, e1);
     683             :         }
     684          14 :         list_hash_clear(side->exps);
     685             : 
     686          14 :         if (need_distinct(rel))
     687           0 :                 set_distinct(side);
     688          14 :         side->p = prop_copy(v->sql->sa, rel->p);
     689          14 :         rel_destroy(rel);
     690          14 :         return side;
     691             : }
     692             : 
     693             : static BUN
     694       20332 : trivial_project_exp_card(sql_exp *e)
     695             : {
     696       20623 :         if (e->type == e_convert)
     697         291 :                 return trivial_project_exp_card(e->l);
     698       20332 :         return e->type == e_atom && e->f ? (BUN) list_length(e->f) : 1;
     699             : }
     700             : 
     701             : static BUN
     702       24248 : rel_calc_nuniques(mvc *sql, sql_rel *l, list *exps)
     703             : {
     704       24248 :         BUN lv = get_rel_count(l);
     705             : 
     706       24248 :         if (lv == 0)
     707             :                 return 0;
     708       21539 :         if (!list_empty(exps)) {
     709       21539 :                 BUN nuniques = 0;
     710             :                 /* compute the highest number of unique values */
     711       51131 :                 for (node *n = exps->h ; n && nuniques != BUN_NONE ; n = n->next) {
     712       29592 :                         sql_exp *e = n->data;
     713       29592 :                         sql_rel *bt = NULL;
     714       29592 :                         prop *p = NULL;
     715       29592 :                         BUN euniques = BUN_NONE;
     716       29592 :                         atom *min, *max, *sub = NULL;
     717       29592 :                         sql_subtype *tp = exp_subtype(e);
     718       29592 :                         sql_class ec = tp ? tp->type->eclass : EC_STRING; /* if 'e' has no type (eg parameter), use a non-number type to fail condition */
     719             : 
     720       29592 :                         if ((p = find_prop(e->p, PROP_NUNIQUES))) {
     721       20956 :                                 euniques = (BUN) p->value.dval;
     722        8636 :                         } else if (e->type == e_column && e->nid && rel_find_exp_and_corresponding_rel(l, e, false, &bt, NULL) && bt && (p = find_prop(bt->p, PROP_COUNT))) {
     723        6593 :                                 euniques = (BUN) p->value.lval;
     724             :                         }
     725             :                         /* use min to max range to compute number of possible values in the domain for number types */
     726       29592 :                         if ((EC_TEMP(ec)||ec==EC_NUM||ec==EC_MONTH||ec==EC_POS) &&
     727       22363 :                                 (min = find_prop_and_get(e->p, PROP_MIN)) && (max = find_prop_and_get(e->p, PROP_MAX))) {
     728             :                                 /* the range includes min and max, so the atom_inc call is needed */
     729             :                                 /* if 'euniques' has number of distinct values, compute min between both */
     730       17543 :                                 if ((sub = atom_sub(sql->sa, max, min)) && (sub = atom_inc(sql->sa, sub)) && (sub = atom_cast(sql->sa, sub, sql_bind_localtype("oid"))))
     731           0 :                                         euniques = MIN(euniques, (BUN) sub->data.val.oval);
     732             :                         }
     733       29592 :                         if (euniques != BUN_NONE)
     734       27549 :                                 nuniques = MAX(nuniques, euniques); /* the highest cardinality sets the estimation */
     735             :                         else
     736             :                                 nuniques = BUN_NONE;
     737             :                 }
     738       21539 :                 if (nuniques != BUN_NONE)
     739             :                         return nuniques;
     740             :         }
     741             :         return lv;
     742             : }
     743             : 
     744             : static sql_rel *
     745     2070084 : rel_get_statistics_(visitor *v, sql_rel *rel)
     746             : {
     747             :         /* Don't prune updates as pruning will possibly result in removing the joins which therefore cannot be used for constraint checking */
     748     2070084 :         uint8_t has_special_modify = *(uint8_t*) v->data;
     749     2070084 :         bool can_be_pruned = !has_special_modify && v->storage_based_opt;
     750             : 
     751             :         /* Don't look at the same relation twice */
     752     2070084 :         if (are_statistics_gathered(rel->used))
     753             :                 return rel;
     754     1328861 :         rel->used |= statistics_gathered;
     755             : 
     756     1328861 :         switch (rel->op) {
     757      311866 :         case op_basetable: {
     758      311866 :                 sql_table *t = (sql_table *) rel->l;
     759      311866 :                 sqlstore *store = v->sql->session->tr->store;
     760             : 
     761      311866 :                 if (!list_empty(rel->exps)) {
     762     1246450 :                         for (node *n = rel->exps->h ; n ; n = n->next)
     763      934633 :                                 rel_basetable_column_get_statistics(v->sql, rel, n->data);
     764             :                 }
     765             :                 /* Set table row count. TODO? look for remote tables. Don't look at storage for declared tables, because it won't be cleaned */
     766      311866 :                 if (isTable(t) && t->s && !isDeclaredTable(t)) /* count active rows only */
     767      258348 :                         set_count_prop(v->sql->sa, rel, (BUN)store->storage_api.count_col(v->sql->session->tr, ol_first_node(t->columns)->data, 10));
     768             :                 break;
     769             :         }
     770        2790 :         case op_union:
     771             :         case op_inter:
     772             :         case op_except: {
     773        2790 :                 bool empty_cross = false;
     774        2790 :                 int i = 0;
     775        2790 :                 sql_rel *l = rel->l, *pl = rel->l, *r = rel->r, *pr = rel->r;
     776             : 
     777        2790 :                 while (is_sample(pl->op) || is_topn(pl->op)) /* skip topN and sample relations in the middle */
     778           0 :                         pl = pl->l;
     779        2790 :                 while (is_sample(pr->op) || is_topn(pr->op))
     780           0 :                         pr = pr->l;
     781             :                 /* if it's not a projection, then project and propagate statistics */
     782        2790 :                 if (!is_project(pl->op) && !is_base(pl->op)) {
     783           0 :                         pl = rel_project(v->sql->sa, pl, rel_projections(v->sql, pl, NULL, 0, 1));
     784           0 :                         set_count_prop(v->sql->sa, pl, get_rel_count(pl->l));
     785           0 :                         pl->exps = exps_exp_visitor_bottomup(v, pl, pl->exps, 0, &rel_propagate_statistics, false);
     786             :                 }
     787        2790 :                 if (!is_project(pr->op) && !is_base(pr->op)) {
     788           0 :                         pr = rel_project(v->sql->sa, pr, rel_projections(v->sql, pr, NULL, 0, 1));
     789           0 :                         set_count_prop(v->sql->sa, pr, get_rel_count(pr->l));
     790           0 :                         pr->exps = exps_exp_visitor_bottomup(v, pr, pr->exps, 0, &rel_propagate_statistics, false);
     791             :                 }
     792             : 
     793       11662 :                 for (node *n = rel->exps->h ; n ; n = n->next) {
     794        8872 :                         empty_cross |= rel_setop_get_statistics(v->sql, rel, pl->exps, pr->exps, n->data, i);
     795        8872 :                         i++;
     796             :                 }
     797             : 
     798             :                 /* propagate row count */
     799        2790 :                 if (is_union(rel->op)) {
     800         277 :                         BUN lv = need_distinct(rel) ? rel_calc_nuniques(v->sql, l, l->exps) : get_rel_count(l),
     801         277 :                                 rv = need_distinct(rel) ? rel_calc_nuniques(v->sql, r, r->exps) : get_rel_count(r);
     802             : 
     803         277 :                         if (lv == 0 && rv == 0) { /* both sides empty */
     804           2 :                                 if (can_be_pruned)
     805             :                                         empty_cross = true;
     806             :                                 else
     807           2 :                                         set_count_prop(v->sql->sa, rel, 0);
     808         275 :                         } else if (can_be_pruned && lv == 0 && !rel_is_ref(rel)) { /* left side empty */
     809           0 :                                 rel = set_setop_side(v, rel, r);
     810           0 :                                 empty_cross = false; /* don't rewrite again */
     811         275 :                         } else if (can_be_pruned && rv == 0 && !rel_is_ref(rel)) { /* right side empty */
     812           0 :                                 rel = set_setop_side(v, rel, l);
     813           0 :                                 empty_cross = false; /* don't rewrite again */
     814         275 :                         } else if (lv != BUN_NONE && rv != BUN_NONE) {
     815           7 :                                 set_count_prop(v->sql->sa, rel, (rv > (BUN_MAX - lv)) ? BUN_MAX : (lv + rv)); /* overflow check */
     816             :                         }
     817        2513 :                 } else if (is_inter(rel->op) || is_except(rel->op)) {
     818        2513 :                         BUN lv = need_distinct(rel) ? rel_calc_nuniques(v->sql, l, l->exps) : get_rel_count(l),
     819        2513 :                                 rv = need_distinct(rel) ? rel_calc_nuniques(v->sql, r, r->exps) : get_rel_count(r);
     820             : 
     821        2513 :                         if (lv == 0) { /* left side empty */
     822          62 :                                 if (can_be_pruned)
     823             :                                         empty_cross = true;
     824             :                                 else
     825           5 :                                         set_count_prop(v->sql->sa, rel, 0);
     826        2451 :                         } else if (rv == 0) { /* right side empty */
     827          17 :                                 if (is_inter(rel->op)) {
     828           3 :                                         if (can_be_pruned)
     829             :                                                 empty_cross = true;
     830             :                                         else
     831           0 :                                                 set_count_prop(v->sql->sa, rel, 0);
     832          14 :                                 } else if (can_be_pruned && !rel_is_ref(rel)) {
     833          14 :                                         rel = set_setop_side(v, rel, l);
     834          14 :                                         empty_cross = false; /* don't rewrite again */
     835             :                                 } else {
     836           0 :                                         set_count_prop(v->sql->sa, rel, lv);
     837             :                                 }
     838             :                         } else {
     839        2434 :                                 set_count_prop(v->sql->sa, rel, lv);
     840             :                         }
     841             :                 }
     842        2790 :                 if (can_be_pruned && empty_cross) {
     843          80 :                         rel_destroy(rel->l);
     844          80 :                         rel->l = NULL;
     845          80 :                         rel_destroy(rel->r);
     846          80 :                         rel->r = NULL;
     847         244 :                         for (node *n = rel->exps->h ; n ; n = n->next) {
     848         164 :                                 sql_exp *e = n->data, *a = exp_atom(v->sql->sa, atom_general(v->sql->sa, exp_subtype(e), NULL, 0));
     849         164 :                                 exp_prop_alias(v->sql->sa, a, e);
     850         164 :                                 n->data = a;
     851             :                         }
     852          80 :                         list_hash_clear(rel->exps);
     853          80 :                         sql_rel *l = rel_project(v->sql->sa, NULL, rel->exps);
     854          80 :                         set_count_prop(v->sql->sa, l, 1);
     855          80 :                         l = rel_select(v->sql->sa, l, exp_atom_bool(v->sql->sa, 0));
     856          80 :                         set_count_prop(v->sql->sa, l, 0);
     857          80 :                         rel->op = op_project;
     858          80 :                         rel->l = l;
     859          80 :                         rel->exps = rel_projections(v->sql, l, NULL, 1, 1);
     860          80 :                         set_count_prop(v->sql->sa, rel, 0);
     861          80 :                         set_nodistinct(rel); /* set relations may have distinct flag set */
     862          80 :                         v->changes++;
     863             :                 }
     864             :                 break;
     865             :         }
     866       12886 :         case op_munion: {
     867       12886 :                 list *l = rel->l, *nrels = sa_list(v->sql->sa);
     868       12886 :                 BUN cnt = 0;
     869       12886 :                 bool needs_pruning = false;
     870             : 
     871       49266 :                 for (node *n = l->h; n; n = n->next) {
     872       36380 :                         sql_rel *r = n->data, *pl = r;
     873             : 
     874       36380 :                         while (is_sample(pl->op) || is_topn(pl->op)) /* skip topN and sample relations in the middle */
     875           0 :                                         pl = pl->l;
     876             :                         /* if it's not a projection, then project and propagate statistics */
     877       36380 :                         if (!is_project(pl->op) && !is_base(pl->op)) {
     878           0 :                                 pl = rel_project(v->sql->sa, pl, rel_projections(v->sql, pl, NULL, 0, 1));
     879           0 :                                 set_count_prop(v->sql->sa, pl, get_rel_count(pl->l));
     880           0 :                                 pl->exps = exps_exp_visitor_bottomup(v, pl, pl->exps, 0, &rel_propagate_statistics, false);
     881             :                         }
     882       36380 :                         nrels = append(nrels, pl);
     883             :                         /* we need new munion statistics */
     884             :                         /* propagate row count */
     885       36380 :                         BUN rv = need_distinct(rel) ? rel_calc_nuniques(v->sql, r, r->exps) : get_rel_count(r);
     886             :                         /* if PROP_COUNT does not exist we assume at least a row (see get_rel_count def) */
     887       36380 :                         if (rv == BUN_NONE) {
     888        1156 :                                 cnt++;
     889        1156 :                                 continue;
     890             :                         }
     891       35224 :                         if (!rv && can_be_pruned)
     892        6625 :                                 needs_pruning = true;
     893             :                         /* overflow check */
     894       35224 :                         if (rv > (BUN_MAX - cnt))
     895       36380 :                                 rv = BUN_MAX;
     896             :                         else
     897       35224 :                                 cnt += rv;
     898             :                 }
     899       12886 :                 int i = 0;
     900       73792 :                 for (node *n = rel->exps->h ; n ; n = n->next, i++)
     901       60906 :                         rel_munion_get_statistics(v->sql, rel, nrels, n->data, i);
     902             : 
     903       12886 :                 if (needs_pruning && !rel_is_ref(rel)) {
     904        4464 :                         v->changes++;
     905        4464 :                         list *nl = sa_list(l->sa);
     906             : 
     907       16464 :                         for (node *n = nrels->h; n; n = n->next) {
     908       12000 :                                 sql_rel *r = n->data;
     909       12000 :                                 BUN rv = need_distinct(rel) ? rel_calc_nuniques(v->sql, r, r->exps) : get_rel_count(r);
     910             : 
     911       12000 :                                 if (!rv) { /* keep last for now */
     912        6155 :                                         rel_destroy(r);
     913        6155 :                                         continue;
     914             :                                 }
     915        5845 :                                 nl = append(nl, r);
     916             :                         }
     917        4464 :                         rel->l = nl;
     918        4464 :                         if (list_length(nl) == 1) {
     919        4139 :                                 sql_rel *l = rel->l = nl->h->data; /* ugh */
     920        4139 :                                 rel->r = NULL;
     921        4139 :                                 rel->op = op_project;
     922             : 
     923       20389 :                                 for (node *n = rel->exps->h, *m = l->exps->h ; n && m ; n = n->next, m = m->next) {
     924       16250 :                                         sql_exp *pe = n->data, *ie = m->data;
     925       16250 :                                         sql_exp *ne = exp_ref(v->sql, ie);
     926       16250 :                                         exp_prop_alias(v->sql->sa, ne, pe);
     927       16250 :                                         n->data = ne;
     928             :                                 }
     929        4139 :                                 list_hash_clear(rel->exps);
     930         325 :                         } else if (list_empty(nl)) {
     931             :                                 /* empty select (project [ nils ] ) */
     932         454 :                                 for (node *n = rel->exps->h ; n ; n = n->next) {
     933         354 :                                         sql_exp *e = n->data, *a = exp_atom(v->sql->sa, atom_general(v->sql->sa, exp_subtype(e), NULL, 0));
     934         354 :                                         exp_prop_alias(v->sql->sa, a, e);
     935         354 :                                         n->data = a;
     936             :                                 }
     937         100 :                                 list_hash_clear(rel->exps);
     938         100 :                                 sql_rel *l = rel_project(v->sql->sa, NULL, rel->exps);
     939         100 :                                 set_count_prop(v->sql->sa, l, 1);
     940         100 :                                 l = rel_select(v->sql->sa, l, exp_atom_bool(v->sql->sa, 0));
     941         100 :                                 set_count_prop(v->sql->sa, l, 0);
     942         100 :                                 rel->op = op_project;
     943         100 :                                 rel->r = NULL;
     944         100 :                                 rel->l = l;
     945         100 :                                 rel->exps = rel_projections(v->sql, l, NULL, 1, 1);
     946         100 :                                 set_count_prop(v->sql->sa, rel, 0);
     947         100 :                                 set_nodistinct(rel); /* set relations may have distinct flag set */
     948             :                         }
     949             :                 } else {
     950        8422 :                         set_count_prop(v->sql->sa, rel, cnt);
     951             :                 }
     952             :                 break;
     953             :         }
     954      530375 :         case op_join:
     955             :         case op_left:
     956             :         case op_right:
     957             :         case op_full:
     958             :         case op_semi:
     959             :         case op_anti:
     960             :         case op_select:
     961             :         case op_project:
     962             :         case op_groupby: {
     963      530375 :                 if (is_groupby(rel->op) && !list_empty(rel->r))
     964       15531 :                         rel->r = exps_exp_visitor_bottomup(v, rel, rel->r, 0, &rel_propagate_statistics, false);
     965      530375 :                 rel->exps = exps_exp_visitor_bottomup(v, rel, rel->exps, 0, &rel_propagate_statistics, false);
     966      530374 :                 if ((is_simple_project(rel->op) || is_groupby(rel->op)) && !list_empty(rel->r))
     967       39124 :                         rel->r = exps_exp_visitor_bottomup(v, rel, rel->r, 0, &rel_propagate_statistics, false);
     968             :                 /* The following optimizations can only be applied after propagating the statistics to rel->exps */
     969      530375 :                 if (can_be_pruned && (is_join(rel->op) || is_select(rel->op)) && !list_empty(rel->exps)) {
     970      138136 :                         int changes = v->changes;
     971      138136 :                         rel->exps = rel_prune_predicates(v, rel);
     972      138136 :                         if (v->changes > changes)
     973        3521 :                                 rel = rewrite_simplify(v, 0, v->value_based_opt, rel);
     974             :                 }
     975             : 
     976             :                 /* propagate row count */
     977      530375 :                 sql_rel *l = rel->l, *r = rel->r;
     978      530375 :                 switch (rel->op) {
     979      133836 :                 case op_join:
     980             :                 case op_left:
     981             :                 case op_right:
     982             :                 case op_full: {
     983      133836 :                         BUN lv = get_rel_count(l), rv = get_rel_count(r), uniques_estimate = BUN_MAX, join_idx_estimate = BUN_MAX;
     984             : 
     985      133836 :                         if (!list_empty(rel->exps) && !is_single(rel)) {
     986      244944 :                                 for (node *n = rel->exps->h ; n ; n = n->next) {
     987      125044 :                                         sql_exp *e = n->data, *el = e->l, *er = e->r;
     988             : 
     989      125044 :                                         if (find_prop(e->p, PROP_JOINIDX)) {
     990         667 :                                                 join_idx_estimate = lv>rv?lv:rv;
     991      124377 :                                         } else if (e->type == e_cmp && e->flag == cmp_equal) {
     992             :                                                 /* if one of the sides is unique, the cardinality will be that exact number, but look for nulls */
     993      120493 :                                                 if (!is_semantics(e) || !has_nil(el) || !has_nil(er)) {
     994      120097 :                                                         BUN lu = 0, ru = 0;
     995      120097 :                                                         prop *p = NULL;
     996      120097 :                                                         if ((p = find_prop(el->p, PROP_NUNIQUES)))
     997       88970 :                                                                 lu = (BUN) p->value.dval;
     998      120097 :                                                         if ((p = find_prop(er->p, PROP_NUNIQUES)))
     999      103407 :                                                                 ru = (BUN) p->value.dval;
    1000      120097 :                                                         if (is_unique(el) || lu > lv) {
    1001       73153 :                                                                 BUN ncount = (is_right(rel->op) || is_full(rel->op)) ? MAX(lv, rv) : lv;
    1002       73153 :                                                                 uniques_estimate = MIN(uniques_estimate, ncount);
    1003       46944 :                                                         } else if (is_unique(er) || ru > rv) {
    1004       29342 :                                                                 BUN ncount = (is_left(rel->op) || is_full(rel->op)) ? MAX(lv, rv) : rv;
    1005       29342 :                                                                 uniques_estimate = MIN(uniques_estimate, ncount);
    1006             :                                                         }
    1007             :                                                 }
    1008             :                                         }
    1009             :                                 }
    1010             :                         }
    1011      133836 :                         if (is_single(rel)) {
    1012        4603 :                                 set_count_prop(v->sql->sa, rel, lv);
    1013      129233 :                         } else if (join_idx_estimate != BUN_MAX) {
    1014         665 :                                 set_count_prop(v->sql->sa, rel, join_idx_estimate);
    1015      128568 :                         } else if (uniques_estimate != BUN_MAX) {
    1016       96072 :                                 set_count_prop(v->sql->sa, rel, uniques_estimate);
    1017       32496 :                         } else if (list_length(rel->exps) == 1 && (exp_is_false(rel->exps->h->data) || exp_is_null(rel->exps->h->data))) {
    1018             :                                 /* corner cases for outer joins */
    1019         126 :                                 if (is_left(rel->op)) {
    1020         114 :                                         set_count_prop(v->sql->sa, rel, lv);
    1021          12 :                                 } else if (is_right(rel->op)) {
    1022           3 :                                         set_count_prop(v->sql->sa, rel, rv);
    1023           9 :                                 } else if (is_full(rel->op) && lv != BUN_NONE && rv != BUN_NONE) {
    1024           9 :                                         set_count_prop(v->sql->sa, rel, (rv > (BUN_MAX - lv)) ? BUN_MAX : (lv + rv)); /* overflow check */
    1025           0 :                                 } else if (lv != BUN_NONE && rv != BUN_NONE) {
    1026           0 :                                         set_count_prop(v->sql->sa, rel, 0);
    1027             :                                 }
    1028       32370 :                         } else if (lv == 0) {
    1029        1166 :                                 set_count_prop(v->sql->sa, rel, (is_right(rel->op) || is_full(rel->op)) ? rv : 0);
    1030       31773 :                         } else if (rv == 0) {
    1031        1568 :                                 set_count_prop(v->sql->sa, rel, (is_left(rel->op) || is_full(rel->op)) ? lv : 0);
    1032       30699 :                         } else if (lv != BUN_NONE && rv != BUN_NONE) {
    1033       18251 :                                 set_count_prop(v->sql->sa, rel, (rv > (BUN_MAX / lv)) ? BUN_MAX : (lv * rv)); /* overflow check */
    1034             :                         }
    1035             :                         break;
    1036             :                 }
    1037        2626 :                 case op_anti:
    1038        2626 :                         set_count_prop(v->sql->sa, rel, get_rel_count(l));
    1039        2626 :                         break;
    1040      108189 :                 case op_semi:
    1041             :                 case op_select:
    1042             :                         /* TODO calculate cardinalities using selectivities */
    1043      108189 :                         if (list_length(rel->exps) == 1 && (exp_is_false(rel->exps->h->data) || exp_is_null(rel->exps->h->data))) {
    1044        2588 :                                 set_count_prop(v->sql->sa, rel, 0);
    1045             :                         } else {
    1046      207954 :                                 if (!list_empty(rel->exps) && !is_single(rel)) {
    1047      102353 :                                         BUN cnt = get_rel_count(l), u = 1;
    1048      171015 :                                         for (node *n = rel->exps->h ; n ; n = n->next) {
    1049      112256 :                                                 sql_exp *e = n->data, *el = e->l, *er = e->r;
    1050             : 
    1051             :                                                 /* simple expressions first */
    1052      112256 :                                                 if (e->type == e_cmp && e->flag == cmp_equal && exp_is_atom(er)) {
    1053             :                                                         /* use selectivity */
    1054       56787 :                                                         prop *p;
    1055       56787 :                                                         if ((p = find_prop(el->p, PROP_NUNIQUES))) {
    1056       43594 :                                                                 u = (BUN) p->value.dval;
    1057       43594 :                                                                 break;
    1058             :                                                         }
    1059             :                                                 }
    1060             :                                         }
    1061             :                                         /* u is an *estimate*, so don't set count_prop to 0 unless cnt is 0 */
    1062      102353 :                                         set_count_prop(v->sql->sa, rel, cnt == 0 ? 0 : u == 0 || u > cnt ? 1 : cnt/u);
    1063             :                                 } else {
    1064        3248 :                                         set_count_prop(v->sql->sa, rel, get_rel_count(l));
    1065             :                                 }
    1066             :                         }
    1067             :                         break;
    1068      262311 :                 case op_project:
    1069      262311 :                         if (l) {
    1070      252407 :                                 if (need_distinct(rel)) {
    1071           0 :                                         set_count_prop(v->sql->sa, rel, rel_calc_nuniques(v->sql, l, rel->exps));
    1072             :                                 } else {
    1073      252407 :                                         set_count_prop(v->sql->sa, rel, get_rel_count(l));
    1074             :                                 }
    1075             :                         } else {
    1076        9904 :                                 BUN card = 1;
    1077             : 
    1078        9904 :                                 if (!list_empty(rel->exps)) {
    1079       28928 :                                         for (node *n = rel->exps->h ; n ; n = n->next) {
    1080       19024 :                                                 sql_exp *e = n->data;
    1081       19024 :                                                 card = MAX(card, trivial_project_exp_card(e));
    1082             :                                         }
    1083             :                                 }
    1084        9904 :                                 set_count_prop(v->sql->sa, rel, card);
    1085             :                         }
    1086             :                         break;
    1087       23057 :                 case op_groupby:
    1088       23057 :                         if (list_empty(rel->r)) {
    1089        7525 :                                 set_count_prop(v->sql->sa, rel, 1);
    1090             :                         } else {
    1091       15532 :                                 set_count_prop(v->sql->sa, rel, rel_calc_nuniques(v->sql, l, rel->r));
    1092             :                         }
    1093             :                         break;
    1094             :                 default:
    1095             :                         break;
    1096             :                 }
    1097             :                 break;
    1098             :         }
    1099       16788 :         case op_topn: {
    1100       16788 :                 BUN lv = get_rel_count(rel->l);
    1101             : 
    1102       16788 :                 if (lv != BUN_NONE) {
    1103       16771 :                         sql_exp *le = rel->exps->h->data, *oe = list_length(rel->exps) > 1 ? rel->exps->h->next->data : NULL;
    1104          96 :                         if (oe && oe->l && exp_is_not_null(oe)) { /* no parameters */
    1105          96 :                                 BUN offset = (BUN) ((atom*)oe->l)->data.val.lval;
    1106          96 :                                 lv = offset >= lv ? 0 : lv - offset;
    1107             :                         }
    1108       16771 :                         if (le->l && exp_is_not_null(le)) {
    1109       16733 :                                 BUN limit = (BUN) ((atom*)le->l)->data.val.lval;
    1110       16733 :                                 lv = MIN(lv, limit);
    1111             :                         }
    1112       16771 :                         set_count_prop(v->sql->sa, rel, lv);
    1113             :                 }
    1114             :                 break;
    1115             :         }
    1116          22 :         case op_sample: {
    1117          22 :                 BUN lv = get_rel_count(rel->l);
    1118             : 
    1119          22 :                 if (lv != BUN_NONE) {
    1120           4 :                         sql_exp *se = rel->exps->h->data;
    1121           4 :                         sql_subtype *tp = exp_subtype(se);
    1122             : 
    1123           4 :                         if (se->l && tp->type->eclass == EC_NUM) { /* sample is a number of rows */
    1124           4 :                                 BUN sample = (BUN) ((atom*)se->l)->data.val.lval;
    1125           4 :                                 lv = MIN(lv, sample);
    1126           0 :                         } else if (se->l) { /* sample is a percentage of rows */
    1127           0 :                                 dbl percent = ((atom*)se->l)->data.val.dval;
    1128           0 :                                 assert(tp->type->eclass == EC_FLT);
    1129           0 :                                 lv = (BUN) ceil((dbl)lv * percent);
    1130             :                         }
    1131           4 :                         set_count_prop(v->sql->sa, rel, lv);
    1132             :                 }
    1133             :                 break;
    1134             :         }
    1135        6086 :         case op_table: {
    1136        6086 :                 sql_exp *op = rel->r;
    1137        6086 :                 if (rel->flag != TRIGGER_WRAPPER && op) {
    1138        5774 :                         sql_subfunc *f = op->f;
    1139        5774 :                         if (f->func->lang == FUNC_LANG_SQL) {
    1140          97 :                                 set_count_prop(v->sql->sa, rel, 1000 /* just some fallback value */);
    1141        5677 :                         } else if (f->func->lang == FUNC_LANG_MAL && strcmp(f->func->base.name, "storage") == 0) {
    1142         829 :                                 set_count_prop(v->sql->sa, rel, 1000 /* TODO get size of catalog */);
    1143        4848 :                         } else if (f->func->lang == FUNC_LANG_MAL && strcmp(f->func->base.name, "db_users") == 0) {
    1144           0 :                                 set_count_prop(v->sql->sa, rel, 1000 /* TODO get size of users */);
    1145        4848 :                         } else if (f->func->lang == FUNC_LANG_MAL && strncmp(f->func->base.name, "querylog", 8) == 0) {
    1146        1085 :                                 set_count_prop(v->sql->sa, rel, 1000 /* TODO get size of querylog */);
    1147        3763 :                         } else if (f->func->lang == FUNC_LANG_MAL &&
    1148        3653 :                                            (strcmp(f->func->base.name, "queue") == 0 ||
    1149        3389 :                                                 strcmp(f->func->base.name, "optimizers") == 0 ||
    1150        3077 :                                                 strcmp(f->func->base.name, "env") == 0 ||
    1151        2786 :                                                 strcmp(f->func->base.name, "keywords") == 0 ||
    1152        2786 :                                                 strcmp(f->func->base.name, "statistics") == 0 ||
    1153        2119 :                                                 strcmp(f->func->base.name, "rejects") == 0 ||
    1154        1867 :                                                 strcmp(f->func->base.name, "schemastorage") == 0 ||
    1155        1867 :                                                 strncmp(f->func->base.name, "storage", 7) == 0 ||
    1156        1867 :                                                 strcmp(f->func->base.name, "sessions") == 0) ) {
    1157        2076 :                                 set_count_prop(v->sql->sa, rel, 1000 /* TODO get size of queue */);
    1158             :                         }
    1159             :                         /* else {
    1160             :                                 printf("%%func needs stats : %s\n", f->func->base.name);
    1161             :                         } */
    1162             :                 }
    1163             :                 break;
    1164             :         }
    1165             :         /*These relations are less important for now
    1166             :           TODO later we can tune it */
    1167             :         case op_insert:
    1168             :         case op_update:
    1169             :         case op_delete:
    1170             :         case op_truncate:
    1171             :         case op_ddl:
    1172             :         default:
    1173             :                 break;
    1174             :         }
    1175             : 
    1176             :         return rel;
    1177             : }
    1178             : 
    1179             : static sql_rel *
    1180      539569 : rel_get_statistics(visitor *v, global_props *gp, sql_rel *rel)
    1181             : {
    1182             :         /* Don't prune updates as pruning will possibly result in removing the joins which therefore cannot be used for constraint checking */
    1183      539569 :         uint8_t has_special_modify = (uint8_t) gp->has_special_modify;
    1184      539569 :         v->data = &has_special_modify;
    1185      539569 :         rel = rel_visitor_bottomup(v, rel, &rel_get_statistics_);
    1186      539570 :         v->data = gp;
    1187      539570 :         return rel;
    1188             : }
    1189             : 
    1190             : run_optimizer
    1191      705405 : bind_get_statistics(visitor *v, global_props *gp)
    1192             : {
    1193      705405 :         (void) v;
    1194      705405 :         return (gp->opt_level == 1 && !gp->cnt[op_insert]) ? rel_get_statistics : NULL;
    1195             : }
    1196             : 
    1197             : 
    1198             : static bool
    1199       94482 : point_select_on_unique_column(sql_rel *rel)
    1200             : {
    1201       94482 :         if (is_select(rel->op) && !list_empty(rel->exps)) {
    1202      130805 :                 for (node *n = rel->exps->h; n ; n = n->next) {
    1203       75366 :                         sql_exp *e = n->data, *el = e->l, *er = e->r, *found = NULL;
    1204             : 
    1205       75366 :                         if (is_compare(e->type) && e->flag == cmp_equal) {
    1206       33337 :                                 if (is_numeric_upcast(el))
    1207           0 :                                         el = el->l;
    1208       33337 :                                 if (is_numeric_upcast(er))
    1209           0 :                                         er = er->l;
    1210       33337 :                                 if (is_alias(el->type) && exp_is_atom(er) && (found = rel_find_exp(rel->l, el)) &&
    1211         749 :                                         is_unique(found) && (!is_semantics(e) || !has_nil(found) || !has_nil(er)))
    1212             :                                         return true;
    1213       32588 :                                 if (is_alias(er->type) && exp_is_atom(el) && (found = rel_find_exp(rel->l, er)) &&
    1214           0 :                                         is_unique(found) && (!is_semantics(e) || !has_nil(el) || !has_nil(found)))
    1215             :                                         return true;
    1216             :                         }
    1217             :                 }
    1218             :         }
    1219             :         return false;
    1220             : }
    1221             : 
    1222             : /*
    1223             :  * A point select on an unique column reduces the number of rows to 1. If the same select is under a
    1224             :  * join, the opposite side's select can be pushed above the join.
    1225             :  */
    1226             : static inline sql_rel *
    1227     1242625 : rel_push_select_up(visitor *v, sql_rel *rel)
    1228             : {
    1229     1242625 :         if ((is_innerjoin(rel->op) || is_left(rel->op) || is_right(rel->op) || is_semi(rel->op)) && !is_single(rel)) {
    1230      253575 :                 sql_rel *l = rel->l, *r = rel->r;
    1231      253575 :                 bool can_pushup_left = is_select(l->op) && !rel_is_ref(l) && !is_single(l) && (is_innerjoin(rel->op) || is_left(rel->op) || is_semi(rel->op)),
    1232      253575 :                         can_pushup_right = is_select(r->op) && !rel_is_ref(r) && !is_single(r) && (is_innerjoin(rel->op) || is_right(rel->op));
    1233             : 
    1234      253575 :                 if (can_pushup_left || can_pushup_right) {
    1235       66625 :                         if (can_pushup_left)
    1236       45243 :                                 can_pushup_left = point_select_on_unique_column(r);
    1237       66625 :                         if (can_pushup_right)
    1238       49239 :                                 can_pushup_right = point_select_on_unique_column(l);
    1239             : 
    1240             :                         /* if both selects retrieve one row each, it's not worth it to push both up */
    1241       66625 :                         if (can_pushup_left && !can_pushup_right) {
    1242         683 :                                 sql_rel *nrel = rel_dup_copy(v->sql->sa, rel);
    1243         683 :                                 nrel->l = l->l;
    1244         683 :                                 rel = rel_inplace_select(rel, nrel, l->exps);
    1245         683 :                                 assert(is_select(rel->op));
    1246         683 :                                 v->changes++;
    1247       65942 :                         } else if (!can_pushup_left && can_pushup_right) {
    1248          14 :                                 sql_rel *nrel = rel_dup_copy(v->sql->sa, rel);
    1249          14 :                                 nrel->r = r->l;
    1250          14 :                                 rel = rel_inplace_select(rel, nrel, r->exps);
    1251          14 :                                 assert(is_select(rel->op));
    1252          14 :                                 v->changes++;
    1253             :                         }
    1254             :                 }
    1255             :         }
    1256     1242625 :         return rel;
    1257             : }
    1258             : 
    1259             : static int
    1260       93335 : sql_class_base_score(visitor *v, sql_column *c, sql_subtype *t, bool equality_based)
    1261             : {
    1262       93335 :         int de;
    1263             : 
    1264       93335 :         if (!t)
    1265             :                 return 0;
    1266       93335 :         switch (ATOMstorage(t->type->localtype)) {
    1267             :         case TYPE_bte:
    1268             :                 return 150 - 8;
    1269        1665 :         case TYPE_sht:
    1270        1665 :                 return 150 - 16;
    1271       37285 :         case TYPE_int:
    1272       37285 :                 return 150 - 32;
    1273         516 :         case TYPE_void:
    1274             :         case TYPE_lng:
    1275         516 :                 return 150 - 64;
    1276             :         case TYPE_uuid:
    1277             : #ifdef HAVE_HGE
    1278             :         case TYPE_hge:
    1279             : #endif
    1280             :                 return 150 - 128;
    1281           2 :         case TYPE_flt:
    1282           2 :                 return 75 - 24;
    1283             :         case TYPE_dbl:
    1284             :                 return 75 - 53;
    1285       30551 :         default:
    1286       30551 :                 if (equality_based && c && v->storage_based_opt && (de = mvc_is_duplicate_eliminated(v->sql, c)))
    1287        1554 :                         return 150 - de * 8;
    1288             :                 /* strings and blobs not duplicate eliminated don't get any points here */
    1289             :                 return 0;
    1290             :         }
    1291             : }
    1292             : 
    1293             : static int
    1294       34436 : score_se_base(visitor *v, sql_rel *rel, sql_exp *e)
    1295             : {
    1296       34436 :         int res = 0;
    1297       34436 :         sql_subtype *t = exp_subtype(e);
    1298       34436 :         sql_column *c = NULL;
    1299             : 
    1300       34436 :         if (e->type == e_convert) /* keep unsafes at the end (TODO improve) */
    1301             :                 return -1000;
    1302             :         /* can we find out if the underlying table is sorted */
    1303       33835 :         if ((c = exp_find_column(rel, e, -2)) && v->storage_based_opt && mvc_is_sorted(v->sql, c))
    1304       33835 :                 res += 600;
    1305             : 
    1306             :         /* prefer the shorter var types over the longer ones */
    1307       33835 :         res += sql_class_base_score(v, c, t, is_equality_or_inequality_exp(e->flag)); /* smaller the type, better */
    1308       33835 :         return res;
    1309             : }
    1310             : 
    1311             : static int
    1312       58510 : score_se(visitor *v, sql_rel *rel, sql_exp *e)
    1313             : {
    1314       58510 :         int score = 0;
    1315       58510 :         if (e->type == e_cmp && !is_complex_exp(e->flag)) {
    1316       34436 :                 sql_exp *l = e->l;
    1317             : 
    1318       34436 :                 while (l->type == e_cmp) { /* go through nested comparisons */
    1319           2 :                         sql_exp *ll;
    1320             : 
    1321           2 :                         if (l->flag == cmp_filter || l->flag == cmp_or)
    1322           0 :                                 ll = ((list*)l->l)->h->data;
    1323             :                         else
    1324           2 :                                 ll = l->l;
    1325           2 :                         if (ll->type != e_cmp)
    1326             :                                 break;
    1327             :                         l = ll;
    1328             :                 }
    1329       34436 :                 score += score_se_base(v, rel, l);
    1330             :         }
    1331       58510 :         score += exp_keyvalue(e);
    1332       58510 :         return score;
    1333             : }
    1334             : 
    1335             : static inline sql_rel *
    1336     1242625 : rel_select_order(visitor *v, sql_rel *rel)
    1337             : {
    1338     1242625 :         int *scores = NULL;
    1339     1242625 :         sql_exp **exps = NULL;
    1340             : 
    1341     1242625 :         if (is_select(rel->op) && list_length(rel->exps) > 1) {
    1342       27372 :                 node *n;
    1343       27372 :                 int i, nexps = list_length(rel->exps);
    1344       27372 :                 scores = SA_NEW_ARRAY(v->sql->ta, int, nexps);
    1345       27372 :                 exps = SA_NEW_ARRAY(v->sql->ta, sql_exp*, nexps);
    1346             : 
    1347       85882 :                 for (i = 0, n = rel->exps->h; n; i++, n = n->next) {
    1348       58540 :                         exps[i] = n->data;
    1349       58540 :                         if (find_prop(exps[i]->p, PROP_HASHCOL))
    1350             :                                 return rel;
    1351       58510 :                         scores[i] = score_se(v, rel, n->data);
    1352             :                 }
    1353       27342 :                 GDKqsort(scores, exps, NULL, nexps, sizeof(int), sizeof(void *), TYPE_int, true, true);
    1354             : 
    1355       85852 :                 for (i = 0, n = rel->exps->h; n; i++, n = n->next)
    1356       58510 :                         n->data = exps[i];
    1357             :         }
    1358             : 
    1359             :         return rel;
    1360             : }
    1361             : 
    1362             : /* Compute the efficiency of using this expression early in a group by list */
    1363             : static int
    1364       59500 : score_gbe(visitor *v, sql_rel *rel, sql_exp *e)
    1365             : {
    1366       59500 :         int res = 0;
    1367       59500 :         sql_subtype *t = exp_subtype(e);
    1368       59500 :         sql_column *c = exp_find_column(rel, e, -2);
    1369             : 
    1370       59500 :         if (e->card == CARD_ATOM) /* constants are trivial to group */
    1371          38 :                 res += 1000;
    1372             :         /* can we find out if the underlying table is sorted */
    1373       59500 :         if (is_unique(e) || find_prop(e->p, PROP_HASHCOL) || (c && v->storage_based_opt && mvc_is_unique(v->sql, c))) /* distinct columns */
    1374        3584 :                 res += 700;
    1375       38910 :         if (c && v->storage_based_opt && mvc_is_sorted(v->sql, c))
    1376        3710 :                 res += 500;
    1377       59500 :         if (find_prop(e->p, PROP_HASHIDX)) /* has hash index */
    1378           0 :                 res += 200;
    1379             : 
    1380             :         /* prefer the shorter var types over the longer ones */
    1381       59500 :         res += sql_class_base_score(v, c, t, true); /* smaller the type, better */
    1382       59500 :         return res;
    1383             : }
    1384             : 
    1385             : /* reorder group by expressions */
    1386             : static inline sql_rel *
    1387     1242625 : rel_groupby_order(visitor *v, sql_rel *rel)
    1388             : {
    1389     1242625 :         int *scores = NULL;
    1390     1242625 :         sql_exp **exps = NULL;
    1391             : 
    1392     1242625 :         if (is_groupby(rel->op) && list_length(rel->r) > 1) {
    1393       25587 :                 node *n;
    1394       25587 :                 list *gbe = rel->r;
    1395       25587 :                 int i, ngbe = list_length(gbe);
    1396       25587 :                 scores = SA_NEW_ARRAY(v->sql->ta, int, ngbe);
    1397       25587 :                 exps = SA_NEW_ARRAY(v->sql->ta, sql_exp*, ngbe);
    1398             : 
    1399             :                 /* first sorting step, give priority for integers and sorted columns */
    1400       85087 :                 for (i = 0, n = gbe->h; n; i++, n = n->next) {
    1401       59500 :                         exps[i] = n->data;
    1402       59500 :                         scores[i] = score_gbe(v, rel, exps[i]);
    1403             :                 }
    1404       25587 :                 GDKqsort(scores, exps, NULL, ngbe, sizeof(int), sizeof(void *), TYPE_int, true, true);
    1405             : 
    1406             :                 /* second sorting step, give priority to strings with lower number of digits */
    1407       50504 :                 for (i = ngbe - 1; i && !scores[i]; i--); /* find expressions with no score from the first round */
    1408       25587 :                 if (scores[i])
    1409       24589 :                         i++;
    1410       25587 :                 if (ngbe - i > 1) {
    1411        8604 :                         for (int j = i; j < ngbe; j++) {
    1412        6597 :                                 sql_subtype *t = exp_subtype(exps[j]);
    1413        6597 :                                 scores[j] = t ? t->digits : 0;
    1414             :                         }
    1415             :                         /* the less number of digits the better, order ascending */
    1416        2007 :                         GDKqsort(scores + i, exps + i, NULL, ngbe - i, sizeof(int), sizeof(void *), TYPE_int, false, true);
    1417             :                 }
    1418             : 
    1419       85087 :                 for (i = 0, n = gbe->h; n; i++, n = n->next)
    1420       59500 :                         n->data = exps[i];
    1421             :         }
    1422             : 
    1423     1242625 :         return rel;
    1424             : }
    1425             : 
    1426             : /* This optimization loop contains optimizations that can potentially use statistics */
    1427             : static sql_rel *
    1428     1242625 : rel_final_optimization_loop_(visitor *v, sql_rel *rel)
    1429             : {
    1430             :         /* Run rel_push_select_up only once at the end to avoid an infinite optimization loop */
    1431     1242625 :         rel = rel_push_select_up(v, rel);
    1432     1242625 :         rel = rel_select_order(v, rel);
    1433             : 
    1434             :         /* TODO? Maybe later add rel_simplify_count, rel_join2semijoin, rel_simplify_fk_joins,
    1435             :            rel_distinct_project2groupby, rel_simplify_predicates, rel_simplify_math,
    1436             :            rel_distinct_aggregate_on_unique_values */
    1437             : 
    1438     1242625 :         rel = rel_groupby_order(v, rel);
    1439     1242625 :         return rel;
    1440             : }
    1441             : 
    1442             : static sql_rel *
    1443       65868 : rel_final_optimization_loop(visitor *v, global_props *gp, sql_rel *rel)
    1444             : {
    1445       65868 :         (void) gp;
    1446       65868 :         return rel_visitor_bottomup(v, rel, &rel_final_optimization_loop_);
    1447             : }
    1448             : 
    1449             : run_optimizer
    1450      705403 : bind_final_optimization_loop(visitor *v, global_props *gp)
    1451             : {
    1452      705403 :         int flag = v->sql->sql_optimizer;
    1453             :         /* At the moment, this optimizer has dependency on 3 flags */
    1454      547158 :         return gp->opt_level == 1 && (gp->cnt[op_groupby] || gp->cnt[op_select]) &&
    1455      771273 :                 (flag & push_select_up) && (flag & optimize_select_and_joins_topdown) && (flag & optimize_projections) ? rel_final_optimization_loop : NULL;
    1456             : }

Generated by: LCOV version 1.14