LCOV - code coverage report
Current view: top level - sql/server - rel_statistics.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 831 883 94.1 %
Date: 2024-11-15 19:37:45 Functions: 26 26 100.0 %

          Line data    Source code
       1             : /*
       2             :  * SPDX-License-Identifier: MPL-2.0
       3             :  *
       4             :  * This Source Code Form is subject to the terms of the Mozilla Public
       5             :  * License, v. 2.0.  If a copy of the MPL was not distributed with this
       6             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       7             :  *
       8             :  * Copyright 2024 MonetDB Foundation;
       9             :  * Copyright August 2008 - 2023 MonetDB B.V.;
      10             :  * Copyright 1997 - July 2008 CWI.
      11             :  */
      12             : 
      13             : #include "monetdb_config.h"
      14             : #include "rel_optimizer_private.h"
      15             : #include "rel_statistics.h"
      16             : #include "rel_basetable.h"
      17             : #include "rel_rewriter.h"
      18             : 
      19             : static sql_exp *
      20     3244857 : comparison_find_column(sql_exp *input, sql_exp *e)
      21             : {
      22     3244857 :         switch (input->type) {
      23       92278 :         case e_convert: {
      24       92278 :                 list *types = (list *)input->r;
      25       92278 :                 sql_class from = ((sql_subtype*)types->h->data)->type->eclass, to = ((sql_subtype*)types->h->next->data)->type->eclass;
      26       92278 :                 if (from == to)
      27      177800 :                         return comparison_find_column(input->l, e) ? input : NULL;
      28             :                 return NULL;
      29             :         }
      30     2871894 :         case e_column:
      31     2871894 :                 return exp_match(e, input) ? input : NULL;
      32             :         default:
      33             :                 return NULL;
      34             :         }
      35             : }
      36             : 
      37             : /* multi lo <= col <= hi, maybe still be false even if lo or hi are NULL, possibly similar for filter on multiple
      38             :  * columns */
      39             : #define comp_single_column(c) (!c->f)
      40             : 
      41             : static sql_exp *
      42     8289866 : rel_propagate_column_ref_statistics(mvc *sql, sql_rel *rel, sql_exp *e)
      43             : {
      44     8373139 :         assert(e->type == e_column);
      45     8373139 :         if (rel) {
      46     8373060 :                 switch(rel->op) {
      47     3894598 :                 case op_left:
      48             :                 case op_right:
      49             :                 case op_full:
      50             :                 case op_join:
      51             :                 case op_select:
      52             :                 case op_anti:
      53             :                 case op_semi: {
      54     3894598 :                         bool found_without_semantics = false, found_left = false, found_right = false, still_unique = false;
      55             : 
      56     3894598 :                         if ((is_innerjoin(rel->op) || is_select(rel->op)) && list_length(rel->exps) == 1 && exp_is_false(rel->exps->h->data))
      57             :                                 return NULL; /* nothing will pass, skip */
      58             : 
      59             :                         /* propagate from the bottom first */
      60     3886835 :                         if (rel_propagate_column_ref_statistics(sql, rel->l, e))
      61             :                                 found_left = true;
      62     2498956 :                         if (!found_left && is_join(rel->op) && rel_propagate_column_ref_statistics(sql, rel->r, e))
      63     3886835 :                                 found_right = true;
      64             : 
      65     3886835 :                         if (!found_left && !found_right)
      66             :                                 return NULL;
      67     1661126 :                         if (!list_empty(rel->exps) && rel->op != op_anti) { /* if there's an or, the MIN and MAX get difficult to propagate */
      68     3273095 :                                 for (node *n = rel->exps->h ; n ; n = n->next) {
      69     1693430 :                                         sql_exp *comp = n->data, *le = comp->l, *lne = NULL, *re = comp->r, *rne = NULL, *fe = comp->f, *fne = NULL;
      70             : 
      71     1693430 :                                         if (comp->type == e_cmp) {
      72     1692734 :                                                 if (is_theta_exp(comp->flag) && ((lne = comparison_find_column(le, e)) || (rne = comparison_find_column(re, e)) || (fe && (fne = comparison_find_column(fe, e))))) {
      73      113435 :                                                         atom *lval_min = find_prop_and_get(le->p, PROP_MIN), *lval_max = find_prop_and_get(le->p, PROP_MAX),
      74      113435 :                                                                 *rval_min = find_prop_and_get(re->p, PROP_MIN), *rval_max = find_prop_and_get(re->p, PROP_MAX);
      75             : 
      76             :                                                         /* not semantics found or if explicitly filtering not null values from the column */
      77      113435 :                                                         found_without_semantics |= (!is_semantics(comp) && comp_single_column(comp)) || (comp->flag == cmp_equal && lne && is_anti(comp) && exp_is_null(re));
      78      113435 :                                                         still_unique |= comp->flag == cmp_equal && is_unique(le) && is_unique(re); /* unique if only equi-joins on unique columns are there */
      79      113435 :                                                         if (is_full(rel->op) || (is_left(rel->op) && found_left) || (is_right(rel->op) && found_right)) /* on outer joins, min and max cannot be propagated on some cases */
      80       17946 :                                                                 continue;
      81             :                                                         /* if (end2 >= start1 && start2 <= end1) then the 2 intervals are intersected */
      82       95489 :                                                         if (fe && lval_min && lval_max) { /* range case, the middle expression must intersect the other two */
      83        4622 :                                                                 atom *fval_min = find_prop_and_get(fe->p, PROP_MIN), *fval_max = find_prop_and_get(fe->p, PROP_MAX);
      84        4622 :                                                                 int int1 = rval_min && rval_max && atom_cmp(rval_max, lval_min) >= 0 && atom_cmp(rval_min, lval_max) <= 0,
      85        4622 :                                                                         int2 = fval_min && fval_max && atom_cmp(fval_max, lval_min) >= 0 && atom_cmp(fval_min, lval_max) <= 0,
      86        4622 :                                                                         symmetric = is_symmetric(comp);
      87             : 
      88        4622 :                                                                 if (is_anti(comp) || (!symmetric && fval_min && rval_max && atom_cmp(fval_min, rval_max) < 0)) /* for asymmetric case the re range must be after the fe range */
      89        1817 :                                                                         continue;
      90        2805 :                                                                 if (lne && int1 && int2) {
      91         104 :                                                                         if (symmetric) {
      92           0 :                                                                                 prop *p1 = find_prop(e->p, PROP_MIN), *p2 = find_prop(e->p, PROP_MAX);
      93           0 :                                                                                 atom *nmin = statistics_atom_min(sql, rval_min, fval_min), *nmax = statistics_atom_max(sql, rval_max, fval_max);
      94             :                                                                                 /* max is min from le and (max from re and fe max) */
      95           0 :                                                                                 set_minmax_property(sql, e, PROP_MAX, p2 ? statistics_atom_min(sql, nmax, p2->value.pval) : nmax);
      96             :                                                                                 /* min is max from le and (min from re and fe min) */
      97           0 :                                                                                 set_minmax_property(sql, e, PROP_MIN, p1 ? statistics_atom_max(sql, nmin, p1->value.pval) : nmin);
      98             :                                                                         } else {
      99         104 :                                                                                 prop *p1 = find_prop(e->p, PROP_MIN), *p2 = find_prop(e->p, PROP_MAX);
     100             :                                                                                 /* max is min from le and fe max */
     101         104 :                                                                                 set_minmax_property(sql, e, PROP_MAX, p2 ? statistics_atom_min(sql, fval_max, p2->value.pval) : fval_max);
     102             :                                                                                 /* min is max from le and re min */
     103         104 :                                                                                 set_minmax_property(sql, e, PROP_MIN, p1 ? statistics_atom_max(sql, rval_min, p1->value.pval) : rval_min);
     104             :                                                                         }
     105        2701 :                                                                 } else if (rne) {
     106         590 :                                                                         if (symmetric && int1 && int2) { /* min is max from le and (min from re and fe min) */
     107           0 :                                                                                 prop *p = find_prop(e->p, PROP_MIN);
     108           0 :                                                                                 atom *nmin = p ? statistics_atom_min(sql, p->value.pval, fval_min) : fval_min;
     109           0 :                                                                                 set_minmax_property(sql, e, PROP_MIN, statistics_atom_max(sql, nmin, lval_min));
     110         590 :                                                                         } else if (int1) { /* min is max from le and re min */
     111         100 :                                                                                 prop *p = find_prop(e->p, PROP_MIN);
     112         100 :                                                                                 set_minmax_property(sql, e, PROP_MIN, p ? statistics_atom_max(sql, lval_min, p->value.pval) : lval_min);
     113             :                                                                         }
     114        2111 :                                                                 } else if (fne) {
     115         549 :                                                                         if (symmetric && int1 && int2) { /* max is min from le and (max from re and fe max) */
     116           0 :                                                                                 prop *p = find_prop(e->p, PROP_MAX);
     117           0 :                                                                                 atom *nmax = p ? statistics_atom_max(sql, p->value.pval, rval_max) : rval_max;
     118           0 :                                                                                 set_minmax_property(sql, e, PROP_MAX, p ? statistics_atom_min(sql, nmax, lval_max) : nmax);
     119         549 :                                                                         } else if (int2) { /* max is min from le and fe max */
     120         266 :                                                                                 prop *p = find_prop(e->p, PROP_MAX);
     121         266 :                                                                                 set_minmax_property(sql, e, PROP_MAX, p ? statistics_atom_min(sql, lval_max, p->value.pval) : lval_max);
     122             :                                                                         }
     123             :                                                                 }
     124       90867 :                                                         } else if (lval_min && lval_max && rval_min && rval_max && atom_cmp(rval_max, lval_min) >= 0 && atom_cmp(rval_min, lval_max) <= 0) {
     125             :                                                                 /* both min and max must be set and the intervals must overlap */
     126       39794 :                                                                 switch (comp->flag) {
     127       25916 :                                                                 case cmp_equal: /* for equality reduce */
     128       25916 :                                                                         set_minmax_property(sql, e, PROP_MAX, is_anti(comp) ? statistics_atom_max(sql, lval_max, rval_max) : statistics_atom_min(sql, lval_max, rval_max));
     129       25916 :                                                                         set_minmax_property(sql, e, PROP_MIN, is_anti(comp) ? statistics_atom_min(sql, lval_min, rval_min) : statistics_atom_max(sql, lval_min, rval_min));
     130       25916 :                                                                         break;
     131        4586 :                                                                 case cmp_notequal: /* for inequality expand */
     132        4586 :                                                                         set_minmax_property(sql, e, PROP_MAX, is_anti(comp) ? statistics_atom_min(sql, lval_max, rval_max) : statistics_atom_max(sql, lval_max, rval_max));
     133        4586 :                                                                         set_minmax_property(sql, e, PROP_MIN, is_anti(comp) ? statistics_atom_max(sql, lval_min, rval_min) : statistics_atom_min(sql, lval_min, rval_min));
     134        4586 :                                                                         break;
     135        5286 :                                                                 case cmp_gt:
     136             :                                                                 case cmp_gte:
     137        9634 :                                                                         if (!is_anti(comp) && lne) { /* min is max from both min */
     138        4348 :                                                                                 prop *p = find_prop(e->p, PROP_MIN);
     139        4348 :                                                                                 set_minmax_property(sql, e, PROP_MIN, p ? statistics_atom_max(sql, rval_min, p->value.pval) : rval_min);
     140         938 :                                                                         } else if (!is_anti(comp)) { /* max is min from both max */
     141         938 :                                                                                 prop *p = find_prop(e->p, PROP_MAX);
     142         938 :                                                                                 set_minmax_property(sql, e, PROP_MAX, p ? statistics_atom_min(sql, lval_max, p->value.pval) : lval_max);
     143             :                                                                         }
     144             :                                                                         break;
     145        4006 :                                                                 case cmp_lt:
     146             :                                                                 case cmp_lte:
     147        7205 :                                                                         if (!is_anti(comp) && lne) { /* max is min from both max */
     148        3199 :                                                                                 prop *p = find_prop(e->p, PROP_MAX);
     149        3199 :                                                                                 set_minmax_property(sql, e, PROP_MAX, p ? statistics_atom_min(sql, rval_max, p->value.pval) : rval_max);
     150         807 :                                                                         } else if (!is_anti(comp)) { /* min is max from both min */
     151         807 :                                                                                 prop *p = find_prop(e->p, PROP_MIN);
     152         807 :                                                                                 set_minmax_property(sql, e, PROP_MIN, p ? statistics_atom_max(sql, lval_min, p->value.pval) : lval_min);
     153             :                                                                         }
     154             :                                                                         break;
     155             :                                                                 default: /* Maybe later I can do cmp_in and cmp_notin */
     156             :                                                                         break;
     157             :                                                                 }
     158             :                                                         }
     159             :                                                 }
     160             :                                         }
     161             :                                 }
     162             :                         }
     163     1661126 :                         if (is_full(rel->op) || (is_left(rel->op) && found_right) || (is_right(rel->op) && found_left))
     164       32645 :                                 set_has_nil(e);
     165     1661126 :                         if (!is_outerjoin(rel->op) && found_without_semantics) /* at an outer join, null values pass */
     166       85991 :                                 set_has_no_nil(e);
     167     1661126 :                         if (is_join(rel->op) && is_unique(e) && !still_unique)
     168      111460 :                                 set_not_unique(e);
     169     1661126 :                         return e;
     170             :                 }
     171     4375828 :                 case op_table:
     172             :                 case op_basetable:
     173             :                 case op_union:
     174             :                 case op_except:
     175             :                 case op_inter:
     176             :                 case op_munion:
     177             :                 case op_project:
     178             :                 case op_groupby: {
     179     4375828 :                         sql_exp *found;
     180     4375828 :                         atom *fval;
     181     4375828 :                         prop *est;
     182     4375828 :                         if ((found = rel_find_exp(rel, e))) {
     183     1981743 :                                 if (rel->op != op_table) { /* At the moment don't propagate statistics for table relations */
     184     1945179 :                                         if ((fval = find_prop_and_get(found->p, PROP_MAX)))
     185     1048046 :                                                 set_minmax_property(sql, e, PROP_MAX, fval);
     186     1945170 :                                         if ((fval = find_prop_and_get(found->p, PROP_MIN)))
     187     1055994 :                                                 set_minmax_property(sql, e, PROP_MIN, fval);
     188     1945173 :                                         if (!has_nil(found))
     189     1273827 :                                                 set_has_no_nil(e);
     190     1945173 :                                         if (is_unique(found) || (need_distinct(rel) && list_length(rel->exps) == 1) ||
     191     1567583 :                                                 (is_groupby(rel->op) && (list_empty(rel->r) || (list_length(rel->r) == 1 && exps_find_exp(rel->r, e)))))
     192      388442 :                                                 set_unique(e);
     193             :                                         /* propagate unique estimation for known cases */
     194     1945172 :                                         if (is_groupby(rel->op) && list_empty(rel->r) && !find_prop(e->p, PROP_NUNIQUES)) { /* global aggregate case */
     195        7533 :                                                 prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     196        7533 :                                                 p->value.dval = 1;
     197     1937638 :                                         } else if (((is_basetable(rel->op) || is_except(rel->op) || is_inter(rel->op) || is_simple_project(rel->op) ||
     198       67962 :                                                                  (is_groupby(rel->op) && exps_find_exp(rel->r, e))) &&
     199     1883894 :                                                                 (est = find_prop(found->p, PROP_NUNIQUES)) && !find_prop(e->p, PROP_NUNIQUES))) {
     200      166727 :                                                 prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     201      166727 :                                                 p->value.dval = est->value.dval;
     202             :                                         }
     203             :                                 }
     204     1981738 :                                 return e;
     205             :                         }
     206             :                         return NULL;
     207             :                 }
     208       83273 :                 case op_topn:
     209             :                 case op_sample:
     210       83273 :                         return rel_propagate_column_ref_statistics(sql, rel->l, e);
     211             :                 default:
     212             :                         break;
     213             :                 }
     214             :         }
     215             :         return NULL;
     216             : }
     217             : 
     218             : static atom *
     219     4083669 : atom_from_valptr( allocator *sa, sql_subtype *tpe, ValRecord *v)
     220             : {
     221     4083669 :         atom *a = SA_NEW(sa, atom);
     222             : 
     223     4083692 :         assert(!VALisnil(v));
     224     4083752 :         *a = (atom) {.tpe = *tpe,};
     225     4083752 :         SA_VALcopy(sa, &a->data, v);
     226     4083910 :         return a;
     227             : }
     228             : 
     229             : void
     230     3593437 : sql_column_get_statistics(mvc *sql, sql_column *c, sql_exp *e)
     231             : {
     232     3593437 :         bool nonil = false, unique = false;
     233     3593437 :         double unique_est = 0.0;
     234     3593437 :         ValRecord min, max;
     235     3593437 :         int ok = mvc_col_stats(sql, c, &nonil, &unique, &unique_est, &min, &max);
     236             : 
     237     3594709 :         if (has_nil(e) && nonil)
     238     2455526 :                 set_has_no_nil(e);
     239     3594709 :         if (!is_unique(e) && unique)
     240     1054949 :                 set_unique(e);
     241     3594709 :         if (unique_est != 0.0) {
     242     2397672 :                 prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     243     2397537 :                 p->value.dval = unique_est;
     244             :         }
     245     3594574 :         unsigned int digits = 0;
     246     3594574 :         sql_subtype *et = exp_subtype(e);
     247     3594656 :         if (et->type->eclass == EC_DEC || et->type->eclass == EC_NUM)
     248     2363359 :                 digits = et->digits;
     249     3594656 :         if ((ok & 2) == 2) {
     250     2038480 :                 if (!VALisnil(&max)) {
     251     2038449 :                         prop *p = e->p = prop_create(sql->sa, PROP_MAX, e->p);
     252     2038418 :                         p->value.pval = atom_from_valptr(sql->sa, &c->type, &max);
     253     2038432 :                         if (digits) {
     254     1529160 :                                 unsigned int nd = atom_digits(p->value.pval);
     255     1529055 :                                 if (nd < digits)
     256             :                                         digits = nd;
     257     1529055 :                                 if (!digits)
     258             :                                         digits = 1;
     259             :                         }
     260             :                 }
     261     2038322 :                 VALclear(&max);
     262             :         }
     263     3594464 :         if ((ok & 1) == 1) {
     264     2045674 :                 if (!VALisnil(&min)) {
     265     2045704 :                         prop *p = e->p = prop_create(sql->sa, PROP_MIN, e->p);
     266     2045742 :                         p->value.pval = atom_from_valptr(sql->sa, &c->type, &min);
     267     2045878 :                         if (digits) {
     268     1537671 :                                 unsigned int nd = atom_digits(p->value.pval);
     269     1537680 :                                 if (nd > digits) /* possibly set to low by max value */
     270             :                                         digits = nd;
     271             :                                 if (!digits)
     272             :                                         digits = 1;
     273             :                         }
     274             :                 }
     275     2045889 :                 VALclear(&min);
     276             :         }
     277     3594672 :         if (digits)
     278     2363368 :                 et->digits = digits;
     279     3594672 :         if (et->type->eclass == EC_DEC && et->digits <= et->scale)
     280         216 :                 et->digits = et->scale + 1;
     281     3594672 : }
     282             : 
     283             : static void
     284      874909 : rel_basetable_column_get_statistics(mvc *sql, sql_rel *rel, sql_exp *e)
     285             : {
     286      874909 :         if (e->p)
     287             :                 return;
     288      291818 :         sql_column *c = NULL;
     289             : 
     290      291818 :         if ((c = rel_base_find_column(rel, e->nid))) {
     291      195842 :                 sql_column_get_statistics(sql, c, e);
     292             :         }
     293             : }
     294             : 
     295             : static bool
     296        8872 : rel_setop_get_statistics(mvc *sql, sql_rel *rel, list *lexps, list *rexps, sql_exp *e, int i)
     297             : {
     298        8872 :         sql_exp *le = list_fetch(lexps, i), *re = list_fetch(rexps, i);
     299        8872 :         atom *lval_min = find_prop_and_get(le->p, PROP_MIN), *lval_max = find_prop_and_get(le->p, PROP_MAX),
     300        8872 :                 *rval_min = find_prop_and_get(re->p, PROP_MIN), *rval_max = find_prop_and_get(re->p, PROP_MAX);
     301        8872 :         prop *est;
     302             : 
     303             :         /* for the intersection, if both expressions don't overlap, it can be pruned */
     304        8872 :         if (is_inter(rel->op) && !has_nil(le) && !has_nil(re) &&
     305         511 :                 ((rval_max && lval_min && atom_cmp(rval_max, lval_min) < 0) || (rval_min && lval_max && atom_cmp(rval_min, lval_max) > 0)))
     306          26 :                 return true;
     307             : 
     308        8846 :         if (lval_max && rval_max) {
     309        2557 :                 if (is_union(rel->op))
     310           3 :                         set_minmax_property(sql, e, PROP_MAX, statistics_atom_max(sql, lval_max, rval_max)); /* for union the new max will be the max of the two */
     311        2554 :                 else if (is_inter(rel->op))
     312         399 :                         set_minmax_property(sql, e, PROP_MAX, statistics_atom_min(sql, lval_max, rval_max)); /* for intersect the new max will be the min of the two */
     313             :                 else /* except */
     314        2155 :                         set_minmax_property(sql, e, PROP_MAX, lval_max);
     315             :         }
     316        8846 :         if (lval_min && rval_min) {
     317        2557 :                 if (is_union(rel->op))
     318           3 :                         set_minmax_property(sql, e, PROP_MIN, statistics_atom_min(sql, lval_min, rval_min)); /* for union the new min will be the min of the two */
     319        2554 :                 else if (is_inter(rel->op))
     320         399 :                         set_minmax_property(sql, e, PROP_MIN, statistics_atom_max(sql, lval_min, rval_min)); /* for intersect the new min will be the max of the two */
     321             :                 else /* except */
     322        2155 :                         set_minmax_property(sql, e, PROP_MIN, lval_min);
     323             :         }
     324             : 
     325        8846 :         if (is_union(rel->op) || is_munion(rel->op)) {
     326        5986 :                 if (!has_nil(le) && !has_nil(re))
     327         879 :                         set_has_no_nil(e);
     328        5986 :                 if (need_distinct(rel) && list_length(rel->exps) == 1)
     329           0 :                         set_unique(e);
     330        2860 :         } else if (is_inter(rel->op)) {
     331         564 :                 if (!has_nil(le) || !has_nil(re))
     332         509 :                         set_has_no_nil(e);
     333         564 :                 if (is_unique(le) || (need_distinct(rel) && list_length(rel->exps) == 1))
     334         345 :                         set_unique(e);
     335             :         } else {
     336        2296 :                 assert(is_except(rel->op));
     337        2296 :                 if (!has_nil(le))
     338        2233 :                         set_has_no_nil(e);
     339        2296 :                 if (is_unique(le) || (need_distinct(rel) && list_length(rel->exps) == 1))
     340        2009 :                         set_unique(e);
     341             :         }
     342             :         /* propagate unique estimation for known cases */
     343        8846 :         if (!is_union(rel->op) && (est = find_prop(le->p, PROP_NUNIQUES)) && !find_prop(e->p, PROP_NUNIQUES)) {
     344         205 :                 prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     345         205 :                 p->value.dval = est->value.dval;
     346             :         }
     347             :         return false;
     348             : }
     349             : 
     350             : 
     351             : static void
     352       56927 : rel_munion_get_statistics(mvc *sql, sql_rel *rel, list *rels, sql_exp *e, int i)
     353             : {
     354       56927 :         assert(is_munion(rel->op));
     355             : 
     356       56927 :         sql_rel *l = rels->h->data;
     357       56927 :         sql_exp *le = list_fetch(l->exps, i);
     358       56927 :         atom *lval_min = find_prop_and_get(le->p, PROP_MIN), *lval_max = find_prop_and_get(le->p, PROP_MAX);
     359       56927 :         bool has_nonil = !has_nil(le);
     360             : 
     361      160702 :         for(node *n = rels->h->next; n; n = n->next) {
     362      103775 :                 sql_rel *r = n->data;
     363      103775 :                 sql_exp *re = list_fetch(r->exps, i);
     364      103775 :                 atom *rval_min = find_prop_and_get(re->p, PROP_MIN), *rval_max = find_prop_and_get(re->p, PROP_MAX);
     365             : 
     366      103775 :                 if (lval_max && rval_max) {
     367       39696 :                         set_minmax_property(sql, e, PROP_MAX, statistics_atom_max(sql, lval_max, rval_max)); /* for union the new max will be the max of the two */
     368       39696 :                         lval_max = find_prop_and_get(e->p, PROP_MAX);
     369             :                 }
     370      103775 :                 if (lval_min && rval_min) {
     371       39263 :                         set_minmax_property(sql, e, PROP_MIN, statistics_atom_min(sql, lval_min, rval_min)); /* for union the new min will be the min of the two */
     372       39263 :                         lval_min = find_prop_and_get(e->p, PROP_MIN);
     373             :                 }
     374      103775 :                 has_nonil &= !has_nil(re);
     375             : 
     376             :         }
     377             : 
     378       56927 :         if (has_nonil)
     379       40429 :                 set_has_no_nil(e);
     380             : 
     381       56927 :         if (need_distinct(rel) && list_length(rel->exps) == 1)
     382        1594 :                 set_unique(e);
     383       56927 : }
     384             : 
     385             : 
     386             : static sql_exp *
     387     3200227 : rel_propagate_statistics(visitor *v, sql_rel *rel, sql_exp *e, int depth)
     388             : {
     389     3200227 :         mvc *sql = v->sql;
     390     3200227 :         atom *lval;
     391             : 
     392     3200227 :         (void) depth;
     393     3200227 :         switch(e->type) {
     394     1986419 :         case e_column:
     395     1986419 :                 switch (rel->op) { /* set relations don't call rel_propagate_statistics */
     396      259459 :                 case op_join:
     397             :                 case op_left:
     398             :                 case op_right:
     399             :                 case op_full:
     400             :                 case op_semi:
     401             :                 case op_anti: {
     402      259459 :                         sql_exp *found = rel_propagate_column_ref_statistics(sql, rel->l, e);
     403      259459 :                         if (!found)
     404      130122 :                                 (void) rel_propagate_column_ref_statistics(sql, rel->r, e);
     405             :                         break;
     406             :                 }
     407     1726960 :                 case op_select:
     408             :                 case op_project:
     409             :                 case op_groupby: {
     410     1726960 :                         sql_exp *found = rel_propagate_column_ref_statistics(sql, rel->l, e); /* labels may be found on the same projection, ugh */
     411     1726949 :                         if (!found && is_simple_project(rel->op))
     412      117872 :                                 (void) rel_propagate_column_ref_statistics(sql, rel, e);
     413             :                         break;
     414             :                 }
     415           0 :                 case op_insert:
     416             :                 case op_update:
     417             :                 case op_delete:
     418           0 :                         (void) rel_propagate_column_ref_statistics(sql, rel->r, e);
     419           0 :                         break;
     420             :                 default:
     421             :                         break;
     422             :                 }
     423             :                 break;
     424       91797 :         case e_convert: {
     425       91797 :                 sql_subtype *to = exp_totype(e), *from = exp_fromtype(e);
     426       91797 :                 sql_exp *l = e->l;
     427       91797 :                 sql_class fr = from->type->eclass, too = to->type->eclass;
     428       91797 :                 prop *est;
     429             : 
     430       91797 :                 if (fr == too) {
     431       82891 :                         if ((lval = find_prop_and_get(l->p, PROP_MAX))) {
     432       54790 :                                 atom *res = atom_copy(sql->sa, lval);
     433       54790 :                                 if ((res = atom_cast(sql->sa, res, to)))
     434       54767 :                                         set_minmax_property(sql, e, PROP_MAX, res);
     435             :                         }
     436       82891 :                         if ((lval = find_prop_and_get(l->p, PROP_MIN))) {
     437       55407 :                                 atom *res = atom_copy(sql->sa, lval);
     438       55408 :                                 if ((res = atom_cast(sql->sa, res, to)))
     439       55385 :                                         set_minmax_property(sql, e, PROP_MIN, res);
     440             :                         }
     441             :                 }
     442             :                 /* propagating nuniques through conversions is tricky. I am adding just the general cases */
     443       91798 :                 if ((est = find_prop(l->p, PROP_NUNIQUES)) &&
     444       56542 :                         (((fr == EC_TIME || fr == EC_TIME_TZ || fr == EC_DATE) && (too == EC_TIMESTAMP || too == EC_TIMESTAMP_TZ)) ||
     445       56517 :                          ((fr == EC_NUM || fr == EC_BIT) && too == EC_NUM) || (fr == EC_DEC && too == EC_DEC) || (fr != EC_EXTERNAL && too == EC_STRING))) {
     446       53120 :                         prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     447       53120 :                         p->value.dval = est->value.dval;
     448             :                 }
     449       91798 :                 if (!has_nil(l))
     450       50907 :                         set_has_no_nil(e);
     451             :                 break;
     452             :         }
     453      326636 :         case e_aggr:
     454             :         case e_func: {
     455      326636 :                 BUN lv;
     456      326636 :                 sql_subfunc *f = e->f;
     457             : 
     458      326636 :                 if (!f->func->s) {
     459      301184 :                         int key = hash_key(f->func->base.name); /* Using hash lookup */
     460      301184 :                         sql_hash_e *he = sql_functions_lookup->buckets[key&(sql_functions_lookup->size-1)];
     461      301184 :                         lookup_function look = NULL;
     462             : 
     463      660114 :                         for (; he && !look; he = he->chain) {
     464      358930 :                                 struct function_properties* fp = (struct function_properties*) he->value;
     465             : 
     466      358930 :                                 if (!strcmp(f->func->base.name, fp->name))
     467      104855 :                                         look = fp->func;
     468             :                         }
     469      301184 :                         if (look)
     470      104854 :                                 look(sql, e);
     471             :                 }
     472             :                 /* for global aggregates with no semantics, if the left relation has values, then the output won't be NULL */
     473      326635 :                 if (!is_semantics(e) && e->l && !have_nil(e->l) &&
     474       86062 :                         (e->type != e_aggr || (is_groupby(rel->op) && list_length(rel->r)) || ((lv = get_rel_count(rel->l)) != BUN_NONE && lv > 0)))
     475       85735 :                         set_has_no_nil(e);
     476             :                 /* set properties for global aggregates */
     477      326635 :                 if (e->type == e_aggr && is_groupby(rel->op) && list_empty(rel->r)) {
     478        7868 :                         if (!find_prop(e->p, PROP_NUNIQUES)) {
     479        7868 :                                 prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     480        7868 :                                 p->value.dval = 1;
     481             :                         }
     482        7868 :                         set_unique(e);
     483             :                 }
     484             :                 break;
     485             :         }
     486      531711 :         case e_atom:
     487      531711 :                 if (e->l) {
     488      514866 :                         atom *a = (atom*) e->l;
     489      514866 :                         if (!a->isnull) {
     490      459784 :                                 set_minmax_property(sql, e, PROP_MAX, a);
     491      459785 :                                 set_minmax_property(sql, e, PROP_MIN, a);
     492             :                         }
     493      514867 :                         prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     494      514865 :                         p->value.dval = 1;
     495       16845 :                 } else if (e->f) {
     496        3520 :                         list *vals = (list *) e->f;
     497        3520 :                         sql_exp *first = vals->h ? vals->h->data : NULL;
     498        3520 :                         atom *max = NULL, *min = NULL; /* all child values must have a valid min/max */
     499        3520 :                         int has_nil = 0;
     500             : 
     501        3520 :                         if (first) {
     502        3520 :                                 max = ((lval = find_prop_and_get(first->p, PROP_MAX))) ? lval : NULL;
     503        3520 :                                 min = ((lval = find_prop_and_get(first->p, PROP_MIN))) ? lval : NULL;
     504        3520 :                                 has_nil |= has_nil(first);
     505             :                         }
     506             : 
     507       15389 :                         for (node *n = vals->h ? vals->h->next : NULL ; n ; n = n->next) {
     508        8349 :                                 sql_exp *ee = n->data;
     509             : 
     510        8349 :                                 if (min && max) {
     511        7856 :                                         if ((lval = find_prop_and_get(ee->p, PROP_MAX))) {
     512        7810 :                                                 max = atom_cmp(lval, max) > 0 ? lval : max;
     513             :                                         } else {
     514             :                                                 max = NULL;
     515             :                                         }
     516        7856 :                                         if ((lval = find_prop_and_get(ee->p, PROP_MIN))) {
     517        7810 :                                                 min = atom_cmp(min, lval) > 0 ? lval : min;
     518             :                                         } else {
     519             :                                                 min = NULL;
     520             :                                         }
     521             :                                 }
     522        8349 :                                 has_nil |= has_nil(ee);
     523             :                         }
     524             : 
     525        3520 :                         if (!has_nil)
     526        3150 :                                 set_has_no_nil(e);
     527        3520 :                         if (min && max) {
     528        3108 :                                 set_minmax_property(sql, e, PROP_MAX, max);
     529        3108 :                                 set_minmax_property(sql, e, PROP_MIN, min);
     530             :                         }
     531        3520 :                         prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     532        3520 :                         p->value.dval = (dbl) list_length(vals);
     533             :                 } else {
     534       13325 :                         prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     535       13325 :                         p->value.dval = 1;
     536             :                 }
     537             :                 break;
     538      263664 :         case e_cmp:
     539             :                 /* TODO? propagating min/max/unique of booleans is not very worth it */
     540      263664 :                 if (e->flag == cmp_or || e->flag == cmp_filter) {
     541       17012 :                         if (!have_nil(e->l) && !have_nil(e->r))
     542       12832 :                                 set_has_no_nil(e);
     543      246652 :                 } else if (e->flag == cmp_in || e->flag == cmp_notin) {
     544       21114 :                         sql_exp *le = e->l;
     545       21114 :                         if (!has_nil(le) && !have_nil(e->r))
     546       18440 :                                 set_has_no_nil(e);
     547             :                 } else {
     548      225538 :                         sql_exp *le = e->l, *re = e->r, *fe = e->f;
     549      225538 :                         if (!has_nil(le) && !has_nil(re) && (!e->f || !has_nil(fe)))
     550      168477 :                                 set_has_no_nil(e);
     551             :                 }
     552             :                 break;
     553             :         case e_psm:
     554             :                 break;
     555             :         }
     556             : 
     557             : #ifndef NDEBUG
     558             :         {
     559             :                 /* min and max cannot be NULL and min must be <= than max, if it happens the inner relation must be empty! */
     560     3200215 :                 atom *min = find_prop_and_get(e->p, PROP_MIN), *max = find_prop_and_get(e->p, PROP_MAX);
     561             : 
     562     3200219 :                 (void) min;
     563     3200219 :                 (void) max;
     564     3200219 :                 assert(!min || !min->isnull);
     565     3200219 :                 assert(!max || !max->isnull);
     566     3200219 :                 assert(!min || !max || (min && max)/* atom_cmp(min, max) <= 0*/);
     567             :         }
     568             : #endif
     569     3200219 :         return e;
     570             : }
     571             : 
     572             : static list * /* Remove predicates always false from min/max values */
     573      120948 : rel_prune_predicates(visitor *v, sql_rel *rel)
     574             : {
     575      120948 :         if (rel->l) {
     576      120948 :                 sql_rel *l = rel->l;
     577      120948 :                 if (is_single(l))
     578        2173 :                         return rel->exps;
     579             :         }
     580      118775 :         if (!list_empty(rel->attr))
     581        2963 :                 return rel->exps;
     582      246190 :         for (node *n = rel->exps->h ; n ; n = n->next) {
     583      130378 :                 sql_exp *e = n->data;
     584             : 
     585      130378 :                 if (e->type == e_cmp && is_theta_exp(e->flag)) {
     586      105956 :                         sql_exp *le = e->l, *re = e->r, *fe = e->f;
     587      105956 :                         atom *lval_min = find_prop_and_get(le->p, PROP_MIN), *lval_max = find_prop_and_get(le->p, PROP_MAX),
     588      105956 :                                 *rval_min = find_prop_and_get(re->p, PROP_MIN), *rval_max = find_prop_and_get(re->p, PROP_MAX);
     589      105956 :                         bool always_false = false, always_true = false;
     590             : 
     591      105956 :                         if (fe && !is_symmetric(e)) {
     592        3058 :                                 atom *fval_min = find_prop_and_get(fe->p, PROP_MIN), *fval_max = find_prop_and_get(fe->p, PROP_MAX);
     593        3058 :                                 comp_type lower = range2lcompare(e->flag), higher = range2rcompare(e->flag);
     594        3660 :                                 int not_int1 = rval_min && lval_max && /* the middle and left intervals don't overlap */
     595        1663 :                                         (!is_anti(e) && (lower == cmp_gte ? atom_cmp(rval_min, lval_max) > 0 : atom_cmp(rval_min, lval_max) >= 0)),
     596        4072 :                                         not_int2 = lval_min && fval_max && /* the middle and right intervals don't overlap */
     597        2485 :                                         (!is_anti(e) && (higher == cmp_lte ? atom_cmp(lval_min, fval_max) > 0 : atom_cmp(lval_min, fval_max) >= 0)),
     598        3058 :                                         not_int3 = rval_min && fval_max && /* the left interval is after the right one */
     599        1287 :                                         (!is_anti(e) && (atom_cmp(rval_min, fval_max) > 0));
     600             : 
     601        3058 :                                 always_false |= not_int1 || not_int2 || not_int3;
     602             :                                 /* for anti the middle must be before the left or after the right or the right after the left, for the other the middle must be always between the left and right intervals */
     603        4113 :                                 always_true |= !has_nil(le) && !has_nil(re) && !has_nil(fe) &&
     604        3957 :                                         lval_min && lval_max && rval_min && rval_max && fval_min && fval_max &&
     605         575 :                                         (is_anti(e) ? ((lower == cmp_gte ? atom_cmp(rval_min, lval_max) > 0 : atom_cmp(rval_min, lval_max) >= 0) || (higher == cmp_lte ? atom_cmp(lval_min, fval_max) > 0 : atom_cmp(lval_min, fval_max) >= 0) || atom_cmp(rval_min, fval_max) > 0) :
     606         488 :                                          ((lower == cmp_gte ? atom_cmp(lval_min, rval_max) >= 0 : atom_cmp(lval_min, rval_max) > 0) && (higher == cmp_lte ? atom_cmp(fval_min, lval_max) >= 0 : atom_cmp(fval_min, lval_max) > 0)));
     607             :                         } else if (!fe) {
     608      102880 :                                 if (!is_semantics(e)) /* trivial not null cmp null case */
     609      196110 :                                         always_false |= !is_anti(e) && ((exp_is_not_null(le) && exp_is_null(re)) || (exp_is_null(le) && exp_is_not_null(re)));
     610      102880 :                                 switch (e->flag) {
     611       88983 :                                 case cmp_equal:
     612       88983 :                                         if (lval_min && lval_max && rval_min && rval_max && (!is_semantics(e) || !has_nil(le) || !has_nil(re)))
     613      117424 :                                                 always_false |= (is_anti(e) ? (atom_cmp(lval_min, rval_min) == 0 && atom_cmp(lval_max, rval_max) <= 0) : (atom_cmp(rval_max, lval_min) < 0 || atom_cmp(rval_min, lval_max) > 0));
     614       88983 :                                         if (is_semantics(e)) { /* prune *= NULL cases */
     615        4796 :                                                 always_false |= (is_anti(e) ? (exp_is_null(le) && exp_is_null(re)) : ((exp_is_not_null(le) && exp_is_null(re)) || (exp_is_null(le) && exp_is_not_null(re))));
     616        9592 :                                                 always_true |= (is_anti(e) ? ((exp_is_not_null(le) && exp_is_null(re)) || (exp_is_null(le) && exp_is_not_null(re))) : (exp_is_null(le) && exp_is_null(re)));
     617             :                                         }
     618             :                                         break;
     619        6464 :                                 case cmp_notequal:
     620        6464 :                                         if (lval_min && lval_max && rval_min && rval_max)
     621       10730 :                                                 always_true |= !has_nil(le) && !has_nil(re) && (is_anti(e) ? (atom_cmp(lval_min, rval_min) == 0 && atom_cmp(lval_max, rval_max) <= 0) : (atom_cmp(rval_max, lval_min) < 0 || atom_cmp(rval_min, lval_max) > 0));
     622        6464 :                                         if (is_semantics(e)) {
     623          29 :                                                 always_true |= (is_anti(e) ? (exp_is_null(le) && exp_is_null(re)) : ((exp_is_not_null(le) && exp_is_null(re)) || (exp_is_null(le) && exp_is_not_null(re))));
     624          58 :                                                 always_false |= (is_anti(e) ? ((exp_is_not_null(le) && exp_is_null(re)) || (exp_is_null(le) && exp_is_not_null(re))) : (exp_is_null(le) && exp_is_null(re)));
     625             :                                         }
     626             :                                         break;
     627        4805 :                                 case cmp_gt:
     628        4805 :                                         if (lval_max && rval_min)
     629        1789 :                                                 always_false |= (is_anti(e) ? atom_cmp(lval_max, rval_min) > 0 : atom_cmp(lval_max, rval_min) <= 0);
     630        4805 :                                         if (lval_min && rval_max)
     631        3578 :                                                 always_true |= !has_nil(le) && !has_nil(re) && (is_anti(e) ? atom_cmp(lval_min, rval_max) <= 0 : atom_cmp(lval_min, rval_max) > 0);
     632             :                                         break;
     633         103 :                                 case cmp_gte:
     634         103 :                                         if (lval_max && rval_min)
     635          41 :                                                 always_false |= (is_anti(e) ? atom_cmp(lval_max, rval_min) >= 0 : atom_cmp(lval_max, rval_min) < 0);
     636         103 :                                         if (lval_min && rval_max)
     637          82 :                                                 always_true |= !has_nil(le) && !has_nil(re) && (is_anti(e) ? atom_cmp(lval_min, rval_max) < 0 : atom_cmp(lval_min, rval_max) >= 0);
     638             :                                         break;
     639        2441 :                                 case cmp_lt:
     640        2441 :                                         if (lval_min && rval_max)
     641        1383 :                                                 always_false |= (is_anti(e) ? atom_cmp(lval_min, rval_max) < 0 : atom_cmp(lval_min, rval_max) >= 0);
     642        2441 :                                         if (lval_max && rval_min)
     643        2814 :                                                 always_true |= !has_nil(le) && !has_nil(re) && (is_anti(e) ? atom_cmp(lval_max, rval_min) >= 0 : atom_cmp(lval_max, rval_min) < 0);
     644             :                                         break;
     645          84 :                                 case cmp_lte:
     646          84 :                                         if (lval_min && rval_max)
     647          11 :                                                 always_false |= (is_anti(e) ? atom_cmp(lval_min, rval_max) <= 0 : atom_cmp(lval_min, rval_max) > 0);
     648          84 :                                         if (lval_max && rval_min)
     649          22 :                                                 always_true |= !has_nil(le) && !has_nil(re) && (is_anti(e) ? atom_cmp(lval_max, rval_min) > 0 : atom_cmp(lval_max, rval_min) <= 0);
     650             :                                         break;
     651             :                                 default: /* Maybe later I can do cmp_in and cmp_notin but don't forget to remove is_theta_exp check up there */
     652             :                                         break;
     653             :                                 }
     654             :                         }
     655      105956 :                         assert(!always_false || !always_true);
     656      105956 :                         if (always_false || always_true) {
     657        3392 :                                 sql_exp *ne = exp_atom_bool(v->sql->sa, always_true /* if always_true set then true else false */);
     658        3392 :                                 if (exp_name(e))
     659           0 :                                         exp_prop_alias(v->sql->sa, ne, e);
     660        3392 :                                 n->data = ne;
     661        3392 :                                 v->changes++;
     662             :                         }
     663             :                 }
     664             :         }
     665      115812 :         return rel->exps;
     666             : }
     667             : 
     668             : static sql_rel *
     669          14 : set_setop_side(visitor *v, sql_rel *rel, sql_rel *side)
     670             : {
     671          14 :         if (side == rel->r)
     672           0 :                 rel->r = NULL;
     673             :         else
     674          14 :                 rel->l = NULL;
     675          14 :         if (!is_simple_project(side->op) || rel_is_ref(side) || !list_empty(side->r)) {
     676           0 :                 side = rel_project(v->sql->sa, side, rel_projections(v->sql, side, NULL, 0, 1));
     677           0 :                 set_count_prop(v->sql->sa, side, get_rel_count(side->l));
     678           0 :                 side->exps = exps_exp_visitor_bottomup(v, side, side->exps, 0, &rel_propagate_statistics, false);
     679             :         }
     680          35 :         for (node *n = rel->exps->h, *m = side->exps->h ; n && m ; n = n->next, m = m->next) {
     681          21 :                 sql_exp *e1 = n->data, *e2 = m->data;
     682          21 :                 exp_prop_alias(v->sql->sa, e2, e1);
     683             :         }
     684          14 :         list_hash_clear(side->exps);
     685             : 
     686          14 :         if (need_distinct(rel))
     687           0 :                 set_distinct(side);
     688          14 :         side->p = prop_copy(v->sql->sa, rel->p);
     689          14 :         rel_destroy(rel);
     690          14 :         return side;
     691             : }
     692             : 
     693             : static BUN
     694       14897 : trivial_project_exp_card(sql_exp *e)
     695             : {
     696       15104 :         if (e->type == e_convert)
     697         207 :                 return trivial_project_exp_card(e->l);
     698       14897 :         return e->type == e_atom && e->f ? (BUN) list_length(e->f) : 1;
     699             : }
     700             : 
     701             : static BUN
     702       24957 : rel_calc_nuniques(mvc *sql, sql_rel *l, list *exps)
     703             : {
     704       24957 :         BUN lv = get_rel_count(l);
     705             : 
     706       24956 :         if (lv == 0)
     707             :                 return 0;
     708       22407 :         if (!list_empty(exps)) {
     709       22407 :                 BUN nuniques = 0;
     710             :                 /* compute the highest number of unique values */
     711       55862 :                 for (node *n = exps->h ; n && nuniques != BUN_NONE ; n = n->next) {
     712       33454 :                         sql_exp *e = n->data;
     713       33454 :                         sql_rel *bt = NULL;
     714       33454 :                         prop *p = NULL;
     715       33454 :                         BUN euniques = BUN_NONE;
     716       33454 :                         atom *min, *max, *sub = NULL;
     717       33454 :                         sql_subtype *tp = exp_subtype(e);
     718       33454 :                         sql_class ec = tp ? tp->type->eclass : EC_STRING; /* if 'e' has no type (eg parameter), use a non-number type to fail condition */
     719             : 
     720       33454 :                         if ((p = find_prop(e->p, PROP_NUNIQUES))) {
     721       24110 :                                 euniques = (BUN) p->value.dval;
     722        9344 :                         } else if (e->type == e_column && e->nid && rel_find_exp_and_corresponding_rel(l, e, false, &bt, NULL) && bt && (p = find_prop(bt->p, PROP_COUNT))) {
     723        7329 :                                 euniques = (BUN) p->value.lval;
     724             :                         }
     725             :                         /* use min to max range to compute number of possible values in the domain for number types */
     726       33454 :                         if ((EC_TEMP(ec)||ec==EC_NUM||ec==EC_MONTH||ec==EC_POS) &&
     727       23407 :                                 (min = find_prop_and_get(e->p, PROP_MIN)) && (max = find_prop_and_get(e->p, PROP_MAX))) {
     728             :                                 /* the range includes min and max, so the atom_inc call is needed */
     729             :                                 /* if 'euniques' has number of distinct values, compute min between both */
     730       18954 :                                 if ((sub = atom_sub(sql->sa, max, min)) && (sub = atom_inc(sql->sa, sub)) && (sub = atom_cast(sql->sa, sub, sql_bind_localtype("oid"))))
     731           0 :                                         euniques = MIN(euniques, (BUN) sub->data.val.oval);
     732             :                         }
     733       33455 :                         if (euniques != BUN_NONE)
     734       31440 :                                 nuniques = MAX(nuniques, euniques); /* the highest cardinality sets the estimation */
     735             :                         else
     736             :                                 nuniques = BUN_NONE;
     737             :                 }
     738       22408 :                 if (nuniques != BUN_NONE)
     739             :                         return nuniques;
     740             :         }
     741             :         return lv;
     742             : }
     743             : 
     744             : static sql_rel *
     745     2006003 : rel_get_statistics_(visitor *v, sql_rel *rel)
     746             : {
     747             :         /* Don't prune updates as pruning will possibly result in removing the joins which therefore cannot be used for constraint checking */
     748     2006003 :         uint8_t has_special_modify = *(uint8_t*) v->data;
     749     2006003 :         bool can_be_pruned = !has_special_modify && v->storage_based_opt;
     750             : 
     751             :         /* Don't look at the same relation twice */
     752     2006003 :         if (are_statistics_gathered(rel->used))
     753             :                 return rel;
     754     1269687 :         rel->used |= statistics_gathered;
     755             : 
     756     1269687 :         switch (rel->op) {
     757      296569 :         case op_basetable: {
     758      296569 :                 sql_table *t = (sql_table *) rel->l;
     759      296569 :                 sqlstore *store = v->sql->session->tr->store;
     760             : 
     761      296569 :                 if (!list_empty(rel->exps)) {
     762     1171761 :                         for (node *n = rel->exps->h ; n ; n = n->next)
     763      875040 :                                 rel_basetable_column_get_statistics(v->sql, rel, n->data);
     764             :                 }
     765             :                 /* Set table row count. TODO? look for remote tables. Don't look at storage for declared tables, because it won't be cleaned */
     766      296717 :                 if (isTable(t) && t->s && !isDeclaredTable(t)) /* count active rows only */
     767      243458 :                         set_count_prop(v->sql->sa, rel, (BUN)store->storage_api.count_col(v->sql->session->tr, ol_first_node(t->columns)->data, 10));
     768             :                 break;
     769             :         }
     770        2790 :         case op_union:
     771             :         case op_inter:
     772             :         case op_except: {
     773        2790 :                 bool empty_cross = false;
     774        2790 :                 int i = 0;
     775        2790 :                 sql_rel *l = rel->l, *pl = rel->l, *r = rel->r, *pr = rel->r;
     776             : 
     777        2790 :                 while (is_sample(pl->op) || is_topn(pl->op)) /* skip topN and sample relations in the middle */
     778           0 :                         pl = pl->l;
     779        2790 :                 while (is_sample(pr->op) || is_topn(pr->op))
     780           0 :                         pr = pr->l;
     781             :                 /* if it's not a projection, then project and propagate statistics */
     782        2790 :                 if (!is_project(pl->op) && !is_base(pl->op)) {
     783           0 :                         pl = rel_project(v->sql->sa, pl, rel_projections(v->sql, pl, NULL, 0, 1));
     784           0 :                         set_count_prop(v->sql->sa, pl, get_rel_count(pl->l));
     785           0 :                         pl->exps = exps_exp_visitor_bottomup(v, pl, pl->exps, 0, &rel_propagate_statistics, false);
     786             :                 }
     787        2790 :                 if (!is_project(pr->op) && !is_base(pr->op)) {
     788           0 :                         pr = rel_project(v->sql->sa, pr, rel_projections(v->sql, pr, NULL, 0, 1));
     789           0 :                         set_count_prop(v->sql->sa, pr, get_rel_count(pr->l));
     790           0 :                         pr->exps = exps_exp_visitor_bottomup(v, pr, pr->exps, 0, &rel_propagate_statistics, false);
     791             :                 }
     792             : 
     793       11662 :                 for (node *n = rel->exps->h ; n ; n = n->next) {
     794        8872 :                         empty_cross |= rel_setop_get_statistics(v->sql, rel, pl->exps, pr->exps, n->data, i);
     795        8872 :                         i++;
     796             :                 }
     797             : 
     798             :                 /* propagate row count */
     799        2790 :                 if (is_union(rel->op)) {
     800         277 :                         BUN lv = need_distinct(rel) ? rel_calc_nuniques(v->sql, l, l->exps) : get_rel_count(l),
     801         277 :                                 rv = need_distinct(rel) ? rel_calc_nuniques(v->sql, r, r->exps) : get_rel_count(r);
     802             : 
     803         277 :                         if (lv == 0 && rv == 0) { /* both sides empty */
     804           2 :                                 if (can_be_pruned)
     805             :                                         empty_cross = true;
     806             :                                 else
     807           2 :                                         set_count_prop(v->sql->sa, rel, 0);
     808         275 :                         } else if (can_be_pruned && lv == 0 && !rel_is_ref(rel)) { /* left side empty */
     809           0 :                                 rel = set_setop_side(v, rel, r);
     810           0 :                                 empty_cross = false; /* don't rewrite again */
     811         275 :                         } else if (can_be_pruned && rv == 0 && !rel_is_ref(rel)) { /* right side empty */
     812           0 :                                 rel = set_setop_side(v, rel, l);
     813           0 :                                 empty_cross = false; /* don't rewrite again */
     814         275 :                         } else if (lv != BUN_NONE && rv != BUN_NONE) {
     815           7 :                                 set_count_prop(v->sql->sa, rel, (rv > (BUN_MAX - lv)) ? BUN_MAX : (lv + rv)); /* overflow check */
     816             :                         }
     817        2513 :                 } else if (is_inter(rel->op) || is_except(rel->op)) {
     818        2513 :                         BUN lv = need_distinct(rel) ? rel_calc_nuniques(v->sql, l, l->exps) : get_rel_count(l),
     819        2513 :                                 rv = need_distinct(rel) ? rel_calc_nuniques(v->sql, r, r->exps) : get_rel_count(r);
     820             : 
     821        2513 :                         if (lv == 0) { /* left side empty */
     822          62 :                                 if (can_be_pruned)
     823             :                                         empty_cross = true;
     824             :                                 else
     825           5 :                                         set_count_prop(v->sql->sa, rel, 0);
     826        2451 :                         } else if (rv == 0) { /* right side empty */
     827          17 :                                 if (is_inter(rel->op)) {
     828           3 :                                         if (can_be_pruned)
     829             :                                                 empty_cross = true;
     830             :                                         else
     831           0 :                                                 set_count_prop(v->sql->sa, rel, 0);
     832          14 :                                 } else if (can_be_pruned && !rel_is_ref(rel)) {
     833          14 :                                         rel = set_setop_side(v, rel, l);
     834          14 :                                         empty_cross = false; /* don't rewrite again */
     835             :                                 } else {
     836           0 :                                         set_count_prop(v->sql->sa, rel, lv);
     837             :                                 }
     838             :                         } else {
     839        2434 :                                 set_count_prop(v->sql->sa, rel, lv);
     840             :                         }
     841             :                 }
     842        2790 :                 if (can_be_pruned && empty_cross) {
     843          80 :                         rel_destroy(rel->l);
     844          80 :                         rel->l = NULL;
     845          80 :                         rel_destroy(rel->r);
     846          80 :                         rel->r = NULL;
     847         244 :                         for (node *n = rel->exps->h ; n ; n = n->next) {
     848         164 :                                 sql_exp *e = n->data, *a = exp_atom(v->sql->sa, atom_general(v->sql->sa, exp_subtype(e), NULL, 0));
     849         164 :                                 exp_prop_alias(v->sql->sa, a, e);
     850         164 :                                 n->data = a;
     851             :                         }
     852          80 :                         list_hash_clear(rel->exps);
     853          80 :                         sql_rel *l = rel_project(v->sql->sa, NULL, rel->exps);
     854          80 :                         set_count_prop(v->sql->sa, l, 1);
     855          80 :                         l = rel_select(v->sql->sa, l, exp_atom_bool(v->sql->sa, 0));
     856          80 :                         set_count_prop(v->sql->sa, l, 0);
     857          80 :                         rel->op = op_project;
     858          80 :                         rel->l = l;
     859          80 :                         rel->exps = rel_projections(v->sql, l, NULL, 1, 1);
     860          80 :                         set_count_prop(v->sql->sa, rel, 0);
     861          80 :                         set_nodistinct(rel); /* set relations may have distinct flag set */
     862          80 :                         v->changes++;
     863             :                 }
     864             :                 break;
     865             :         }
     866       12221 :         case op_munion: {
     867       12221 :                 list *l = rel->l, *nrels = sa_list(v->sql->sa);
     868       12221 :                 BUN cnt = 0;
     869       12221 :                 bool needs_pruning = false;
     870             : 
     871       46040 :                 for (node *n = l->h; n; n = n->next) {
     872       33819 :                         sql_rel *r = n->data, *pl = r;
     873             : 
     874       33819 :                         while (is_sample(pl->op) || is_topn(pl->op)) /* skip topN and sample relations in the middle */
     875           0 :                                         pl = pl->l;
     876             :                         /* if it's not a projection, then project and propagate statistics */
     877       33819 :                         if (!is_project(pl->op) && !is_base(pl->op)) {
     878           0 :                                 pl = rel_project(v->sql->sa, pl, rel_projections(v->sql, pl, NULL, 0, 1));
     879           0 :                                 set_count_prop(v->sql->sa, pl, get_rel_count(pl->l));
     880           0 :                                 pl->exps = exps_exp_visitor_bottomup(v, pl, pl->exps, 0, &rel_propagate_statistics, false);
     881             :                         }
     882       33819 :                         nrels = append(nrels, pl);
     883             :                         /* we need new munion statistics */
     884             :                         /* propagate row count */
     885       33819 :                         BUN rv = need_distinct(rel) ? rel_calc_nuniques(v->sql, r, r->exps) : get_rel_count(r);
     886             :                         /* if PROP_COUNT does not exist we assume at least a row (see get_rel_count def) */
     887       33819 :                         if (rv == BUN_NONE) {
     888        1076 :                                 cnt++;
     889        1076 :                                 continue;
     890             :                         }
     891       32743 :                         if (!rv && can_be_pruned)
     892        5613 :                                 needs_pruning = true;
     893             :                         /* overflow check */
     894       32743 :                         if (rv > (BUN_MAX - cnt))
     895       33819 :                                 rv = BUN_MAX;
     896             :                         else
     897       32743 :                                 cnt += rv;
     898             :                 }
     899       12221 :                 int i = 0;
     900       69148 :                 for (node *n = rel->exps->h ; n ; n = n->next, i++)
     901       56927 :                         rel_munion_get_statistics(v->sql, rel, nrels, n->data, i);
     902             : 
     903       12221 :                 if (needs_pruning && !rel_is_ref(rel)) {
     904        3828 :                         v->changes++;
     905        3828 :                         list *nl = sa_list(l->sa);
     906             : 
     907       13517 :                         for (node *n = nrels->h; n; n = n->next) {
     908        9689 :                                 sql_rel *r = n->data;
     909        9689 :                                 BUN rv = need_distinct(rel) ? rel_calc_nuniques(v->sql, r, r->exps) : get_rel_count(r);
     910             : 
     911        9689 :                                 if (!rv) { /* keep last for now */
     912        5159 :                                         rel_destroy(r);
     913        5159 :                                         continue;
     914             :                                 }
     915        4530 :                                 nl = append(nl, r);
     916             :                         }
     917        3828 :                         rel->l = nl;
     918        3828 :                         if (list_length(nl) == 1) {
     919        3606 :                                 sql_rel *l = rel->l = nl->h->data; /* ugh */
     920        3606 :                                 rel->r = NULL;
     921        3606 :                                 rel->op = op_project;
     922             : 
     923       17555 :                                 for (node *n = rel->exps->h, *m = l->exps->h ; n && m ; n = n->next, m = m->next) {
     924       13949 :                                         sql_exp *pe = n->data, *ie = m->data;
     925       13949 :                                         sql_exp *ne = exp_ref(v->sql, ie);
     926       13949 :                                         exp_prop_alias(v->sql->sa, ne, pe);
     927       13949 :                                         n->data = ne;
     928             :                                 }
     929        3606 :                                 list_hash_clear(rel->exps);
     930         222 :                         } else if (list_empty(nl)) {
     931             :                                 /* empty select (project [ nils ] ) */
     932         454 :                                 for (node *n = rel->exps->h ; n ; n = n->next) {
     933         354 :                                         sql_exp *e = n->data, *a = exp_atom(v->sql->sa, atom_general(v->sql->sa, exp_subtype(e), NULL, 0));
     934         354 :                                         exp_prop_alias(v->sql->sa, a, e);
     935         354 :                                         n->data = a;
     936             :                                 }
     937         100 :                                 list_hash_clear(rel->exps);
     938         100 :                                 sql_rel *l = rel_project(v->sql->sa, NULL, rel->exps);
     939         100 :                                 set_count_prop(v->sql->sa, l, 1);
     940         100 :                                 l = rel_select(v->sql->sa, l, exp_atom_bool(v->sql->sa, 0));
     941         100 :                                 set_count_prop(v->sql->sa, l, 0);
     942         100 :                                 rel->op = op_project;
     943         100 :                                 rel->r = NULL;
     944         100 :                                 rel->l = l;
     945         100 :                                 rel->exps = rel_projections(v->sql, l, NULL, 1, 1);
     946         100 :                                 set_count_prop(v->sql->sa, rel, 0);
     947         100 :                                 set_nodistinct(rel); /* set relations may have distinct flag set */
     948             :                         }
     949             :                 } else {
     950        8393 :                         set_count_prop(v->sql->sa, rel, cnt);
     951             :                 }
     952             :                 break;
     953             :         }
     954      487862 :         case op_join:
     955             :         case op_left:
     956             :         case op_right:
     957             :         case op_full:
     958             :         case op_semi:
     959             :         case op_anti:
     960             :         case op_select:
     961             :         case op_project:
     962             :         case op_groupby: {
     963      487862 :                 if (is_groupby(rel->op) && !list_empty(rel->r))
     964       15762 :                         rel->r = exps_exp_visitor_bottomup(v, rel, rel->r, 0, &rel_propagate_statistics, false);
     965      487861 :                 rel->exps = exps_exp_visitor_bottomup(v, rel, rel->exps, 0, &rel_propagate_statistics, false);
     966      487853 :                 if ((is_simple_project(rel->op) || is_groupby(rel->op)) && !list_empty(rel->r))
     967       35963 :                         rel->r = exps_exp_visitor_bottomup(v, rel, rel->r, 0, &rel_propagate_statistics, false);
     968             :                 /* The following optimizations can only be applied after propagating the statistics to rel->exps */
     969      487854 :                 if (can_be_pruned && (is_join(rel->op) || is_select(rel->op)) && !list_empty(rel->exps)) {
     970      120948 :                         int changes = v->changes;
     971      120948 :                         rel->exps = rel_prune_predicates(v, rel);
     972      120948 :                         if (v->changes > changes)
     973        3359 :                                 rel = rewrite_simplify(v, 0, v->value_based_opt, rel);
     974             :                 }
     975             : 
     976             :                 /* propagate row count */
     977      487854 :                 sql_rel *l = rel->l, *r = rel->r;
     978      487854 :                 switch (rel->op) {
     979      125258 :                 case op_join:
     980             :                 case op_left:
     981             :                 case op_right:
     982             :                 case op_full: {
     983      125258 :                         BUN lv = get_rel_count(l), rv = get_rel_count(r), uniques_estimate = BUN_MAX, join_idx_estimate = BUN_MAX;
     984             : 
     985      125258 :                         if (!list_empty(rel->exps) && !is_single(rel)) {
     986      229546 :                                 for (node *n = rel->exps->h ; n ; n = n->next) {
     987      117126 :                                         sql_exp *e = n->data, *el = e->l, *er = e->r;
     988             : 
     989      117126 :                                         if (find_prop(e->p, PROP_JOINIDX)) {
     990         667 :                                                 join_idx_estimate = lv>rv?lv:rv;
     991      116459 :                                         } else if (e->type == e_cmp && e->flag == cmp_equal) {
     992             :                                                 /* if one of the sides is unique, the cardinality will be that exact number, but look for nulls */
     993      112633 :                                                 if (!is_semantics(e) || !has_nil(el) || !has_nil(er)) {
     994      112484 :                                                         BUN lu = 0, ru = 0;
     995      112484 :                                                         prop *p = NULL;
     996      112484 :                                                         if ((p = find_prop(el->p, PROP_NUNIQUES)))
     997       82336 :                                                                 lu = (BUN) p->value.dval;
     998      112484 :                                                         if ((p = find_prop(er->p, PROP_NUNIQUES)))
     999       96330 :                                                                 ru = (BUN) p->value.dval;
    1000      112484 :                                                         if (is_unique(el) || lu > lv) {
    1001       67857 :                                                                 BUN ncount = (is_right(rel->op) || is_full(rel->op)) ? MAX(lv, rv) : lv;
    1002       67857 :                                                                 uniques_estimate = MIN(uniques_estimate, ncount);
    1003       44627 :                                                         } else if (is_unique(er) || ru > rv) {
    1004       27436 :                                                                 BUN ncount = (is_left(rel->op) || is_full(rel->op)) ? MAX(lv, rv) : rv;
    1005       27436 :                                                                 uniques_estimate = MIN(uniques_estimate, ncount);
    1006             :                                                         }
    1007             :                                                 }
    1008             :                                         }
    1009             :                                 }
    1010             :                         }
    1011      125258 :                         if (is_single(rel)) {
    1012        3513 :                                 set_count_prop(v->sql->sa, rel, lv);
    1013      121745 :                         } else if (join_idx_estimate != BUN_MAX) {
    1014         665 :                                 set_count_prop(v->sql->sa, rel, join_idx_estimate);
    1015      121080 :                         } else if (uniques_estimate != BUN_MAX) {
    1016       89117 :                                 set_count_prop(v->sql->sa, rel, uniques_estimate);
    1017       31963 :                         } else if (list_length(rel->exps) == 1 && (exp_is_false(rel->exps->h->data) || exp_is_null(rel->exps->h->data))) {
    1018             :                                 /* corner cases for outer joins */
    1019          84 :                                 if (is_left(rel->op)) {
    1020          72 :                                         set_count_prop(v->sql->sa, rel, lv);
    1021          12 :                                 } else if (is_right(rel->op)) {
    1022           3 :                                         set_count_prop(v->sql->sa, rel, rv);
    1023           9 :                                 } else if (is_full(rel->op) && lv != BUN_NONE && rv != BUN_NONE) {
    1024           9 :                                         set_count_prop(v->sql->sa, rel, (rv > (BUN_MAX - lv)) ? BUN_MAX : (lv + rv)); /* overflow check */
    1025           0 :                                 } else if (lv != BUN_NONE && rv != BUN_NONE) {
    1026           0 :                                         set_count_prop(v->sql->sa, rel, 0);
    1027             :                                 }
    1028       31879 :                         } else if (lv == 0) {
    1029        1166 :                                 set_count_prop(v->sql->sa, rel, (is_right(rel->op) || is_full(rel->op)) ? rv : 0);
    1030       31282 :                         } else if (rv == 0) {
    1031        1568 :                                 set_count_prop(v->sql->sa, rel, (is_left(rel->op) || is_full(rel->op)) ? lv : 0);
    1032       30208 :                         } else if (lv != BUN_NONE && rv != BUN_NONE) {
    1033       17870 :                                 set_count_prop(v->sql->sa, rel, (rv > (BUN_MAX / lv)) ? BUN_MAX : (lv * rv)); /* overflow check */
    1034             :                         }
    1035             :                         break;
    1036             :                 }
    1037        2054 :                 case op_anti:
    1038        2054 :                         set_count_prop(v->sql->sa, rel, get_rel_count(l));
    1039        2054 :                         break;
    1040       97157 :                 case op_semi:
    1041             :                 case op_select:
    1042             :                         /* TODO calculate cardinalities using selectivities */
    1043       97157 :                         if (list_length(rel->exps) == 1 && (exp_is_false(rel->exps->h->data) || exp_is_null(rel->exps->h->data))) {
    1044        1739 :                                 set_count_prop(v->sql->sa, rel, 0);
    1045             :                         } else {
    1046      187596 :                                 if (!list_empty(rel->exps) && !is_single(rel)) {
    1047       92178 :                                         BUN cnt = get_rel_count(l), u = 1;
    1048      155466 :                                         for (node *n = rel->exps->h ; n ; n = n->next) {
    1049      101154 :                                                 sql_exp *e = n->data, *el = e->l, *er = e->r;
    1050             : 
    1051             :                                                 /* simple expressions first */
    1052      101154 :                                                 if (e->type == e_cmp && e->flag == cmp_equal && exp_is_atom(er)) {
    1053             :                                                         /* use selectivity */
    1054       50387 :                                                         prop *p;
    1055       50387 :                                                         if ((p = find_prop(el->p, PROP_NUNIQUES))) {
    1056       37866 :                                                                 u = (BUN) p->value.dval;
    1057       37866 :                                                                 break;
    1058             :                                                         }
    1059             :                                                 }
    1060             :                                         }
    1061             :                                         /* u is an *estimate*, so don't set count_prop to 0 unless cnt is 0 */
    1062       92178 :                                         set_count_prop(v->sql->sa, rel, cnt == 0 ? 0 : u == 0 || u > cnt ? 1 : cnt/u);
    1063             :                                 } else {
    1064        3240 :                                         set_count_prop(v->sql->sa, rel, get_rel_count(l));
    1065             :                                 }
    1066             :                         }
    1067             :                         break;
    1068      239842 :                 case op_project:
    1069      239842 :                         if (l) {
    1070      231019 :                                 if (need_distinct(rel)) {
    1071           0 :                                         set_count_prop(v->sql->sa, rel, rel_calc_nuniques(v->sql, l, rel->exps));
    1072             :                                 } else {
    1073      231019 :                                         set_count_prop(v->sql->sa, rel, get_rel_count(l));
    1074             :                                 }
    1075             :                         } else {
    1076        8823 :                                 BUN card = 1;
    1077             : 
    1078        8823 :                                 if (!list_empty(rel->exps)) {
    1079       22671 :                                         for (node *n = rel->exps->h ; n ; n = n->next) {
    1080       13848 :                                                 sql_exp *e = n->data;
    1081       13848 :                                                 card = MAX(card, trivial_project_exp_card(e));
    1082             :                                         }
    1083             :                                 }
    1084        8823 :                                 set_count_prop(v->sql->sa, rel, card);
    1085             :                         }
    1086             :                         break;
    1087       23179 :                 case op_groupby:
    1088       23179 :                         if (list_empty(rel->r)) {
    1089        7415 :                                 set_count_prop(v->sql->sa, rel, 1);
    1090             :                         } else {
    1091       15763 :                                 set_count_prop(v->sql->sa, rel, rel_calc_nuniques(v->sql, l, rel->r));
    1092             :                         }
    1093             :                         break;
    1094             :                 default:
    1095             :                         break;
    1096             :                 }
    1097             :                 break;
    1098             :         }
    1099       16828 :         case op_topn: {
    1100       16828 :                 BUN lv = get_rel_count(rel->l);
    1101             : 
    1102       16828 :                 if (lv != BUN_NONE) {
    1103       16811 :                         sql_exp *le = rel->exps->h->data, *oe = list_length(rel->exps) > 1 ? rel->exps->h->next->data : NULL;
    1104          96 :                         if (oe && oe->l && exp_is_not_null(oe)) { /* no parameters */
    1105          96 :                                 BUN offset = (BUN) ((atom*)oe->l)->data.val.lval;
    1106          96 :                                 lv = offset >= lv ? 0 : lv - offset;
    1107             :                         }
    1108       16811 :                         if (le->l && exp_is_not_null(le)) {
    1109       16773 :                                 BUN limit = (BUN) ((atom*)le->l)->data.val.lval;
    1110       16773 :                                 lv = MIN(lv, limit);
    1111             :                         }
    1112       16811 :                         set_count_prop(v->sql->sa, rel, lv);
    1113             :                 }
    1114             :                 break;
    1115             :         }
    1116          22 :         case op_sample: {
    1117          22 :                 BUN lv = get_rel_count(rel->l);
    1118             : 
    1119          22 :                 if (lv != BUN_NONE) {
    1120           4 :                         sql_exp *se = rel->exps->h->data;
    1121           4 :                         sql_subtype *tp = exp_subtype(se);
    1122             : 
    1123           4 :                         if (se->l && tp->type->eclass == EC_NUM) { /* sample is a number of rows */
    1124           4 :                                 BUN sample = (BUN) ((atom*)se->l)->data.val.lval;
    1125           4 :                                 lv = MIN(lv, sample);
    1126           0 :                         } else if (se->l) { /* sample is a percentage of rows */
    1127           0 :                                 dbl percent = ((atom*)se->l)->data.val.dval;
    1128           0 :                                 assert(tp->type->eclass == EC_FLT);
    1129           0 :                                 lv = (BUN) ceil((dbl)lv * percent);
    1130             :                         }
    1131           4 :                         set_count_prop(v->sql->sa, rel, lv);
    1132             :                 }
    1133             :                 break;
    1134             :         }
    1135        5357 :         case op_table: {
    1136        5357 :                 sql_exp *op = rel->r;
    1137        5357 :                 if (rel->flag != TRIGGER_WRAPPER && op) {
    1138        5045 :                         sql_subfunc *f = op->f;
    1139        5045 :                         if (f->func->lang == FUNC_LANG_SQL) {
    1140          97 :                                 set_count_prop(v->sql->sa, rel, 1000 /* just some fallback value */);
    1141        4948 :                         } else if (f->func->lang == FUNC_LANG_MAL && strcmp(f->func->base.name, "storage") == 0) {
    1142         597 :                                 set_count_prop(v->sql->sa, rel, 1000 /* TODO get size of catalog */);
    1143        4351 :                         } else if (f->func->lang == FUNC_LANG_MAL && strcmp(f->func->base.name, "db_users") == 0) {
    1144           0 :                                 set_count_prop(v->sql->sa, rel, 1000 /* TODO get size of users */);
    1145        4351 :                         } else if (f->func->lang == FUNC_LANG_MAL && strncmp(f->func->base.name, "querylog", 8) == 0) {
    1146         965 :                                 set_count_prop(v->sql->sa, rel, 1000 /* TODO get size of querylog */);
    1147        3386 :                         } else if (f->func->lang == FUNC_LANG_MAL &&
    1148        3276 :                                            (strcmp(f->func->base.name, "queue") == 0 ||
    1149        3020 :                                                 strcmp(f->func->base.name, "optimizers") == 0 ||
    1150        2772 :                                                 strcmp(f->func->base.name, "env") == 0 ||
    1151        2488 :                                                 strcmp(f->func->base.name, "keywords") == 0 ||
    1152        2488 :                                                 strcmp(f->func->base.name, "statistics") == 0 ||
    1153        1941 :                                                 strcmp(f->func->base.name, "rejects") == 0 ||
    1154        1695 :                                                 strcmp(f->func->base.name, "schemastorage") == 0 ||
    1155        1695 :                                                 strncmp(f->func->base.name, "storage", 7) == 0 ||
    1156        1695 :                                                 strcmp(f->func->base.name, "sessions") == 0) ) {
    1157        1839 :                                 set_count_prop(v->sql->sa, rel, 1000 /* TODO get size of queue */);
    1158             :                         }
    1159             :                         /* else {
    1160             :                                 printf("%%func needs stats : %s\n", f->func->base.name);
    1161             :                         } */
    1162             :                 }
    1163             :                 break;
    1164             :         }
    1165             :         /*These relations are less important for now
    1166             :           TODO later we can tune it */
    1167             :         case op_insert:
    1168             :         case op_update:
    1169             :         case op_delete:
    1170             :         case op_truncate:
    1171             :         case op_ddl:
    1172             :         default:
    1173             :                 break;
    1174             :         }
    1175             : 
    1176             :         return rel;
    1177             : }
    1178             : 
    1179             : static sql_rel *
    1180      533348 : rel_get_statistics(visitor *v, global_props *gp, sql_rel *rel)
    1181             : {
    1182             :         /* Don't prune updates as pruning will possibly result in removing the joins which therefore cannot be used for constraint checking */
    1183      533348 :         uint8_t has_special_modify = (uint8_t) gp->has_special_modify;
    1184      533348 :         v->data = &has_special_modify;
    1185      533348 :         rel = rel_visitor_bottomup(v, rel, &rel_get_statistics_);
    1186      533848 :         v->data = gp;
    1187      533848 :         return rel;
    1188             : }
    1189             : 
    1190             : run_optimizer
    1191      666872 : bind_get_statistics(visitor *v, global_props *gp)
    1192             : {
    1193      666872 :         (void) v;
    1194      666872 :         return (gp->opt_level == 1 && !gp->cnt[op_insert]) ? rel_get_statistics : NULL;
    1195             : }
    1196             : 
    1197             : 
    1198             : static bool
    1199       90736 : point_select_on_unique_column(sql_rel *rel)
    1200             : {
    1201       90736 :         if (is_select(rel->op) && !list_empty(rel->exps)) {
    1202      126015 :                 for (node *n = rel->exps->h; n ; n = n->next) {
    1203       72939 :                         sql_exp *e = n->data, *el = e->l, *er = e->r, *found = NULL;
    1204             : 
    1205       72939 :                         if (is_compare(e->type) && e->flag == cmp_equal) {
    1206       30990 :                                 if (is_numeric_upcast(el))
    1207           0 :                                         el = el->l;
    1208       30990 :                                 if (is_numeric_upcast(er))
    1209           0 :                                         er = er->l;
    1210       30990 :                                 if (is_alias(el->type) && exp_is_atom(er) && (found = rel_find_exp(rel->l, el)) &&
    1211         749 :                                         is_unique(found) && (!is_semantics(e) || !has_nil(found) || !has_nil(er)))
    1212             :                                         return true;
    1213       30241 :                                 if (is_alias(er->type) && exp_is_atom(el) && (found = rel_find_exp(rel->l, er)) &&
    1214           0 :                                         is_unique(found) && (!is_semantics(e) || !has_nil(el) || !has_nil(found)))
    1215             :                                         return true;
    1216             :                         }
    1217             :                 }
    1218             :         }
    1219             :         return false;
    1220             : }
    1221             : 
    1222             : /*
    1223             :  * A point select on an unique column reduces the number of rows to 1. If the same select is under a
    1224             :  * join, the opposite side's select can be pushed above the join.
    1225             :  */
    1226             : static inline sql_rel *
    1227     1188360 : rel_push_select_up(visitor *v, sql_rel *rel)
    1228             : {
    1229     1188360 :         if ((is_innerjoin(rel->op) || is_left(rel->op) || is_right(rel->op) || is_semi(rel->op)) && !is_single(rel)) {
    1230      245920 :                 sql_rel *l = rel->l, *r = rel->r;
    1231      245920 :                 bool can_pushup_left = is_select(l->op) && !rel_is_ref(l) && !is_single(l) && (is_innerjoin(rel->op) || is_left(rel->op) || is_semi(rel->op)),
    1232      245920 :                         can_pushup_right = is_select(r->op) && !rel_is_ref(r) && !is_single(r) && (is_innerjoin(rel->op) || is_right(rel->op));
    1233             : 
    1234      245920 :                 if (can_pushup_left || can_pushup_right) {
    1235       63939 :                         if (can_pushup_left)
    1236       43434 :                                 can_pushup_left = point_select_on_unique_column(r);
    1237       63939 :                         if (can_pushup_right)
    1238       47302 :                                 can_pushup_right = point_select_on_unique_column(l);
    1239             : 
    1240             :                         /* if both selects retrieve one row each, it's not worth it to push both up */
    1241       63939 :                         if (can_pushup_left && !can_pushup_right) {
    1242         683 :                                 sql_rel *nrel = rel_dup_copy(v->sql->sa, rel);
    1243         683 :                                 nrel->l = l->l;
    1244         683 :                                 rel = rel_inplace_select(rel, nrel, l->exps);
    1245         683 :                                 assert(is_select(rel->op));
    1246         683 :                                 v->changes++;
    1247       63256 :                         } else if (!can_pushup_left && can_pushup_right) {
    1248          14 :                                 sql_rel *nrel = rel_dup_copy(v->sql->sa, rel);
    1249          14 :                                 nrel->r = r->l;
    1250          14 :                                 rel = rel_inplace_select(rel, nrel, r->exps);
    1251          14 :                                 assert(is_select(rel->op));
    1252          14 :                                 v->changes++;
    1253             :                         }
    1254             :                 }
    1255             :         }
    1256     1188360 :         return rel;
    1257             : }
    1258             : 
    1259             : static int
    1260       94236 : sql_class_base_score(visitor *v, sql_column *c, sql_subtype *t, bool equality_based)
    1261             : {
    1262       94236 :         int de;
    1263             : 
    1264       94236 :         if (!t)
    1265             :                 return 0;
    1266       94236 :         switch (ATOMstorage(t->type->localtype)) {
    1267             :         case TYPE_bte:
    1268             :                 return 150 - 8;
    1269        1307 :         case TYPE_sht:
    1270        1307 :                 return 150 - 16;
    1271       37399 :         case TYPE_int:
    1272       37399 :                 return 150 - 32;
    1273         518 :         case TYPE_void:
    1274             :         case TYPE_lng:
    1275         518 :                 return 150 - 64;
    1276             :         case TYPE_uuid:
    1277             : #ifdef HAVE_HGE
    1278             :         case TYPE_hge:
    1279             : #endif
    1280             :                 return 150 - 128;
    1281           2 :         case TYPE_flt:
    1282           2 :                 return 75 - 24;
    1283             :         case TYPE_dbl:
    1284             :                 return 75 - 53;
    1285       32104 :         default:
    1286       32104 :                 if (equality_based && c && v->storage_based_opt && (de = mvc_is_duplicate_eliminated(v->sql, c)))
    1287        1478 :                         return 150 - de * 8;
    1288             :                 /* strings and blobs not duplicate eliminated don't get any points here */
    1289             :                 return 0;
    1290             :         }
    1291             : }
    1292             : 
    1293             : static int
    1294       32731 : score_se_base(visitor *v, sql_rel *rel, sql_exp *e)
    1295             : {
    1296       32731 :         int res = 0;
    1297       32731 :         sql_subtype *t = exp_subtype(e);
    1298       32731 :         sql_column *c = NULL;
    1299             : 
    1300       32731 :         if (e->type == e_convert) /* keep unsafes at the end (TODO improve) */
    1301             :                 return -1000;
    1302             :         /* can we find out if the underlying table is sorted */
    1303       32130 :         if ((c = exp_find_column(rel, e, -2)) && v->storage_based_opt && mvc_is_sorted(v->sql, c))
    1304       32130 :                 res += 600;
    1305             : 
    1306             :         /* prefer the shorter var types over the longer ones */
    1307       32130 :         res += sql_class_base_score(v, c, t, is_equality_or_inequality_exp(e->flag)); /* smaller the type, better */
    1308       32130 :         return res;
    1309             : }
    1310             : 
    1311             : static int
    1312       56433 : score_se(visitor *v, sql_rel *rel, sql_exp *e)
    1313             : {
    1314       56433 :         int score = 0;
    1315       56433 :         if (e->type == e_cmp && !is_complex_exp(e->flag)) {
    1316       32731 :                 sql_exp *l = e->l;
    1317             : 
    1318       32731 :                 while (l->type == e_cmp) { /* go through nested comparisons */
    1319           2 :                         sql_exp *ll;
    1320             : 
    1321           2 :                         if (l->flag == cmp_filter || l->flag == cmp_or)
    1322           0 :                                 ll = ((list*)l->l)->h->data;
    1323             :                         else
    1324           2 :                                 ll = l->l;
    1325           2 :                         if (ll->type != e_cmp)
    1326             :                                 break;
    1327             :                         l = ll;
    1328             :                 }
    1329       32731 :                 score += score_se_base(v, rel, l);
    1330             :         }
    1331       56433 :         score += exp_keyvalue(e);
    1332       56433 :         return score;
    1333             : }
    1334             : 
    1335             : static inline sql_rel *
    1336     1188360 : rel_select_order(visitor *v, sql_rel *rel)
    1337             : {
    1338     1188360 :         int *scores = NULL;
    1339     1188360 :         sql_exp **exps = NULL;
    1340             : 
    1341     1188360 :         if (is_select(rel->op) && list_length(rel->exps) > 1) {
    1342       26503 :                 node *n;
    1343       26503 :                 int i, nexps = list_length(rel->exps);
    1344       26503 :                 scores = SA_NEW_ARRAY(v->sql->ta, int, nexps);
    1345       26503 :                 exps = SA_NEW_ARRAY(v->sql->ta, sql_exp*, nexps);
    1346             : 
    1347       82936 :                 for (i = 0, n = rel->exps->h; n; i++, n = n->next) {
    1348       56463 :                         exps[i] = n->data;
    1349       56463 :                         if (find_prop(exps[i]->p, PROP_HASHCOL))
    1350             :                                 return rel;
    1351       56433 :                         scores[i] = score_se(v, rel, n->data);
    1352             :                 }
    1353       26473 :                 GDKqsort(scores, exps, NULL, nexps, sizeof(int), sizeof(void *), TYPE_int, true, true);
    1354             : 
    1355       82906 :                 for (i = 0, n = rel->exps->h; n; i++, n = n->next)
    1356       56433 :                         n->data = exps[i];
    1357             :         }
    1358             : 
    1359             :         return rel;
    1360             : }
    1361             : 
    1362             : /* Compute the efficiency of using this expression early in a group by list */
    1363             : static int
    1364       62106 : score_gbe(visitor *v, sql_rel *rel, sql_exp *e)
    1365             : {
    1366       62106 :         int res = 0;
    1367       62106 :         sql_subtype *t = exp_subtype(e);
    1368       62106 :         sql_column *c = exp_find_column(rel, e, -2);
    1369             : 
    1370       62106 :         if (e->card == CARD_ATOM) /* constants are trivial to group */
    1371          38 :                 res += 1000;
    1372             :         /* can we find out if the underlying table is sorted */
    1373       62106 :         if (is_unique(e) || find_prop(e->p, PROP_HASHCOL) || (c && v->storage_based_opt && mvc_is_unique(v->sql, c))) /* distinct columns */
    1374        3396 :                 res += 700;
    1375       41123 :         if (c && v->storage_based_opt && mvc_is_sorted(v->sql, c))
    1376        3436 :                 res += 500;
    1377       62106 :         if (find_prop(e->p, PROP_HASHIDX)) /* has hash index */
    1378           0 :                 res += 200;
    1379             : 
    1380             :         /* prefer the shorter var types over the longer ones */
    1381       62106 :         res += sql_class_base_score(v, c, t, true); /* smaller the type, better */
    1382       62106 :         return res;
    1383             : }
    1384             : 
    1385             : /* reorder group by expressions */
    1386             : static inline sql_rel *
    1387     1188360 : rel_groupby_order(visitor *v, sql_rel *rel)
    1388             : {
    1389     1188360 :         int *scores = NULL;
    1390     1188360 :         sql_exp **exps = NULL;
    1391             : 
    1392     1188360 :         if (is_groupby(rel->op) && list_length(rel->r) > 1) {
    1393       26255 :                 node *n;
    1394       26255 :                 list *gbe = rel->r;
    1395       26255 :                 int i, ngbe = list_length(gbe);
    1396       26255 :                 scores = SA_NEW_ARRAY(v->sql->ta, int, ngbe);
    1397       26255 :                 exps = SA_NEW_ARRAY(v->sql->ta, sql_exp*, ngbe);
    1398             : 
    1399             :                 /* first sorting step, give priority for integers and sorted columns */
    1400       88361 :                 for (i = 0, n = gbe->h; n; i++, n = n->next) {
    1401       62106 :                         exps[i] = n->data;
    1402       62106 :                         scores[i] = score_gbe(v, rel, exps[i]);
    1403             :                 }
    1404       26255 :                 GDKqsort(scores, exps, NULL, ngbe, sizeof(int), sizeof(void *), TYPE_int, true, true);
    1405             : 
    1406             :                 /* second sorting step, give priority to strings with lower number of digits */
    1407       53405 :                 for (i = ngbe - 1; i && !scores[i]; i--); /* find expressions with no score from the first round */
    1408       26255 :                 if (scores[i])
    1409       25289 :                         i++;
    1410       26255 :                 if (ngbe - i > 1) {
    1411       11678 :                         for (int j = i; j < ngbe; j++) {
    1412        8795 :                                 sql_subtype *t = exp_subtype(exps[j]);
    1413        8795 :                                 scores[j] = t ? t->digits : 0;
    1414             :                         }
    1415             :                         /* the less number of digits the better, order ascending */
    1416        2883 :                         GDKqsort(scores + i, exps + i, NULL, ngbe - i, sizeof(int), sizeof(void *), TYPE_int, false, true);
    1417             :                 }
    1418             : 
    1419       88361 :                 for (i = 0, n = gbe->h; n; i++, n = n->next)
    1420       62106 :                         n->data = exps[i];
    1421             :         }
    1422             : 
    1423     1188360 :         return rel;
    1424             : }
    1425             : 
    1426             : /* This optimization loop contains optimizations that can potentially use statistics */
    1427             : static sql_rel *
    1428     1188360 : rel_final_optimization_loop_(visitor *v, sql_rel *rel)
    1429             : {
    1430             :         /* Run rel_push_select_up only once at the end to avoid an infinite optimization loop */
    1431     1188360 :         rel = rel_push_select_up(v, rel);
    1432     1188360 :         rel = rel_select_order(v, rel);
    1433             : 
    1434             :         /* TODO? Maybe later add rel_simplify_count, rel_join2semijoin, rel_simplify_fk_joins,
    1435             :            rel_distinct_project2groupby, rel_simplify_predicates, rel_simplify_math,
    1436             :            rel_distinct_aggregate_on_unique_values */
    1437             : 
    1438     1188360 :         rel = rel_groupby_order(v, rel);
    1439     1188360 :         return rel;
    1440             : }
    1441             : 
    1442             : static sql_rel *
    1443       60682 : rel_final_optimization_loop(visitor *v, global_props *gp, sql_rel *rel)
    1444             : {
    1445       60682 :         (void) gp;
    1446       60682 :         return rel_visitor_bottomup(v, rel, &rel_final_optimization_loop_);
    1447             : }
    1448             : 
    1449             : run_optimizer
    1450      667494 : bind_final_optimization_loop(visitor *v, global_props *gp)
    1451             : {
    1452      667494 :         int flag = v->sql->sql_optimizer;
    1453             :         /* At the moment, this optimizer has dependency on 3 flags */
    1454      541249 :         return gp->opt_level == 1 && (gp->cnt[op_groupby] || gp->cnt[op_select]) &&
    1455      728178 :                 (flag & push_select_up) && (flag & optimize_select_and_joins_topdown) && (flag & optimize_projections) ? rel_final_optimization_loop : NULL;
    1456             : }

Generated by: LCOV version 1.14