LCOV - code coverage report
Current view: top level - sql/server - rel_statistics.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 831 883 94.1 %
Date: 2024-11-13 19:37:10 Functions: 26 26 100.0 %

          Line data    Source code
       1             : /*
       2             :  * SPDX-License-Identifier: MPL-2.0
       3             :  *
       4             :  * This Source Code Form is subject to the terms of the Mozilla Public
       5             :  * License, v. 2.0.  If a copy of the MPL was not distributed with this
       6             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       7             :  *
       8             :  * Copyright 2024 MonetDB Foundation;
       9             :  * Copyright August 2008 - 2023 MonetDB B.V.;
      10             :  * Copyright 1997 - July 2008 CWI.
      11             :  */
      12             : 
      13             : #include "monetdb_config.h"
      14             : #include "rel_optimizer_private.h"
      15             : #include "rel_statistics.h"
      16             : #include "rel_basetable.h"
      17             : #include "rel_rewriter.h"
      18             : 
      19             : static sql_exp *
      20     3462911 : comparison_find_column(sql_exp *input, sql_exp *e)
      21             : {
      22     3462911 :         switch (input->type) {
      23       98422 :         case e_convert: {
      24       98422 :                 list *types = (list *)input->r;
      25       98422 :                 sql_class from = ((sql_subtype*)types->h->data)->type->eclass, to = ((sql_subtype*)types->h->next->data)->type->eclass;
      26       98422 :                 if (from == to)
      27      189776 :                         return comparison_find_column(input->l, e) ? input : NULL;
      28             :                 return NULL;
      29             :         }
      30     3052820 :         case e_column:
      31     3052820 :                 return exp_match(e, input) ? input : NULL;
      32             :         default:
      33             :                 return NULL;
      34             :         }
      35             : }
      36             : 
      37             : /* multi lo <= col <= hi, maybe still be false even if lo or hi are NULL, possibly similar for filter on multiple
      38             :  * columns */
      39             : #define comp_single_column(c) (!c->f)
      40             : 
      41             : static sql_exp *
      42     8744252 : rel_propagate_column_ref_statistics(mvc *sql, sql_rel *rel, sql_exp *e)
      43             : {
      44     8827329 :         assert(e->type == e_column);
      45     8827329 :         if (rel) {
      46     8827250 :                 switch(rel->op) {
      47     4087964 :                 case op_left:
      48             :                 case op_right:
      49             :                 case op_full:
      50             :                 case op_join:
      51             :                 case op_select:
      52             :                 case op_anti:
      53             :                 case op_semi: {
      54     4087964 :                         bool found_without_semantics = false, found_left = false, found_right = false, still_unique = false;
      55             : 
      56     4087964 :                         if ((is_innerjoin(rel->op) || is_select(rel->op)) && list_length(rel->exps) == 1 && exp_is_false(rel->exps->h->data))
      57             :                                 return NULL; /* nothing will pass, skip */
      58             : 
      59             :                         /* propagate from the bottom first */
      60     4072411 :                         if (rel_propagate_column_ref_statistics(sql, rel->l, e))
      61             :                                 found_left = true;
      62     2590746 :                         if (!found_left && is_join(rel->op) && rel_propagate_column_ref_statistics(sql, rel->r, e))
      63     4072411 :                                 found_right = true;
      64             : 
      65     4072411 :                         if (!found_left && !found_right)
      66             :                                 return NULL;
      67     1776981 :                         if (!list_empty(rel->exps) && rel->op != op_anti) { /* if there's an or, the MIN and MAX get difficult to propagate */
      68     3495151 :                                 for (node *n = rel->exps->h ; n ; n = n->next) {
      69     1809468 :                                         sql_exp *comp = n->data, *le = comp->l, *lne = NULL, *re = comp->r, *rne = NULL, *fe = comp->f, *fne = NULL;
      70             : 
      71     1809468 :                                         if (comp->type == e_cmp) {
      72     1808466 :                                                 if (is_theta_exp(comp->flag) && ((lne = comparison_find_column(le, e)) || (rne = comparison_find_column(re, e)) || (fe && (fne = comparison_find_column(fe, e))))) {
      73      125119 :                                                         atom *lval_min = find_prop_and_get(le->p, PROP_MIN), *lval_max = find_prop_and_get(le->p, PROP_MAX),
      74      125119 :                                                                 *rval_min = find_prop_and_get(re->p, PROP_MIN), *rval_max = find_prop_and_get(re->p, PROP_MAX);
      75             : 
      76             :                                                         /* not semantics found or if explicitly filtering not null values from the column */
      77      125119 :                                                         found_without_semantics |= (!is_semantics(comp) && comp_single_column(comp)) || (comp->flag == cmp_equal && lne && is_anti(comp) && exp_is_null(re));
      78      125119 :                                                         still_unique |= comp->flag == cmp_equal && is_unique(le) && is_unique(re); /* unique if only equi-joins on unique columns are there */
      79      125119 :                                                         if (is_full(rel->op) || (is_left(rel->op) && found_left) || (is_right(rel->op) && found_right)) /* on outer joins, min and max cannot be propagated on some cases */
      80       19791 :                                                                 continue;
      81             :                                                         /* if (end2 >= start1 && start2 <= end1) then the 2 intervals are intersected */
      82      105328 :                                                         if (fe && lval_min && lval_max) { /* range case, the middle expression must intersect the other two */
      83        4622 :                                                                 atom *fval_min = find_prop_and_get(fe->p, PROP_MIN), *fval_max = find_prop_and_get(fe->p, PROP_MAX);
      84        4622 :                                                                 int int1 = rval_min && rval_max && atom_cmp(rval_max, lval_min) >= 0 && atom_cmp(rval_min, lval_max) <= 0,
      85        4622 :                                                                         int2 = fval_min && fval_max && atom_cmp(fval_max, lval_min) >= 0 && atom_cmp(fval_min, lval_max) <= 0,
      86        4622 :                                                                         symmetric = is_symmetric(comp);
      87             : 
      88        4622 :                                                                 if (is_anti(comp) || (!symmetric && fval_min && rval_max && atom_cmp(fval_min, rval_max) < 0)) /* for asymmetric case the re range must be after the fe range */
      89        1817 :                                                                         continue;
      90        2805 :                                                                 if (lne && int1 && int2) {
      91         104 :                                                                         if (symmetric) {
      92           0 :                                                                                 prop *p1 = find_prop(e->p, PROP_MIN), *p2 = find_prop(e->p, PROP_MAX);
      93           0 :                                                                                 atom *nmin = statistics_atom_min(sql, rval_min, fval_min), *nmax = statistics_atom_max(sql, rval_max, fval_max);
      94             :                                                                                 /* max is min from le and (max from re and fe max) */
      95           0 :                                                                                 set_minmax_property(sql, e, PROP_MAX, p2 ? statistics_atom_min(sql, nmax, p2->value.pval) : nmax);
      96             :                                                                                 /* min is max from le and (min from re and fe min) */
      97           0 :                                                                                 set_minmax_property(sql, e, PROP_MIN, p1 ? statistics_atom_max(sql, nmin, p1->value.pval) : nmin);
      98             :                                                                         } else {
      99         104 :                                                                                 prop *p1 = find_prop(e->p, PROP_MIN), *p2 = find_prop(e->p, PROP_MAX);
     100             :                                                                                 /* max is min from le and fe max */
     101         104 :                                                                                 set_minmax_property(sql, e, PROP_MAX, p2 ? statistics_atom_min(sql, fval_max, p2->value.pval) : fval_max);
     102             :                                                                                 /* min is max from le and re min */
     103         104 :                                                                                 set_minmax_property(sql, e, PROP_MIN, p1 ? statistics_atom_max(sql, rval_min, p1->value.pval) : rval_min);
     104             :                                                                         }
     105        2701 :                                                                 } else if (rne) {
     106         590 :                                                                         if (symmetric && int1 && int2) { /* min is max from le and (min from re and fe min) */
     107           0 :                                                                                 prop *p = find_prop(e->p, PROP_MIN);
     108           0 :                                                                                 atom *nmin = p ? statistics_atom_min(sql, p->value.pval, fval_min) : fval_min;
     109           0 :                                                                                 set_minmax_property(sql, e, PROP_MIN, statistics_atom_max(sql, nmin, lval_min));
     110         590 :                                                                         } else if (int1) { /* min is max from le and re min */
     111         100 :                                                                                 prop *p = find_prop(e->p, PROP_MIN);
     112         100 :                                                                                 set_minmax_property(sql, e, PROP_MIN, p ? statistics_atom_max(sql, lval_min, p->value.pval) : lval_min);
     113             :                                                                         }
     114        2111 :                                                                 } else if (fne) {
     115         549 :                                                                         if (symmetric && int1 && int2) { /* max is min from le and (max from re and fe max) */
     116           0 :                                                                                 prop *p = find_prop(e->p, PROP_MAX);
     117           0 :                                                                                 atom *nmax = p ? statistics_atom_max(sql, p->value.pval, rval_max) : rval_max;
     118           0 :                                                                                 set_minmax_property(sql, e, PROP_MAX, p ? statistics_atom_min(sql, nmax, lval_max) : nmax);
     119         549 :                                                                         } else if (int2) { /* max is min from le and fe max */
     120         266 :                                                                                 prop *p = find_prop(e->p, PROP_MAX);
     121         266 :                                                                                 set_minmax_property(sql, e, PROP_MAX, p ? statistics_atom_min(sql, lval_max, p->value.pval) : lval_max);
     122             :                                                                         }
     123             :                                                                 }
     124      100706 :                                                         } else if (lval_min && lval_max && rval_min && rval_max && atom_cmp(rval_max, lval_min) >= 0 && atom_cmp(rval_min, lval_max) <= 0) {
     125             :                                                                 /* both min and max must be set and the intervals must overlap */
     126       43314 :                                                                 switch (comp->flag) {
     127       28978 :                                                                 case cmp_equal: /* for equality reduce */
     128       28978 :                                                                         set_minmax_property(sql, e, PROP_MAX, is_anti(comp) ? statistics_atom_max(sql, lval_max, rval_max) : statistics_atom_min(sql, lval_max, rval_max));
     129       28978 :                                                                         set_minmax_property(sql, e, PROP_MIN, is_anti(comp) ? statistics_atom_min(sql, lval_min, rval_min) : statistics_atom_max(sql, lval_min, rval_min));
     130       28978 :                                                                         break;
     131        4679 :                                                                 case cmp_notequal: /* for inequality expand */
     132        4679 :                                                                         set_minmax_property(sql, e, PROP_MAX, is_anti(comp) ? statistics_atom_min(sql, lval_max, rval_max) : statistics_atom_max(sql, lval_max, rval_max));
     133        4679 :                                                                         set_minmax_property(sql, e, PROP_MIN, is_anti(comp) ? statistics_atom_max(sql, lval_min, rval_min) : statistics_atom_min(sql, lval_min, rval_min));
     134        4679 :                                                                         break;
     135        5651 :                                                                 case cmp_gt:
     136             :                                                                 case cmp_gte:
     137       10364 :                                                                         if (!is_anti(comp) && lne) { /* min is max from both min */
     138        4713 :                                                                                 prop *p = find_prop(e->p, PROP_MIN);
     139        4713 :                                                                                 set_minmax_property(sql, e, PROP_MIN, p ? statistics_atom_max(sql, rval_min, p->value.pval) : rval_min);
     140         938 :                                                                         } else if (!is_anti(comp)) { /* max is min from both max */
     141         938 :                                                                                 prop *p = find_prop(e->p, PROP_MAX);
     142         938 :                                                                                 set_minmax_property(sql, e, PROP_MAX, p ? statistics_atom_min(sql, lval_max, p->value.pval) : lval_max);
     143             :                                                                         }
     144             :                                                                         break;
     145        4006 :                                                                 case cmp_lt:
     146             :                                                                 case cmp_lte:
     147        7205 :                                                                         if (!is_anti(comp) && lne) { /* max is min from both max */
     148        3199 :                                                                                 prop *p = find_prop(e->p, PROP_MAX);
     149        3199 :                                                                                 set_minmax_property(sql, e, PROP_MAX, p ? statistics_atom_min(sql, rval_max, p->value.pval) : rval_max);
     150         807 :                                                                         } else if (!is_anti(comp)) { /* min is max from both min */
     151         807 :                                                                                 prop *p = find_prop(e->p, PROP_MIN);
     152         807 :                                                                                 set_minmax_property(sql, e, PROP_MIN, p ? statistics_atom_max(sql, lval_min, p->value.pval) : lval_min);
     153             :                                                                         }
     154             :                                                                         break;
     155             :                                                                 default: /* Maybe later I can do cmp_in and cmp_notin */
     156             :                                                                         break;
     157             :                                                                 }
     158             :                                                         }
     159             :                                                 }
     160             :                                         }
     161             :                                 }
     162             :                         }
     163     1776981 :                         if (is_full(rel->op) || (is_left(rel->op) && found_right) || (is_right(rel->op) && found_left))
     164       38014 :                                 set_has_nil(e);
     165     1776981 :                         if (!is_outerjoin(rel->op) && found_without_semantics) /* at an outer join, null values pass */
     166       94272 :                                 set_has_no_nil(e);
     167     1776981 :                         if (is_join(rel->op) && is_unique(e) && !still_unique)
     168      118915 :                                 set_not_unique(e);
     169     1776981 :                         return e;
     170             :                 }
     171     4636862 :                 case op_table:
     172             :                 case op_basetable:
     173             :                 case op_union:
     174             :                 case op_except:
     175             :                 case op_inter:
     176             :                 case op_munion:
     177             :                 case op_project:
     178             :                 case op_groupby: {
     179     4636862 :                         sql_exp *found;
     180     4636862 :                         atom *fval;
     181     4636862 :                         prop *est;
     182     4636862 :                         if ((found = rel_find_exp(rel, e))) {
     183     2175093 :                                 if (rel->op != op_table) { /* At the moment don't propagate statistics for table relations */
     184     2130783 :                                         if ((fval = find_prop_and_get(found->p, PROP_MAX)))
     185     1133067 :                                                 set_minmax_property(sql, e, PROP_MAX, fval);
     186     2130779 :                                         if ((fval = find_prop_and_get(found->p, PROP_MIN)))
     187     1140374 :                                                 set_minmax_property(sql, e, PROP_MIN, fval);
     188     2130785 :                                         if (!has_nil(found))
     189     1377434 :                                                 set_has_no_nil(e);
     190     2130785 :                                         if (is_unique(found) || (need_distinct(rel) && list_length(rel->exps) == 1) ||
     191     1720382 :                                                 (is_groupby(rel->op) && (list_empty(rel->r) || (list_length(rel->r) == 1 && exps_find_exp(rel->r, e)))))
     192      421278 :                                                 set_unique(e);
     193             :                                         /* propagate unique estimation for known cases */
     194     2130784 :                                         if (is_groupby(rel->op) && list_empty(rel->r) && !find_prop(e->p, PROP_NUNIQUES)) { /* global aggregate case */
     195        7538 :                                                 prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     196        7538 :                                                 p->value.dval = 1;
     197     2123246 :                                         } else if (((is_basetable(rel->op) || is_except(rel->op) || is_inter(rel->op) || is_simple_project(rel->op) ||
     198       66648 :                                                                  (is_groupby(rel->op) && exps_find_exp(rel->r, e))) &&
     199     2065733 :                                                                 (est = find_prop(found->p, PROP_NUNIQUES)) && !find_prop(e->p, PROP_NUNIQUES))) {
     200      187991 :                                                 prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     201      187991 :                                                 p->value.dval = est->value.dval;
     202             :                                         }
     203             :                                 }
     204     2175099 :                                 return e;
     205             :                         }
     206             :                         return NULL;
     207             :                 }
     208       83077 :                 case op_topn:
     209             :                 case op_sample:
     210       83077 :                         return rel_propagate_column_ref_statistics(sql, rel->l, e);
     211             :                 default:
     212             :                         break;
     213             :                 }
     214             :         }
     215             :         return NULL;
     216             : }
     217             : 
     218             : static atom *
     219     4451390 : atom_from_valptr( allocator *sa, sql_subtype *tpe, ValRecord *v)
     220             : {
     221     4451390 :         atom *a = SA_NEW(sa, atom);
     222             : 
     223     4451375 :         assert(!VALisnil(v));
     224     4451410 :         *a = (atom) {.tpe = *tpe,};
     225     4451410 :         SA_VALcopy(sa, &a->data, v);
     226     4451447 :         return a;
     227             : }
     228             : 
     229             : void
     230     4058648 : sql_column_get_statistics(mvc *sql, sql_column *c, sql_exp *e)
     231             : {
     232     4058648 :         bool nonil = false, unique = false;
     233     4058648 :         double unique_est = 0.0;
     234     4058648 :         ValRecord min, max;
     235     4058648 :         int ok = mvc_col_stats(sql, c, &nonil, &unique, &unique_est, &min, &max);
     236             : 
     237     4059801 :         if (has_nil(e) && nonil)
     238     2654932 :                 set_has_no_nil(e);
     239     4059801 :         if (!is_unique(e) && unique)
     240     1100600 :                 set_unique(e);
     241     4059801 :         if (unique_est != 0.0) {
     242     2823637 :                 prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     243     2823508 :                 p->value.dval = unique_est;
     244             :         }
     245     4059672 :         unsigned int digits = 0;
     246     4059672 :         sql_subtype *et = exp_subtype(e);
     247     4059784 :         if (et->type->eclass == EC_DEC || et->type->eclass == EC_NUM)
     248     2640950 :                 digits = et->digits;
     249     4059784 :         if ((ok & 2) == 2) {
     250     2222783 :                 if (!VALisnil(&max)) {
     251     2222729 :                         prop *p = e->p = prop_create(sql->sa, PROP_MAX, e->p);
     252     2222667 :                         p->value.pval = atom_from_valptr(sql->sa, &c->type, &max);
     253     2222650 :                         if (digits) {
     254     1655602 :                                 unsigned int nd = atom_digits(p->value.pval);
     255     1655565 :                                 if (nd < digits)
     256             :                                         digits = nd;
     257     1655565 :                                 if (!digits)
     258             :                                         digits = 1;
     259             :                         }
     260             :                 }
     261     2222575 :                 VALclear(&max);
     262             :         }
     263     4059567 :         if ((ok & 1) == 1) {
     264     2228988 :                 if (!VALisnil(&min)) {
     265     2229006 :                         prop *p = e->p = prop_create(sql->sa, PROP_MIN, e->p);
     266     2229088 :                         p->value.pval = atom_from_valptr(sql->sa, &c->type, &min);
     267     2229193 :                         if (digits) {
     268     1663219 :                                 unsigned int nd = atom_digits(p->value.pval);
     269     1663219 :                                 if (nd > digits) /* possibly set to low by max value */
     270             :                                         digits = nd;
     271             :                                 if (!digits)
     272             :                                         digits = 1;
     273             :                         }
     274             :                 }
     275     2229183 :                 VALclear(&min);
     276             :         }
     277     4059778 :         if (digits)
     278     2640916 :                 et->digits = digits;
     279     4059778 :         if (et->type->eclass == EC_DEC && et->digits <= et->scale)
     280         216 :                 et->digits = et->scale + 1;
     281     4059778 : }
     282             : 
     283             : static void
     284      935414 : rel_basetable_column_get_statistics(mvc *sql, sql_rel *rel, sql_exp *e)
     285             : {
     286      935414 :         if (e->p)
     287             :                 return;
     288      302070 :         sql_column *c = NULL;
     289             : 
     290      302070 :         if ((c = rel_base_find_column(rel, e->nid))) {
     291      204204 :                 sql_column_get_statistics(sql, c, e);
     292             :         }
     293             : }
     294             : 
     295             : static bool
     296        8872 : rel_setop_get_statistics(mvc *sql, sql_rel *rel, list *lexps, list *rexps, sql_exp *e, int i)
     297             : {
     298        8872 :         sql_exp *le = list_fetch(lexps, i), *re = list_fetch(rexps, i);
     299        8872 :         atom *lval_min = find_prop_and_get(le->p, PROP_MIN), *lval_max = find_prop_and_get(le->p, PROP_MAX),
     300        8872 :                 *rval_min = find_prop_and_get(re->p, PROP_MIN), *rval_max = find_prop_and_get(re->p, PROP_MAX);
     301        8872 :         prop *est;
     302             : 
     303             :         /* for the intersection, if both expressions don't overlap, it can be pruned */
     304        8872 :         if (is_inter(rel->op) && !has_nil(le) && !has_nil(re) &&
     305         511 :                 ((rval_max && lval_min && atom_cmp(rval_max, lval_min) < 0) || (rval_min && lval_max && atom_cmp(rval_min, lval_max) > 0)))
     306          26 :                 return true;
     307             : 
     308        8846 :         if (lval_max && rval_max) {
     309        2557 :                 if (is_union(rel->op))
     310           3 :                         set_minmax_property(sql, e, PROP_MAX, statistics_atom_max(sql, lval_max, rval_max)); /* for union the new max will be the max of the two */
     311        2554 :                 else if (is_inter(rel->op))
     312         399 :                         set_minmax_property(sql, e, PROP_MAX, statistics_atom_min(sql, lval_max, rval_max)); /* for intersect the new max will be the min of the two */
     313             :                 else /* except */
     314        2155 :                         set_minmax_property(sql, e, PROP_MAX, lval_max);
     315             :         }
     316        8846 :         if (lval_min && rval_min) {
     317        2557 :                 if (is_union(rel->op))
     318           3 :                         set_minmax_property(sql, e, PROP_MIN, statistics_atom_min(sql, lval_min, rval_min)); /* for union the new min will be the min of the two */
     319        2554 :                 else if (is_inter(rel->op))
     320         399 :                         set_minmax_property(sql, e, PROP_MIN, statistics_atom_max(sql, lval_min, rval_min)); /* for intersect the new min will be the max of the two */
     321             :                 else /* except */
     322        2155 :                         set_minmax_property(sql, e, PROP_MIN, lval_min);
     323             :         }
     324             : 
     325        8846 :         if (is_union(rel->op) || is_munion(rel->op)) {
     326        5986 :                 if (!has_nil(le) && !has_nil(re))
     327         879 :                         set_has_no_nil(e);
     328        5986 :                 if (need_distinct(rel) && list_length(rel->exps) == 1)
     329           0 :                         set_unique(e);
     330        2860 :         } else if (is_inter(rel->op)) {
     331         564 :                 if (!has_nil(le) || !has_nil(re))
     332         509 :                         set_has_no_nil(e);
     333         564 :                 if (is_unique(le) || (need_distinct(rel) && list_length(rel->exps) == 1))
     334         345 :                         set_unique(e);
     335             :         } else {
     336        2296 :                 assert(is_except(rel->op));
     337        2296 :                 if (!has_nil(le))
     338        2233 :                         set_has_no_nil(e);
     339        2296 :                 if (is_unique(le) || (need_distinct(rel) && list_length(rel->exps) == 1))
     340        2009 :                         set_unique(e);
     341             :         }
     342             :         /* propagate unique estimation for known cases */
     343        8846 :         if (!is_union(rel->op) && (est = find_prop(le->p, PROP_NUNIQUES)) && !find_prop(e->p, PROP_NUNIQUES)) {
     344         205 :                 prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     345         205 :                 p->value.dval = est->value.dval;
     346             :         }
     347             :         return false;
     348             : }
     349             : 
     350             : 
     351             : static void
     352       60685 : rel_munion_get_statistics(mvc *sql, sql_rel *rel, list *rels, sql_exp *e, int i)
     353             : {
     354       60685 :         assert(is_munion(rel->op));
     355             : 
     356       60685 :         sql_rel *l = rels->h->data;
     357       60685 :         sql_exp *le = list_fetch(l->exps, i);
     358       60685 :         atom *lval_min = find_prop_and_get(le->p, PROP_MIN), *lval_max = find_prop_and_get(le->p, PROP_MAX);
     359       60685 :         bool has_nonil = !has_nil(le);
     360             : 
     361      175898 :         for(node *n = rels->h->next; n; n = n->next) {
     362      115213 :                 sql_rel *r = n->data;
     363      115213 :                 sql_exp *re = list_fetch(r->exps, i);
     364      115213 :                 atom *rval_min = find_prop_and_get(re->p, PROP_MIN), *rval_max = find_prop_and_get(re->p, PROP_MAX);
     365             : 
     366      115213 :                 if (lval_max && rval_max) {
     367       42562 :                         set_minmax_property(sql, e, PROP_MAX, statistics_atom_max(sql, lval_max, rval_max)); /* for union the new max will be the max of the two */
     368       42562 :                         lval_max = find_prop_and_get(e->p, PROP_MAX);
     369             :                 }
     370      115213 :                 if (lval_min && rval_min) {
     371       42063 :                         set_minmax_property(sql, e, PROP_MIN, statistics_atom_min(sql, lval_min, rval_min)); /* for union the new min will be the min of the two */
     372       42063 :                         lval_min = find_prop_and_get(e->p, PROP_MIN);
     373             :                 }
     374      115213 :                 has_nonil &= !has_nil(re);
     375             : 
     376             :         }
     377             : 
     378       60685 :         if (has_nonil)
     379       41540 :                 set_has_no_nil(e);
     380             : 
     381       60685 :         if (need_distinct(rel) && list_length(rel->exps) == 1)
     382        1594 :                 set_unique(e);
     383       60685 : }
     384             : 
     385             : 
     386             : static sql_exp *
     387     3478426 : rel_propagate_statistics(visitor *v, sql_rel *rel, sql_exp *e, int depth)
     388             : {
     389     3478426 :         mvc *sql = v->sql;
     390     3478426 :         atom *lval;
     391             : 
     392     3478426 :         (void) depth;
     393     3478426 :         switch(e->type) {
     394     2180916 :         case e_column:
     395     2180916 :                 switch (rel->op) { /* set relations don't call rel_propagate_statistics */
     396      278805 :                 case op_join:
     397             :                 case op_left:
     398             :                 case op_right:
     399             :                 case op_full:
     400             :                 case op_semi:
     401             :                 case op_anti: {
     402      278805 :                         sql_exp *found = rel_propagate_column_ref_statistics(sql, rel->l, e);
     403      278805 :                         if (!found)
     404      139847 :                                 (void) rel_propagate_column_ref_statistics(sql, rel->r, e);
     405             :                         break;
     406             :                 }
     407     1902111 :                 case op_select:
     408             :                 case op_project:
     409             :                 case op_groupby: {
     410     1902111 :                         sql_exp *found = rel_propagate_column_ref_statistics(sql, rel->l, e); /* labels may be found on the same projection, ugh */
     411     1902112 :                         if (!found && is_simple_project(rel->op))
     412      125476 :                                 (void) rel_propagate_column_ref_statistics(sql, rel, e);
     413             :                         break;
     414             :                 }
     415           0 :                 case op_insert:
     416             :                 case op_update:
     417             :                 case op_delete:
     418           0 :                         (void) rel_propagate_column_ref_statistics(sql, rel->r, e);
     419           0 :                         break;
     420             :                 default:
     421             :                         break;
     422             :                 }
     423             :                 break;
     424       98453 :         case e_convert: {
     425       98453 :                 sql_subtype *to = exp_totype(e), *from = exp_fromtype(e);
     426       98453 :                 sql_exp *l = e->l;
     427       98453 :                 sql_class fr = from->type->eclass, too = to->type->eclass;
     428       98453 :                 prop *est;
     429             : 
     430       98453 :                 if (fr == too) {
     431       89381 :                         if ((lval = find_prop_and_get(l->p, PROP_MAX))) {
     432       58826 :                                 atom *res = atom_copy(sql->sa, lval);
     433       58826 :                                 if ((res = atom_cast(sql->sa, res, to)))
     434       58803 :                                         set_minmax_property(sql, e, PROP_MAX, res);
     435             :                         }
     436       89381 :                         if ((lval = find_prop_and_get(l->p, PROP_MIN))) {
     437       59443 :                                 atom *res = atom_copy(sql->sa, lval);
     438       59443 :                                 if ((res = atom_cast(sql->sa, res, to)))
     439       59420 :                                         set_minmax_property(sql, e, PROP_MIN, res);
     440             :                         }
     441             :                 }
     442             :                 /* propagating nuniques through conversions is tricky. I am adding just the general cases */
     443       98453 :                 if ((est = find_prop(l->p, PROP_NUNIQUES)) &&
     444       61091 :                         (((fr == EC_TIME || fr == EC_TIME_TZ || fr == EC_DATE) && (too == EC_TIMESTAMP || too == EC_TIMESTAMP_TZ)) ||
     445       61066 :                          ((fr == EC_NUM || fr == EC_BIT) && too == EC_NUM) || (fr == EC_DEC && too == EC_DEC) || (fr != EC_EXTERNAL && too == EC_STRING))) {
     446       57637 :                         prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     447       57637 :                         p->value.dval = est->value.dval;
     448             :                 }
     449       98453 :                 if (!has_nil(l))
     450       55516 :                         set_has_no_nil(e);
     451             :                 break;
     452             :         }
     453      341627 :         case e_aggr:
     454             :         case e_func: {
     455      341627 :                 BUN lv;
     456      341627 :                 sql_subfunc *f = e->f;
     457             : 
     458      341627 :                 if (!f->func->s) {
     459      315154 :                         int key = hash_key(f->func->base.name); /* Using hash lookup */
     460      315154 :                         sql_hash_e *he = sql_functions_lookup->buckets[key&(sql_functions_lookup->size-1)];
     461      315154 :                         lookup_function look = NULL;
     462             : 
     463      688375 :                         for (; he && !look; he = he->chain) {
     464      373221 :                                 struct function_properties* fp = (struct function_properties*) he->value;
     465             : 
     466      373221 :                                 if (!strcmp(f->func->base.name, fp->name))
     467      107712 :                                         look = fp->func;
     468             :                         }
     469      315154 :                         if (look)
     470      107712 :                                 look(sql, e);
     471             :                 }
     472             :                 /* for global aggregates with no semantics, if the left relation has values, then the output won't be NULL */
     473      341627 :                 if (!is_semantics(e) && e->l && !have_nil(e->l) &&
     474       89129 :                         (e->type != e_aggr || (is_groupby(rel->op) && list_length(rel->r)) || ((lv = get_rel_count(rel->l)) != BUN_NONE && lv > 0)))
     475       88806 :                         set_has_no_nil(e);
     476             :                 /* set properties for global aggregates */
     477      341627 :                 if (e->type == e_aggr && is_groupby(rel->op) && list_empty(rel->r)) {
     478        7871 :                         if (!find_prop(e->p, PROP_NUNIQUES)) {
     479        7871 :                                 prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     480        7871 :                                 p->value.dval = 1;
     481             :                         }
     482        7871 :                         set_unique(e);
     483             :                 }
     484             :                 break;
     485             :         }
     486      570278 :         case e_atom:
     487      570278 :                 if (e->l) {
     488      553559 :                         atom *a = (atom*) e->l;
     489      553559 :                         if (!a->isnull) {
     490      490243 :                                 set_minmax_property(sql, e, PROP_MAX, a);
     491      490243 :                                 set_minmax_property(sql, e, PROP_MIN, a);
     492             :                         }
     493      553559 :                         prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     494      553558 :                         p->value.dval = 1;
     495       16719 :                 } else if (e->f) {
     496        3194 :                         list *vals = (list *) e->f;
     497        3194 :                         sql_exp *first = vals->h ? vals->h->data : NULL;
     498        3194 :                         atom *max = NULL, *min = NULL; /* all child values must have a valid min/max */
     499        3194 :                         int has_nil = 0;
     500             : 
     501        3194 :                         if (first) {
     502        3194 :                                 max = ((lval = find_prop_and_get(first->p, PROP_MAX))) ? lval : NULL;
     503        3194 :                                 min = ((lval = find_prop_and_get(first->p, PROP_MIN))) ? lval : NULL;
     504        3194 :                                 has_nil |= has_nil(first);
     505             :                         }
     506             : 
     507       13934 :                         for (node *n = vals->h ? vals->h->next : NULL ; n ; n = n->next) {
     508        7546 :                                 sql_exp *ee = n->data;
     509             : 
     510        7546 :                                 if (min && max) {
     511        7093 :                                         if ((lval = find_prop_and_get(ee->p, PROP_MAX))) {
     512        7047 :                                                 max = atom_cmp(lval, max) > 0 ? lval : max;
     513             :                                         } else {
     514             :                                                 max = NULL;
     515             :                                         }
     516        7093 :                                         if ((lval = find_prop_and_get(ee->p, PROP_MIN))) {
     517        7047 :                                                 min = atom_cmp(min, lval) > 0 ? lval : min;
     518             :                                         } else {
     519             :                                                 min = NULL;
     520             :                                         }
     521             :                                 }
     522        7546 :                                 has_nil |= has_nil(ee);
     523             :                         }
     524             : 
     525        3194 :                         if (!has_nil)
     526        2828 :                                 set_has_no_nil(e);
     527        3194 :                         if (min && max) {
     528        2805 :                                 set_minmax_property(sql, e, PROP_MAX, max);
     529        2805 :                                 set_minmax_property(sql, e, PROP_MIN, min);
     530             :                         }
     531        3194 :                         prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     532        3194 :                         p->value.dval = (dbl) list_length(vals);
     533             :                 } else {
     534       13525 :                         prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     535       13525 :                         p->value.dval = 1;
     536             :                 }
     537             :                 break;
     538      287152 :         case e_cmp:
     539             :                 /* TODO? propagating min/max/unique of booleans is not very worth it */
     540      287152 :                 if (e->flag == cmp_or || e->flag == cmp_filter) {
     541       18058 :                         if (!have_nil(e->l) && !have_nil(e->r))
     542       13588 :                                 set_has_no_nil(e);
     543      269094 :                 } else if (e->flag == cmp_in || e->flag == cmp_notin) {
     544       21237 :                         sql_exp *le = e->l;
     545       21237 :                         if (!has_nil(le) && !have_nil(e->r))
     546       18194 :                                 set_has_no_nil(e);
     547             :                 } else {
     548      247857 :                         sql_exp *le = e->l, *re = e->r, *fe = e->f;
     549      247857 :                         if (!has_nil(le) && !has_nil(re) && (!e->f || !has_nil(fe)))
     550      177921 :                                 set_has_no_nil(e);
     551             :                 }
     552             :                 break;
     553             :         case e_psm:
     554             :                 break;
     555             :         }
     556             : 
     557             : #ifndef NDEBUG
     558             :         {
     559             :                 /* min and max cannot be NULL and min must be <= than max, if it happens the inner relation must be empty! */
     560     3478426 :                 atom *min = find_prop_and_get(e->p, PROP_MIN), *max = find_prop_and_get(e->p, PROP_MAX);
     561             : 
     562     3478419 :                 (void) min;
     563     3478419 :                 (void) max;
     564     3478419 :                 assert(!min || !min->isnull);
     565     3478419 :                 assert(!max || !max->isnull);
     566     3478419 :                 assert(!min || !max || (min && max)/* atom_cmp(min, max) <= 0*/);
     567             :         }
     568             : #endif
     569     3478419 :         return e;
     570             : }
     571             : 
     572             : static list * /* Remove predicates always false from min/max values */
     573      139266 : rel_prune_predicates(visitor *v, sql_rel *rel)
     574             : {
     575      139266 :         if (rel->l) {
     576      139266 :                 sql_rel *l = rel->l;
     577      139266 :                 if (is_single(l))
     578        3391 :                         return rel->exps;
     579             :         }
     580      135875 :         if (!list_empty(rel->attr))
     581        2960 :                 return rel->exps;
     582      281935 :         for (node *n = rel->exps->h ; n ; n = n->next) {
     583      149020 :                 sql_exp *e = n->data;
     584             : 
     585      149020 :                 if (e->type == e_cmp && is_theta_exp(e->flag)) {
     586      122629 :                         sql_exp *le = e->l, *re = e->r, *fe = e->f;
     587      122629 :                         atom *lval_min = find_prop_and_get(le->p, PROP_MIN), *lval_max = find_prop_and_get(le->p, PROP_MAX),
     588      122629 :                                 *rval_min = find_prop_and_get(re->p, PROP_MIN), *rval_max = find_prop_and_get(re->p, PROP_MAX);
     589      122629 :                         bool always_false = false, always_true = false;
     590             : 
     591      122629 :                         if (fe && !is_symmetric(e)) {
     592        3058 :                                 atom *fval_min = find_prop_and_get(fe->p, PROP_MIN), *fval_max = find_prop_and_get(fe->p, PROP_MAX);
     593        3058 :                                 comp_type lower = range2lcompare(e->flag), higher = range2rcompare(e->flag);
     594        3660 :                                 int not_int1 = rval_min && lval_max && /* the middle and left intervals don't overlap */
     595        1663 :                                         (!is_anti(e) && (lower == cmp_gte ? atom_cmp(rval_min, lval_max) > 0 : atom_cmp(rval_min, lval_max) >= 0)),
     596        4072 :                                         not_int2 = lval_min && fval_max && /* the middle and right intervals don't overlap */
     597        2485 :                                         (!is_anti(e) && (higher == cmp_lte ? atom_cmp(lval_min, fval_max) > 0 : atom_cmp(lval_min, fval_max) >= 0)),
     598        3058 :                                         not_int3 = rval_min && fval_max && /* the left interval is after the right one */
     599        1288 :                                         (!is_anti(e) && (atom_cmp(rval_min, fval_max) > 0));
     600             : 
     601        3058 :                                 always_false |= not_int1 || not_int2 || not_int3;
     602             :                                 /* for anti the middle must be before the left or after the right or the right after the left, for the other the middle must be always between the left and right intervals */
     603        4113 :                                 always_true |= !has_nil(le) && !has_nil(re) && !has_nil(fe) &&
     604        3957 :                                         lval_min && lval_max && rval_min && rval_max && fval_min && fval_max &&
     605         575 :                                         (is_anti(e) ? ((lower == cmp_gte ? atom_cmp(rval_min, lval_max) > 0 : atom_cmp(rval_min, lval_max) >= 0) || (higher == cmp_lte ? atom_cmp(lval_min, fval_max) > 0 : atom_cmp(lval_min, fval_max) >= 0) || atom_cmp(rval_min, fval_max) > 0) :
     606         488 :                                          ((lower == cmp_gte ? atom_cmp(lval_min, rval_max) >= 0 : atom_cmp(lval_min, rval_max) > 0) && (higher == cmp_lte ? atom_cmp(fval_min, lval_max) >= 0 : atom_cmp(fval_min, lval_max) > 0)));
     607             :                         } else if (!fe) {
     608      119553 :                                 if (!is_semantics(e)) /* trivial not null cmp null case */
     609      227682 :                                         always_false |= !is_anti(e) && ((exp_is_not_null(le) && exp_is_null(re)) || (exp_is_null(le) && exp_is_not_null(re)));
     610      119553 :                                 switch (e->flag) {
     611      104083 :                                 case cmp_equal:
     612      104083 :                                         if (lval_min && lval_max && rval_min && rval_max && (!is_semantics(e) || !has_nil(le) || !has_nil(re)))
     613      133444 :                                                 always_false |= (is_anti(e) ? (atom_cmp(lval_min, rval_min) == 0 && atom_cmp(lval_max, rval_max) <= 0) : (atom_cmp(rval_max, lval_min) < 0 || atom_cmp(rval_min, lval_max) > 0));
     614      104083 :                                         if (is_semantics(e)) { /* prune *= NULL cases */
     615        5686 :                                                 always_false |= (is_anti(e) ? (exp_is_null(le) && exp_is_null(re)) : ((exp_is_not_null(le) && exp_is_null(re)) || (exp_is_null(le) && exp_is_not_null(re))));
     616       11372 :                                                 always_true |= (is_anti(e) ? ((exp_is_not_null(le) && exp_is_null(re)) || (exp_is_null(le) && exp_is_not_null(re))) : (exp_is_null(le) && exp_is_null(re)));
     617             :                                         }
     618             :                                         break;
     619        7207 :                                 case cmp_notequal:
     620        7207 :                                         if (lval_min && lval_max && rval_min && rval_max)
     621       11348 :                                                 always_true |= !has_nil(le) && !has_nil(re) && (is_anti(e) ? (atom_cmp(lval_min, rval_min) == 0 && atom_cmp(lval_max, rval_max) <= 0) : (atom_cmp(rval_max, lval_min) < 0 || atom_cmp(rval_min, lval_max) > 0));
     622        7207 :                                         if (is_semantics(e)) {
     623          26 :                                                 always_true |= (is_anti(e) ? (exp_is_null(le) && exp_is_null(re)) : ((exp_is_not_null(le) && exp_is_null(re)) || (exp_is_null(le) && exp_is_not_null(re))));
     624          52 :                                                 always_false |= (is_anti(e) ? ((exp_is_not_null(le) && exp_is_null(re)) || (exp_is_null(le) && exp_is_not_null(re))) : (exp_is_null(le) && exp_is_null(re)));
     625             :                                         }
     626             :                                         break;
     627        5632 :                                 case cmp_gt:
     628        5632 :                                         if (lval_max && rval_min)
     629        1947 :                                                 always_false |= (is_anti(e) ? atom_cmp(lval_max, rval_min) > 0 : atom_cmp(lval_max, rval_min) <= 0);
     630        5632 :                                         if (lval_min && rval_max)
     631        3894 :                                                 always_true |= !has_nil(le) && !has_nil(re) && (is_anti(e) ? atom_cmp(lval_min, rval_max) <= 0 : atom_cmp(lval_min, rval_max) > 0);
     632             :                                         break;
     633         119 :                                 case cmp_gte:
     634         119 :                                         if (lval_max && rval_min)
     635          49 :                                                 always_false |= (is_anti(e) ? atom_cmp(lval_max, rval_min) >= 0 : atom_cmp(lval_max, rval_min) < 0);
     636         119 :                                         if (lval_min && rval_max)
     637          98 :                                                 always_true |= !has_nil(le) && !has_nil(re) && (is_anti(e) ? atom_cmp(lval_min, rval_max) < 0 : atom_cmp(lval_min, rval_max) >= 0);
     638             :                                         break;
     639        2428 :                                 case cmp_lt:
     640        2428 :                                         if (lval_min && rval_max)
     641        1376 :                                                 always_false |= (is_anti(e) ? atom_cmp(lval_min, rval_max) < 0 : atom_cmp(lval_min, rval_max) >= 0);
     642        2428 :                                         if (lval_max && rval_min)
     643        2800 :                                                 always_true |= !has_nil(le) && !has_nil(re) && (is_anti(e) ? atom_cmp(lval_max, rval_min) >= 0 : atom_cmp(lval_max, rval_min) < 0);
     644             :                                         break;
     645          84 :                                 case cmp_lte:
     646          84 :                                         if (lval_min && rval_max)
     647          11 :                                                 always_false |= (is_anti(e) ? atom_cmp(lval_min, rval_max) <= 0 : atom_cmp(lval_min, rval_max) > 0);
     648          84 :                                         if (lval_max && rval_min)
     649          22 :                                                 always_true |= !has_nil(le) && !has_nil(re) && (is_anti(e) ? atom_cmp(lval_max, rval_min) > 0 : atom_cmp(lval_max, rval_min) <= 0);
     650             :                                         break;
     651             :                                 default: /* Maybe later I can do cmp_in and cmp_notin but don't forget to remove is_theta_exp check up there */
     652             :                                         break;
     653             :                                 }
     654             :                         }
     655      122629 :                         assert(!always_false || !always_true);
     656      122629 :                         if (always_false || always_true) {
     657        3651 :                                 sql_exp *ne = exp_atom_bool(v->sql->sa, always_true /* if always_true set then true else false */);
     658        3651 :                                 if (exp_name(e))
     659           0 :                                         exp_prop_alias(v->sql->sa, ne, e);
     660        3651 :                                 n->data = ne;
     661        3651 :                                 v->changes++;
     662             :                         }
     663             :                 }
     664             :         }
     665      132915 :         return rel->exps;
     666             : }
     667             : 
     668             : static sql_rel *
     669          14 : set_setop_side(visitor *v, sql_rel *rel, sql_rel *side)
     670             : {
     671          14 :         if (side == rel->r)
     672           0 :                 rel->r = NULL;
     673             :         else
     674          14 :                 rel->l = NULL;
     675          14 :         if (!is_simple_project(side->op) || rel_is_ref(side) || !list_empty(side->r)) {
     676           0 :                 side = rel_project(v->sql->sa, side, rel_projections(v->sql, side, NULL, 0, 1));
     677           0 :                 set_count_prop(v->sql->sa, side, get_rel_count(side->l));
     678           0 :                 side->exps = exps_exp_visitor_bottomup(v, side, side->exps, 0, &rel_propagate_statistics, false);
     679             :         }
     680          35 :         for (node *n = rel->exps->h, *m = side->exps->h ; n && m ; n = n->next, m = m->next) {
     681          21 :                 sql_exp *e1 = n->data, *e2 = m->data;
     682          21 :                 exp_prop_alias(v->sql->sa, e2, e1);
     683             :         }
     684          14 :         list_hash_clear(side->exps);
     685             : 
     686          14 :         if (need_distinct(rel))
     687           0 :                 set_distinct(side);
     688          14 :         side->p = prop_copy(v->sql->sa, rel->p);
     689          14 :         rel_destroy(rel);
     690          14 :         return side;
     691             : }
     692             : 
     693             : static BUN
     694       21876 : trivial_project_exp_card(sql_exp *e)
     695             : {
     696       22170 :         if (e->type == e_convert)
     697         294 :                 return trivial_project_exp_card(e->l);
     698       21876 :         return e->type == e_atom && e->f ? (BUN) list_length(e->f) : 1;
     699             : }
     700             : 
     701             : static BUN
     702       24248 : rel_calc_nuniques(mvc *sql, sql_rel *l, list *exps)
     703             : {
     704       24248 :         BUN lv = get_rel_count(l);
     705             : 
     706       24247 :         if (lv == 0)
     707             :                 return 0;
     708       21538 :         if (!list_empty(exps)) {
     709       21538 :                 BUN nuniques = 0;
     710             :                 /* compute the highest number of unique values */
     711       51130 :                 for (node *n = exps->h ; n && nuniques != BUN_NONE ; n = n->next) {
     712       29591 :                         sql_exp *e = n->data;
     713       29591 :                         sql_rel *bt = NULL;
     714       29591 :                         prop *p = NULL;
     715       29591 :                         BUN euniques = BUN_NONE;
     716       29591 :                         atom *min, *max, *sub = NULL;
     717       29591 :                         sql_subtype *tp = exp_subtype(e);
     718       29591 :                         sql_class ec = tp ? tp->type->eclass : EC_STRING; /* if 'e' has no type (eg parameter), use a non-number type to fail condition */
     719             : 
     720       29591 :                         if ((p = find_prop(e->p, PROP_NUNIQUES))) {
     721       20955 :                                 euniques = (BUN) p->value.dval;
     722        8636 :                         } else if (e->type == e_column && e->nid && rel_find_exp_and_corresponding_rel(l, e, false, &bt, NULL) && bt && (p = find_prop(bt->p, PROP_COUNT))) {
     723        6593 :                                 euniques = (BUN) p->value.lval;
     724             :                         }
     725             :                         /* use min to max range to compute number of possible values in the domain for number types */
     726       29591 :                         if ((EC_TEMP(ec)||ec==EC_NUM||ec==EC_MONTH||ec==EC_POS) &&
     727       22362 :                                 (min = find_prop_and_get(e->p, PROP_MIN)) && (max = find_prop_and_get(e->p, PROP_MAX))) {
     728             :                                 /* the range includes min and max, so the atom_inc call is needed */
     729             :                                 /* if 'euniques' has number of distinct values, compute min between both */
     730       17543 :                                 if ((sub = atom_sub(sql->sa, max, min)) && (sub = atom_inc(sql->sa, sub)) && (sub = atom_cast(sql->sa, sub, sql_bind_localtype("oid"))))
     731           0 :                                         euniques = MIN(euniques, (BUN) sub->data.val.oval);
     732             :                         }
     733       29592 :                         if (euniques != BUN_NONE)
     734       27549 :                                 nuniques = MAX(nuniques, euniques); /* the highest cardinality sets the estimation */
     735             :                         else
     736             :                                 nuniques = BUN_NONE;
     737             :                 }
     738       21539 :                 if (nuniques != BUN_NONE)
     739             :                         return nuniques;
     740             :         }
     741             :         return lv;
     742             : }
     743             : 
     744             : static sql_rel *
     745     2071789 : rel_get_statistics_(visitor *v, sql_rel *rel)
     746             : {
     747             :         /* Don't prune updates as pruning will possibly result in removing the joins which therefore cannot be used for constraint checking */
     748     2071789 :         uint8_t has_special_modify = *(uint8_t*) v->data;
     749     2071789 :         bool can_be_pruned = !has_special_modify && v->storage_based_opt;
     750             : 
     751             :         /* Don't look at the same relation twice */
     752     2071789 :         if (are_statistics_gathered(rel->used))
     753             :                 return rel;
     754     1330571 :         rel->used |= statistics_gathered;
     755             : 
     756     1330571 :         switch (rel->op) {
     757      312090 :         case op_basetable: {
     758      312090 :                 sql_table *t = (sql_table *) rel->l;
     759      312090 :                 sqlstore *store = v->sql->session->tr->store;
     760             : 
     761      312090 :                 if (!list_empty(rel->exps)) {
     762     1247607 :                         for (node *n = rel->exps->h ; n ; n = n->next)
     763      935380 :                                 rel_basetable_column_get_statistics(v->sql, rel, n->data);
     764             :                 }
     765             :                 /* Set table row count. TODO? look for remote tables. Don't look at storage for declared tables, because it won't be cleaned */
     766      312220 :                 if (isTable(t) && t->s && !isDeclaredTable(t)) /* count active rows only */
     767      258801 :                         set_count_prop(v->sql->sa, rel, (BUN)store->storage_api.count_col(v->sql->session->tr, ol_first_node(t->columns)->data, 10));
     768             :                 break;
     769             :         }
     770        2790 :         case op_union:
     771             :         case op_inter:
     772             :         case op_except: {
     773        2790 :                 bool empty_cross = false;
     774        2790 :                 int i = 0;
     775        2790 :                 sql_rel *l = rel->l, *pl = rel->l, *r = rel->r, *pr = rel->r;
     776             : 
     777        2790 :                 while (is_sample(pl->op) || is_topn(pl->op)) /* skip topN and sample relations in the middle */
     778           0 :                         pl = pl->l;
     779        2790 :                 while (is_sample(pr->op) || is_topn(pr->op))
     780           0 :                         pr = pr->l;
     781             :                 /* if it's not a projection, then project and propagate statistics */
     782        2790 :                 if (!is_project(pl->op) && !is_base(pl->op)) {
     783           0 :                         pl = rel_project(v->sql->sa, pl, rel_projections(v->sql, pl, NULL, 0, 1));
     784           0 :                         set_count_prop(v->sql->sa, pl, get_rel_count(pl->l));
     785           0 :                         pl->exps = exps_exp_visitor_bottomup(v, pl, pl->exps, 0, &rel_propagate_statistics, false);
     786             :                 }
     787        2790 :                 if (!is_project(pr->op) && !is_base(pr->op)) {
     788           0 :                         pr = rel_project(v->sql->sa, pr, rel_projections(v->sql, pr, NULL, 0, 1));
     789           0 :                         set_count_prop(v->sql->sa, pr, get_rel_count(pr->l));
     790           0 :                         pr->exps = exps_exp_visitor_bottomup(v, pr, pr->exps, 0, &rel_propagate_statistics, false);
     791             :                 }
     792             : 
     793       11662 :                 for (node *n = rel->exps->h ; n ; n = n->next) {
     794        8872 :                         empty_cross |= rel_setop_get_statistics(v->sql, rel, pl->exps, pr->exps, n->data, i);
     795        8872 :                         i++;
     796             :                 }
     797             : 
     798             :                 /* propagate row count */
     799        2790 :                 if (is_union(rel->op)) {
     800         277 :                         BUN lv = need_distinct(rel) ? rel_calc_nuniques(v->sql, l, l->exps) : get_rel_count(l),
     801         277 :                                 rv = need_distinct(rel) ? rel_calc_nuniques(v->sql, r, r->exps) : get_rel_count(r);
     802             : 
     803         277 :                         if (lv == 0 && rv == 0) { /* both sides empty */
     804           2 :                                 if (can_be_pruned)
     805             :                                         empty_cross = true;
     806             :                                 else
     807           2 :                                         set_count_prop(v->sql->sa, rel, 0);
     808         275 :                         } else if (can_be_pruned && lv == 0 && !rel_is_ref(rel)) { /* left side empty */
     809           0 :                                 rel = set_setop_side(v, rel, r);
     810           0 :                                 empty_cross = false; /* don't rewrite again */
     811         275 :                         } else if (can_be_pruned && rv == 0 && !rel_is_ref(rel)) { /* right side empty */
     812           0 :                                 rel = set_setop_side(v, rel, l);
     813           0 :                                 empty_cross = false; /* don't rewrite again */
     814         275 :                         } else if (lv != BUN_NONE && rv != BUN_NONE) {
     815           7 :                                 set_count_prop(v->sql->sa, rel, (rv > (BUN_MAX - lv)) ? BUN_MAX : (lv + rv)); /* overflow check */
     816             :                         }
     817        2513 :                 } else if (is_inter(rel->op) || is_except(rel->op)) {
     818        2513 :                         BUN lv = need_distinct(rel) ? rel_calc_nuniques(v->sql, l, l->exps) : get_rel_count(l),
     819        2513 :                                 rv = need_distinct(rel) ? rel_calc_nuniques(v->sql, r, r->exps) : get_rel_count(r);
     820             : 
     821        2513 :                         if (lv == 0) { /* left side empty */
     822          62 :                                 if (can_be_pruned)
     823             :                                         empty_cross = true;
     824             :                                 else
     825           5 :                                         set_count_prop(v->sql->sa, rel, 0);
     826        2451 :                         } else if (rv == 0) { /* right side empty */
     827          17 :                                 if (is_inter(rel->op)) {
     828           3 :                                         if (can_be_pruned)
     829             :                                                 empty_cross = true;
     830             :                                         else
     831           0 :                                                 set_count_prop(v->sql->sa, rel, 0);
     832          14 :                                 } else if (can_be_pruned && !rel_is_ref(rel)) {
     833          14 :                                         rel = set_setop_side(v, rel, l);
     834          14 :                                         empty_cross = false; /* don't rewrite again */
     835             :                                 } else {
     836           0 :                                         set_count_prop(v->sql->sa, rel, lv);
     837             :                                 }
     838             :                         } else {
     839        2434 :                                 set_count_prop(v->sql->sa, rel, lv);
     840             :                         }
     841             :                 }
     842        2790 :                 if (can_be_pruned && empty_cross) {
     843          80 :                         rel_destroy(rel->l);
     844          80 :                         rel->l = NULL;
     845          80 :                         rel_destroy(rel->r);
     846          80 :                         rel->r = NULL;
     847         244 :                         for (node *n = rel->exps->h ; n ; n = n->next) {
     848         164 :                                 sql_exp *e = n->data, *a = exp_atom(v->sql->sa, atom_general(v->sql->sa, exp_subtype(e), NULL, 0));
     849         164 :                                 exp_prop_alias(v->sql->sa, a, e);
     850         164 :                                 n->data = a;
     851             :                         }
     852          80 :                         list_hash_clear(rel->exps);
     853          80 :                         sql_rel *l = rel_project(v->sql->sa, NULL, rel->exps);
     854          80 :                         set_count_prop(v->sql->sa, l, 1);
     855          80 :                         l = rel_select(v->sql->sa, l, exp_atom_bool(v->sql->sa, 0));
     856          80 :                         set_count_prop(v->sql->sa, l, 0);
     857          80 :                         rel->op = op_project;
     858          80 :                         rel->l = l;
     859          80 :                         rel->exps = rel_projections(v->sql, l, NULL, 1, 1);
     860          80 :                         set_count_prop(v->sql->sa, rel, 0);
     861          80 :                         set_nodistinct(rel); /* set relations may have distinct flag set */
     862          80 :                         v->changes++;
     863             :                 }
     864             :                 break;
     865             :         }
     866       12863 :         case op_munion: {
     867       12863 :                 list *l = rel->l, *nrels = sa_list(v->sql->sa);
     868       12863 :                 BUN cnt = 0;
     869       12863 :                 bool needs_pruning = false;
     870             : 
     871       49198 :                 for (node *n = l->h; n; n = n->next) {
     872       36335 :                         sql_rel *r = n->data, *pl = r;
     873             : 
     874       36335 :                         while (is_sample(pl->op) || is_topn(pl->op)) /* skip topN and sample relations in the middle */
     875           0 :                                         pl = pl->l;
     876             :                         /* if it's not a projection, then project and propagate statistics */
     877       36335 :                         if (!is_project(pl->op) && !is_base(pl->op)) {
     878           0 :                                 pl = rel_project(v->sql->sa, pl, rel_projections(v->sql, pl, NULL, 0, 1));
     879           0 :                                 set_count_prop(v->sql->sa, pl, get_rel_count(pl->l));
     880           0 :                                 pl->exps = exps_exp_visitor_bottomup(v, pl, pl->exps, 0, &rel_propagate_statistics, false);
     881             :                         }
     882       36335 :                         nrels = append(nrels, pl);
     883             :                         /* we need new munion statistics */
     884             :                         /* propagate row count */
     885       36335 :                         BUN rv = need_distinct(rel) ? rel_calc_nuniques(v->sql, r, r->exps) : get_rel_count(r);
     886             :                         /* if PROP_COUNT does not exist we assume at least a row (see get_rel_count def) */
     887       36335 :                         if (rv == BUN_NONE) {
     888        1156 :                                 cnt++;
     889        1156 :                                 continue;
     890             :                         }
     891       35179 :                         if (!rv && can_be_pruned)
     892        6616 :                                 needs_pruning = true;
     893             :                         /* overflow check */
     894       35179 :                         if (rv > (BUN_MAX - cnt))
     895       36335 :                                 rv = BUN_MAX;
     896             :                         else
     897       35179 :                                 cnt += rv;
     898             :                 }
     899       12863 :                 int i = 0;
     900       73548 :                 for (node *n = rel->exps->h ; n ; n = n->next, i++)
     901       60685 :                         rel_munion_get_statistics(v->sql, rel, nrels, n->data, i);
     902             : 
     903       12863 :                 if (needs_pruning && !rel_is_ref(rel)) {
     904        4455 :                         v->changes++;
     905        4455 :                         list *nl = sa_list(l->sa);
     906             : 
     907       16438 :                         for (node *n = nrels->h; n; n = n->next) {
     908       11983 :                                 sql_rel *r = n->data;
     909       11983 :                                 BUN rv = need_distinct(rel) ? rel_calc_nuniques(v->sql, r, r->exps) : get_rel_count(r);
     910             : 
     911       11983 :                                 if (!rv) { /* keep last for now */
     912        6146 :                                         rel_destroy(r);
     913        6146 :                                         continue;
     914             :                                 }
     915        5837 :                                 nl = append(nl, r);
     916             :                         }
     917        4455 :                         rel->l = nl;
     918        4455 :                         if (list_length(nl) == 1) {
     919        4129 :                                 sql_rel *l = rel->l = nl->h->data; /* ugh */
     920        4129 :                                 rel->r = NULL;
     921        4129 :                                 rel->op = op_project;
     922             : 
     923       20283 :                                 for (node *n = rel->exps->h, *m = l->exps->h ; n && m ; n = n->next, m = m->next) {
     924       16154 :                                         sql_exp *pe = n->data, *ie = m->data;
     925       16154 :                                         sql_exp *ne = exp_ref(v->sql, ie);
     926       16154 :                                         exp_prop_alias(v->sql->sa, ne, pe);
     927       16154 :                                         n->data = ne;
     928             :                                 }
     929        4129 :                                 list_hash_clear(rel->exps);
     930         326 :                         } else if (list_empty(nl)) {
     931             :                                 /* empty select (project [ nils ] ) */
     932         454 :                                 for (node *n = rel->exps->h ; n ; n = n->next) {
     933         354 :                                         sql_exp *e = n->data, *a = exp_atom(v->sql->sa, atom_general(v->sql->sa, exp_subtype(e), NULL, 0));
     934         354 :                                         exp_prop_alias(v->sql->sa, a, e);
     935         354 :                                         n->data = a;
     936             :                                 }
     937         100 :                                 list_hash_clear(rel->exps);
     938         100 :                                 sql_rel *l = rel_project(v->sql->sa, NULL, rel->exps);
     939         100 :                                 set_count_prop(v->sql->sa, l, 1);
     940         100 :                                 l = rel_select(v->sql->sa, l, exp_atom_bool(v->sql->sa, 0));
     941         100 :                                 set_count_prop(v->sql->sa, l, 0);
     942         100 :                                 rel->op = op_project;
     943         100 :                                 rel->r = NULL;
     944         100 :                                 rel->l = l;
     945         100 :                                 rel->exps = rel_projections(v->sql, l, NULL, 1, 1);
     946         100 :                                 set_count_prop(v->sql->sa, rel, 0);
     947         100 :                                 set_nodistinct(rel); /* set relations may have distinct flag set */
     948             :                         }
     949             :                 } else {
     950        8408 :                         set_count_prop(v->sql->sa, rel, cnt);
     951             :                 }
     952             :                 break;
     953             :         }
     954      532042 :         case op_join:
     955             :         case op_left:
     956             :         case op_right:
     957             :         case op_full:
     958             :         case op_semi:
     959             :         case op_anti:
     960             :         case op_select:
     961             :         case op_project:
     962             :         case op_groupby: {
     963      532042 :                 if (is_groupby(rel->op) && !list_empty(rel->r))
     964       15531 :                         rel->r = exps_exp_visitor_bottomup(v, rel, rel->r, 0, &rel_propagate_statistics, false);
     965      532042 :                 rel->exps = exps_exp_visitor_bottomup(v, rel, rel->exps, 0, &rel_propagate_statistics, false);
     966      532031 :                 if ((is_simple_project(rel->op) || is_groupby(rel->op)) && !list_empty(rel->r))
     967       39252 :                         rel->r = exps_exp_visitor_bottomup(v, rel, rel->r, 0, &rel_propagate_statistics, false);
     968             :                 /* The following optimizations can only be applied after propagating the statistics to rel->exps */
     969      532032 :                 if (can_be_pruned && (is_join(rel->op) || is_select(rel->op)) && !list_empty(rel->exps)) {
     970      139266 :                         int changes = v->changes;
     971      139266 :                         rel->exps = rel_prune_predicates(v, rel);
     972      139266 :                         if (v->changes > changes)
     973        3618 :                                 rel = rewrite_simplify(v, 0, v->value_based_opt, rel);
     974             :                 }
     975             : 
     976             :                 /* propagate row count */
     977      532032 :                 sql_rel *l = rel->l, *r = rel->r;
     978      532032 :                 switch (rel->op) {
     979      134379 :                 case op_join:
     980             :                 case op_left:
     981             :                 case op_right:
     982             :                 case op_full: {
     983      134379 :                         BUN lv = get_rel_count(l), rv = get_rel_count(r), uniques_estimate = BUN_MAX, join_idx_estimate = BUN_MAX;
     984             : 
     985      134379 :                         if (!list_empty(rel->exps) && !is_single(rel)) {
     986      245795 :                                 for (node *n = rel->exps->h ; n ; n = n->next) {
     987      125480 :                                         sql_exp *e = n->data, *el = e->l, *er = e->r;
     988             : 
     989      125480 :                                         if (find_prop(e->p, PROP_JOINIDX)) {
     990         667 :                                                 join_idx_estimate = lv>rv?lv:rv;
     991      124813 :                                         } else if (e->type == e_cmp && e->flag == cmp_equal) {
     992             :                                                 /* if one of the sides is unique, the cardinality will be that exact number, but look for nulls */
     993      120929 :                                                 if (!is_semantics(e) || !has_nil(el) || !has_nil(er)) {
     994      120533 :                                                         BUN lu = 0, ru = 0;
     995      120533 :                                                         prop *p = NULL;
     996      120533 :                                                         if ((p = find_prop(el->p, PROP_NUNIQUES)))
     997       89458 :                                                                 lu = (BUN) p->value.dval;
     998      120533 :                                                         if ((p = find_prop(er->p, PROP_NUNIQUES)))
     999      103843 :                                                                 ru = (BUN) p->value.dval;
    1000      120533 :                                                         if (is_unique(el) || lu > lv) {
    1001       73582 :                                                                 BUN ncount = (is_right(rel->op) || is_full(rel->op)) ? MAX(lv, rv) : lv;
    1002       73582 :                                                                 uniques_estimate = MIN(uniques_estimate, ncount);
    1003       46951 :                                                         } else if (is_unique(er) || ru > rv) {
    1004       29324 :                                                                 BUN ncount = (is_left(rel->op) || is_full(rel->op)) ? MAX(lv, rv) : rv;
    1005       29324 :                                                                 uniques_estimate = MIN(uniques_estimate, ncount);
    1006             :                                                         }
    1007             :                                                 }
    1008             :                                         }
    1009             :                                 }
    1010             :                         }
    1011      134379 :                         if (is_single(rel)) {
    1012        4731 :                                 set_count_prop(v->sql->sa, rel, lv);
    1013      129648 :                         } else if (join_idx_estimate != BUN_MAX) {
    1014         665 :                                 set_count_prop(v->sql->sa, rel, join_idx_estimate);
    1015      128983 :                         } else if (uniques_estimate != BUN_MAX) {
    1016       96461 :                                 set_count_prop(v->sql->sa, rel, uniques_estimate);
    1017       32522 :                         } else if (list_length(rel->exps) == 1 && (exp_is_false(rel->exps->h->data) || exp_is_null(rel->exps->h->data))) {
    1018             :                                 /* corner cases for outer joins */
    1019         126 :                                 if (is_left(rel->op)) {
    1020         114 :                                         set_count_prop(v->sql->sa, rel, lv);
    1021          12 :                                 } else if (is_right(rel->op)) {
    1022           3 :                                         set_count_prop(v->sql->sa, rel, rv);
    1023           9 :                                 } else if (is_full(rel->op) && lv != BUN_NONE && rv != BUN_NONE) {
    1024           9 :                                         set_count_prop(v->sql->sa, rel, (rv > (BUN_MAX - lv)) ? BUN_MAX : (lv + rv)); /* overflow check */
    1025           0 :                                 } else if (lv != BUN_NONE && rv != BUN_NONE) {
    1026           0 :                                         set_count_prop(v->sql->sa, rel, 0);
    1027             :                                 }
    1028       32396 :                         } else if (lv == 0) {
    1029        1166 :                                 set_count_prop(v->sql->sa, rel, (is_right(rel->op) || is_full(rel->op)) ? rv : 0);
    1030       31799 :                         } else if (rv == 0) {
    1031        1568 :                                 set_count_prop(v->sql->sa, rel, (is_left(rel->op) || is_full(rel->op)) ? lv : 0);
    1032       30725 :                         } else if (lv != BUN_NONE && rv != BUN_NONE) {
    1033       18276 :                                 set_count_prop(v->sql->sa, rel, (rv > (BUN_MAX / lv)) ? BUN_MAX : (lv * rv)); /* overflow check */
    1034             :                         }
    1035             :                         break;
    1036             :                 }
    1037        2635 :                 case op_anti:
    1038        2635 :                         set_count_prop(v->sql->sa, rel, get_rel_count(l));
    1039        2635 :                         break;
    1040      108868 :                 case op_semi:
    1041             :                 case op_select:
    1042             :                         /* TODO calculate cardinalities using selectivities */
    1043      108868 :                         if (list_length(rel->exps) == 1 && (exp_is_false(rel->exps->h->data) || exp_is_null(rel->exps->h->data))) {
    1044        2743 :                                 set_count_prop(v->sql->sa, rel, 0);
    1045             :                         } else {
    1046      209002 :                                 if (!list_empty(rel->exps) && !is_single(rel)) {
    1047      102877 :                                         BUN cnt = get_rel_count(l), u = 1;
    1048      171629 :                                         for (node *n = rel->exps->h ; n ; n = n->next) {
    1049      112717 :                                                 sql_exp *e = n->data, *el = e->l, *er = e->r;
    1050             : 
    1051             :                                                 /* simple expressions first */
    1052      112717 :                                                 if (e->type == e_cmp && e->flag == cmp_equal && exp_is_atom(er)) {
    1053             :                                                         /* use selectivity */
    1054       57154 :                                                         prop *p;
    1055       57154 :                                                         if ((p = find_prop(el->p, PROP_NUNIQUES))) {
    1056       43965 :                                                                 u = (BUN) p->value.dval;
    1057       43965 :                                                                 break;
    1058             :                                                         }
    1059             :                                                 }
    1060             :                                         }
    1061             :                                         /* u is an *estimate*, so don't set count_prop to 0 unless cnt is 0 */
    1062      102877 :                                         set_count_prop(v->sql->sa, rel, cnt == 0 ? 0 : u == 0 || u > cnt ? 1 : cnt/u);
    1063             :                                 } else {
    1064        3248 :                                         set_count_prop(v->sql->sa, rel, get_rel_count(l));
    1065             :                                 }
    1066             :                         }
    1067             :                         break;
    1068      262792 :                 case op_project:
    1069      262792 :                         if (l) {
    1070      252678 :                                 if (need_distinct(rel)) {
    1071           0 :                                         set_count_prop(v->sql->sa, rel, rel_calc_nuniques(v->sql, l, rel->exps));
    1072             :                                 } else {
    1073      252678 :                                         set_count_prop(v->sql->sa, rel, get_rel_count(l));
    1074             :                                 }
    1075             :                         } else {
    1076       10114 :                                 BUN card = 1;
    1077             : 
    1078       10114 :                                 if (!list_empty(rel->exps)) {
    1079       30661 :                                         for (node *n = rel->exps->h ; n ; n = n->next) {
    1080       20547 :                                                 sql_exp *e = n->data;
    1081       20547 :                                                 card = MAX(card, trivial_project_exp_card(e));
    1082             :                                         }
    1083             :                                 }
    1084       10114 :                                 set_count_prop(v->sql->sa, rel, card);
    1085             :                         }
    1086             :                         break;
    1087       22953 :                 case op_groupby:
    1088       22953 :                         if (list_empty(rel->r)) {
    1089        7421 :                                 set_count_prop(v->sql->sa, rel, 1);
    1090             :                         } else {
    1091       15532 :                                 set_count_prop(v->sql->sa, rel, rel_calc_nuniques(v->sql, l, rel->r));
    1092             :                         }
    1093             :                         break;
    1094             :                 default:
    1095             :                         break;
    1096             :                 }
    1097             :                 break;
    1098             :         }
    1099       16788 :         case op_topn: {
    1100       16788 :                 BUN lv = get_rel_count(rel->l);
    1101             : 
    1102       16787 :                 if (lv != BUN_NONE) {
    1103       16770 :                         sql_exp *le = rel->exps->h->data, *oe = list_length(rel->exps) > 1 ? rel->exps->h->next->data : NULL;
    1104          96 :                         if (oe && oe->l && exp_is_not_null(oe)) { /* no parameters */
    1105          96 :                                 BUN offset = (BUN) ((atom*)oe->l)->data.val.lval;
    1106          96 :                                 lv = offset >= lv ? 0 : lv - offset;
    1107             :                         }
    1108       16771 :                         if (le->l && exp_is_not_null(le)) {
    1109       16733 :                                 BUN limit = (BUN) ((atom*)le->l)->data.val.lval;
    1110       16733 :                                 lv = MIN(lv, limit);
    1111             :                         }
    1112       16771 :                         set_count_prop(v->sql->sa, rel, lv);
    1113             :                 }
    1114             :                 break;
    1115             :         }
    1116          22 :         case op_sample: {
    1117          22 :                 BUN lv = get_rel_count(rel->l);
    1118             : 
    1119          22 :                 if (lv != BUN_NONE) {
    1120           4 :                         sql_exp *se = rel->exps->h->data;
    1121           4 :                         sql_subtype *tp = exp_subtype(se);
    1122             : 
    1123           4 :                         if (se->l && tp->type->eclass == EC_NUM) { /* sample is a number of rows */
    1124           4 :                                 BUN sample = (BUN) ((atom*)se->l)->data.val.lval;
    1125           4 :                                 lv = MIN(lv, sample);
    1126           0 :                         } else if (se->l) { /* sample is a percentage of rows */
    1127           0 :                                 dbl percent = ((atom*)se->l)->data.val.dval;
    1128           0 :                                 assert(tp->type->eclass == EC_FLT);
    1129           0 :                                 lv = (BUN) ceil((dbl)lv * percent);
    1130             :                         }
    1131           4 :                         set_count_prop(v->sql->sa, rel, lv);
    1132             :                 }
    1133             :                 break;
    1134             :         }
    1135        5972 :         case op_table: {
    1136        5972 :                 sql_exp *op = rel->r;
    1137        5972 :                 if (rel->flag != TRIGGER_WRAPPER && op) {
    1138        5660 :                         sql_subfunc *f = op->f;
    1139        5660 :                         if (f->func->lang == FUNC_LANG_SQL) {
    1140          97 :                                 set_count_prop(v->sql->sa, rel, 1000 /* just some fallback value */);
    1141        5563 :                         } else if (f->func->lang == FUNC_LANG_MAL && strcmp(f->func->base.name, "storage") == 0) {
    1142         829 :                                 set_count_prop(v->sql->sa, rel, 1000 /* TODO get size of catalog */);
    1143        4734 :                         } else if (f->func->lang == FUNC_LANG_MAL && strcmp(f->func->base.name, "db_users") == 0) {
    1144           0 :                                 set_count_prop(v->sql->sa, rel, 1000 /* TODO get size of users */);
    1145        4734 :                         } else if (f->func->lang == FUNC_LANG_MAL && strncmp(f->func->base.name, "querylog", 8) == 0) {
    1146        1085 :                                 set_count_prop(v->sql->sa, rel, 1000 /* TODO get size of querylog */);
    1147        3649 :                         } else if (f->func->lang == FUNC_LANG_MAL &&
    1148        3539 :                                            (strcmp(f->func->base.name, "queue") == 0 ||
    1149        3275 :                                                 strcmp(f->func->base.name, "optimizers") == 0 ||
    1150        2963 :                                                 strcmp(f->func->base.name, "env") == 0 ||
    1151        2672 :                                                 strcmp(f->func->base.name, "keywords") == 0 ||
    1152        2672 :                                                 strcmp(f->func->base.name, "statistics") == 0 ||
    1153        2005 :                                                 strcmp(f->func->base.name, "rejects") == 0 ||
    1154        1751 :                                                 strcmp(f->func->base.name, "schemastorage") == 0 ||
    1155        1751 :                                                 strncmp(f->func->base.name, "storage", 7) == 0 ||
    1156        1751 :                                                 strcmp(f->func->base.name, "sessions") == 0) ) {
    1157        2078 :                                 set_count_prop(v->sql->sa, rel, 1000 /* TODO get size of queue */);
    1158             :                         }
    1159             :                         /* else {
    1160             :                                 printf("%%func needs stats : %s\n", f->func->base.name);
    1161             :                         } */
    1162             :                 }
    1163             :                 break;
    1164             :         }
    1165             :         /*These relations are less important for now
    1166             :           TODO later we can tune it */
    1167             :         case op_insert:
    1168             :         case op_update:
    1169             :         case op_delete:
    1170             :         case op_truncate:
    1171             :         case op_ddl:
    1172             :         default:
    1173             :                 break;
    1174             :         }
    1175             : 
    1176             :         return rel;
    1177             : }
    1178             : 
    1179             : static sql_rel *
    1180      539410 : rel_get_statistics(visitor *v, global_props *gp, sql_rel *rel)
    1181             : {
    1182             :         /* Don't prune updates as pruning will possibly result in removing the joins which therefore cannot be used for constraint checking */
    1183      539410 :         uint8_t has_special_modify = (uint8_t) gp->has_special_modify;
    1184      539410 :         v->data = &has_special_modify;
    1185      539410 :         rel = rel_visitor_bottomup(v, rel, &rel_get_statistics_);
    1186      539765 :         v->data = gp;
    1187      539765 :         return rel;
    1188             : }
    1189             : 
    1190             : run_optimizer
    1191      704556 : bind_get_statistics(visitor *v, global_props *gp)
    1192             : {
    1193      704556 :         (void) v;
    1194      704556 :         return (gp->opt_level == 1 && !gp->cnt[op_insert]) ? rel_get_statistics : NULL;
    1195             : }
    1196             : 
    1197             : 
    1198             : static bool
    1199       94897 : point_select_on_unique_column(sql_rel *rel)
    1200             : {
    1201       94897 :         if (is_select(rel->op) && !list_empty(rel->exps)) {
    1202      131375 :                 for (node *n = rel->exps->h; n ; n = n->next) {
    1203       75652 :                         sql_exp *e = n->data, *el = e->l, *er = e->r, *found = NULL;
    1204             : 
    1205       75652 :                         if (is_compare(e->type) && e->flag == cmp_equal) {
    1206       33623 :                                 if (is_numeric_upcast(el))
    1207           0 :                                         el = el->l;
    1208       33623 :                                 if (is_numeric_upcast(er))
    1209           0 :                                         er = er->l;
    1210       33623 :                                 if (is_alias(el->type) && exp_is_atom(er) && (found = rel_find_exp(rel->l, el)) &&
    1211         749 :                                         is_unique(found) && (!is_semantics(e) || !has_nil(found) || !has_nil(er)))
    1212             :                                         return true;
    1213       32874 :                                 if (is_alias(er->type) && exp_is_atom(el) && (found = rel_find_exp(rel->l, er)) &&
    1214           0 :                                         is_unique(found) && (!is_semantics(e) || !has_nil(el) || !has_nil(found)))
    1215             :                                         return true;
    1216             :                         }
    1217             :                 }
    1218             :         }
    1219             :         return false;
    1220             : }
    1221             : 
    1222             : /*
    1223             :  * A point select on an unique column reduces the number of rows to 1. If the same select is under a
    1224             :  * join, the opposite side's select can be pushed above the join.
    1225             :  */
    1226             : static inline sql_rel *
    1227     1244722 : rel_push_select_up(visitor *v, sql_rel *rel)
    1228             : {
    1229     1244722 :         if ((is_innerjoin(rel->op) || is_left(rel->op) || is_right(rel->op) || is_semi(rel->op)) && !is_single(rel)) {
    1230      253991 :                 sql_rel *l = rel->l, *r = rel->r;
    1231      253991 :                 bool can_pushup_left = is_select(l->op) && !rel_is_ref(l) && !is_single(l) && (is_innerjoin(rel->op) || is_left(rel->op) || is_semi(rel->op)),
    1232      253991 :                         can_pushup_right = is_select(r->op) && !rel_is_ref(r) && !is_single(r) && (is_innerjoin(rel->op) || is_right(rel->op));
    1233             : 
    1234      253991 :                 if (can_pushup_left || can_pushup_right) {
    1235       66898 :                         if (can_pushup_left)
    1236       45430 :                                 can_pushup_left = point_select_on_unique_column(r);
    1237       66898 :                         if (can_pushup_right)
    1238       49467 :                                 can_pushup_right = point_select_on_unique_column(l);
    1239             : 
    1240             :                         /* if both selects retrieve one row each, it's not worth it to push both up */
    1241       66898 :                         if (can_pushup_left && !can_pushup_right) {
    1242         683 :                                 sql_rel *nrel = rel_dup_copy(v->sql->sa, rel);
    1243         683 :                                 nrel->l = l->l;
    1244         683 :                                 rel = rel_inplace_select(rel, nrel, l->exps);
    1245         683 :                                 assert(is_select(rel->op));
    1246         683 :                                 v->changes++;
    1247       66215 :                         } else if (!can_pushup_left && can_pushup_right) {
    1248          14 :                                 sql_rel *nrel = rel_dup_copy(v->sql->sa, rel);
    1249          14 :                                 nrel->r = r->l;
    1250          14 :                                 rel = rel_inplace_select(rel, nrel, r->exps);
    1251          14 :                                 assert(is_select(rel->op));
    1252          14 :                                 v->changes++;
    1253             :                         }
    1254             :                 }
    1255             :         }
    1256     1244722 :         return rel;
    1257             : }
    1258             : 
    1259             : static int
    1260       93235 : sql_class_base_score(visitor *v, sql_column *c, sql_subtype *t, bool equality_based)
    1261             : {
    1262       93235 :         int de;
    1263             : 
    1264       93235 :         if (!t)
    1265             :                 return 0;
    1266       93235 :         switch (ATOMstorage(t->type->localtype)) {
    1267             :         case TYPE_bte:
    1268             :                 return 150 - 8;
    1269        1592 :         case TYPE_sht:
    1270        1592 :                 return 150 - 16;
    1271       37290 :         case TYPE_int:
    1272       37290 :                 return 150 - 32;
    1273         518 :         case TYPE_void:
    1274             :         case TYPE_lng:
    1275         518 :                 return 150 - 64;
    1276             :         case TYPE_uuid:
    1277             : #ifdef HAVE_HGE
    1278             :         case TYPE_hge:
    1279             : #endif
    1280             :                 return 150 - 128;
    1281           2 :         case TYPE_flt:
    1282           2 :                 return 75 - 24;
    1283             :         case TYPE_dbl:
    1284             :                 return 75 - 53;
    1285       30567 :         default:
    1286       30567 :                 if (equality_based && c && v->storage_based_opt && (de = mvc_is_duplicate_eliminated(v->sql, c)))
    1287        1554 :                         return 150 - de * 8;
    1288             :                 /* strings and blobs not duplicate eliminated don't get any points here */
    1289             :                 return 0;
    1290             :         }
    1291             : }
    1292             : 
    1293             : static int
    1294       34336 : score_se_base(visitor *v, sql_rel *rel, sql_exp *e)
    1295             : {
    1296       34336 :         int res = 0;
    1297       34336 :         sql_subtype *t = exp_subtype(e);
    1298       34336 :         sql_column *c = NULL;
    1299             : 
    1300       34336 :         if (e->type == e_convert) /* keep unsafes at the end (TODO improve) */
    1301             :                 return -1000;
    1302             :         /* can we find out if the underlying table is sorted */
    1303       33735 :         if ((c = exp_find_column(rel, e, -2)) && v->storage_based_opt && mvc_is_sorted(v->sql, c))
    1304       33735 :                 res += 600;
    1305             : 
    1306             :         /* prefer the shorter var types over the longer ones */
    1307       33735 :         res += sql_class_base_score(v, c, t, is_equality_or_inequality_exp(e->flag)); /* smaller the type, better */
    1308       33735 :         return res;
    1309             : }
    1310             : 
    1311             : static int
    1312       58415 : score_se(visitor *v, sql_rel *rel, sql_exp *e)
    1313             : {
    1314       58415 :         int score = 0;
    1315       58415 :         if (e->type == e_cmp && !is_complex_exp(e->flag)) {
    1316       34336 :                 sql_exp *l = e->l;
    1317             : 
    1318       34336 :                 while (l->type == e_cmp) { /* go through nested comparisons */
    1319           2 :                         sql_exp *ll;
    1320             : 
    1321           2 :                         if (l->flag == cmp_filter || l->flag == cmp_or)
    1322           0 :                                 ll = ((list*)l->l)->h->data;
    1323             :                         else
    1324           2 :                                 ll = l->l;
    1325           2 :                         if (ll->type != e_cmp)
    1326             :                                 break;
    1327             :                         l = ll;
    1328             :                 }
    1329       34336 :                 score += score_se_base(v, rel, l);
    1330             :         }
    1331       58415 :         score += exp_keyvalue(e);
    1332       58415 :         return score;
    1333             : }
    1334             : 
    1335             : static inline sql_rel *
    1336     1244722 : rel_select_order(visitor *v, sql_rel *rel)
    1337             : {
    1338     1244722 :         int *scores = NULL;
    1339     1244722 :         sql_exp **exps = NULL;
    1340             : 
    1341     1244722 :         if (is_select(rel->op) && list_length(rel->exps) > 1) {
    1342       27325 :                 node *n;
    1343       27325 :                 int i, nexps = list_length(rel->exps);
    1344       27325 :                 scores = SA_NEW_ARRAY(v->sql->ta, int, nexps);
    1345       27325 :                 exps = SA_NEW_ARRAY(v->sql->ta, sql_exp*, nexps);
    1346             : 
    1347       85740 :                 for (i = 0, n = rel->exps->h; n; i++, n = n->next) {
    1348       58445 :                         exps[i] = n->data;
    1349       58445 :                         if (find_prop(exps[i]->p, PROP_HASHCOL))
    1350             :                                 return rel;
    1351       58415 :                         scores[i] = score_se(v, rel, n->data);
    1352             :                 }
    1353       27295 :                 GDKqsort(scores, exps, NULL, nexps, sizeof(int), sizeof(void *), TYPE_int, true, true);
    1354             : 
    1355       85710 :                 for (i = 0, n = rel->exps->h; n; i++, n = n->next)
    1356       58415 :                         n->data = exps[i];
    1357             :         }
    1358             : 
    1359             :         return rel;
    1360             : }
    1361             : 
    1362             : /* Compute the efficiency of using this expression early in a group by list */
    1363             : static int
    1364       59500 : score_gbe(visitor *v, sql_rel *rel, sql_exp *e)
    1365             : {
    1366       59500 :         int res = 0;
    1367       59500 :         sql_subtype *t = exp_subtype(e);
    1368       59500 :         sql_column *c = exp_find_column(rel, e, -2);
    1369             : 
    1370       59500 :         if (e->card == CARD_ATOM) /* constants are trivial to group */
    1371          38 :                 res += 1000;
    1372             :         /* can we find out if the underlying table is sorted */
    1373       59500 :         if (is_unique(e) || find_prop(e->p, PROP_HASHCOL) || (c && v->storage_based_opt && mvc_is_unique(v->sql, c))) /* distinct columns */
    1374        3584 :                 res += 700;
    1375       38910 :         if (c && v->storage_based_opt && mvc_is_sorted(v->sql, c))
    1376        3710 :                 res += 500;
    1377       59500 :         if (find_prop(e->p, PROP_HASHIDX)) /* has hash index */
    1378           0 :                 res += 200;
    1379             : 
    1380             :         /* prefer the shorter var types over the longer ones */
    1381       59500 :         res += sql_class_base_score(v, c, t, true); /* smaller the type, better */
    1382       59500 :         return res;
    1383             : }
    1384             : 
    1385             : /* reorder group by expressions */
    1386             : static inline sql_rel *
    1387     1244722 : rel_groupby_order(visitor *v, sql_rel *rel)
    1388             : {
    1389     1244722 :         int *scores = NULL;
    1390     1244722 :         sql_exp **exps = NULL;
    1391             : 
    1392     1244722 :         if (is_groupby(rel->op) && list_length(rel->r) > 1) {
    1393       25587 :                 node *n;
    1394       25587 :                 list *gbe = rel->r;
    1395       25587 :                 int i, ngbe = list_length(gbe);
    1396       25587 :                 scores = SA_NEW_ARRAY(v->sql->ta, int, ngbe);
    1397       25587 :                 exps = SA_NEW_ARRAY(v->sql->ta, sql_exp*, ngbe);
    1398             : 
    1399             :                 /* first sorting step, give priority for integers and sorted columns */
    1400       85087 :                 for (i = 0, n = gbe->h; n; i++, n = n->next) {
    1401       59500 :                         exps[i] = n->data;
    1402       59500 :                         scores[i] = score_gbe(v, rel, exps[i]);
    1403             :                 }
    1404       25587 :                 GDKqsort(scores, exps, NULL, ngbe, sizeof(int), sizeof(void *), TYPE_int, true, true);
    1405             : 
    1406             :                 /* second sorting step, give priority to strings with lower number of digits */
    1407       50504 :                 for (i = ngbe - 1; i && !scores[i]; i--); /* find expressions with no score from the first round */
    1408       25587 :                 if (scores[i])
    1409       24589 :                         i++;
    1410       25587 :                 if (ngbe - i > 1) {
    1411        8604 :                         for (int j = i; j < ngbe; j++) {
    1412        6597 :                                 sql_subtype *t = exp_subtype(exps[j]);
    1413        6597 :                                 scores[j] = t ? t->digits : 0;
    1414             :                         }
    1415             :                         /* the less number of digits the better, order ascending */
    1416        2007 :                         GDKqsort(scores + i, exps + i, NULL, ngbe - i, sizeof(int), sizeof(void *), TYPE_int, false, true);
    1417             :                 }
    1418             : 
    1419       85087 :                 for (i = 0, n = gbe->h; n; i++, n = n->next)
    1420       59500 :                         n->data = exps[i];
    1421             :         }
    1422             : 
    1423     1244722 :         return rel;
    1424             : }
    1425             : 
    1426             : /* This optimization loop contains optimizations that can potentially use statistics */
    1427             : static sql_rel *
    1428     1244721 : rel_final_optimization_loop_(visitor *v, sql_rel *rel)
    1429             : {
    1430             :         /* Run rel_push_select_up only once at the end to avoid an infinite optimization loop */
    1431     1244721 :         rel = rel_push_select_up(v, rel);
    1432     1244722 :         rel = rel_select_order(v, rel);
    1433             : 
    1434             :         /* TODO? Maybe later add rel_simplify_count, rel_join2semijoin, rel_simplify_fk_joins,
    1435             :            rel_distinct_project2groupby, rel_simplify_predicates, rel_simplify_math,
    1436             :            rel_distinct_aggregate_on_unique_values */
    1437             : 
    1438     1244722 :         rel = rel_groupby_order(v, rel);
    1439     1244720 :         return rel;
    1440             : }
    1441             : 
    1442             : static sql_rel *
    1443       66043 : rel_final_optimization_loop(visitor *v, global_props *gp, sql_rel *rel)
    1444             : {
    1445       66043 :         (void) gp;
    1446       66043 :         return rel_visitor_bottomup(v, rel, &rel_final_optimization_loop_);
    1447             : }
    1448             : 
    1449             : run_optimizer
    1450      705107 : bind_final_optimization_loop(visitor *v, global_props *gp)
    1451             : {
    1452      705107 :         int flag = v->sql->sql_optimizer;
    1453             :         /* At the moment, this optimizer has dependency on 3 flags */
    1454      547277 :         return gp->opt_level == 1 && (gp->cnt[op_groupby] || gp->cnt[op_select]) &&
    1455      771152 :                 (flag & push_select_up) && (flag & optimize_select_and_joins_topdown) && (flag & optimize_projections) ? rel_final_optimization_loop : NULL;
    1456             : }

Generated by: LCOV version 1.14