LCOV - code coverage report
Current view: top level - sql/server - rel_statistics.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 831 883 94.1 %
Date: 2024-11-11 20:03:36 Functions: 26 26 100.0 %

          Line data    Source code
       1             : /*
       2             :  * SPDX-License-Identifier: MPL-2.0
       3             :  *
       4             :  * This Source Code Form is subject to the terms of the Mozilla Public
       5             :  * License, v. 2.0.  If a copy of the MPL was not distributed with this
       6             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       7             :  *
       8             :  * Copyright 2024 MonetDB Foundation;
       9             :  * Copyright August 2008 - 2023 MonetDB B.V.;
      10             :  * Copyright 1997 - July 2008 CWI.
      11             :  */
      12             : 
      13             : #include "monetdb_config.h"
      14             : #include "rel_optimizer_private.h"
      15             : #include "rel_statistics.h"
      16             : #include "rel_basetable.h"
      17             : #include "rel_rewriter.h"
      18             : 
      19             : static sql_exp *
      20     3462441 : comparison_find_column(sql_exp *input, sql_exp *e)
      21             : {
      22     3462441 :         switch (input->type) {
      23       98657 :         case e_convert: {
      24       98657 :                 list *types = (list *)input->r;
      25       98657 :                 sql_class from = ((sql_subtype*)types->h->data)->type->eclass, to = ((sql_subtype*)types->h->next->data)->type->eclass;
      26       98657 :                 if (from == to)
      27      190249 :                         return comparison_find_column(input->l, e) ? input : NULL;
      28             :                 return NULL;
      29             :         }
      30     3054183 :         case e_column:
      31     3054183 :                 return exp_match(e, input) ? input : NULL;
      32             :         default:
      33             :                 return NULL;
      34             :         }
      35             : }
      36             : 
      37             : /* multi lo <= col <= hi, maybe still be false even if lo or hi are NULL, possibly similar for filter on multiple
      38             :  * columns */
      39             : #define comp_single_column(c) (!c->f)
      40             : 
      41             : static sql_exp *
      42     8755228 : rel_propagate_column_ref_statistics(mvc *sql, sql_rel *rel, sql_exp *e)
      43             : {
      44     8838849 :         assert(e->type == e_column);
      45     8838849 :         if (rel) {
      46     8838770 :                 switch(rel->op) {
      47     4087909 :                 case op_left:
      48             :                 case op_right:
      49             :                 case op_full:
      50             :                 case op_join:
      51             :                 case op_select:
      52             :                 case op_anti:
      53             :                 case op_semi: {
      54     4087909 :                         bool found_without_semantics = false, found_left = false, found_right = false, still_unique = false;
      55             : 
      56     4087909 :                         if ((is_innerjoin(rel->op) || is_select(rel->op)) && list_length(rel->exps) == 1 && exp_is_false(rel->exps->h->data))
      57             :                                 return NULL; /* nothing will pass, skip */
      58             : 
      59             :                         /* propagate from the bottom first */
      60     4072354 :                         if (rel_propagate_column_ref_statistics(sql, rel->l, e))
      61             :                                 found_left = true;
      62     2592158 :                         if (!found_left && is_join(rel->op) && rel_propagate_column_ref_statistics(sql, rel->r, e))
      63     4072354 :                                 found_right = true;
      64             : 
      65     4072354 :                         if (!found_left && !found_right)
      66             :                                 return NULL;
      67     1776344 :                         if (!list_empty(rel->exps) && rel->op != op_anti) { /* if there's an or, the MIN and MAX get difficult to propagate */
      68     3494160 :                                 for (node *n = rel->exps->h ; n ; n = n->next) {
      69     1809020 :                                         sql_exp *comp = n->data, *le = comp->l, *lne = NULL, *re = comp->r, *rne = NULL, *fe = comp->f, *fne = NULL;
      70             : 
      71     1809020 :                                         if (comp->type == e_cmp) {
      72     1808018 :                                                 if (is_theta_exp(comp->flag) && ((lne = comparison_find_column(le, e)) || (rne = comparison_find_column(re, e)) || (fe && (fne = comparison_find_column(fe, e))))) {
      73      124979 :                                                         atom *lval_min = find_prop_and_get(le->p, PROP_MIN), *lval_max = find_prop_and_get(le->p, PROP_MAX),
      74      124979 :                                                                 *rval_min = find_prop_and_get(re->p, PROP_MIN), *rval_max = find_prop_and_get(re->p, PROP_MAX);
      75             : 
      76             :                                                         /* not semantics found or if explicitly filtering not null values from the column */
      77      124979 :                                                         found_without_semantics |= (!is_semantics(comp) && comp_single_column(comp)) || (comp->flag == cmp_equal && lne && is_anti(comp) && exp_is_null(re));
      78      124979 :                                                         still_unique |= comp->flag == cmp_equal && is_unique(le) && is_unique(re); /* unique if only equi-joins on unique columns are there */
      79      124979 :                                                         if (is_full(rel->op) || (is_left(rel->op) && found_left) || (is_right(rel->op) && found_right)) /* on outer joins, min and max cannot be propagated on some cases */
      80       19789 :                                                                 continue;
      81             :                                                         /* if (end2 >= start1 && start2 <= end1) then the 2 intervals are intersected */
      82      105190 :                                                         if (fe && lval_min && lval_max) { /* range case, the middle expression must intersect the other two */
      83        4622 :                                                                 atom *fval_min = find_prop_and_get(fe->p, PROP_MIN), *fval_max = find_prop_and_get(fe->p, PROP_MAX);
      84        4622 :                                                                 int int1 = rval_min && rval_max && atom_cmp(rval_max, lval_min) >= 0 && atom_cmp(rval_min, lval_max) <= 0,
      85        4622 :                                                                         int2 = fval_min && fval_max && atom_cmp(fval_max, lval_min) >= 0 && atom_cmp(fval_min, lval_max) <= 0,
      86        4622 :                                                                         symmetric = is_symmetric(comp);
      87             : 
      88        4622 :                                                                 if (is_anti(comp) || (!symmetric && fval_min && rval_max && atom_cmp(fval_min, rval_max) < 0)) /* for asymmetric case the re range must be after the fe range */
      89        1817 :                                                                         continue;
      90        2805 :                                                                 if (lne && int1 && int2) {
      91         104 :                                                                         if (symmetric) {
      92           0 :                                                                                 prop *p1 = find_prop(e->p, PROP_MIN), *p2 = find_prop(e->p, PROP_MAX);
      93           0 :                                                                                 atom *nmin = statistics_atom_min(sql, rval_min, fval_min), *nmax = statistics_atom_max(sql, rval_max, fval_max);
      94             :                                                                                 /* max is min from le and (max from re and fe max) */
      95           0 :                                                                                 set_minmax_property(sql, e, PROP_MAX, p2 ? statistics_atom_min(sql, nmax, p2->value.pval) : nmax);
      96             :                                                                                 /* min is max from le and (min from re and fe min) */
      97           0 :                                                                                 set_minmax_property(sql, e, PROP_MIN, p1 ? statistics_atom_max(sql, nmin, p1->value.pval) : nmin);
      98             :                                                                         } else {
      99         104 :                                                                                 prop *p1 = find_prop(e->p, PROP_MIN), *p2 = find_prop(e->p, PROP_MAX);
     100             :                                                                                 /* max is min from le and fe max */
     101         104 :                                                                                 set_minmax_property(sql, e, PROP_MAX, p2 ? statistics_atom_min(sql, fval_max, p2->value.pval) : fval_max);
     102             :                                                                                 /* min is max from le and re min */
     103         104 :                                                                                 set_minmax_property(sql, e, PROP_MIN, p1 ? statistics_atom_max(sql, rval_min, p1->value.pval) : rval_min);
     104             :                                                                         }
     105        2701 :                                                                 } else if (rne) {
     106         590 :                                                                         if (symmetric && int1 && int2) { /* min is max from le and (min from re and fe min) */
     107           0 :                                                                                 prop *p = find_prop(e->p, PROP_MIN);
     108           0 :                                                                                 atom *nmin = p ? statistics_atom_min(sql, p->value.pval, fval_min) : fval_min;
     109           0 :                                                                                 set_minmax_property(sql, e, PROP_MIN, statistics_atom_max(sql, nmin, lval_min));
     110         590 :                                                                         } else if (int1) { /* min is max from le and re min */
     111         100 :                                                                                 prop *p = find_prop(e->p, PROP_MIN);
     112         100 :                                                                                 set_minmax_property(sql, e, PROP_MIN, p ? statistics_atom_max(sql, lval_min, p->value.pval) : lval_min);
     113             :                                                                         }
     114        2111 :                                                                 } else if (fne) {
     115         549 :                                                                         if (symmetric && int1 && int2) { /* max is min from le and (max from re and fe max) */
     116           0 :                                                                                 prop *p = find_prop(e->p, PROP_MAX);
     117           0 :                                                                                 atom *nmax = p ? statistics_atom_max(sql, p->value.pval, rval_max) : rval_max;
     118           0 :                                                                                 set_minmax_property(sql, e, PROP_MAX, p ? statistics_atom_min(sql, nmax, lval_max) : nmax);
     119         549 :                                                                         } else if (int2) { /* max is min from le and fe max */
     120         266 :                                                                                 prop *p = find_prop(e->p, PROP_MAX);
     121         266 :                                                                                 set_minmax_property(sql, e, PROP_MAX, p ? statistics_atom_min(sql, lval_max, p->value.pval) : lval_max);
     122             :                                                                         }
     123             :                                                                 }
     124      100568 :                                                         } else if (lval_min && lval_max && rval_min && rval_max && atom_cmp(rval_max, lval_min) >= 0 && atom_cmp(rval_min, lval_max) <= 0) {
     125             :                                                                 /* both min and max must be set and the intervals must overlap */
     126       42997 :                                                                 switch (comp->flag) {
     127       29001 :                                                                 case cmp_equal: /* for equality reduce */
     128       29001 :                                                                         set_minmax_property(sql, e, PROP_MAX, is_anti(comp) ? statistics_atom_max(sql, lval_max, rval_max) : statistics_atom_min(sql, lval_max, rval_max));
     129       29001 :                                                                         set_minmax_property(sql, e, PROP_MIN, is_anti(comp) ? statistics_atom_min(sql, lval_min, rval_min) : statistics_atom_max(sql, lval_min, rval_min));
     130       29001 :                                                                         break;
     131        4680 :                                                                 case cmp_notequal: /* for inequality expand */
     132        4680 :                                                                         set_minmax_property(sql, e, PROP_MAX, is_anti(comp) ? statistics_atom_min(sql, lval_max, rval_max) : statistics_atom_max(sql, lval_max, rval_max));
     133        4680 :                                                                         set_minmax_property(sql, e, PROP_MIN, is_anti(comp) ? statistics_atom_max(sql, lval_min, rval_min) : statistics_atom_min(sql, lval_min, rval_min));
     134        4680 :                                                                         break;
     135        5310 :                                                                 case cmp_gt:
     136             :                                                                 case cmp_gte:
     137        9682 :                                                                         if (!is_anti(comp) && lne) { /* min is max from both min */
     138        4372 :                                                                                 prop *p = find_prop(e->p, PROP_MIN);
     139        4372 :                                                                                 set_minmax_property(sql, e, PROP_MIN, p ? statistics_atom_max(sql, rval_min, p->value.pval) : rval_min);
     140         938 :                                                                         } else if (!is_anti(comp)) { /* max is min from both max */
     141         938 :                                                                                 prop *p = find_prop(e->p, PROP_MAX);
     142         938 :                                                                                 set_minmax_property(sql, e, PROP_MAX, p ? statistics_atom_min(sql, lval_max, p->value.pval) : lval_max);
     143             :                                                                         }
     144             :                                                                         break;
     145        4006 :                                                                 case cmp_lt:
     146             :                                                                 case cmp_lte:
     147        7205 :                                                                         if (!is_anti(comp) && lne) { /* max is min from both max */
     148        3199 :                                                                                 prop *p = find_prop(e->p, PROP_MAX);
     149        3199 :                                                                                 set_minmax_property(sql, e, PROP_MAX, p ? statistics_atom_min(sql, rval_max, p->value.pval) : rval_max);
     150         807 :                                                                         } else if (!is_anti(comp)) { /* min is max from both min */
     151         807 :                                                                                 prop *p = find_prop(e->p, PROP_MIN);
     152         807 :                                                                                 set_minmax_property(sql, e, PROP_MIN, p ? statistics_atom_max(sql, lval_min, p->value.pval) : lval_min);
     153             :                                                                         }
     154             :                                                                         break;
     155             :                                                                 default: /* Maybe later I can do cmp_in and cmp_notin */
     156             :                                                                         break;
     157             :                                                                 }
     158             :                                                         }
     159             :                                                 }
     160             :                                         }
     161             :                                 }
     162             :                         }
     163     1776344 :                         if (is_full(rel->op) || (is_left(rel->op) && found_right) || (is_right(rel->op) && found_left))
     164       38046 :                                 set_has_nil(e);
     165     1776344 :                         if (!is_outerjoin(rel->op) && found_without_semantics) /* at an outer join, null values pass */
     166       94131 :                                 set_has_no_nil(e);
     167     1776344 :                         if (is_join(rel->op) && is_unique(e) && !still_unique)
     168      119293 :                                 set_not_unique(e);
     169     1776344 :                         return e;
     170             :                 }
     171     4647879 :                 case op_table:
     172             :                 case op_basetable:
     173             :                 case op_union:
     174             :                 case op_except:
     175             :                 case op_inter:
     176             :                 case op_munion:
     177             :                 case op_project:
     178             :                 case op_groupby: {
     179     4647879 :                         sql_exp *found;
     180     4647879 :                         atom *fval;
     181     4647879 :                         prop *est;
     182     4647879 :                         if ((found = rel_find_exp(rel, e))) {
     183     2184562 :                                 if (rel->op != op_table) { /* At the moment don't propagate statistics for table relations */
     184     2140242 :                                         if ((fval = find_prop_and_get(found->p, PROP_MAX)))
     185     1136142 :                                                 set_minmax_property(sql, e, PROP_MAX, fval);
     186     2140238 :                                         if ((fval = find_prop_and_get(found->p, PROP_MIN)))
     187     1143025 :                                                 set_minmax_property(sql, e, PROP_MIN, fval);
     188     2140236 :                                         if (!has_nil(found))
     189     1381927 :                                                 set_has_no_nil(e);
     190     2140236 :                                         if (is_unique(found) || (need_distinct(rel) && list_length(rel->exps) == 1) ||
     191     1728969 :                                                 (is_groupby(rel->op) && (list_empty(rel->r) || (list_length(rel->r) == 1 && exps_find_exp(rel->r, e)))))
     192      422149 :                                                 set_unique(e);
     193             :                                         /* propagate unique estimation for known cases */
     194     2140236 :                                         if (is_groupby(rel->op) && list_empty(rel->r) && !find_prop(e->p, PROP_NUNIQUES)) { /* global aggregate case */
     195        7543 :                                                 prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     196        7543 :                                                 p->value.dval = 1;
     197     2132693 :                                         } else if (((is_basetable(rel->op) || is_except(rel->op) || is_inter(rel->op) || is_simple_project(rel->op) ||
     198       66651 :                                                                  (is_groupby(rel->op) && exps_find_exp(rel->r, e))) &&
     199     2074978 :                                                                 (est = find_prop(found->p, PROP_NUNIQUES)) && !find_prop(e->p, PROP_NUNIQUES))) {
     200      191758 :                                                 prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     201      191757 :                                                 p->value.dval = est->value.dval;
     202             :                                         }
     203             :                                 }
     204     2184556 :                                 return e;
     205             :                         }
     206             :                         return NULL;
     207             :                 }
     208       83621 :                 case op_topn:
     209             :                 case op_sample:
     210       83621 :                         return rel_propagate_column_ref_statistics(sql, rel->l, e);
     211             :                 default:
     212             :                         break;
     213             :                 }
     214             :         }
     215             :         return NULL;
     216             : }
     217             : 
     218             : static atom *
     219     4541994 : atom_from_valptr( allocator *sa, sql_subtype *tpe, ValRecord *v)
     220             : {
     221     4541994 :         atom *a = SA_NEW(sa, atom);
     222             : 
     223     4541900 :         assert(!VALisnil(v));
     224     4541801 :         *a = (atom) {.tpe = *tpe,};
     225     4541801 :         SA_VALcopy(sa, &a->data, v);
     226     4541725 :         return a;
     227             : }
     228             : 
     229             : void
     230     4179165 : sql_column_get_statistics(mvc *sql, sql_column *c, sql_exp *e)
     231             : {
     232     4179165 :         bool nonil = false, unique = false;
     233     4179165 :         double unique_est = 0.0;
     234     4179165 :         ValRecord min, max;
     235     4179165 :         int ok = mvc_col_stats(sql, c, &nonil, &unique, &unique_est, &min, &max);
     236             : 
     237     4179180 :         if (has_nil(e) && nonil)
     238     2753845 :                 set_has_no_nil(e);
     239     4179180 :         if (!is_unique(e) && unique)
     240     1106664 :                 set_unique(e);
     241     4179180 :         if (unique_est != 0.0) {
     242     2934618 :                 prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     243     2934625 :                 p->value.dval = unique_est;
     244             :         }
     245     4179187 :         unsigned int digits = 0;
     246     4179187 :         sql_subtype *et = exp_subtype(e);
     247     4179155 :         if (et->type->eclass == EC_DEC || et->type->eclass == EC_NUM)
     248     2712581 :                 digits = et->digits;
     249     4179155 :         if ((ok & 2) == 2) {
     250     2268195 :                 if (!VALisnil(&max)) {
     251     2268196 :                         prop *p = e->p = prop_create(sql->sa, PROP_MAX, e->p);
     252     2268155 :                         p->value.pval = atom_from_valptr(sql->sa, &c->type, &max);
     253     2268089 :                         if (digits) {
     254     1687283 :                                 unsigned int nd = atom_digits(p->value.pval);
     255     1687376 :                                 if (nd < digits)
     256             :                                         digits = nd;
     257     1687376 :                                 if (!digits)
     258             :                                         digits = 1;
     259             :                         }
     260             :                 }
     261     2268187 :                 VALclear(&max);
     262             :         }
     263     4179119 :         if ((ok & 1) == 1) {
     264     2274008 :                 if (!VALisnil(&min)) {
     265     2274005 :                         prop *p = e->p = prop_create(sql->sa, PROP_MIN, e->p);
     266     2274000 :                         p->value.pval = atom_from_valptr(sql->sa, &c->type, &min);
     267     2273964 :                         if (digits) {
     268     1694243 :                                 unsigned int nd = atom_digits(p->value.pval);
     269     1694240 :                                 if (nd > digits) /* possibly set to low by max value */
     270             :                                         digits = nd;
     271             :                                 if (!digits)
     272             :                                         digits = 1;
     273             :                         }
     274             :                 }
     275     2273961 :                 VALclear(&min);
     276             :         }
     277     4179061 :         if (digits)
     278     2712490 :                 et->digits = digits;
     279     4179061 :         if (et->type->eclass == EC_DEC && et->digits <= et->scale)
     280         216 :                 et->digits = et->scale + 1;
     281     4179061 : }
     282             : 
     283             : static void
     284      938948 : rel_basetable_column_get_statistics(mvc *sql, sql_rel *rel, sql_exp *e)
     285             : {
     286      938948 :         if (e->p)
     287             :                 return;
     288      303109 :         sql_column *c = NULL;
     289             : 
     290      303109 :         if ((c = rel_base_find_column(rel, e->nid))) {
     291      204909 :                 sql_column_get_statistics(sql, c, e);
     292             :         }
     293             : }
     294             : 
     295             : static bool
     296        8872 : rel_setop_get_statistics(mvc *sql, sql_rel *rel, list *lexps, list *rexps, sql_exp *e, int i)
     297             : {
     298        8872 :         sql_exp *le = list_fetch(lexps, i), *re = list_fetch(rexps, i);
     299        8872 :         atom *lval_min = find_prop_and_get(le->p, PROP_MIN), *lval_max = find_prop_and_get(le->p, PROP_MAX),
     300        8872 :                 *rval_min = find_prop_and_get(re->p, PROP_MIN), *rval_max = find_prop_and_get(re->p, PROP_MAX);
     301        8872 :         prop *est;
     302             : 
     303             :         /* for the intersection, if both expressions don't overlap, it can be pruned */
     304        8872 :         if (is_inter(rel->op) && !has_nil(le) && !has_nil(re) &&
     305         511 :                 ((rval_max && lval_min && atom_cmp(rval_max, lval_min) < 0) || (rval_min && lval_max && atom_cmp(rval_min, lval_max) > 0)))
     306          26 :                 return true;
     307             : 
     308        8846 :         if (lval_max && rval_max) {
     309        2557 :                 if (is_union(rel->op))
     310           3 :                         set_minmax_property(sql, e, PROP_MAX, statistics_atom_max(sql, lval_max, rval_max)); /* for union the new max will be the max of the two */
     311        2554 :                 else if (is_inter(rel->op))
     312         399 :                         set_minmax_property(sql, e, PROP_MAX, statistics_atom_min(sql, lval_max, rval_max)); /* for intersect the new max will be the min of the two */
     313             :                 else /* except */
     314        2155 :                         set_minmax_property(sql, e, PROP_MAX, lval_max);
     315             :         }
     316        8846 :         if (lval_min && rval_min) {
     317        2557 :                 if (is_union(rel->op))
     318           3 :                         set_minmax_property(sql, e, PROP_MIN, statistics_atom_min(sql, lval_min, rval_min)); /* for union the new min will be the min of the two */
     319        2554 :                 else if (is_inter(rel->op))
     320         399 :                         set_minmax_property(sql, e, PROP_MIN, statistics_atom_max(sql, lval_min, rval_min)); /* for intersect the new min will be the max of the two */
     321             :                 else /* except */
     322        2155 :                         set_minmax_property(sql, e, PROP_MIN, lval_min);
     323             :         }
     324             : 
     325        8846 :         if (is_union(rel->op) || is_munion(rel->op)) {
     326        5986 :                 if (!has_nil(le) && !has_nil(re))
     327         879 :                         set_has_no_nil(e);
     328        5986 :                 if (need_distinct(rel) && list_length(rel->exps) == 1)
     329           0 :                         set_unique(e);
     330        2860 :         } else if (is_inter(rel->op)) {
     331         564 :                 if (!has_nil(le) || !has_nil(re))
     332         509 :                         set_has_no_nil(e);
     333         564 :                 if (is_unique(le) || (need_distinct(rel) && list_length(rel->exps) == 1))
     334         345 :                         set_unique(e);
     335             :         } else {
     336        2296 :                 assert(is_except(rel->op));
     337        2296 :                 if (!has_nil(le))
     338        2233 :                         set_has_no_nil(e);
     339        2296 :                 if (is_unique(le) || (need_distinct(rel) && list_length(rel->exps) == 1))
     340        2009 :                         set_unique(e);
     341             :         }
     342             :         /* propagate unique estimation for known cases */
     343        8846 :         if (!is_union(rel->op) && (est = find_prop(le->p, PROP_NUNIQUES)) && !find_prop(e->p, PROP_NUNIQUES)) {
     344         205 :                 prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     345         205 :                 p->value.dval = est->value.dval;
     346             :         }
     347             :         return false;
     348             : }
     349             : 
     350             : 
     351             : static void
     352       60852 : rel_munion_get_statistics(mvc *sql, sql_rel *rel, list *rels, sql_exp *e, int i)
     353             : {
     354       60852 :         assert(is_munion(rel->op));
     355             : 
     356       60852 :         sql_rel *l = rels->h->data;
     357       60852 :         sql_exp *le = list_fetch(l->exps, i);
     358       60852 :         atom *lval_min = find_prop_and_get(le->p, PROP_MIN), *lval_max = find_prop_and_get(le->p, PROP_MAX);
     359       60852 :         bool has_nonil = !has_nil(le);
     360             : 
     361      178024 :         for(node *n = rels->h->next; n; n = n->next) {
     362      117172 :                 sql_rel *r = n->data;
     363      117172 :                 sql_exp *re = list_fetch(r->exps, i);
     364      117172 :                 atom *rval_min = find_prop_and_get(re->p, PROP_MIN), *rval_max = find_prop_and_get(re->p, PROP_MAX);
     365             : 
     366      117172 :                 if (lval_max && rval_max) {
     367       43136 :                         set_minmax_property(sql, e, PROP_MAX, statistics_atom_max(sql, lval_max, rval_max)); /* for union the new max will be the max of the two */
     368       43136 :                         lval_max = find_prop_and_get(e->p, PROP_MAX);
     369             :                 }
     370      117172 :                 if (lval_min && rval_min) {
     371       42591 :                         set_minmax_property(sql, e, PROP_MIN, statistics_atom_min(sql, lval_min, rval_min)); /* for union the new min will be the min of the two */
     372       42591 :                         lval_min = find_prop_and_get(e->p, PROP_MIN);
     373             :                 }
     374      117172 :                 has_nonil &= !has_nil(re);
     375             : 
     376             :         }
     377             : 
     378       60852 :         if (has_nonil)
     379       41583 :                 set_has_no_nil(e);
     380             : 
     381       60852 :         if (need_distinct(rel) && list_length(rel->exps) == 1)
     382        1594 :                 set_unique(e);
     383       60852 : }
     384             : 
     385             : 
     386             : static sql_exp *
     387     3491153 : rel_propagate_statistics(visitor *v, sql_rel *rel, sql_exp *e, int depth)
     388             : {
     389     3491153 :         mvc *sql = v->sql;
     390     3491153 :         atom *lval;
     391             : 
     392     3491153 :         (void) depth;
     393     3491153 :         switch(e->type) {
     394     2190395 :         case e_column:
     395     2190395 :                 switch (rel->op) { /* set relations don't call rel_propagate_statistics */
     396      279481 :                 case op_join:
     397             :                 case op_left:
     398             :                 case op_right:
     399             :                 case op_full:
     400             :                 case op_semi:
     401             :                 case op_anti: {
     402      279481 :                         sql_exp *found = rel_propagate_column_ref_statistics(sql, rel->l, e);
     403      279481 :                         if (!found)
     404      140185 :                                 (void) rel_propagate_column_ref_statistics(sql, rel->r, e);
     405             :                         break;
     406             :                 }
     407     1910914 :                 case op_select:
     408             :                 case op_project:
     409             :                 case op_groupby: {
     410     1910914 :                         sql_exp *found = rel_propagate_column_ref_statistics(sql, rel->l, e); /* labels may be found on the same projection, ugh */
     411     1910909 :                         if (!found && is_simple_project(rel->op))
     412      125491 :                                 (void) rel_propagate_column_ref_statistics(sql, rel, e);
     413             :                         break;
     414             :                 }
     415           0 :                 case op_insert:
     416             :                 case op_update:
     417             :                 case op_delete:
     418           0 :                         (void) rel_propagate_column_ref_statistics(sql, rel->r, e);
     419           0 :                         break;
     420             :                 default:
     421             :                         break;
     422             :                 }
     423             :                 break;
     424       99314 :         case e_convert: {
     425       99314 :                 sql_subtype *to = exp_totype(e), *from = exp_fromtype(e);
     426       99314 :                 sql_exp *l = e->l;
     427       99314 :                 sql_class fr = from->type->eclass, too = to->type->eclass;
     428       99314 :                 prop *est;
     429             : 
     430       99314 :                 if (fr == too) {
     431       90242 :                         if ((lval = find_prop_and_get(l->p, PROP_MAX))) {
     432       59460 :                                 atom *res = atom_copy(sql->sa, lval);
     433       59460 :                                 if ((res = atom_cast(sql->sa, res, to)))
     434       59437 :                                         set_minmax_property(sql, e, PROP_MAX, res);
     435             :                         }
     436       90242 :                         if ((lval = find_prop_and_get(l->p, PROP_MIN))) {
     437       60077 :                                 atom *res = atom_copy(sql->sa, lval);
     438       60077 :                                 if ((res = atom_cast(sql->sa, res, to)))
     439       60054 :                                         set_minmax_property(sql, e, PROP_MIN, res);
     440             :                         }
     441             :                 }
     442             :                 /* propagating nuniques through conversions is tricky. I am adding just the general cases */
     443       99314 :                 if ((est = find_prop(l->p, PROP_NUNIQUES)) &&
     444       61694 :                         (((fr == EC_TIME || fr == EC_TIME_TZ || fr == EC_DATE) && (too == EC_TIMESTAMP || too == EC_TIMESTAMP_TZ)) ||
     445       61669 :                          ((fr == EC_NUM || fr == EC_BIT) && too == EC_NUM) || (fr == EC_DEC && too == EC_DEC) || (fr != EC_EXTERNAL && too == EC_STRING))) {
     446       58242 :                         prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     447       58242 :                         p->value.dval = est->value.dval;
     448             :                 }
     449       99314 :                 if (!has_nil(l))
     450       56039 :                         set_has_no_nil(e);
     451             :                 break;
     452             :         }
     453      342131 :         case e_aggr:
     454             :         case e_func: {
     455      342131 :                 BUN lv;
     456      342131 :                 sql_subfunc *f = e->f;
     457             : 
     458      342131 :                 if (!f->func->s) {
     459      315658 :                         int key = hash_key(f->func->base.name); /* Using hash lookup */
     460      315658 :                         sql_hash_e *he = sql_functions_lookup->buckets[key&(sql_functions_lookup->size-1)];
     461      315658 :                         lookup_function look = NULL;
     462             : 
     463      689297 :                         for (; he && !look; he = he->chain) {
     464      373639 :                                 struct function_properties* fp = (struct function_properties*) he->value;
     465             : 
     466      373639 :                                 if (!strcmp(f->func->base.name, fp->name))
     467      107852 :                                         look = fp->func;
     468             :                         }
     469      315658 :                         if (look)
     470      107852 :                                 look(sql, e);
     471             :                 }
     472             :                 /* for global aggregates with no semantics, if the left relation has values, then the output won't be NULL */
     473      342131 :                 if (!is_semantics(e) && e->l && !have_nil(e->l) &&
     474       89264 :                         (e->type != e_aggr || (is_groupby(rel->op) && list_length(rel->r)) || ((lv = get_rel_count(rel->l)) != BUN_NONE && lv > 0)))
     475       88937 :                         set_has_no_nil(e);
     476             :                 /* set properties for global aggregates */
     477      342131 :                 if (e->type == e_aggr && is_groupby(rel->op) && list_empty(rel->r)) {
     478        7875 :                         if (!find_prop(e->p, PROP_NUNIQUES)) {
     479        7875 :                                 prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     480        7875 :                                 p->value.dval = 1;
     481             :                         }
     482        7875 :                         set_unique(e);
     483             :                 }
     484             :                 break;
     485             :         }
     486      573626 :         case e_atom:
     487      573626 :                 if (e->l) {
     488      555925 :                         atom *a = (atom*) e->l;
     489      555925 :                         if (!a->isnull) {
     490      492419 :                                 set_minmax_property(sql, e, PROP_MAX, a);
     491      492419 :                                 set_minmax_property(sql, e, PROP_MIN, a);
     492             :                         }
     493      555925 :                         prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     494      555925 :                         p->value.dval = 1;
     495       17701 :                 } else if (e->f) {
     496        4176 :                         list *vals = (list *) e->f;
     497        4176 :                         sql_exp *first = vals->h ? vals->h->data : NULL;
     498        4176 :                         atom *max = NULL, *min = NULL; /* all child values must have a valid min/max */
     499        4176 :                         int has_nil = 0;
     500             : 
     501        4176 :                         if (first) {
     502        4176 :                                 max = ((lval = find_prop_and_get(first->p, PROP_MAX))) ? lval : NULL;
     503        4176 :                                 min = ((lval = find_prop_and_get(first->p, PROP_MIN))) ? lval : NULL;
     504        4176 :                                 has_nil |= has_nil(first);
     505             :                         }
     506             : 
     507       17949 :                         for (node *n = vals->h ? vals->h->next : NULL ; n ; n = n->next) {
     508        9597 :                                 sql_exp *ee = n->data;
     509             : 
     510        9597 :                                 if (min && max) {
     511        9104 :                                         if ((lval = find_prop_and_get(ee->p, PROP_MAX))) {
     512        9058 :                                                 max = atom_cmp(lval, max) > 0 ? lval : max;
     513             :                                         } else {
     514             :                                                 max = NULL;
     515             :                                         }
     516        9104 :                                         if ((lval = find_prop_and_get(ee->p, PROP_MIN))) {
     517        9058 :                                                 min = atom_cmp(min, lval) > 0 ? lval : min;
     518             :                                         } else {
     519             :                                                 min = NULL;
     520             :                                         }
     521             :                                 }
     522        9597 :                                 has_nil |= has_nil(ee);
     523             :                         }
     524             : 
     525        4176 :                         if (!has_nil)
     526        3806 :                                 set_has_no_nil(e);
     527        4176 :                         if (min && max) {
     528        3764 :                                 set_minmax_property(sql, e, PROP_MAX, max);
     529        3764 :                                 set_minmax_property(sql, e, PROP_MIN, min);
     530             :                         }
     531        4176 :                         prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     532        4176 :                         p->value.dval = (dbl) list_length(vals);
     533             :                 } else {
     534       13525 :                         prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     535       13525 :                         p->value.dval = 1;
     536             :                 }
     537             :                 break;
     538      285687 :         case e_cmp:
     539             :                 /* TODO? propagating min/max/unique of booleans is not very worth it */
     540      285687 :                 if (e->flag == cmp_or || e->flag == cmp_filter) {
     541       17436 :                         if (!have_nil(e->l) && !have_nil(e->r))
     542       12966 :                                 set_has_no_nil(e);
     543      268251 :                 } else if (e->flag == cmp_in || e->flag == cmp_notin) {
     544       21798 :                         sql_exp *le = e->l;
     545       21798 :                         if (!has_nil(le) && !have_nil(e->r))
     546       18734 :                                 set_has_no_nil(e);
     547             :                 } else {
     548      246453 :                         sql_exp *le = e->l, *re = e->r, *fe = e->f;
     549      246453 :                         if (!has_nil(le) && !has_nil(re) && (!e->f || !has_nil(fe)))
     550      176318 :                                 set_has_no_nil(e);
     551             :                 }
     552             :                 break;
     553             :         case e_psm:
     554             :                 break;
     555             :         }
     556             : 
     557             : #ifndef NDEBUG
     558             :         {
     559             :                 /* min and max cannot be NULL and min must be <= than max, if it happens the inner relation must be empty! */
     560     3491148 :                 atom *min = find_prop_and_get(e->p, PROP_MIN), *max = find_prop_and_get(e->p, PROP_MAX);
     561             : 
     562     3491148 :                 (void) min;
     563     3491148 :                 (void) max;
     564     3491148 :                 assert(!min || !min->isnull);
     565     3491148 :                 assert(!max || !max->isnull);
     566     3491148 :                 assert(!min || !max || (min && max)/* atom_cmp(min, max) <= 0*/);
     567             :         }
     568             : #endif
     569     3491148 :         return e;
     570             : }
     571             : 
     572             : static list * /* Remove predicates always false from min/max values */
     573      139300 : rel_prune_predicates(visitor *v, sql_rel *rel)
     574             : {
     575      139300 :         if (rel->l) {
     576      139300 :                 sql_rel *l = rel->l;
     577      139300 :                 if (is_single(l))
     578        3397 :                         return rel->exps;
     579             :         }
     580      135903 :         if (!list_empty(rel->attr))
     581        2963 :                 return rel->exps;
     582      282008 :         for (node *n = rel->exps->h ; n ; n = n->next) {
     583      149068 :                 sql_exp *e = n->data;
     584             : 
     585      149068 :                 if (e->type == e_cmp && is_theta_exp(e->flag)) {
     586      122668 :                         sql_exp *le = e->l, *re = e->r, *fe = e->f;
     587      122668 :                         atom *lval_min = find_prop_and_get(le->p, PROP_MIN), *lval_max = find_prop_and_get(le->p, PROP_MAX),
     588      122668 :                                 *rval_min = find_prop_and_get(re->p, PROP_MIN), *rval_max = find_prop_and_get(re->p, PROP_MAX);
     589      122668 :                         bool always_false = false, always_true = false;
     590             : 
     591      122668 :                         if (fe && !is_symmetric(e)) {
     592        3058 :                                 atom *fval_min = find_prop_and_get(fe->p, PROP_MIN), *fval_max = find_prop_and_get(fe->p, PROP_MAX);
     593        3058 :                                 comp_type lower = range2lcompare(e->flag), higher = range2rcompare(e->flag);
     594        3660 :                                 int not_int1 = rval_min && lval_max && /* the middle and left intervals don't overlap */
     595        1663 :                                         (!is_anti(e) && (lower == cmp_gte ? atom_cmp(rval_min, lval_max) > 0 : atom_cmp(rval_min, lval_max) >= 0)),
     596        4072 :                                         not_int2 = lval_min && fval_max && /* the middle and right intervals don't overlap */
     597        2485 :                                         (!is_anti(e) && (higher == cmp_lte ? atom_cmp(lval_min, fval_max) > 0 : atom_cmp(lval_min, fval_max) >= 0)),
     598        3058 :                                         not_int3 = rval_min && fval_max && /* the left interval is after the right one */
     599        1287 :                                         (!is_anti(e) && (atom_cmp(rval_min, fval_max) > 0));
     600             : 
     601        3058 :                                 always_false |= not_int1 || not_int2 || not_int3;
     602             :                                 /* for anti the middle must be before the left or after the right or the right after the left, for the other the middle must be always between the left and right intervals */
     603        4113 :                                 always_true |= !has_nil(le) && !has_nil(re) && !has_nil(fe) &&
     604        3957 :                                         lval_min && lval_max && rval_min && rval_max && fval_min && fval_max &&
     605         575 :                                         (is_anti(e) ? ((lower == cmp_gte ? atom_cmp(rval_min, lval_max) > 0 : atom_cmp(rval_min, lval_max) >= 0) || (higher == cmp_lte ? atom_cmp(lval_min, fval_max) > 0 : atom_cmp(lval_min, fval_max) >= 0) || atom_cmp(rval_min, fval_max) > 0) :
     606         488 :                                          ((lower == cmp_gte ? atom_cmp(lval_min, rval_max) >= 0 : atom_cmp(lval_min, rval_max) > 0) && (higher == cmp_lte ? atom_cmp(fval_min, lval_max) >= 0 : atom_cmp(fval_min, lval_max) > 0)));
     607             :                         } else if (!fe) {
     608      119592 :                                 if (!is_semantics(e)) /* trivial not null cmp null case */
     609      227754 :                                         always_false |= !is_anti(e) && ((exp_is_not_null(le) && exp_is_null(re)) || (exp_is_null(le) && exp_is_not_null(re)));
     610      119592 :                                 switch (e->flag) {
     611      104211 :                                 case cmp_equal:
     612      104211 :                                         if (lval_min && lval_max && rval_min && rval_max && (!is_semantics(e) || !has_nil(le) || !has_nil(re)))
     613      133468 :                                                 always_false |= (is_anti(e) ? (atom_cmp(lval_min, rval_min) == 0 && atom_cmp(lval_max, rval_max) <= 0) : (atom_cmp(rval_max, lval_min) < 0 || atom_cmp(rval_min, lval_max) > 0));
     614      104211 :                                         if (is_semantics(e)) { /* prune *= NULL cases */
     615        5686 :                                                 always_false |= (is_anti(e) ? (exp_is_null(le) && exp_is_null(re)) : ((exp_is_not_null(le) && exp_is_null(re)) || (exp_is_null(le) && exp_is_not_null(re))));
     616       11372 :                                                 always_true |= (is_anti(e) ? ((exp_is_not_null(le) && exp_is_null(re)) || (exp_is_null(le) && exp_is_not_null(re))) : (exp_is_null(le) && exp_is_null(re)));
     617             :                                         }
     618             :                                         break;
     619        7250 :                                 case cmp_notequal:
     620        7250 :                                         if (lval_min && lval_max && rval_min && rval_max)
     621       11354 :                                                 always_true |= !has_nil(le) && !has_nil(re) && (is_anti(e) ? (atom_cmp(lval_min, rval_min) == 0 && atom_cmp(lval_max, rval_max) <= 0) : (atom_cmp(rval_max, lval_min) < 0 || atom_cmp(rval_min, lval_max) > 0));
     622        7250 :                                         if (is_semantics(e)) {
     623          29 :                                                 always_true |= (is_anti(e) ? (exp_is_null(le) && exp_is_null(re)) : ((exp_is_not_null(le) && exp_is_null(re)) || (exp_is_null(le) && exp_is_not_null(re))));
     624          58 :                                                 always_false |= (is_anti(e) ? ((exp_is_not_null(le) && exp_is_null(re)) || (exp_is_null(le) && exp_is_not_null(re))) : (exp_is_null(le) && exp_is_null(re)));
     625             :                                         }
     626             :                                         break;
     627        5487 :                                 case cmp_gt:
     628        5487 :                                         if (lval_max && rval_min)
     629        1833 :                                                 always_false |= (is_anti(e) ? atom_cmp(lval_max, rval_min) > 0 : atom_cmp(lval_max, rval_min) <= 0);
     630        5487 :                                         if (lval_min && rval_max)
     631        3666 :                                                 always_true |= !has_nil(le) && !has_nil(re) && (is_anti(e) ? atom_cmp(lval_min, rval_max) <= 0 : atom_cmp(lval_min, rval_max) > 0);
     632             :                                         break;
     633         119 :                                 case cmp_gte:
     634         119 :                                         if (lval_max && rval_min)
     635          49 :                                                 always_false |= (is_anti(e) ? atom_cmp(lval_max, rval_min) >= 0 : atom_cmp(lval_max, rval_min) < 0);
     636         119 :                                         if (lval_min && rval_max)
     637          98 :                                                 always_true |= !has_nil(le) && !has_nil(re) && (is_anti(e) ? atom_cmp(lval_min, rval_max) < 0 : atom_cmp(lval_min, rval_max) >= 0);
     638             :                                         break;
     639        2441 :                                 case cmp_lt:
     640        2441 :                                         if (lval_min && rval_max)
     641        1383 :                                                 always_false |= (is_anti(e) ? atom_cmp(lval_min, rval_max) < 0 : atom_cmp(lval_min, rval_max) >= 0);
     642        2441 :                                         if (lval_max && rval_min)
     643        2814 :                                                 always_true |= !has_nil(le) && !has_nil(re) && (is_anti(e) ? atom_cmp(lval_max, rval_min) >= 0 : atom_cmp(lval_max, rval_min) < 0);
     644             :                                         break;
     645          84 :                                 case cmp_lte:
     646          84 :                                         if (lval_min && rval_max)
     647          11 :                                                 always_false |= (is_anti(e) ? atom_cmp(lval_min, rval_max) <= 0 : atom_cmp(lval_min, rval_max) > 0);
     648          84 :                                         if (lval_max && rval_min)
     649          22 :                                                 always_true |= !has_nil(le) && !has_nil(re) && (is_anti(e) ? atom_cmp(lval_max, rval_min) > 0 : atom_cmp(lval_max, rval_min) <= 0);
     650             :                                         break;
     651             :                                 default: /* Maybe later I can do cmp_in and cmp_notin but don't forget to remove is_theta_exp check up there */
     652             :                                         break;
     653             :                                 }
     654             :                         }
     655      122668 :                         assert(!always_false || !always_true);
     656      122668 :                         if (always_false || always_true) {
     657        3656 :                                 sql_exp *ne = exp_atom_bool(v->sql->sa, always_true /* if always_true set then true else false */);
     658        3656 :                                 if (exp_name(e))
     659           0 :                                         exp_prop_alias(v->sql->sa, ne, e);
     660        3656 :                                 n->data = ne;
     661        3656 :                                 v->changes++;
     662             :                         }
     663             :                 }
     664             :         }
     665      132940 :         return rel->exps;
     666             : }
     667             : 
     668             : static sql_rel *
     669          14 : set_setop_side(visitor *v, sql_rel *rel, sql_rel *side)
     670             : {
     671          14 :         if (side == rel->r)
     672           0 :                 rel->r = NULL;
     673             :         else
     674          14 :                 rel->l = NULL;
     675          14 :         if (!is_simple_project(side->op) || rel_is_ref(side) || !list_empty(side->r)) {
     676           0 :                 side = rel_project(v->sql->sa, side, rel_projections(v->sql, side, NULL, 0, 1));
     677           0 :                 set_count_prop(v->sql->sa, side, get_rel_count(side->l));
     678           0 :                 side->exps = exps_exp_visitor_bottomup(v, side, side->exps, 0, &rel_propagate_statistics, false);
     679             :         }
     680          35 :         for (node *n = rel->exps->h, *m = side->exps->h ; n && m ; n = n->next, m = m->next) {
     681          21 :                 sql_exp *e1 = n->data, *e2 = m->data;
     682          21 :                 exp_prop_alias(v->sql->sa, e2, e1);
     683             :         }
     684          14 :         list_hash_clear(side->exps);
     685             : 
     686          14 :         if (need_distinct(rel))
     687           0 :                 set_distinct(side);
     688          14 :         side->p = prop_copy(v->sql->sa, rel->p);
     689          14 :         rel_destroy(rel);
     690          14 :         return side;
     691             : }
     692             : 
     693             : static BUN
     694       21885 : trivial_project_exp_card(sql_exp *e)
     695             : {
     696       22188 :         if (e->type == e_convert)
     697         303 :                 return trivial_project_exp_card(e->l);
     698       21885 :         return e->type == e_atom && e->f ? (BUN) list_length(e->f) : 1;
     699             : }
     700             : 
     701             : static BUN
     702       24248 : rel_calc_nuniques(mvc *sql, sql_rel *l, list *exps)
     703             : {
     704       24248 :         BUN lv = get_rel_count(l);
     705             : 
     706       24248 :         if (lv == 0)
     707             :                 return 0;
     708       21539 :         if (!list_empty(exps)) {
     709       21539 :                 BUN nuniques = 0;
     710             :                 /* compute the highest number of unique values */
     711       51131 :                 for (node *n = exps->h ; n && nuniques != BUN_NONE ; n = n->next) {
     712       29592 :                         sql_exp *e = n->data;
     713       29592 :                         sql_rel *bt = NULL;
     714       29592 :                         prop *p = NULL;
     715       29592 :                         BUN euniques = BUN_NONE;
     716       29592 :                         atom *min, *max, *sub = NULL;
     717       29592 :                         sql_subtype *tp = exp_subtype(e);
     718       29592 :                         sql_class ec = tp ? tp->type->eclass : EC_STRING; /* if 'e' has no type (eg parameter), use a non-number type to fail condition */
     719             : 
     720       29592 :                         if ((p = find_prop(e->p, PROP_NUNIQUES))) {
     721       20956 :                                 euniques = (BUN) p->value.dval;
     722        8636 :                         } else if (e->type == e_column && e->nid && rel_find_exp_and_corresponding_rel(l, e, false, &bt, NULL) && bt && (p = find_prop(bt->p, PROP_COUNT))) {
     723        6593 :                                 euniques = (BUN) p->value.lval;
     724             :                         }
     725             :                         /* use min to max range to compute number of possible values in the domain for number types */
     726       29592 :                         if ((EC_TEMP(ec)||ec==EC_NUM||ec==EC_MONTH||ec==EC_POS) &&
     727       22363 :                                 (min = find_prop_and_get(e->p, PROP_MIN)) && (max = find_prop_and_get(e->p, PROP_MAX))) {
     728             :                                 /* the range includes min and max, so the atom_inc call is needed */
     729             :                                 /* if 'euniques' has number of distinct values, compute min between both */
     730       17528 :                                 if ((sub = atom_sub(sql->sa, max, min)) && (sub = atom_inc(sql->sa, sub)) && (sub = atom_cast(sql->sa, sub, sql_bind_localtype("oid"))))
     731           0 :                                         euniques = MIN(euniques, (BUN) sub->data.val.oval);
     732             :                         }
     733       29592 :                         if (euniques != BUN_NONE)
     734       27549 :                                 nuniques = MAX(nuniques, euniques); /* the highest cardinality sets the estimation */
     735             :                         else
     736             :                                 nuniques = BUN_NONE;
     737             :                 }
     738       21539 :                 if (nuniques != BUN_NONE)
     739             :                         return nuniques;
     740             :         }
     741             :         return lv;
     742             : }
     743             : 
     744             : static sql_rel *
     745     2077322 : rel_get_statistics_(visitor *v, sql_rel *rel)
     746             : {
     747             :         /* Don't prune updates as pruning will possibly result in removing the joins which therefore cannot be used for constraint checking */
     748     2077322 :         uint8_t has_special_modify = *(uint8_t*) v->data;
     749     2077322 :         bool can_be_pruned = !has_special_modify && v->storage_based_opt;
     750             : 
     751             :         /* Don't look at the same relation twice */
     752     2077322 :         if (are_statistics_gathered(rel->used))
     753             :                 return rel;
     754     1336117 :         rel->used |= statistics_gathered;
     755             : 
     756     1336117 :         switch (rel->op) {
     757      313477 :         case op_basetable: {
     758      313477 :                 sql_table *t = (sql_table *) rel->l;
     759      313477 :                 sqlstore *store = v->sql->session->tr->store;
     760             : 
     761      313477 :                 if (!list_empty(rel->exps)) {
     762     1252376 :                         for (node *n = rel->exps->h ; n ; n = n->next)
     763      938948 :                                 rel_basetable_column_get_statistics(v->sql, rel, n->data);
     764             :                 }
     765             :                 /* Set table row count. TODO? look for remote tables. Don't look at storage for declared tables, because it won't be cleaned */
     766      313477 :                 if (isTable(t) && t->s && !isDeclaredTable(t)) /* count active rows only */
     767      259849 :                         set_count_prop(v->sql->sa, rel, (BUN)store->storage_api.count_col(v->sql->session->tr, ol_first_node(t->columns)->data, 10));
     768             :                 break;
     769             :         }
     770        2790 :         case op_union:
     771             :         case op_inter:
     772             :         case op_except: {
     773        2790 :                 bool empty_cross = false;
     774        2790 :                 int i = 0;
     775        2790 :                 sql_rel *l = rel->l, *pl = rel->l, *r = rel->r, *pr = rel->r;
     776             : 
     777        2790 :                 while (is_sample(pl->op) || is_topn(pl->op)) /* skip topN and sample relations in the middle */
     778           0 :                         pl = pl->l;
     779        2790 :                 while (is_sample(pr->op) || is_topn(pr->op))
     780           0 :                         pr = pr->l;
     781             :                 /* if it's not a projection, then project and propagate statistics */
     782        2790 :                 if (!is_project(pl->op) && !is_base(pl->op)) {
     783           0 :                         pl = rel_project(v->sql->sa, pl, rel_projections(v->sql, pl, NULL, 0, 1));
     784           0 :                         set_count_prop(v->sql->sa, pl, get_rel_count(pl->l));
     785           0 :                         pl->exps = exps_exp_visitor_bottomup(v, pl, pl->exps, 0, &rel_propagate_statistics, false);
     786             :                 }
     787        2790 :                 if (!is_project(pr->op) && !is_base(pr->op)) {
     788           0 :                         pr = rel_project(v->sql->sa, pr, rel_projections(v->sql, pr, NULL, 0, 1));
     789           0 :                         set_count_prop(v->sql->sa, pr, get_rel_count(pr->l));
     790           0 :                         pr->exps = exps_exp_visitor_bottomup(v, pr, pr->exps, 0, &rel_propagate_statistics, false);
     791             :                 }
     792             : 
     793       11662 :                 for (node *n = rel->exps->h ; n ; n = n->next) {
     794        8872 :                         empty_cross |= rel_setop_get_statistics(v->sql, rel, pl->exps, pr->exps, n->data, i);
     795        8872 :                         i++;
     796             :                 }
     797             : 
     798             :                 /* propagate row count */
     799        2790 :                 if (is_union(rel->op)) {
     800         277 :                         BUN lv = need_distinct(rel) ? rel_calc_nuniques(v->sql, l, l->exps) : get_rel_count(l),
     801         277 :                                 rv = need_distinct(rel) ? rel_calc_nuniques(v->sql, r, r->exps) : get_rel_count(r);
     802             : 
     803         277 :                         if (lv == 0 && rv == 0) { /* both sides empty */
     804           2 :                                 if (can_be_pruned)
     805             :                                         empty_cross = true;
     806             :                                 else
     807           2 :                                         set_count_prop(v->sql->sa, rel, 0);
     808         275 :                         } else if (can_be_pruned && lv == 0 && !rel_is_ref(rel)) { /* left side empty */
     809           0 :                                 rel = set_setop_side(v, rel, r);
     810           0 :                                 empty_cross = false; /* don't rewrite again */
     811         275 :                         } else if (can_be_pruned && rv == 0 && !rel_is_ref(rel)) { /* right side empty */
     812           0 :                                 rel = set_setop_side(v, rel, l);
     813           0 :                                 empty_cross = false; /* don't rewrite again */
     814         275 :                         } else if (lv != BUN_NONE && rv != BUN_NONE) {
     815           7 :                                 set_count_prop(v->sql->sa, rel, (rv > (BUN_MAX - lv)) ? BUN_MAX : (lv + rv)); /* overflow check */
     816             :                         }
     817        2513 :                 } else if (is_inter(rel->op) || is_except(rel->op)) {
     818        2513 :                         BUN lv = need_distinct(rel) ? rel_calc_nuniques(v->sql, l, l->exps) : get_rel_count(l),
     819        2513 :                                 rv = need_distinct(rel) ? rel_calc_nuniques(v->sql, r, r->exps) : get_rel_count(r);
     820             : 
     821        2513 :                         if (lv == 0) { /* left side empty */
     822          62 :                                 if (can_be_pruned)
     823             :                                         empty_cross = true;
     824             :                                 else
     825           5 :                                         set_count_prop(v->sql->sa, rel, 0);
     826        2451 :                         } else if (rv == 0) { /* right side empty */
     827          17 :                                 if (is_inter(rel->op)) {
     828           3 :                                         if (can_be_pruned)
     829             :                                                 empty_cross = true;
     830             :                                         else
     831           0 :                                                 set_count_prop(v->sql->sa, rel, 0);
     832          14 :                                 } else if (can_be_pruned && !rel_is_ref(rel)) {
     833          14 :                                         rel = set_setop_side(v, rel, l);
     834          14 :                                         empty_cross = false; /* don't rewrite again */
     835             :                                 } else {
     836           0 :                                         set_count_prop(v->sql->sa, rel, lv);
     837             :                                 }
     838             :                         } else {
     839        2434 :                                 set_count_prop(v->sql->sa, rel, lv);
     840             :                         }
     841             :                 }
     842        2790 :                 if (can_be_pruned && empty_cross) {
     843          80 :                         rel_destroy(rel->l);
     844          80 :                         rel->l = NULL;
     845          80 :                         rel_destroy(rel->r);
     846          80 :                         rel->r = NULL;
     847         244 :                         for (node *n = rel->exps->h ; n ; n = n->next) {
     848         164 :                                 sql_exp *e = n->data, *a = exp_atom(v->sql->sa, atom_general(v->sql->sa, exp_subtype(e), NULL, 0));
     849         164 :                                 exp_prop_alias(v->sql->sa, a, e);
     850         164 :                                 n->data = a;
     851             :                         }
     852          80 :                         list_hash_clear(rel->exps);
     853          80 :                         sql_rel *l = rel_project(v->sql->sa, NULL, rel->exps);
     854          80 :                         set_count_prop(v->sql->sa, l, 1);
     855          80 :                         l = rel_select(v->sql->sa, l, exp_atom_bool(v->sql->sa, 0));
     856          80 :                         set_count_prop(v->sql->sa, l, 0);
     857          80 :                         rel->op = op_project;
     858          80 :                         rel->l = l;
     859          80 :                         rel->exps = rel_projections(v->sql, l, NULL, 1, 1);
     860          80 :                         set_count_prop(v->sql->sa, rel, 0);
     861          80 :                         set_nodistinct(rel); /* set relations may have distinct flag set */
     862          80 :                         v->changes++;
     863             :                 }
     864             :                 break;
     865             :         }
     866       12888 :         case op_munion: {
     867       12888 :                 list *l = rel->l, *nrels = sa_list(v->sql->sa);
     868       12888 :                 BUN cnt = 0;
     869       12888 :                 bool needs_pruning = false;
     870             : 
     871       49497 :                 for (node *n = l->h; n; n = n->next) {
     872       36609 :                         sql_rel *r = n->data, *pl = r;
     873             : 
     874       36609 :                         while (is_sample(pl->op) || is_topn(pl->op)) /* skip topN and sample relations in the middle */
     875           0 :                                         pl = pl->l;
     876             :                         /* if it's not a projection, then project and propagate statistics */
     877       36609 :                         if (!is_project(pl->op) && !is_base(pl->op)) {
     878           0 :                                 pl = rel_project(v->sql->sa, pl, rel_projections(v->sql, pl, NULL, 0, 1));
     879           0 :                                 set_count_prop(v->sql->sa, pl, get_rel_count(pl->l));
     880           0 :                                 pl->exps = exps_exp_visitor_bottomup(v, pl, pl->exps, 0, &rel_propagate_statistics, false);
     881             :                         }
     882       36609 :                         nrels = append(nrels, pl);
     883             :                         /* we need new munion statistics */
     884             :                         /* propagate row count */
     885       36609 :                         BUN rv = need_distinct(rel) ? rel_calc_nuniques(v->sql, r, r->exps) : get_rel_count(r);
     886             :                         /* if PROP_COUNT does not exist we assume at least a row (see get_rel_count def) */
     887       36609 :                         if (rv == BUN_NONE) {
     888        1172 :                                 cnt++;
     889        1172 :                                 continue;
     890             :                         }
     891       35437 :                         if (!rv && can_be_pruned)
     892        6625 :                                 needs_pruning = true;
     893             :                         /* overflow check */
     894       35437 :                         if (rv > (BUN_MAX - cnt))
     895       36609 :                                 rv = BUN_MAX;
     896             :                         else
     897       35437 :                                 cnt += rv;
     898             :                 }
     899       12888 :                 int i = 0;
     900       73740 :                 for (node *n = rel->exps->h ; n ; n = n->next, i++)
     901       60852 :                         rel_munion_get_statistics(v->sql, rel, nrels, n->data, i);
     902             : 
     903       12888 :                 if (needs_pruning && !rel_is_ref(rel)) {
     904        4464 :                         v->changes++;
     905        4464 :                         list *nl = sa_list(l->sa);
     906             : 
     907       16465 :                         for (node *n = nrels->h; n; n = n->next) {
     908       12001 :                                 sql_rel *r = n->data;
     909       12001 :                                 BUN rv = need_distinct(rel) ? rel_calc_nuniques(v->sql, r, r->exps) : get_rel_count(r);
     910             : 
     911       12001 :                                 if (!rv) { /* keep last for now */
     912        6155 :                                         rel_destroy(r);
     913        6155 :                                         continue;
     914             :                                 }
     915        5846 :                                 nl = append(nl, r);
     916             :                         }
     917        4464 :                         rel->l = nl;
     918        4464 :                         if (list_length(nl) == 1) {
     919        4138 :                                 sql_rel *l = rel->l = nl->h->data; /* ugh */
     920        4138 :                                 rel->r = NULL;
     921        4138 :                                 rel->op = op_project;
     922             : 
     923       20331 :                                 for (node *n = rel->exps->h, *m = l->exps->h ; n && m ; n = n->next, m = m->next) {
     924       16193 :                                         sql_exp *pe = n->data, *ie = m->data;
     925       16193 :                                         sql_exp *ne = exp_ref(v->sql, ie);
     926       16193 :                                         exp_prop_alias(v->sql->sa, ne, pe);
     927       16193 :                                         n->data = ne;
     928             :                                 }
     929        4138 :                                 list_hash_clear(rel->exps);
     930         326 :                         } else if (list_empty(nl)) {
     931             :                                 /* empty select (project [ nils ] ) */
     932         454 :                                 for (node *n = rel->exps->h ; n ; n = n->next) {
     933         354 :                                         sql_exp *e = n->data, *a = exp_atom(v->sql->sa, atom_general(v->sql->sa, exp_subtype(e), NULL, 0));
     934         354 :                                         exp_prop_alias(v->sql->sa, a, e);
     935         354 :                                         n->data = a;
     936             :                                 }
     937         100 :                                 list_hash_clear(rel->exps);
     938         100 :                                 sql_rel *l = rel_project(v->sql->sa, NULL, rel->exps);
     939         100 :                                 set_count_prop(v->sql->sa, l, 1);
     940         100 :                                 l = rel_select(v->sql->sa, l, exp_atom_bool(v->sql->sa, 0));
     941         100 :                                 set_count_prop(v->sql->sa, l, 0);
     942         100 :                                 rel->op = op_project;
     943         100 :                                 rel->r = NULL;
     944         100 :                                 rel->l = l;
     945         100 :                                 rel->exps = rel_projections(v->sql, l, NULL, 1, 1);
     946         100 :                                 set_count_prop(v->sql->sa, rel, 0);
     947         100 :                                 set_nodistinct(rel); /* set relations may have distinct flag set */
     948             :                         }
     949             :                 } else {
     950        8424 :                         set_count_prop(v->sql->sa, rel, cnt);
     951             :                 }
     952             :                 break;
     953             :         }
     954      533609 :         case op_join:
     955             :         case op_left:
     956             :         case op_right:
     957             :         case op_full:
     958             :         case op_semi:
     959             :         case op_anti:
     960             :         case op_select:
     961             :         case op_project:
     962             :         case op_groupby: {
     963      533609 :                 if (is_groupby(rel->op) && !list_empty(rel->r))
     964       15531 :                         rel->r = exps_exp_visitor_bottomup(v, rel, rel->r, 0, &rel_propagate_statistics, false);
     965      533609 :                 rel->exps = exps_exp_visitor_bottomup(v, rel, rel->exps, 0, &rel_propagate_statistics, false);
     966      533608 :                 if ((is_simple_project(rel->op) || is_groupby(rel->op)) && !list_empty(rel->r))
     967       39284 :                         rel->r = exps_exp_visitor_bottomup(v, rel, rel->r, 0, &rel_propagate_statistics, false);
     968             :                 /* The following optimizations can only be applied after propagating the statistics to rel->exps */
     969      533608 :                 if (can_be_pruned && (is_join(rel->op) || is_select(rel->op)) && !list_empty(rel->exps)) {
     970      139300 :                         int changes = v->changes;
     971      139300 :                         rel->exps = rel_prune_predicates(v, rel);
     972      139300 :                         if (v->changes > changes)
     973        3623 :                                 rel = rewrite_simplify(v, 0, v->value_based_opt, rel);
     974             :                 }
     975             : 
     976             :                 /* propagate row count */
     977      533608 :                 sql_rel *l = rel->l, *r = rel->r;
     978      533608 :                 switch (rel->op) {
     979      134701 :                 case op_join:
     980             :                 case op_left:
     981             :                 case op_right:
     982             :                 case op_full: {
     983      134701 :                         BUN lv = get_rel_count(l), rv = get_rel_count(r), uniques_estimate = BUN_MAX, join_idx_estimate = BUN_MAX;
     984             : 
     985      134701 :                         if (!list_empty(rel->exps) && !is_single(rel)) {
     986      246427 :                                 for (node *n = rel->exps->h ; n ; n = n->next) {
     987      125796 :                                         sql_exp *e = n->data, *el = e->l, *er = e->r;
     988             : 
     989      125796 :                                         if (find_prop(e->p, PROP_JOINIDX)) {
     990         667 :                                                 join_idx_estimate = lv>rv?lv:rv;
     991      125129 :                                         } else if (e->type == e_cmp && e->flag == cmp_equal) {
     992             :                                                 /* if one of the sides is unique, the cardinality will be that exact number, but look for nulls */
     993      121245 :                                                 if (!is_semantics(e) || !has_nil(el) || !has_nil(er)) {
     994      120849 :                                                         BUN lu = 0, ru = 0;
     995      120849 :                                                         prop *p = NULL;
     996      120849 :                                                         if ((p = find_prop(el->p, PROP_NUNIQUES)))
     997       89685 :                                                                 lu = (BUN) p->value.dval;
     998      120849 :                                                         if ((p = find_prop(er->p, PROP_NUNIQUES)))
     999      104063 :                                                                 ru = (BUN) p->value.dval;
    1000      120849 :                                                         if (is_unique(el) || lu > lv) {
    1001       73808 :                                                                 BUN ncount = (is_right(rel->op) || is_full(rel->op)) ? MAX(lv, rv) : lv;
    1002       73808 :                                                                 uniques_estimate = MIN(uniques_estimate, ncount);
    1003       47041 :                                                         } else if (is_unique(er) || ru > rv) {
    1004       29402 :                                                                 BUN ncount = (is_left(rel->op) || is_full(rel->op)) ? MAX(lv, rv) : rv;
    1005       29402 :                                                                 uniques_estimate = MIN(uniques_estimate, ncount);
    1006             :                                                         }
    1007             :                                                 }
    1008             :                                         }
    1009             :                                 }
    1010             :                         }
    1011      134701 :                         if (is_single(rel)) {
    1012        4737 :                                 set_count_prop(v->sql->sa, rel, lv);
    1013      129964 :                         } else if (join_idx_estimate != BUN_MAX) {
    1014         665 :                                 set_count_prop(v->sql->sa, rel, join_idx_estimate);
    1015      129299 :                         } else if (uniques_estimate != BUN_MAX) {
    1016       96746 :                                 set_count_prop(v->sql->sa, rel, uniques_estimate);
    1017       32553 :                         } else if (list_length(rel->exps) == 1 && (exp_is_false(rel->exps->h->data) || exp_is_null(rel->exps->h->data))) {
    1018             :                                 /* corner cases for outer joins */
    1019         126 :                                 if (is_left(rel->op)) {
    1020         114 :                                         set_count_prop(v->sql->sa, rel, lv);
    1021          12 :                                 } else if (is_right(rel->op)) {
    1022           3 :                                         set_count_prop(v->sql->sa, rel, rv);
    1023           9 :                                 } else if (is_full(rel->op) && lv != BUN_NONE && rv != BUN_NONE) {
    1024           9 :                                         set_count_prop(v->sql->sa, rel, (rv > (BUN_MAX - lv)) ? BUN_MAX : (lv + rv)); /* overflow check */
    1025           0 :                                 } else if (lv != BUN_NONE && rv != BUN_NONE) {
    1026           0 :                                         set_count_prop(v->sql->sa, rel, 0);
    1027             :                                 }
    1028       32427 :                         } else if (lv == 0) {
    1029        1166 :                                 set_count_prop(v->sql->sa, rel, (is_right(rel->op) || is_full(rel->op)) ? rv : 0);
    1030       31830 :                         } else if (rv == 0) {
    1031        1568 :                                 set_count_prop(v->sql->sa, rel, (is_left(rel->op) || is_full(rel->op)) ? lv : 0);
    1032       30756 :                         } else if (lv != BUN_NONE && rv != BUN_NONE) {
    1033       18288 :                                 set_count_prop(v->sql->sa, rel, (rv > (BUN_MAX / lv)) ? BUN_MAX : (lv * rv)); /* overflow check */
    1034             :                         }
    1035             :                         break;
    1036             :                 }
    1037        2639 :                 case op_anti:
    1038        2639 :                         set_count_prop(v->sql->sa, rel, get_rel_count(l));
    1039        2639 :                         break;
    1040      108641 :                 case op_semi:
    1041             :                 case op_select:
    1042             :                         /* TODO calculate cardinalities using selectivities */
    1043      108641 :                         if (list_length(rel->exps) == 1 && (exp_is_false(rel->exps->h->data) || exp_is_null(rel->exps->h->data))) {
    1044        2745 :                                 set_count_prop(v->sql->sa, rel, 0);
    1045             :                         } else {
    1046      208552 :                                 if (!list_empty(rel->exps) && !is_single(rel)) {
    1047      102656 :                                         BUN cnt = get_rel_count(l), u = 1;
    1048      171084 :                                         for (node *n = rel->exps->h ; n ; n = n->next) {
    1049      112474 :                                                 sql_exp *e = n->data, *el = e->l, *er = e->r;
    1050             : 
    1051             :                                                 /* simple expressions first */
    1052      112474 :                                                 if (e->type == e_cmp && e->flag == cmp_equal && exp_is_atom(er)) {
    1053             :                                                         /* use selectivity */
    1054       57247 :                                                         prop *p;
    1055       57247 :                                                         if ((p = find_prop(el->p, PROP_NUNIQUES))) {
    1056       44046 :                                                                 u = (BUN) p->value.dval;
    1057       44046 :                                                                 break;
    1058             :                                                         }
    1059             :                                                 }
    1060             :                                         }
    1061             :                                         /* u is an *estimate*, so don't set count_prop to 0 unless cnt is 0 */
    1062      102656 :                                         set_count_prop(v->sql->sa, rel, cnt == 0 ? 0 : u == 0 || u > cnt ? 1 : cnt/u);
    1063             :                                 } else {
    1064        3240 :                                         set_count_prop(v->sql->sa, rel, get_rel_count(l));
    1065             :                                 }
    1066             :                         }
    1067             :                         break;
    1068      264257 :                 case op_project:
    1069      264257 :                         if (l) {
    1070      254140 :                                 if (need_distinct(rel)) {
    1071           0 :                                         set_count_prop(v->sql->sa, rel, rel_calc_nuniques(v->sql, l, rel->exps));
    1072             :                                 } else {
    1073      254140 :                                         set_count_prop(v->sql->sa, rel, get_rel_count(l));
    1074             :                                 }
    1075             :                         } else {
    1076       10117 :                                 BUN card = 1;
    1077             : 
    1078       10117 :                                 if (!list_empty(rel->exps)) {
    1079       30673 :                                         for (node *n = rel->exps->h ; n ; n = n->next) {
    1080       20556 :                                                 sql_exp *e = n->data;
    1081       20556 :                                                 card = MAX(card, trivial_project_exp_card(e));
    1082             :                                         }
    1083             :                                 }
    1084       10117 :                                 set_count_prop(v->sql->sa, rel, card);
    1085             :                         }
    1086             :                         break;
    1087       22956 :                 case op_groupby:
    1088       22956 :                         if (list_empty(rel->r)) {
    1089        7424 :                                 set_count_prop(v->sql->sa, rel, 1);
    1090             :                         } else {
    1091       15532 :                                 set_count_prop(v->sql->sa, rel, rel_calc_nuniques(v->sql, l, rel->r));
    1092             :                         }
    1093             :                         break;
    1094             :                 default:
    1095             :                         break;
    1096             :                 }
    1097             :                 break;
    1098             :         }
    1099       16896 :         case op_topn: {
    1100       16896 :                 BUN lv = get_rel_count(rel->l);
    1101             : 
    1102       16896 :                 if (lv != BUN_NONE) {
    1103       16879 :                         sql_exp *le = rel->exps->h->data, *oe = list_length(rel->exps) > 1 ? rel->exps->h->next->data : NULL;
    1104          96 :                         if (oe && oe->l && exp_is_not_null(oe)) { /* no parameters */
    1105          96 :                                 BUN offset = (BUN) ((atom*)oe->l)->data.val.lval;
    1106          96 :                                 lv = offset >= lv ? 0 : lv - offset;
    1107             :                         }
    1108       16879 :                         if (le->l && exp_is_not_null(le)) {
    1109       16841 :                                 BUN limit = (BUN) ((atom*)le->l)->data.val.lval;
    1110       16841 :                                 lv = MIN(lv, limit);
    1111             :                         }
    1112       16879 :                         set_count_prop(v->sql->sa, rel, lv);
    1113             :                 }
    1114             :                 break;
    1115             :         }
    1116          22 :         case op_sample: {
    1117          22 :                 BUN lv = get_rel_count(rel->l);
    1118             : 
    1119          22 :                 if (lv != BUN_NONE) {
    1120           4 :                         sql_exp *se = rel->exps->h->data;
    1121           4 :                         sql_subtype *tp = exp_subtype(se);
    1122             : 
    1123           4 :                         if (se->l && tp->type->eclass == EC_NUM) { /* sample is a number of rows */
    1124           4 :                                 BUN sample = (BUN) ((atom*)se->l)->data.val.lval;
    1125           4 :                                 lv = MIN(lv, sample);
    1126           0 :                         } else if (se->l) { /* sample is a percentage of rows */
    1127           0 :                                 dbl percent = ((atom*)se->l)->data.val.dval;
    1128           0 :                                 assert(tp->type->eclass == EC_FLT);
    1129           0 :                                 lv = (BUN) ceil((dbl)lv * percent);
    1130             :                         }
    1131           4 :                         set_count_prop(v->sql->sa, rel, lv);
    1132             :                 }
    1133             :                 break;
    1134             :         }
    1135        5973 :         case op_table: {
    1136        5973 :                 sql_exp *op = rel->r;
    1137        5973 :                 if (rel->flag != TRIGGER_WRAPPER && op) {
    1138        5661 :                         sql_subfunc *f = op->f;
    1139        5661 :                         if (f->func->lang == FUNC_LANG_SQL) {
    1140          97 :                                 set_count_prop(v->sql->sa, rel, 1000 /* just some fallback value */);
    1141        5564 :                         } else if (f->func->lang == FUNC_LANG_MAL && strcmp(f->func->base.name, "storage") == 0) {
    1142         829 :                                 set_count_prop(v->sql->sa, rel, 1000 /* TODO get size of catalog */);
    1143        4735 :                         } else if (f->func->lang == FUNC_LANG_MAL && strcmp(f->func->base.name, "db_users") == 0) {
    1144           0 :                                 set_count_prop(v->sql->sa, rel, 1000 /* TODO get size of users */);
    1145        4735 :                         } else if (f->func->lang == FUNC_LANG_MAL && strncmp(f->func->base.name, "querylog", 8) == 0) {
    1146        1085 :                                 set_count_prop(v->sql->sa, rel, 1000 /* TODO get size of querylog */);
    1147        3650 :                         } else if (f->func->lang == FUNC_LANG_MAL &&
    1148        3540 :                                            (strcmp(f->func->base.name, "queue") == 0 ||
    1149        3276 :                                                 strcmp(f->func->base.name, "optimizers") == 0 ||
    1150        2964 :                                                 strcmp(f->func->base.name, "env") == 0 ||
    1151        2672 :                                                 strcmp(f->func->base.name, "keywords") == 0 ||
    1152        2672 :                                                 strcmp(f->func->base.name, "statistics") == 0 ||
    1153        2005 :                                                 strcmp(f->func->base.name, "rejects") == 0 ||
    1154        1751 :                                                 strcmp(f->func->base.name, "schemastorage") == 0 ||
    1155        1751 :                                                 strncmp(f->func->base.name, "storage", 7) == 0 ||
    1156        1751 :                                                 strcmp(f->func->base.name, "sessions") == 0) ) {
    1157        2079 :                                 set_count_prop(v->sql->sa, rel, 1000 /* TODO get size of queue */);
    1158             :                         }
    1159             :                         /* else {
    1160             :                                 printf("%%func needs stats : %s\n", f->func->base.name);
    1161             :                         } */
    1162             :                 }
    1163             :                 break;
    1164             :         }
    1165             :         /*These relations are less important for now
    1166             :           TODO later we can tune it */
    1167             :         case op_insert:
    1168             :         case op_update:
    1169             :         case op_delete:
    1170             :         case op_truncate:
    1171             :         case op_ddl:
    1172             :         default:
    1173             :                 break;
    1174             :         }
    1175             : 
    1176             :         return rel;
    1177             : }
    1178             : 
    1179             : static sql_rel *
    1180      542396 : rel_get_statistics(visitor *v, global_props *gp, sql_rel *rel)
    1181             : {
    1182             :         /* Don't prune updates as pruning will possibly result in removing the joins which therefore cannot be used for constraint checking */
    1183      542396 :         uint8_t has_special_modify = (uint8_t) gp->has_special_modify;
    1184      542396 :         v->data = &has_special_modify;
    1185      542396 :         rel = rel_visitor_bottomup(v, rel, &rel_get_statistics_);
    1186      542396 :         v->data = gp;
    1187      542396 :         return rel;
    1188             : }
    1189             : 
    1190             : run_optimizer
    1191      718925 : bind_get_statistics(visitor *v, global_props *gp)
    1192             : {
    1193      718925 :         (void) v;
    1194      718925 :         return (gp->opt_level == 1 && !gp->cnt[op_insert]) ? rel_get_statistics : NULL;
    1195             : }
    1196             : 
    1197             : 
    1198             : static bool
    1199       94932 : point_select_on_unique_column(sql_rel *rel)
    1200             : {
    1201       94932 :         if (is_select(rel->op) && !list_empty(rel->exps)) {
    1202      131381 :                 for (node *n = rel->exps->h; n ; n = n->next) {
    1203       75654 :                         sql_exp *e = n->data, *el = e->l, *er = e->r, *found = NULL;
    1204             : 
    1205       75654 :                         if (is_compare(e->type) && e->flag == cmp_equal) {
    1206       33625 :                                 if (is_numeric_upcast(el))
    1207           0 :                                         el = el->l;
    1208       33625 :                                 if (is_numeric_upcast(er))
    1209           0 :                                         er = er->l;
    1210       33625 :                                 if (is_alias(el->type) && exp_is_atom(er) && (found = rel_find_exp(rel->l, el)) &&
    1211         749 :                                         is_unique(found) && (!is_semantics(e) || !has_nil(found) || !has_nil(er)))
    1212             :                                         return true;
    1213       32876 :                                 if (is_alias(er->type) && exp_is_atom(el) && (found = rel_find_exp(rel->l, er)) &&
    1214           0 :                                         is_unique(found) && (!is_semantics(e) || !has_nil(el) || !has_nil(found)))
    1215             :                                         return true;
    1216             :                         }
    1217             :                 }
    1218             :         }
    1219             :         return false;
    1220             : }
    1221             : 
    1222             : /*
    1223             :  * A point select on an unique column reduces the number of rows to 1. If the same select is under a
    1224             :  * join, the opposite side's select can be pushed above the join.
    1225             :  */
    1226             : static inline sql_rel *
    1227     1246752 : rel_push_select_up(visitor *v, sql_rel *rel)
    1228             : {
    1229     1246752 :         if ((is_innerjoin(rel->op) || is_left(rel->op) || is_right(rel->op) || is_semi(rel->op)) && !is_single(rel)) {
    1230      254263 :                 sql_rel *l = rel->l, *r = rel->r;
    1231      254263 :                 bool can_pushup_left = is_select(l->op) && !rel_is_ref(l) && !is_single(l) && (is_innerjoin(rel->op) || is_left(rel->op) || is_semi(rel->op)),
    1232      254263 :                         can_pushup_right = is_select(r->op) && !rel_is_ref(r) && !is_single(r) && (is_innerjoin(rel->op) || is_right(rel->op));
    1233             : 
    1234      254263 :                 if (can_pushup_left || can_pushup_right) {
    1235       66931 :                         if (can_pushup_left)
    1236       45442 :                                 can_pushup_left = point_select_on_unique_column(r);
    1237       66931 :                         if (can_pushup_right)
    1238       49490 :                                 can_pushup_right = point_select_on_unique_column(l);
    1239             : 
    1240             :                         /* if both selects retrieve one row each, it's not worth it to push both up */
    1241       66931 :                         if (can_pushup_left && !can_pushup_right) {
    1242         683 :                                 sql_rel *nrel = rel_dup_copy(v->sql->sa, rel);
    1243         683 :                                 nrel->l = l->l;
    1244         683 :                                 rel = rel_inplace_select(rel, nrel, l->exps);
    1245         683 :                                 assert(is_select(rel->op));
    1246         683 :                                 v->changes++;
    1247       66248 :                         } else if (!can_pushup_left && can_pushup_right) {
    1248          14 :                                 sql_rel *nrel = rel_dup_copy(v->sql->sa, rel);
    1249          14 :                                 nrel->r = r->l;
    1250          14 :                                 rel = rel_inplace_select(rel, nrel, r->exps);
    1251          14 :                                 assert(is_select(rel->op));
    1252          14 :                                 v->changes++;
    1253             :                         }
    1254             :                 }
    1255             :         }
    1256     1246752 :         return rel;
    1257             : }
    1258             : 
    1259             : static int
    1260       93332 : sql_class_base_score(visitor *v, sql_column *c, sql_subtype *t, bool equality_based)
    1261             : {
    1262       93332 :         int de;
    1263             : 
    1264       93332 :         if (!t)
    1265             :                 return 0;
    1266       93332 :         switch (ATOMstorage(t->type->localtype)) {
    1267             :         case TYPE_bte:
    1268             :                 return 150 - 8;
    1269        1599 :         case TYPE_sht:
    1270        1599 :                 return 150 - 16;
    1271       37333 :         case TYPE_int:
    1272       37333 :                 return 150 - 32;
    1273         518 :         case TYPE_void:
    1274             :         case TYPE_lng:
    1275         518 :                 return 150 - 64;
    1276             :         case TYPE_uuid:
    1277             : #ifdef HAVE_HGE
    1278             :         case TYPE_hge:
    1279             : #endif
    1280             :                 return 150 - 128;
    1281           2 :         case TYPE_flt:
    1282           2 :                 return 75 - 24;
    1283             :         case TYPE_dbl:
    1284             :                 return 75 - 53;
    1285       30582 :         default:
    1286       30582 :                 if (equality_based && c && v->storage_based_opt && (de = mvc_is_duplicate_eliminated(v->sql, c)))
    1287        1554 :                         return 150 - de * 8;
    1288             :                 /* strings and blobs not duplicate eliminated don't get any points here */
    1289             :                 return 0;
    1290             :         }
    1291             : }
    1292             : 
    1293             : static int
    1294       34433 : score_se_base(visitor *v, sql_rel *rel, sql_exp *e)
    1295             : {
    1296       34433 :         int res = 0;
    1297       34433 :         sql_subtype *t = exp_subtype(e);
    1298       34433 :         sql_column *c = NULL;
    1299             : 
    1300       34433 :         if (e->type == e_convert) /* keep unsafes at the end (TODO improve) */
    1301             :                 return -1000;
    1302             :         /* can we find out if the underlying table is sorted */
    1303       33832 :         if ((c = exp_find_column(rel, e, -2)) && v->storage_based_opt && mvc_is_sorted(v->sql, c))
    1304       33832 :                 res += 600;
    1305             : 
    1306             :         /* prefer the shorter var types over the longer ones */
    1307       33832 :         res += sql_class_base_score(v, c, t, is_equality_or_inequality_exp(e->flag)); /* smaller the type, better */
    1308       33832 :         return res;
    1309             : }
    1310             : 
    1311             : static int
    1312       58443 : score_se(visitor *v, sql_rel *rel, sql_exp *e)
    1313             : {
    1314       58443 :         int score = 0;
    1315       58443 :         if (e->type == e_cmp && !is_complex_exp(e->flag)) {
    1316       34433 :                 sql_exp *l = e->l;
    1317             : 
    1318       34433 :                 while (l->type == e_cmp) { /* go through nested comparisons */
    1319           2 :                         sql_exp *ll;
    1320             : 
    1321           2 :                         if (l->flag == cmp_filter || l->flag == cmp_or)
    1322           0 :                                 ll = ((list*)l->l)->h->data;
    1323             :                         else
    1324           2 :                                 ll = l->l;
    1325           2 :                         if (ll->type != e_cmp)
    1326             :                                 break;
    1327             :                         l = ll;
    1328             :                 }
    1329       34433 :                 score += score_se_base(v, rel, l);
    1330             :         }
    1331       58443 :         score += exp_keyvalue(e);
    1332       58443 :         return score;
    1333             : }
    1334             : 
    1335             : static inline sql_rel *
    1336     1246752 : rel_select_order(visitor *v, sql_rel *rel)
    1337             : {
    1338     1246752 :         int *scores = NULL;
    1339     1246752 :         sql_exp **exps = NULL;
    1340             : 
    1341     1246752 :         if (is_select(rel->op) && list_length(rel->exps) > 1) {
    1342       27337 :                 node *n;
    1343       27337 :                 int i, nexps = list_length(rel->exps);
    1344       27337 :                 scores = SA_NEW_ARRAY(v->sql->ta, int, nexps);
    1345       27337 :                 exps = SA_NEW_ARRAY(v->sql->ta, sql_exp*, nexps);
    1346             : 
    1347       85780 :                 for (i = 0, n = rel->exps->h; n; i++, n = n->next) {
    1348       58473 :                         exps[i] = n->data;
    1349       58473 :                         if (find_prop(exps[i]->p, PROP_HASHCOL))
    1350             :                                 return rel;
    1351       58443 :                         scores[i] = score_se(v, rel, n->data);
    1352             :                 }
    1353       27307 :                 GDKqsort(scores, exps, NULL, nexps, sizeof(int), sizeof(void *), TYPE_int, true, true);
    1354             : 
    1355       85750 :                 for (i = 0, n = rel->exps->h; n; i++, n = n->next)
    1356       58443 :                         n->data = exps[i];
    1357             :         }
    1358             : 
    1359             :         return rel;
    1360             : }
    1361             : 
    1362             : /* Compute the efficiency of using this expression early in a group by list */
    1363             : static int
    1364       59500 : score_gbe(visitor *v, sql_rel *rel, sql_exp *e)
    1365             : {
    1366       59500 :         int res = 0;
    1367       59500 :         sql_subtype *t = exp_subtype(e);
    1368       59500 :         sql_column *c = exp_find_column(rel, e, -2);
    1369             : 
    1370       59500 :         if (e->card == CARD_ATOM) /* constants are trivial to group */
    1371          38 :                 res += 1000;
    1372             :         /* can we find out if the underlying table is sorted */
    1373       59500 :         if (is_unique(e) || find_prop(e->p, PROP_HASHCOL) || (c && v->storage_based_opt && mvc_is_unique(v->sql, c))) /* distinct columns */
    1374        3582 :                 res += 700;
    1375       38911 :         if (c && v->storage_based_opt && mvc_is_sorted(v->sql, c))
    1376        3712 :                 res += 500;
    1377       59500 :         if (find_prop(e->p, PROP_HASHIDX)) /* has hash index */
    1378           0 :                 res += 200;
    1379             : 
    1380             :         /* prefer the shorter var types over the longer ones */
    1381       59500 :         res += sql_class_base_score(v, c, t, true); /* smaller the type, better */
    1382       59500 :         return res;
    1383             : }
    1384             : 
    1385             : /* reorder group by expressions */
    1386             : static inline sql_rel *
    1387     1246751 : rel_groupby_order(visitor *v, sql_rel *rel)
    1388             : {
    1389     1246751 :         int *scores = NULL;
    1390     1246751 :         sql_exp **exps = NULL;
    1391             : 
    1392     1246751 :         if (is_groupby(rel->op) && list_length(rel->r) > 1) {
    1393       25587 :                 node *n;
    1394       25587 :                 list *gbe = rel->r;
    1395       25587 :                 int i, ngbe = list_length(gbe);
    1396       25587 :                 scores = SA_NEW_ARRAY(v->sql->ta, int, ngbe);
    1397       25587 :                 exps = SA_NEW_ARRAY(v->sql->ta, sql_exp*, ngbe);
    1398             : 
    1399             :                 /* first sorting step, give priority for integers and sorted columns */
    1400       85087 :                 for (i = 0, n = gbe->h; n; i++, n = n->next) {
    1401       59500 :                         exps[i] = n->data;
    1402       59500 :                         scores[i] = score_gbe(v, rel, exps[i]);
    1403             :                 }
    1404       25587 :                 GDKqsort(scores, exps, NULL, ngbe, sizeof(int), sizeof(void *), TYPE_int, true, true);
    1405             : 
    1406             :                 /* second sorting step, give priority to strings with lower number of digits */
    1407       50503 :                 for (i = ngbe - 1; i && !scores[i]; i--); /* find expressions with no score from the first round */
    1408       25587 :                 if (scores[i])
    1409       24589 :                         i++;
    1410       25587 :                 if (ngbe - i > 1) {
    1411        8604 :                         for (int j = i; j < ngbe; j++) {
    1412        6597 :                                 sql_subtype *t = exp_subtype(exps[j]);
    1413        6597 :                                 scores[j] = t ? t->digits : 0;
    1414             :                         }
    1415             :                         /* the less number of digits the better, order ascending */
    1416        2007 :                         GDKqsort(scores + i, exps + i, NULL, ngbe - i, sizeof(int), sizeof(void *), TYPE_int, false, true);
    1417             :                 }
    1418             : 
    1419       85087 :                 for (i = 0, n = gbe->h; n; i++, n = n->next)
    1420       59500 :                         n->data = exps[i];
    1421             :         }
    1422             : 
    1423     1246751 :         return rel;
    1424             : }
    1425             : 
    1426             : /* This optimization loop contains optimizations that can potentially use statistics */
    1427             : static sql_rel *
    1428     1246752 : rel_final_optimization_loop_(visitor *v, sql_rel *rel)
    1429             : {
    1430             :         /* Run rel_push_select_up only once at the end to avoid an infinite optimization loop */
    1431     1246752 :         rel = rel_push_select_up(v, rel);
    1432     1246752 :         rel = rel_select_order(v, rel);
    1433             : 
    1434             :         /* TODO? Maybe later add rel_simplify_count, rel_join2semijoin, rel_simplify_fk_joins,
    1435             :            rel_distinct_project2groupby, rel_simplify_predicates, rel_simplify_math,
    1436             :            rel_distinct_aggregate_on_unique_values */
    1437             : 
    1438     1246751 :         rel = rel_groupby_order(v, rel);
    1439     1246751 :         return rel;
    1440             : }
    1441             : 
    1442             : static sql_rel *
    1443       66181 : rel_final_optimization_loop(visitor *v, global_props *gp, sql_rel *rel)
    1444             : {
    1445       66181 :         (void) gp;
    1446       66181 :         return rel_visitor_bottomup(v, rel, &rel_final_optimization_loop_);
    1447             : }
    1448             : 
    1449             : run_optimizer
    1450      718921 : bind_final_optimization_loop(visitor *v, global_props *gp)
    1451             : {
    1452      718921 :         int flag = v->sql->sql_optimizer;
    1453             :         /* At the moment, this optimizer has dependency on 3 flags */
    1454      549989 :         return gp->opt_level == 1 && (gp->cnt[op_groupby] || gp->cnt[op_select]) &&
    1455      785104 :                 (flag & push_select_up) && (flag & optimize_select_and_joins_topdown) && (flag & optimize_projections) ? rel_final_optimization_loop : NULL;
    1456             : }

Generated by: LCOV version 1.14