LCOV - code coverage report
Current view: top level - sql/server - rel_statistics.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 831 883 94.1 %
Date: 2024-11-14 20:04:02 Functions: 26 26 100.0 %

          Line data    Source code
       1             : /*
       2             :  * SPDX-License-Identifier: MPL-2.0
       3             :  *
       4             :  * This Source Code Form is subject to the terms of the Mozilla Public
       5             :  * License, v. 2.0.  If a copy of the MPL was not distributed with this
       6             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       7             :  *
       8             :  * Copyright 2024 MonetDB Foundation;
       9             :  * Copyright August 2008 - 2023 MonetDB B.V.;
      10             :  * Copyright 1997 - July 2008 CWI.
      11             :  */
      12             : 
      13             : #include "monetdb_config.h"
      14             : #include "rel_optimizer_private.h"
      15             : #include "rel_statistics.h"
      16             : #include "rel_basetable.h"
      17             : #include "rel_rewriter.h"
      18             : 
      19             : static sql_exp *
      20     3463029 : comparison_find_column(sql_exp *input, sql_exp *e)
      21             : {
      22     3463029 :         switch (input->type) {
      23       98425 :         case e_convert: {
      24       98425 :                 list *types = (list *)input->r;
      25       98425 :                 sql_class from = ((sql_subtype*)types->h->data)->type->eclass, to = ((sql_subtype*)types->h->next->data)->type->eclass;
      26       98425 :                 if (from == to)
      27      189782 :                         return comparison_find_column(input->l, e) ? input : NULL;
      28             :                 return NULL;
      29             :         }
      30     3052896 :         case e_column:
      31     3052896 :                 return exp_match(e, input) ? input : NULL;
      32             :         default:
      33             :                 return NULL;
      34             :         }
      35             : }
      36             : 
      37             : /* multi lo <= col <= hi, maybe still be false even if lo or hi are NULL, possibly similar for filter on multiple
      38             :  * columns */
      39             : #define comp_single_column(c) (!c->f)
      40             : 
      41             : static sql_exp *
      42     8744632 : rel_propagate_column_ref_statistics(mvc *sql, sql_rel *rel, sql_exp *e)
      43             : {
      44     8827927 :         assert(e->type == e_column);
      45     8827927 :         if (rel) {
      46     8827848 :                 switch(rel->op) {
      47     4088024 :                 case op_left:
      48             :                 case op_right:
      49             :                 case op_full:
      50             :                 case op_join:
      51             :                 case op_select:
      52             :                 case op_anti:
      53             :                 case op_semi: {
      54     4088024 :                         bool found_without_semantics = false, found_left = false, found_right = false, still_unique = false;
      55             : 
      56     4088024 :                         if ((is_innerjoin(rel->op) || is_select(rel->op)) && list_length(rel->exps) == 1 && exp_is_false(rel->exps->h->data))
      57             :                                 return NULL; /* nothing will pass, skip */
      58             : 
      59             :                         /* propagate from the bottom first */
      60     4072471 :                         if (rel_propagate_column_ref_statistics(sql, rel->l, e))
      61             :                                 found_left = true;
      62     2590758 :                         if (!found_left && is_join(rel->op) && rel_propagate_column_ref_statistics(sql, rel->r, e))
      63     4072471 :                                 found_right = true;
      64             : 
      65     4072471 :                         if (!found_left && !found_right)
      66             :                                 return NULL;
      67     1777034 :                         if (!list_empty(rel->exps) && rel->op != op_anti) { /* if there's an or, the MIN and MAX get difficult to propagate */
      68     3495270 :                                 for (node *n = rel->exps->h ; n ; n = n->next) {
      69     1809534 :                                         sql_exp *comp = n->data, *le = comp->l, *lne = NULL, *re = comp->r, *rne = NULL, *fe = comp->f, *fne = NULL;
      70             : 
      71     1809534 :                                         if (comp->type == e_cmp) {
      72     1808532 :                                                 if (is_theta_exp(comp->flag) && ((lne = comparison_find_column(le, e)) || (rne = comparison_find_column(re, e)) || (fe && (fne = comparison_find_column(fe, e))))) {
      73      125124 :                                                         atom *lval_min = find_prop_and_get(le->p, PROP_MIN), *lval_max = find_prop_and_get(le->p, PROP_MAX),
      74      125124 :                                                                 *rval_min = find_prop_and_get(re->p, PROP_MIN), *rval_max = find_prop_and_get(re->p, PROP_MAX);
      75             : 
      76             :                                                         /* not semantics found or if explicitly filtering not null values from the column */
      77      125124 :                                                         found_without_semantics |= (!is_semantics(comp) && comp_single_column(comp)) || (comp->flag == cmp_equal && lne && is_anti(comp) && exp_is_null(re));
      78      125124 :                                                         still_unique |= comp->flag == cmp_equal && is_unique(le) && is_unique(re); /* unique if only equi-joins on unique columns are there */
      79      125124 :                                                         if (is_full(rel->op) || (is_left(rel->op) && found_left) || (is_right(rel->op) && found_right)) /* on outer joins, min and max cannot be propagated on some cases */
      80       19791 :                                                                 continue;
      81             :                                                         /* if (end2 >= start1 && start2 <= end1) then the 2 intervals are intersected */
      82      105333 :                                                         if (fe && lval_min && lval_max) { /* range case, the middle expression must intersect the other two */
      83        4622 :                                                                 atom *fval_min = find_prop_and_get(fe->p, PROP_MIN), *fval_max = find_prop_and_get(fe->p, PROP_MAX);
      84        4622 :                                                                 int int1 = rval_min && rval_max && atom_cmp(rval_max, lval_min) >= 0 && atom_cmp(rval_min, lval_max) <= 0,
      85        4622 :                                                                         int2 = fval_min && fval_max && atom_cmp(fval_max, lval_min) >= 0 && atom_cmp(fval_min, lval_max) <= 0,
      86        4622 :                                                                         symmetric = is_symmetric(comp);
      87             : 
      88        4622 :                                                                 if (is_anti(comp) || (!symmetric && fval_min && rval_max && atom_cmp(fval_min, rval_max) < 0)) /* for asymmetric case the re range must be after the fe range */
      89        1817 :                                                                         continue;
      90        2805 :                                                                 if (lne && int1 && int2) {
      91         104 :                                                                         if (symmetric) {
      92           0 :                                                                                 prop *p1 = find_prop(e->p, PROP_MIN), *p2 = find_prop(e->p, PROP_MAX);
      93           0 :                                                                                 atom *nmin = statistics_atom_min(sql, rval_min, fval_min), *nmax = statistics_atom_max(sql, rval_max, fval_max);
      94             :                                                                                 /* max is min from le and (max from re and fe max) */
      95           0 :                                                                                 set_minmax_property(sql, e, PROP_MAX, p2 ? statistics_atom_min(sql, nmax, p2->value.pval) : nmax);
      96             :                                                                                 /* min is max from le and (min from re and fe min) */
      97           0 :                                                                                 set_minmax_property(sql, e, PROP_MIN, p1 ? statistics_atom_max(sql, nmin, p1->value.pval) : nmin);
      98             :                                                                         } else {
      99         104 :                                                                                 prop *p1 = find_prop(e->p, PROP_MIN), *p2 = find_prop(e->p, PROP_MAX);
     100             :                                                                                 /* max is min from le and fe max */
     101         104 :                                                                                 set_minmax_property(sql, e, PROP_MAX, p2 ? statistics_atom_min(sql, fval_max, p2->value.pval) : fval_max);
     102             :                                                                                 /* min is max from le and re min */
     103         104 :                                                                                 set_minmax_property(sql, e, PROP_MIN, p1 ? statistics_atom_max(sql, rval_min, p1->value.pval) : rval_min);
     104             :                                                                         }
     105        2701 :                                                                 } else if (rne) {
     106         590 :                                                                         if (symmetric && int1 && int2) { /* min is max from le and (min from re and fe min) */
     107           0 :                                                                                 prop *p = find_prop(e->p, PROP_MIN);
     108           0 :                                                                                 atom *nmin = p ? statistics_atom_min(sql, p->value.pval, fval_min) : fval_min;
     109           0 :                                                                                 set_minmax_property(sql, e, PROP_MIN, statistics_atom_max(sql, nmin, lval_min));
     110         590 :                                                                         } else if (int1) { /* min is max from le and re min */
     111         100 :                                                                                 prop *p = find_prop(e->p, PROP_MIN);
     112         100 :                                                                                 set_minmax_property(sql, e, PROP_MIN, p ? statistics_atom_max(sql, lval_min, p->value.pval) : lval_min);
     113             :                                                                         }
     114        2111 :                                                                 } else if (fne) {
     115         549 :                                                                         if (symmetric && int1 && int2) { /* max is min from le and (max from re and fe max) */
     116           0 :                                                                                 prop *p = find_prop(e->p, PROP_MAX);
     117           0 :                                                                                 atom *nmax = p ? statistics_atom_max(sql, p->value.pval, rval_max) : rval_max;
     118           0 :                                                                                 set_minmax_property(sql, e, PROP_MAX, p ? statistics_atom_min(sql, nmax, lval_max) : nmax);
     119         549 :                                                                         } else if (int2) { /* max is min from le and fe max */
     120         266 :                                                                                 prop *p = find_prop(e->p, PROP_MAX);
     121         266 :                                                                                 set_minmax_property(sql, e, PROP_MAX, p ? statistics_atom_min(sql, lval_max, p->value.pval) : lval_max);
     122             :                                                                         }
     123             :                                                                 }
     124      100711 :                                                         } else if (lval_min && lval_max && rval_min && rval_max && atom_cmp(rval_max, lval_min) >= 0 && atom_cmp(rval_min, lval_max) <= 0) {
     125             :                                                                 /* both min and max must be set and the intervals must overlap */
     126       43314 :                                                                 switch (comp->flag) {
     127       28978 :                                                                 case cmp_equal: /* for equality reduce */
     128       28978 :                                                                         set_minmax_property(sql, e, PROP_MAX, is_anti(comp) ? statistics_atom_max(sql, lval_max, rval_max) : statistics_atom_min(sql, lval_max, rval_max));
     129       28978 :                                                                         set_minmax_property(sql, e, PROP_MIN, is_anti(comp) ? statistics_atom_min(sql, lval_min, rval_min) : statistics_atom_max(sql, lval_min, rval_min));
     130       28978 :                                                                         break;
     131        4679 :                                                                 case cmp_notequal: /* for inequality expand */
     132        4679 :                                                                         set_minmax_property(sql, e, PROP_MAX, is_anti(comp) ? statistics_atom_min(sql, lval_max, rval_max) : statistics_atom_max(sql, lval_max, rval_max));
     133        4679 :                                                                         set_minmax_property(sql, e, PROP_MIN, is_anti(comp) ? statistics_atom_max(sql, lval_min, rval_min) : statistics_atom_min(sql, lval_min, rval_min));
     134        4679 :                                                                         break;
     135        5651 :                                                                 case cmp_gt:
     136             :                                                                 case cmp_gte:
     137       10364 :                                                                         if (!is_anti(comp) && lne) { /* min is max from both min */
     138        4713 :                                                                                 prop *p = find_prop(e->p, PROP_MIN);
     139        4713 :                                                                                 set_minmax_property(sql, e, PROP_MIN, p ? statistics_atom_max(sql, rval_min, p->value.pval) : rval_min);
     140         938 :                                                                         } else if (!is_anti(comp)) { /* max is min from both max */
     141         938 :                                                                                 prop *p = find_prop(e->p, PROP_MAX);
     142         938 :                                                                                 set_minmax_property(sql, e, PROP_MAX, p ? statistics_atom_min(sql, lval_max, p->value.pval) : lval_max);
     143             :                                                                         }
     144             :                                                                         break;
     145        4006 :                                                                 case cmp_lt:
     146             :                                                                 case cmp_lte:
     147        7205 :                                                                         if (!is_anti(comp) && lne) { /* max is min from both max */
     148        3199 :                                                                                 prop *p = find_prop(e->p, PROP_MAX);
     149        3199 :                                                                                 set_minmax_property(sql, e, PROP_MAX, p ? statistics_atom_min(sql, rval_max, p->value.pval) : rval_max);
     150         807 :                                                                         } else if (!is_anti(comp)) { /* min is max from both min */
     151         807 :                                                                                 prop *p = find_prop(e->p, PROP_MIN);
     152         807 :                                                                                 set_minmax_property(sql, e, PROP_MIN, p ? statistics_atom_max(sql, lval_min, p->value.pval) : lval_min);
     153             :                                                                         }
     154             :                                                                         break;
     155             :                                                                 default: /* Maybe later I can do cmp_in and cmp_notin */
     156             :                                                                         break;
     157             :                                                                 }
     158             :                                                         }
     159             :                                                 }
     160             :                                         }
     161             :                                 }
     162             :                         }
     163     1777034 :                         if (is_full(rel->op) || (is_left(rel->op) && found_right) || (is_right(rel->op) && found_left))
     164       38014 :                                 set_has_nil(e);
     165     1777034 :                         if (!is_outerjoin(rel->op) && found_without_semantics) /* at an outer join, null values pass */
     166       94277 :                                 set_has_no_nil(e);
     167     1777034 :                         if (is_join(rel->op) && is_unique(e) && !still_unique)
     168      118919 :                                 set_not_unique(e);
     169     1777034 :                         return e;
     170             :                 }
     171     4637182 :                 case op_table:
     172             :                 case op_basetable:
     173             :                 case op_union:
     174             :                 case op_except:
     175             :                 case op_inter:
     176             :                 case op_munion:
     177             :                 case op_project:
     178             :                 case op_groupby: {
     179     4637182 :                         sql_exp *found;
     180     4637182 :                         atom *fval;
     181     4637182 :                         prop *est;
     182     4637182 :                         if ((found = rel_find_exp(rel, e))) {
     183     2175402 :                                 if (rel->op != op_table) { /* At the moment don't propagate statistics for table relations */
     184     2131078 :                                         if ((fval = find_prop_and_get(found->p, PROP_MAX)))
     185     1133316 :                                                 set_minmax_property(sql, e, PROP_MAX, fval);
     186     2131075 :                                         if ((fval = find_prop_and_get(found->p, PROP_MIN)))
     187     1140623 :                                                 set_minmax_property(sql, e, PROP_MIN, fval);
     188     2131072 :                                         if (!has_nil(found))
     189     1377483 :                                                 set_has_no_nil(e);
     190     2131072 :                                         if (is_unique(found) || (need_distinct(rel) && list_length(rel->exps) == 1) ||
     191     1720643 :                                                 (is_groupby(rel->op) && (list_empty(rel->r) || (list_length(rel->r) == 1 && exps_find_exp(rel->r, e)))))
     192      421303 :                                                 set_unique(e);
     193             :                                         /* propagate unique estimation for known cases */
     194     2131071 :                                         if (is_groupby(rel->op) && list_empty(rel->r) && !find_prop(e->p, PROP_NUNIQUES)) { /* global aggregate case */
     195        7538 :                                                 prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     196        7538 :                                                 p->value.dval = 1;
     197     2123533 :                                         } else if (((is_basetable(rel->op) || is_except(rel->op) || is_inter(rel->op) || is_simple_project(rel->op) ||
     198       66647 :                                                                  (is_groupby(rel->op) && exps_find_exp(rel->r, e))) &&
     199     2066028 :                                                                 (est = find_prop(found->p, PROP_NUNIQUES)) && !find_prop(e->p, PROP_NUNIQUES))) {
     200      187990 :                                                 prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     201      187990 :                                                 p->value.dval = est->value.dval;
     202             :                                         }
     203             :                                 }
     204     2175397 :                                 return e;
     205             :                         }
     206             :                         return NULL;
     207             :                 }
     208       83295 :                 case op_topn:
     209             :                 case op_sample:
     210       83295 :                         return rel_propagate_column_ref_statistics(sql, rel->l, e);
     211             :                 default:
     212             :                         break;
     213             :                 }
     214             :         }
     215             :         return NULL;
     216             : }
     217             : 
     218             : static atom *
     219     4452544 : atom_from_valptr( allocator *sa, sql_subtype *tpe, ValRecord *v)
     220             : {
     221     4452544 :         atom *a = SA_NEW(sa, atom);
     222             : 
     223     4452494 :         assert(!VALisnil(v));
     224     4452386 :         *a = (atom) {.tpe = *tpe,};
     225     4452386 :         SA_VALcopy(sa, &a->data, v);
     226     4452311 :         return a;
     227             : }
     228             : 
     229             : void
     230     4060308 : sql_column_get_statistics(mvc *sql, sql_column *c, sql_exp *e)
     231             : {
     232     4060308 :         bool nonil = false, unique = false;
     233     4060308 :         double unique_est = 0.0;
     234     4060308 :         ValRecord min, max;
     235     4060308 :         int ok = mvc_col_stats(sql, c, &nonil, &unique, &unique_est, &min, &max);
     236             : 
     237     4060443 :         if (has_nil(e) && nonil)
     238     2653276 :                 set_has_no_nil(e);
     239     4060443 :         if (!is_unique(e) && unique)
     240     1100513 :                 set_unique(e);
     241     4060443 :         if (unique_est != 0.0) {
     242     2823991 :                 prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     243     2823984 :                 p->value.dval = unique_est;
     244             :         }
     245     4060436 :         unsigned int digits = 0;
     246     4060436 :         sql_subtype *et = exp_subtype(e);
     247     4060444 :         if (et->type->eclass == EC_DEC || et->type->eclass == EC_NUM)
     248     2641293 :                 digits = et->digits;
     249     4060444 :         if ((ok & 2) == 2) {
     250     2223211 :                 if (!VALisnil(&max)) {
     251     2223201 :                         prop *p = e->p = prop_create(sql->sa, PROP_MAX, e->p);
     252     2223178 :                         p->value.pval = atom_from_valptr(sql->sa, &c->type, &max);
     253     2223125 :                         if (digits) {
     254     1656015 :                                 unsigned int nd = atom_digits(p->value.pval);
     255     1656050 :                                 if (nd < digits)
     256             :                                         digits = nd;
     257     1656050 :                                 if (!digits)
     258             :                                         digits = 1;
     259             :                         }
     260             :                 }
     261     2223161 :                 VALclear(&max);
     262             :         }
     263     4060384 :         if ((ok & 1) == 1) {
     264     2229561 :                 if (!VALisnil(&min)) {
     265     2229552 :                         prop *p = e->p = prop_create(sql->sa, PROP_MIN, e->p);
     266     2229547 :                         p->value.pval = atom_from_valptr(sql->sa, &c->type, &min);
     267     2229511 :                         if (digits) {
     268     1663484 :                                 unsigned int nd = atom_digits(p->value.pval);
     269     1663479 :                                 if (nd > digits) /* possibly set to low by max value */
     270             :                                         digits = nd;
     271             :                                 if (!digits)
     272             :                                         digits = 1;
     273             :                         }
     274             :                 }
     275     2229502 :                 VALclear(&min);
     276             :         }
     277     4060318 :         if (digits)
     278     2641170 :                 et->digits = digits;
     279     4060318 :         if (et->type->eclass == EC_DEC && et->digits <= et->scale)
     280         216 :                 et->digits = et->scale + 1;
     281     4060318 : }
     282             : 
     283             : static void
     284      935973 : rel_basetable_column_get_statistics(mvc *sql, sql_rel *rel, sql_exp *e)
     285             : {
     286      935973 :         if (e->p)
     287             :                 return;
     288      302287 :         sql_column *c = NULL;
     289             : 
     290      302287 :         if ((c = rel_base_find_column(rel, e->nid))) {
     291      204214 :                 sql_column_get_statistics(sql, c, e);
     292             :         }
     293             : }
     294             : 
     295             : static bool
     296        8872 : rel_setop_get_statistics(mvc *sql, sql_rel *rel, list *lexps, list *rexps, sql_exp *e, int i)
     297             : {
     298        8872 :         sql_exp *le = list_fetch(lexps, i), *re = list_fetch(rexps, i);
     299        8872 :         atom *lval_min = find_prop_and_get(le->p, PROP_MIN), *lval_max = find_prop_and_get(le->p, PROP_MAX),
     300        8872 :                 *rval_min = find_prop_and_get(re->p, PROP_MIN), *rval_max = find_prop_and_get(re->p, PROP_MAX);
     301        8872 :         prop *est;
     302             : 
     303             :         /* for the intersection, if both expressions don't overlap, it can be pruned */
     304        8872 :         if (is_inter(rel->op) && !has_nil(le) && !has_nil(re) &&
     305         511 :                 ((rval_max && lval_min && atom_cmp(rval_max, lval_min) < 0) || (rval_min && lval_max && atom_cmp(rval_min, lval_max) > 0)))
     306          26 :                 return true;
     307             : 
     308        8846 :         if (lval_max && rval_max) {
     309        2557 :                 if (is_union(rel->op))
     310           3 :                         set_minmax_property(sql, e, PROP_MAX, statistics_atom_max(sql, lval_max, rval_max)); /* for union the new max will be the max of the two */
     311        2554 :                 else if (is_inter(rel->op))
     312         399 :                         set_minmax_property(sql, e, PROP_MAX, statistics_atom_min(sql, lval_max, rval_max)); /* for intersect the new max will be the min of the two */
     313             :                 else /* except */
     314        2155 :                         set_minmax_property(sql, e, PROP_MAX, lval_max);
     315             :         }
     316        8846 :         if (lval_min && rval_min) {
     317        2557 :                 if (is_union(rel->op))
     318           3 :                         set_minmax_property(sql, e, PROP_MIN, statistics_atom_min(sql, lval_min, rval_min)); /* for union the new min will be the min of the two */
     319        2554 :                 else if (is_inter(rel->op))
     320         399 :                         set_minmax_property(sql, e, PROP_MIN, statistics_atom_max(sql, lval_min, rval_min)); /* for intersect the new min will be the max of the two */
     321             :                 else /* except */
     322        2155 :                         set_minmax_property(sql, e, PROP_MIN, lval_min);
     323             :         }
     324             : 
     325        8846 :         if (is_union(rel->op) || is_munion(rel->op)) {
     326        5986 :                 if (!has_nil(le) && !has_nil(re))
     327         879 :                         set_has_no_nil(e);
     328        5986 :                 if (need_distinct(rel) && list_length(rel->exps) == 1)
     329           0 :                         set_unique(e);
     330        2860 :         } else if (is_inter(rel->op)) {
     331         564 :                 if (!has_nil(le) || !has_nil(re))
     332         509 :                         set_has_no_nil(e);
     333         564 :                 if (is_unique(le) || (need_distinct(rel) && list_length(rel->exps) == 1))
     334         345 :                         set_unique(e);
     335             :         } else {
     336        2296 :                 assert(is_except(rel->op));
     337        2296 :                 if (!has_nil(le))
     338        2233 :                         set_has_no_nil(e);
     339        2296 :                 if (is_unique(le) || (need_distinct(rel) && list_length(rel->exps) == 1))
     340        2009 :                         set_unique(e);
     341             :         }
     342             :         /* propagate unique estimation for known cases */
     343        8846 :         if (!is_union(rel->op) && (est = find_prop(le->p, PROP_NUNIQUES)) && !find_prop(e->p, PROP_NUNIQUES)) {
     344         205 :                 prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     345         205 :                 p->value.dval = est->value.dval;
     346             :         }
     347             :         return false;
     348             : }
     349             : 
     350             : 
     351             : static void
     352       60688 : rel_munion_get_statistics(mvc *sql, sql_rel *rel, list *rels, sql_exp *e, int i)
     353             : {
     354       60688 :         assert(is_munion(rel->op));
     355             : 
     356       60688 :         sql_rel *l = rels->h->data;
     357       60688 :         sql_exp *le = list_fetch(l->exps, i);
     358       60688 :         atom *lval_min = find_prop_and_get(le->p, PROP_MIN), *lval_max = find_prop_and_get(le->p, PROP_MAX);
     359       60688 :         bool has_nonil = !has_nil(le);
     360             : 
     361      175904 :         for(node *n = rels->h->next; n; n = n->next) {
     362      115216 :                 sql_rel *r = n->data;
     363      115216 :                 sql_exp *re = list_fetch(r->exps, i);
     364      115216 :                 atom *rval_min = find_prop_and_get(re->p, PROP_MIN), *rval_max = find_prop_and_get(re->p, PROP_MAX);
     365             : 
     366      115216 :                 if (lval_max && rval_max) {
     367       42563 :                         set_minmax_property(sql, e, PROP_MAX, statistics_atom_max(sql, lval_max, rval_max)); /* for union the new max will be the max of the two */
     368       42563 :                         lval_max = find_prop_and_get(e->p, PROP_MAX);
     369             :                 }
     370      115216 :                 if (lval_min && rval_min) {
     371       42064 :                         set_minmax_property(sql, e, PROP_MIN, statistics_atom_min(sql, lval_min, rval_min)); /* for union the new min will be the min of the two */
     372       42064 :                         lval_min = find_prop_and_get(e->p, PROP_MIN);
     373             :                 }
     374      115216 :                 has_nonil &= !has_nil(re);
     375             : 
     376             :         }
     377             : 
     378       60688 :         if (has_nonil)
     379       41542 :                 set_has_no_nil(e);
     380             : 
     381       60688 :         if (need_distinct(rel) && list_length(rel->exps) == 1)
     382        1594 :                 set_unique(e);
     383       60688 : }
     384             : 
     385             : 
     386             : static sql_exp *
     387     3478817 : rel_propagate_statistics(visitor *v, sql_rel *rel, sql_exp *e, int depth)
     388             : {
     389     3478817 :         mvc *sql = v->sql;
     390     3478817 :         atom *lval;
     391             : 
     392     3478817 :         (void) depth;
     393     3478817 :         switch(e->type) {
     394     2181222 :         case e_column:
     395     2181222 :                 switch (rel->op) { /* set relations don't call rel_propagate_statistics */
     396      278817 :                 case op_join:
     397             :                 case op_left:
     398             :                 case op_right:
     399             :                 case op_full:
     400             :                 case op_semi:
     401             :                 case op_anti: {
     402      278817 :                         sql_exp *found = rel_propagate_column_ref_statistics(sql, rel->l, e);
     403      278817 :                         if (!found)
     404      139853 :                                 (void) rel_propagate_column_ref_statistics(sql, rel->r, e);
     405             :                         break;
     406             :                 }
     407     1902405 :                 case op_select:
     408             :                 case op_project:
     409             :                 case op_groupby: {
     410     1902405 :                         sql_exp *found = rel_propagate_column_ref_statistics(sql, rel->l, e); /* labels may be found on the same projection, ugh */
     411     1902399 :                         if (!found && is_simple_project(rel->op))
     412      125477 :                                 (void) rel_propagate_column_ref_statistics(sql, rel, e);
     413             :                         break;
     414             :                 }
     415           0 :                 case op_insert:
     416             :                 case op_update:
     417             :                 case op_delete:
     418           0 :                         (void) rel_propagate_column_ref_statistics(sql, rel->r, e);
     419           0 :                         break;
     420             :                 default:
     421             :                         break;
     422             :                 }
     423             :                 break;
     424       98466 :         case e_convert: {
     425       98466 :                 sql_subtype *to = exp_totype(e), *from = exp_fromtype(e);
     426       98466 :                 sql_exp *l = e->l;
     427       98466 :                 sql_class fr = from->type->eclass, too = to->type->eclass;
     428       98466 :                 prop *est;
     429             : 
     430       98466 :                 if (fr == too) {
     431       89388 :                         if ((lval = find_prop_and_get(l->p, PROP_MAX))) {
     432       58830 :                                 atom *res = atom_copy(sql->sa, lval);
     433       58830 :                                 if ((res = atom_cast(sql->sa, res, to)))
     434       58807 :                                         set_minmax_property(sql, e, PROP_MAX, res);
     435             :                         }
     436       89388 :                         if ((lval = find_prop_and_get(l->p, PROP_MIN))) {
     437       59448 :                                 atom *res = atom_copy(sql->sa, lval);
     438       59448 :                                 if ((res = atom_cast(sql->sa, res, to)))
     439       59425 :                                         set_minmax_property(sql, e, PROP_MIN, res);
     440             :                         }
     441             :                 }
     442             :                 /* propagating nuniques through conversions is tricky. I am adding just the general cases */
     443       98466 :                 if ((est = find_prop(l->p, PROP_NUNIQUES)) &&
     444       61096 :                         (((fr == EC_TIME || fr == EC_TIME_TZ || fr == EC_DATE) && (too == EC_TIMESTAMP || too == EC_TIMESTAMP_TZ)) ||
     445       61071 :                          ((fr == EC_NUM || fr == EC_BIT) && too == EC_NUM) || (fr == EC_DEC && too == EC_DEC) || (fr != EC_EXTERNAL && too == EC_STRING))) {
     446       57642 :                         prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     447       57642 :                         p->value.dval = est->value.dval;
     448             :                 }
     449       98466 :                 if (!has_nil(l))
     450       55522 :                         set_has_no_nil(e);
     451             :                 break;
     452             :         }
     453      341643 :         case e_aggr:
     454             :         case e_func: {
     455      341643 :                 BUN lv;
     456      341643 :                 sql_subfunc *f = e->f;
     457             : 
     458      341643 :                 if (!f->func->s) {
     459      315170 :                         int key = hash_key(f->func->base.name); /* Using hash lookup */
     460      315170 :                         sql_hash_e *he = sql_functions_lookup->buckets[key&(sql_functions_lookup->size-1)];
     461      315170 :                         lookup_function look = NULL;
     462             : 
     463      688413 :                         for (; he && !look; he = he->chain) {
     464      373243 :                                 struct function_properties* fp = (struct function_properties*) he->value;
     465             : 
     466      373243 :                                 if (!strcmp(f->func->base.name, fp->name))
     467      107717 :                                         look = fp->func;
     468             :                         }
     469      315170 :                         if (look)
     470      107717 :                                 look(sql, e);
     471             :                 }
     472             :                 /* for global aggregates with no semantics, if the left relation has values, then the output won't be NULL */
     473      341643 :                 if (!is_semantics(e) && e->l && !have_nil(e->l) &&
     474       89137 :                         (e->type != e_aggr || (is_groupby(rel->op) && list_length(rel->r)) || ((lv = get_rel_count(rel->l)) != BUN_NONE && lv > 0)))
     475       88814 :                         set_has_no_nil(e);
     476             :                 /* set properties for global aggregates */
     477      341643 :                 if (e->type == e_aggr && is_groupby(rel->op) && list_empty(rel->r)) {
     478        7871 :                         if (!find_prop(e->p, PROP_NUNIQUES)) {
     479        7871 :                                 prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     480        7871 :                                 p->value.dval = 1;
     481             :                         }
     482        7871 :                         set_unique(e);
     483             :                 }
     484             :                 break;
     485             :         }
     486      570316 :         case e_atom:
     487      570316 :                 if (e->l) {
     488      553597 :                         atom *a = (atom*) e->l;
     489      553597 :                         if (!a->isnull) {
     490      490281 :                                 set_minmax_property(sql, e, PROP_MAX, a);
     491      490281 :                                 set_minmax_property(sql, e, PROP_MIN, a);
     492             :                         }
     493      553597 :                         prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     494      553597 :                         p->value.dval = 1;
     495       16719 :                 } else if (e->f) {
     496        3194 :                         list *vals = (list *) e->f;
     497        3194 :                         sql_exp *first = vals->h ? vals->h->data : NULL;
     498        3194 :                         atom *max = NULL, *min = NULL; /* all child values must have a valid min/max */
     499        3194 :                         int has_nil = 0;
     500             : 
     501        3194 :                         if (first) {
     502        3194 :                                 max = ((lval = find_prop_and_get(first->p, PROP_MAX))) ? lval : NULL;
     503        3194 :                                 min = ((lval = find_prop_and_get(first->p, PROP_MIN))) ? lval : NULL;
     504        3194 :                                 has_nil |= has_nil(first);
     505             :                         }
     506             : 
     507       13934 :                         for (node *n = vals->h ? vals->h->next : NULL ; n ; n = n->next) {
     508        7546 :                                 sql_exp *ee = n->data;
     509             : 
     510        7546 :                                 if (min && max) {
     511        7093 :                                         if ((lval = find_prop_and_get(ee->p, PROP_MAX))) {
     512        7047 :                                                 max = atom_cmp(lval, max) > 0 ? lval : max;
     513             :                                         } else {
     514             :                                                 max = NULL;
     515             :                                         }
     516        7093 :                                         if ((lval = find_prop_and_get(ee->p, PROP_MIN))) {
     517        7047 :                                                 min = atom_cmp(min, lval) > 0 ? lval : min;
     518             :                                         } else {
     519             :                                                 min = NULL;
     520             :                                         }
     521             :                                 }
     522        7546 :                                 has_nil |= has_nil(ee);
     523             :                         }
     524             : 
     525        3194 :                         if (!has_nil)
     526        2828 :                                 set_has_no_nil(e);
     527        3194 :                         if (min && max) {
     528        2805 :                                 set_minmax_property(sql, e, PROP_MAX, max);
     529        2805 :                                 set_minmax_property(sql, e, PROP_MIN, min);
     530             :                         }
     531        3194 :                         prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     532        3194 :                         p->value.dval = (dbl) list_length(vals);
     533             :                 } else {
     534       13525 :                         prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     535       13525 :                         p->value.dval = 1;
     536             :                 }
     537             :                 break;
     538      287170 :         case e_cmp:
     539             :                 /* TODO? propagating min/max/unique of booleans is not very worth it */
     540      287170 :                 if (e->flag == cmp_or || e->flag == cmp_filter) {
     541       18059 :                         if (!have_nil(e->l) && !have_nil(e->r))
     542       13589 :                                 set_has_no_nil(e);
     543      269111 :                 } else if (e->flag == cmp_in || e->flag == cmp_notin) {
     544       21242 :                         sql_exp *le = e->l;
     545       21242 :                         if (!has_nil(le) && !have_nil(e->r))
     546       18196 :                                 set_has_no_nil(e);
     547             :                 } else {
     548      247869 :                         sql_exp *le = e->l, *re = e->r, *fe = e->f;
     549      247869 :                         if (!has_nil(le) && !has_nil(re) && (!e->f || !has_nil(fe)))
     550      177932 :                                 set_has_no_nil(e);
     551             :                 }
     552             :                 break;
     553             :         case e_psm:
     554             :                 break;
     555             :         }
     556             : 
     557             : #ifndef NDEBUG
     558             :         {
     559             :                 /* min and max cannot be NULL and min must be <= than max, if it happens the inner relation must be empty! */
     560     3478811 :                 atom *min = find_prop_and_get(e->p, PROP_MIN), *max = find_prop_and_get(e->p, PROP_MAX);
     561             : 
     562     3478809 :                 (void) min;
     563     3478809 :                 (void) max;
     564     3478809 :                 assert(!min || !min->isnull);
     565     3478809 :                 assert(!max || !max->isnull);
     566     3478809 :                 assert(!min || !max || (min && max)/* atom_cmp(min, max) <= 0*/);
     567             :         }
     568             : #endif
     569     3478809 :         return e;
     570             : }
     571             : 
     572             : static list * /* Remove predicates always false from min/max values */
     573      139283 : rel_prune_predicates(visitor *v, sql_rel *rel)
     574             : {
     575      139283 :         if (rel->l) {
     576      139283 :                 sql_rel *l = rel->l;
     577      139283 :                 if (is_single(l))
     578        3391 :                         return rel->exps;
     579             :         }
     580      135892 :         if (!list_empty(rel->attr))
     581        2961 :                 return rel->exps;
     582      281968 :         for (node *n = rel->exps->h ; n ; n = n->next) {
     583      149037 :                 sql_exp *e = n->data;
     584             : 
     585      149037 :                 if (e->type == e_cmp && is_theta_exp(e->flag)) {
     586      122640 :                         sql_exp *le = e->l, *re = e->r, *fe = e->f;
     587      122640 :                         atom *lval_min = find_prop_and_get(le->p, PROP_MIN), *lval_max = find_prop_and_get(le->p, PROP_MAX),
     588      122640 :                                 *rval_min = find_prop_and_get(re->p, PROP_MIN), *rval_max = find_prop_and_get(re->p, PROP_MAX);
     589      122640 :                         bool always_false = false, always_true = false;
     590             : 
     591      122640 :                         if (fe && !is_symmetric(e)) {
     592        3058 :                                 atom *fval_min = find_prop_and_get(fe->p, PROP_MIN), *fval_max = find_prop_and_get(fe->p, PROP_MAX);
     593        3058 :                                 comp_type lower = range2lcompare(e->flag), higher = range2rcompare(e->flag);
     594        3660 :                                 int not_int1 = rval_min && lval_max && /* the middle and left intervals don't overlap */
     595        1663 :                                         (!is_anti(e) && (lower == cmp_gte ? atom_cmp(rval_min, lval_max) > 0 : atom_cmp(rval_min, lval_max) >= 0)),
     596        4072 :                                         not_int2 = lval_min && fval_max && /* the middle and right intervals don't overlap */
     597        2485 :                                         (!is_anti(e) && (higher == cmp_lte ? atom_cmp(lval_min, fval_max) > 0 : atom_cmp(lval_min, fval_max) >= 0)),
     598        3058 :                                         not_int3 = rval_min && fval_max && /* the left interval is after the right one */
     599        1288 :                                         (!is_anti(e) && (atom_cmp(rval_min, fval_max) > 0));
     600             : 
     601        3058 :                                 always_false |= not_int1 || not_int2 || not_int3;
     602             :                                 /* for anti the middle must be before the left or after the right or the right after the left, for the other the middle must be always between the left and right intervals */
     603        4113 :                                 always_true |= !has_nil(le) && !has_nil(re) && !has_nil(fe) &&
     604        3957 :                                         lval_min && lval_max && rval_min && rval_max && fval_min && fval_max &&
     605         575 :                                         (is_anti(e) ? ((lower == cmp_gte ? atom_cmp(rval_min, lval_max) > 0 : atom_cmp(rval_min, lval_max) >= 0) || (higher == cmp_lte ? atom_cmp(lval_min, fval_max) > 0 : atom_cmp(lval_min, fval_max) >= 0) || atom_cmp(rval_min, fval_max) > 0) :
     606         488 :                                          ((lower == cmp_gte ? atom_cmp(lval_min, rval_max) >= 0 : atom_cmp(lval_min, rval_max) > 0) && (higher == cmp_lte ? atom_cmp(fval_min, lval_max) >= 0 : atom_cmp(fval_min, lval_max) > 0)));
     607             :                         } else if (!fe) {
     608      119564 :                                 if (!is_semantics(e)) /* trivial not null cmp null case */
     609      227704 :                                         always_false |= !is_anti(e) && ((exp_is_not_null(le) && exp_is_null(re)) || (exp_is_null(le) && exp_is_not_null(re)));
     610      119564 :                                 switch (e->flag) {
     611      104093 :                                 case cmp_equal:
     612      104093 :                                         if (lval_min && lval_max && rval_min && rval_max && (!is_semantics(e) || !has_nil(le) || !has_nil(re)))
     613      133448 :                                                 always_false |= (is_anti(e) ? (atom_cmp(lval_min, rval_min) == 0 && atom_cmp(lval_max, rval_max) <= 0) : (atom_cmp(rval_max, lval_min) < 0 || atom_cmp(rval_min, lval_max) > 0));
     614      104093 :                                         if (is_semantics(e)) { /* prune *= NULL cases */
     615        5686 :                                                 always_false |= (is_anti(e) ? (exp_is_null(le) && exp_is_null(re)) : ((exp_is_not_null(le) && exp_is_null(re)) || (exp_is_null(le) && exp_is_not_null(re))));
     616       11372 :                                                 always_true |= (is_anti(e) ? ((exp_is_not_null(le) && exp_is_null(re)) || (exp_is_null(le) && exp_is_not_null(re))) : (exp_is_null(le) && exp_is_null(re)));
     617             :                                         }
     618             :                                         break;
     619        7208 :                                 case cmp_notequal:
     620        7208 :                                         if (lval_min && lval_max && rval_min && rval_max)
     621       11348 :                                                 always_true |= !has_nil(le) && !has_nil(re) && (is_anti(e) ? (atom_cmp(lval_min, rval_min) == 0 && atom_cmp(lval_max, rval_max) <= 0) : (atom_cmp(rval_max, lval_min) < 0 || atom_cmp(rval_min, lval_max) > 0));
     622        7208 :                                         if (is_semantics(e)) {
     623          26 :                                                 always_true |= (is_anti(e) ? (exp_is_null(le) && exp_is_null(re)) : ((exp_is_not_null(le) && exp_is_null(re)) || (exp_is_null(le) && exp_is_not_null(re))));
     624          52 :                                                 always_false |= (is_anti(e) ? ((exp_is_not_null(le) && exp_is_null(re)) || (exp_is_null(le) && exp_is_not_null(re))) : (exp_is_null(le) && exp_is_null(re)));
     625             :                                         }
     626             :                                         break;
     627        5632 :                                 case cmp_gt:
     628        5632 :                                         if (lval_max && rval_min)
     629        1947 :                                                 always_false |= (is_anti(e) ? atom_cmp(lval_max, rval_min) > 0 : atom_cmp(lval_max, rval_min) <= 0);
     630        5632 :                                         if (lval_min && rval_max)
     631        3894 :                                                 always_true |= !has_nil(le) && !has_nil(re) && (is_anti(e) ? atom_cmp(lval_min, rval_max) <= 0 : atom_cmp(lval_min, rval_max) > 0);
     632             :                                         break;
     633         119 :                                 case cmp_gte:
     634         119 :                                         if (lval_max && rval_min)
     635          49 :                                                 always_false |= (is_anti(e) ? atom_cmp(lval_max, rval_min) >= 0 : atom_cmp(lval_max, rval_min) < 0);
     636         119 :                                         if (lval_min && rval_max)
     637          98 :                                                 always_true |= !has_nil(le) && !has_nil(re) && (is_anti(e) ? atom_cmp(lval_min, rval_max) < 0 : atom_cmp(lval_min, rval_max) >= 0);
     638             :                                         break;
     639        2428 :                                 case cmp_lt:
     640        2428 :                                         if (lval_min && rval_max)
     641        1376 :                                                 always_false |= (is_anti(e) ? atom_cmp(lval_min, rval_max) < 0 : atom_cmp(lval_min, rval_max) >= 0);
     642        2428 :                                         if (lval_max && rval_min)
     643        2800 :                                                 always_true |= !has_nil(le) && !has_nil(re) && (is_anti(e) ? atom_cmp(lval_max, rval_min) >= 0 : atom_cmp(lval_max, rval_min) < 0);
     644             :                                         break;
     645          84 :                                 case cmp_lte:
     646          84 :                                         if (lval_min && rval_max)
     647          11 :                                                 always_false |= (is_anti(e) ? atom_cmp(lval_min, rval_max) <= 0 : atom_cmp(lval_min, rval_max) > 0);
     648          84 :                                         if (lval_max && rval_min)
     649          22 :                                                 always_true |= !has_nil(le) && !has_nil(re) && (is_anti(e) ? atom_cmp(lval_max, rval_min) > 0 : atom_cmp(lval_max, rval_min) <= 0);
     650             :                                         break;
     651             :                                 default: /* Maybe later I can do cmp_in and cmp_notin but don't forget to remove is_theta_exp check up there */
     652             :                                         break;
     653             :                                 }
     654             :                         }
     655      122640 :                         assert(!always_false || !always_true);
     656      122640 :                         if (always_false || always_true) {
     657        3651 :                                 sql_exp *ne = exp_atom_bool(v->sql->sa, always_true /* if always_true set then true else false */);
     658        3651 :                                 if (exp_name(e))
     659           0 :                                         exp_prop_alias(v->sql->sa, ne, e);
     660        3651 :                                 n->data = ne;
     661        3651 :                                 v->changes++;
     662             :                         }
     663             :                 }
     664             :         }
     665      132931 :         return rel->exps;
     666             : }
     667             : 
     668             : static sql_rel *
     669          14 : set_setop_side(visitor *v, sql_rel *rel, sql_rel *side)
     670             : {
     671          14 :         if (side == rel->r)
     672           0 :                 rel->r = NULL;
     673             :         else
     674          14 :                 rel->l = NULL;
     675          14 :         if (!is_simple_project(side->op) || rel_is_ref(side) || !list_empty(side->r)) {
     676           0 :                 side = rel_project(v->sql->sa, side, rel_projections(v->sql, side, NULL, 0, 1));
     677           0 :                 set_count_prop(v->sql->sa, side, get_rel_count(side->l));
     678           0 :                 side->exps = exps_exp_visitor_bottomup(v, side, side->exps, 0, &rel_propagate_statistics, false);
     679             :         }
     680          35 :         for (node *n = rel->exps->h, *m = side->exps->h ; n && m ; n = n->next, m = m->next) {
     681          21 :                 sql_exp *e1 = n->data, *e2 = m->data;
     682          21 :                 exp_prop_alias(v->sql->sa, e2, e1);
     683             :         }
     684          14 :         list_hash_clear(side->exps);
     685             : 
     686          14 :         if (need_distinct(rel))
     687           0 :                 set_distinct(side);
     688          14 :         side->p = prop_copy(v->sql->sa, rel->p);
     689          14 :         rel_destroy(rel);
     690          14 :         return side;
     691             : }
     692             : 
     693             : static BUN
     694       21876 : trivial_project_exp_card(sql_exp *e)
     695             : {
     696       22170 :         if (e->type == e_convert)
     697         294 :                 return trivial_project_exp_card(e->l);
     698       21876 :         return e->type == e_atom && e->f ? (BUN) list_length(e->f) : 1;
     699             : }
     700             : 
     701             : static BUN
     702       24248 : rel_calc_nuniques(mvc *sql, sql_rel *l, list *exps)
     703             : {
     704       24248 :         BUN lv = get_rel_count(l);
     705             : 
     706       24248 :         if (lv == 0)
     707             :                 return 0;
     708       21539 :         if (!list_empty(exps)) {
     709       21539 :                 BUN nuniques = 0;
     710             :                 /* compute the highest number of unique values */
     711       51130 :                 for (node *n = exps->h ; n && nuniques != BUN_NONE ; n = n->next) {
     712       29592 :                         sql_exp *e = n->data;
     713       29592 :                         sql_rel *bt = NULL;
     714       29592 :                         prop *p = NULL;
     715       29592 :                         BUN euniques = BUN_NONE;
     716       29592 :                         atom *min, *max, *sub = NULL;
     717       29592 :                         sql_subtype *tp = exp_subtype(e);
     718       29592 :                         sql_class ec = tp ? tp->type->eclass : EC_STRING; /* if 'e' has no type (eg parameter), use a non-number type to fail condition */
     719             : 
     720       29592 :                         if ((p = find_prop(e->p, PROP_NUNIQUES))) {
     721       20956 :                                 euniques = (BUN) p->value.dval;
     722        8636 :                         } else if (e->type == e_column && e->nid && rel_find_exp_and_corresponding_rel(l, e, false, &bt, NULL) && bt && (p = find_prop(bt->p, PROP_COUNT))) {
     723        6593 :                                 euniques = (BUN) p->value.lval;
     724             :                         }
     725             :                         /* use min to max range to compute number of possible values in the domain for number types */
     726       29592 :                         if ((EC_TEMP(ec)||ec==EC_NUM||ec==EC_MONTH||ec==EC_POS) &&
     727       22363 :                                 (min = find_prop_and_get(e->p, PROP_MIN)) && (max = find_prop_and_get(e->p, PROP_MAX))) {
     728             :                                 /* the range includes min and max, so the atom_inc call is needed */
     729             :                                 /* if 'euniques' has number of distinct values, compute min between both */
     730       17543 :                                 if ((sub = atom_sub(sql->sa, max, min)) && (sub = atom_inc(sql->sa, sub)) && (sub = atom_cast(sql->sa, sub, sql_bind_localtype("oid"))))
     731           0 :                                         euniques = MIN(euniques, (BUN) sub->data.val.oval);
     732             :                         }
     733       29591 :                         if (euniques != BUN_NONE)
     734       27548 :                                 nuniques = MAX(nuniques, euniques); /* the highest cardinality sets the estimation */
     735             :                         else
     736             :                                 nuniques = BUN_NONE;
     737             :                 }
     738       21538 :                 if (nuniques != BUN_NONE)
     739             :                         return nuniques;
     740             :         }
     741             :         return lv;
     742             : }
     743             : 
     744             : static sql_rel *
     745     2072523 : rel_get_statistics_(visitor *v, sql_rel *rel)
     746             : {
     747             :         /* Don't prune updates as pruning will possibly result in removing the joins which therefore cannot be used for constraint checking */
     748     2072523 :         uint8_t has_special_modify = *(uint8_t*) v->data;
     749     2072523 :         bool can_be_pruned = !has_special_modify && v->storage_based_opt;
     750             : 
     751             :         /* Don't look at the same relation twice */
     752     2072523 :         if (are_statistics_gathered(rel->used))
     753             :                 return rel;
     754     1331304 :         rel->used |= statistics_gathered;
     755             : 
     756     1331304 :         switch (rel->op) {
     757      312616 :         case op_basetable: {
     758      312616 :                 sql_table *t = (sql_table *) rel->l;
     759      312616 :                 sqlstore *store = v->sql->session->tr->store;
     760             : 
     761      312616 :                 if (!list_empty(rel->exps)) {
     762     1248541 :                         for (node *n = rel->exps->h ; n ; n = n->next)
     763      935974 :                                 rel_basetable_column_get_statistics(v->sql, rel, n->data);
     764             :                 }
     765             :                 /* Set table row count. TODO? look for remote tables. Don't look at storage for declared tables, because it won't be cleaned */
     766      312616 :                 if (isTable(t) && t->s && !isDeclaredTable(t)) /* count active rows only */
     767      259073 :                         set_count_prop(v->sql->sa, rel, (BUN)store->storage_api.count_col(v->sql->session->tr, ol_first_node(t->columns)->data, 10));
     768             :                 break;
     769             :         }
     770        2790 :         case op_union:
     771             :         case op_inter:
     772             :         case op_except: {
     773        2790 :                 bool empty_cross = false;
     774        2790 :                 int i = 0;
     775        2790 :                 sql_rel *l = rel->l, *pl = rel->l, *r = rel->r, *pr = rel->r;
     776             : 
     777        2790 :                 while (is_sample(pl->op) || is_topn(pl->op)) /* skip topN and sample relations in the middle */
     778           0 :                         pl = pl->l;
     779        2790 :                 while (is_sample(pr->op) || is_topn(pr->op))
     780           0 :                         pr = pr->l;
     781             :                 /* if it's not a projection, then project and propagate statistics */
     782        2790 :                 if (!is_project(pl->op) && !is_base(pl->op)) {
     783           0 :                         pl = rel_project(v->sql->sa, pl, rel_projections(v->sql, pl, NULL, 0, 1));
     784           0 :                         set_count_prop(v->sql->sa, pl, get_rel_count(pl->l));
     785           0 :                         pl->exps = exps_exp_visitor_bottomup(v, pl, pl->exps, 0, &rel_propagate_statistics, false);
     786             :                 }
     787        2790 :                 if (!is_project(pr->op) && !is_base(pr->op)) {
     788           0 :                         pr = rel_project(v->sql->sa, pr, rel_projections(v->sql, pr, NULL, 0, 1));
     789           0 :                         set_count_prop(v->sql->sa, pr, get_rel_count(pr->l));
     790           0 :                         pr->exps = exps_exp_visitor_bottomup(v, pr, pr->exps, 0, &rel_propagate_statistics, false);
     791             :                 }
     792             : 
     793       11662 :                 for (node *n = rel->exps->h ; n ; n = n->next) {
     794        8872 :                         empty_cross |= rel_setop_get_statistics(v->sql, rel, pl->exps, pr->exps, n->data, i);
     795        8872 :                         i++;
     796             :                 }
     797             : 
     798             :                 /* propagate row count */
     799        2790 :                 if (is_union(rel->op)) {
     800         277 :                         BUN lv = need_distinct(rel) ? rel_calc_nuniques(v->sql, l, l->exps) : get_rel_count(l),
     801         277 :                                 rv = need_distinct(rel) ? rel_calc_nuniques(v->sql, r, r->exps) : get_rel_count(r);
     802             : 
     803         277 :                         if (lv == 0 && rv == 0) { /* both sides empty */
     804           2 :                                 if (can_be_pruned)
     805             :                                         empty_cross = true;
     806             :                                 else
     807           2 :                                         set_count_prop(v->sql->sa, rel, 0);
     808         275 :                         } else if (can_be_pruned && lv == 0 && !rel_is_ref(rel)) { /* left side empty */
     809           0 :                                 rel = set_setop_side(v, rel, r);
     810           0 :                                 empty_cross = false; /* don't rewrite again */
     811         275 :                         } else if (can_be_pruned && rv == 0 && !rel_is_ref(rel)) { /* right side empty */
     812           0 :                                 rel = set_setop_side(v, rel, l);
     813           0 :                                 empty_cross = false; /* don't rewrite again */
     814         275 :                         } else if (lv != BUN_NONE && rv != BUN_NONE) {
     815           7 :                                 set_count_prop(v->sql->sa, rel, (rv > (BUN_MAX - lv)) ? BUN_MAX : (lv + rv)); /* overflow check */
     816             :                         }
     817        2513 :                 } else if (is_inter(rel->op) || is_except(rel->op)) {
     818        2513 :                         BUN lv = need_distinct(rel) ? rel_calc_nuniques(v->sql, l, l->exps) : get_rel_count(l),
     819        2513 :                                 rv = need_distinct(rel) ? rel_calc_nuniques(v->sql, r, r->exps) : get_rel_count(r);
     820             : 
     821        2513 :                         if (lv == 0) { /* left side empty */
     822          62 :                                 if (can_be_pruned)
     823             :                                         empty_cross = true;
     824             :                                 else
     825           5 :                                         set_count_prop(v->sql->sa, rel, 0);
     826        2451 :                         } else if (rv == 0) { /* right side empty */
     827          17 :                                 if (is_inter(rel->op)) {
     828           3 :                                         if (can_be_pruned)
     829             :                                                 empty_cross = true;
     830             :                                         else
     831           0 :                                                 set_count_prop(v->sql->sa, rel, 0);
     832          14 :                                 } else if (can_be_pruned && !rel_is_ref(rel)) {
     833          14 :                                         rel = set_setop_side(v, rel, l);
     834          14 :                                         empty_cross = false; /* don't rewrite again */
     835             :                                 } else {
     836           0 :                                         set_count_prop(v->sql->sa, rel, lv);
     837             :                                 }
     838             :                         } else {
     839        2434 :                                 set_count_prop(v->sql->sa, rel, lv);
     840             :                         }
     841             :                 }
     842        2790 :                 if (can_be_pruned && empty_cross) {
     843          80 :                         rel_destroy(rel->l);
     844          80 :                         rel->l = NULL;
     845          80 :                         rel_destroy(rel->r);
     846          80 :                         rel->r = NULL;
     847         244 :                         for (node *n = rel->exps->h ; n ; n = n->next) {
     848         164 :                                 sql_exp *e = n->data, *a = exp_atom(v->sql->sa, atom_general(v->sql->sa, exp_subtype(e), NULL, 0));
     849         164 :                                 exp_prop_alias(v->sql->sa, a, e);
     850         164 :                                 n->data = a;
     851             :                         }
     852          80 :                         list_hash_clear(rel->exps);
     853          80 :                         sql_rel *l = rel_project(v->sql->sa, NULL, rel->exps);
     854          80 :                         set_count_prop(v->sql->sa, l, 1);
     855          80 :                         l = rel_select(v->sql->sa, l, exp_atom_bool(v->sql->sa, 0));
     856          80 :                         set_count_prop(v->sql->sa, l, 0);
     857          80 :                         rel->op = op_project;
     858          80 :                         rel->l = l;
     859          80 :                         rel->exps = rel_projections(v->sql, l, NULL, 1, 1);
     860          80 :                         set_count_prop(v->sql->sa, rel, 0);
     861          80 :                         set_nodistinct(rel); /* set relations may have distinct flag set */
     862          80 :                         v->changes++;
     863             :                 }
     864             :                 break;
     865             :         }
     866       12864 :         case op_munion: {
     867       12864 :                 list *l = rel->l, *nrels = sa_list(v->sql->sa);
     868       12864 :                 BUN cnt = 0;
     869       12864 :                 bool needs_pruning = false;
     870             : 
     871       49201 :                 for (node *n = l->h; n; n = n->next) {
     872       36337 :                         sql_rel *r = n->data, *pl = r;
     873             : 
     874       36337 :                         while (is_sample(pl->op) || is_topn(pl->op)) /* skip topN and sample relations in the middle */
     875           0 :                                         pl = pl->l;
     876             :                         /* if it's not a projection, then project and propagate statistics */
     877       36337 :                         if (!is_project(pl->op) && !is_base(pl->op)) {
     878           0 :                                 pl = rel_project(v->sql->sa, pl, rel_projections(v->sql, pl, NULL, 0, 1));
     879           0 :                                 set_count_prop(v->sql->sa, pl, get_rel_count(pl->l));
     880           0 :                                 pl->exps = exps_exp_visitor_bottomup(v, pl, pl->exps, 0, &rel_propagate_statistics, false);
     881             :                         }
     882       36337 :                         nrels = append(nrels, pl);
     883             :                         /* we need new munion statistics */
     884             :                         /* propagate row count */
     885       36337 :                         BUN rv = need_distinct(rel) ? rel_calc_nuniques(v->sql, r, r->exps) : get_rel_count(r);
     886             :                         /* if PROP_COUNT does not exist we assume at least a row (see get_rel_count def) */
     887       36337 :                         if (rv == BUN_NONE) {
     888        1156 :                                 cnt++;
     889        1156 :                                 continue;
     890             :                         }
     891       35181 :                         if (!rv && can_be_pruned)
     892        6617 :                                 needs_pruning = true;
     893             :                         /* overflow check */
     894       35181 :                         if (rv > (BUN_MAX - cnt))
     895       36337 :                                 rv = BUN_MAX;
     896             :                         else
     897       35181 :                                 cnt += rv;
     898             :                 }
     899       12864 :                 int i = 0;
     900       73552 :                 for (node *n = rel->exps->h ; n ; n = n->next, i++)
     901       60688 :                         rel_munion_get_statistics(v->sql, rel, nrels, n->data, i);
     902             : 
     903       12864 :                 if (needs_pruning && !rel_is_ref(rel)) {
     904        4456 :                         v->changes++;
     905        4456 :                         list *nl = sa_list(l->sa);
     906             : 
     907       16441 :                         for (node *n = nrels->h; n; n = n->next) {
     908       11985 :                                 sql_rel *r = n->data;
     909       11985 :                                 BUN rv = need_distinct(rel) ? rel_calc_nuniques(v->sql, r, r->exps) : get_rel_count(r);
     910             : 
     911       11985 :                                 if (!rv) { /* keep last for now */
     912        6147 :                                         rel_destroy(r);
     913        6147 :                                         continue;
     914             :                                 }
     915        5838 :                                 nl = append(nl, r);
     916             :                         }
     917        4456 :                         rel->l = nl;
     918        4456 :                         if (list_length(nl) == 1) {
     919        4130 :                                 sql_rel *l = rel->l = nl->h->data; /* ugh */
     920        4130 :                                 rel->r = NULL;
     921        4130 :                                 rel->op = op_project;
     922             : 
     923       20287 :                                 for (node *n = rel->exps->h, *m = l->exps->h ; n && m ; n = n->next, m = m->next) {
     924       16157 :                                         sql_exp *pe = n->data, *ie = m->data;
     925       16157 :                                         sql_exp *ne = exp_ref(v->sql, ie);
     926       16157 :                                         exp_prop_alias(v->sql->sa, ne, pe);
     927       16157 :                                         n->data = ne;
     928             :                                 }
     929        4130 :                                 list_hash_clear(rel->exps);
     930         326 :                         } else if (list_empty(nl)) {
     931             :                                 /* empty select (project [ nils ] ) */
     932         454 :                                 for (node *n = rel->exps->h ; n ; n = n->next) {
     933         354 :                                         sql_exp *e = n->data, *a = exp_atom(v->sql->sa, atom_general(v->sql->sa, exp_subtype(e), NULL, 0));
     934         354 :                                         exp_prop_alias(v->sql->sa, a, e);
     935         354 :                                         n->data = a;
     936             :                                 }
     937         100 :                                 list_hash_clear(rel->exps);
     938         100 :                                 sql_rel *l = rel_project(v->sql->sa, NULL, rel->exps);
     939         100 :                                 set_count_prop(v->sql->sa, l, 1);
     940         100 :                                 l = rel_select(v->sql->sa, l, exp_atom_bool(v->sql->sa, 0));
     941         100 :                                 set_count_prop(v->sql->sa, l, 0);
     942         100 :                                 rel->op = op_project;
     943         100 :                                 rel->r = NULL;
     944         100 :                                 rel->l = l;
     945         100 :                                 rel->exps = rel_projections(v->sql, l, NULL, 1, 1);
     946         100 :                                 set_count_prop(v->sql->sa, rel, 0);
     947         100 :                                 set_nodistinct(rel); /* set relations may have distinct flag set */
     948             :                         }
     949             :                 } else {
     950        8408 :                         set_count_prop(v->sql->sa, rel, cnt);
     951             :                 }
     952             :                 break;
     953             :         }
     954      532113 :         case op_join:
     955             :         case op_left:
     956             :         case op_right:
     957             :         case op_full:
     958             :         case op_semi:
     959             :         case op_anti:
     960             :         case op_select:
     961             :         case op_project:
     962             :         case op_groupby: {
     963      532113 :                 if (is_groupby(rel->op) && !list_empty(rel->r))
     964       15531 :                         rel->r = exps_exp_visitor_bottomup(v, rel, rel->r, 0, &rel_propagate_statistics, false);
     965      532113 :                 rel->exps = exps_exp_visitor_bottomup(v, rel, rel->exps, 0, &rel_propagate_statistics, false);
     966      532113 :                 if ((is_simple_project(rel->op) || is_groupby(rel->op)) && !list_empty(rel->r))
     967       39252 :                         rel->r = exps_exp_visitor_bottomup(v, rel, rel->r, 0, &rel_propagate_statistics, false);
     968             :                 /* The following optimizations can only be applied after propagating the statistics to rel->exps */
     969      532113 :                 if (can_be_pruned && (is_join(rel->op) || is_select(rel->op)) && !list_empty(rel->exps)) {
     970      139283 :                         int changes = v->changes;
     971      139283 :                         rel->exps = rel_prune_predicates(v, rel);
     972      139283 :                         if (v->changes > changes)
     973        3618 :                                 rel = rewrite_simplify(v, 0, v->value_based_opt, rel);
     974             :                 }
     975             : 
     976             :                 /* propagate row count */
     977      532113 :                 sql_rel *l = rel->l, *r = rel->r;
     978      532113 :                 switch (rel->op) {
     979      134385 :                 case op_join:
     980             :                 case op_left:
     981             :                 case op_right:
     982             :                 case op_full: {
     983      134385 :                         BUN lv = get_rel_count(l), rv = get_rel_count(r), uniques_estimate = BUN_MAX, join_idx_estimate = BUN_MAX;
     984             : 
     985      134385 :                         if (!list_empty(rel->exps) && !is_single(rel)) {
     986      245807 :                                 for (node *n = rel->exps->h ; n ; n = n->next) {
     987      125486 :                                         sql_exp *e = n->data, *el = e->l, *er = e->r;
     988             : 
     989      125486 :                                         if (find_prop(e->p, PROP_JOINIDX)) {
     990         667 :                                                 join_idx_estimate = lv>rv?lv:rv;
     991      124819 :                                         } else if (e->type == e_cmp && e->flag == cmp_equal) {
     992             :                                                 /* if one of the sides is unique, the cardinality will be that exact number, but look for nulls */
     993      120935 :                                                 if (!is_semantics(e) || !has_nil(el) || !has_nil(er)) {
     994      120539 :                                                         BUN lu = 0, ru = 0;
     995      120539 :                                                         prop *p = NULL;
     996      120539 :                                                         if ((p = find_prop(el->p, PROP_NUNIQUES)))
     997       89461 :                                                                 lu = (BUN) p->value.dval;
     998      120539 :                                                         if ((p = find_prop(er->p, PROP_NUNIQUES)))
     999      103849 :                                                                 ru = (BUN) p->value.dval;
    1000      120539 :                                                         if (is_unique(el) || lu > lv) {
    1001       73609 :                                                                 BUN ncount = (is_right(rel->op) || is_full(rel->op)) ? MAX(lv, rv) : lv;
    1002       73609 :                                                                 uniques_estimate = MIN(uniques_estimate, ncount);
    1003       46930 :                                                         } else if (is_unique(er) || ru > rv) {
    1004       29328 :                                                                 BUN ncount = (is_left(rel->op) || is_full(rel->op)) ? MAX(lv, rv) : rv;
    1005       29328 :                                                                 uniques_estimate = MIN(uniques_estimate, ncount);
    1006             :                                                         }
    1007             :                                                 }
    1008             :                                         }
    1009             :                                 }
    1010             :                         }
    1011      134385 :                         if (is_single(rel)) {
    1012        4731 :                                 set_count_prop(v->sql->sa, rel, lv);
    1013      129654 :                         } else if (join_idx_estimate != BUN_MAX) {
    1014         665 :                                 set_count_prop(v->sql->sa, rel, join_idx_estimate);
    1015      128989 :                         } else if (uniques_estimate != BUN_MAX) {
    1016       96491 :                                 set_count_prop(v->sql->sa, rel, uniques_estimate);
    1017       32498 :                         } else if (list_length(rel->exps) == 1 && (exp_is_false(rel->exps->h->data) || exp_is_null(rel->exps->h->data))) {
    1018             :                                 /* corner cases for outer joins */
    1019         126 :                                 if (is_left(rel->op)) {
    1020         114 :                                         set_count_prop(v->sql->sa, rel, lv);
    1021          12 :                                 } else if (is_right(rel->op)) {
    1022           3 :                                         set_count_prop(v->sql->sa, rel, rv);
    1023           9 :                                 } else if (is_full(rel->op) && lv != BUN_NONE && rv != BUN_NONE) {
    1024           9 :                                         set_count_prop(v->sql->sa, rel, (rv > (BUN_MAX - lv)) ? BUN_MAX : (lv + rv)); /* overflow check */
    1025           0 :                                 } else if (lv != BUN_NONE && rv != BUN_NONE) {
    1026           0 :                                         set_count_prop(v->sql->sa, rel, 0);
    1027             :                                 }
    1028       32372 :                         } else if (lv == 0) {
    1029        1166 :                                 set_count_prop(v->sql->sa, rel, (is_right(rel->op) || is_full(rel->op)) ? rv : 0);
    1030       31775 :                         } else if (rv == 0) {
    1031        1568 :                                 set_count_prop(v->sql->sa, rel, (is_left(rel->op) || is_full(rel->op)) ? lv : 0);
    1032       30701 :                         } else if (lv != BUN_NONE && rv != BUN_NONE) {
    1033       18251 :                                 set_count_prop(v->sql->sa, rel, (rv > (BUN_MAX / lv)) ? BUN_MAX : (lv * rv)); /* overflow check */
    1034             :                         }
    1035             :                         break;
    1036             :                 }
    1037        2635 :                 case op_anti:
    1038        2635 :                         set_count_prop(v->sql->sa, rel, get_rel_count(l));
    1039        2635 :                         break;
    1040      108879 :                 case op_semi:
    1041             :                 case op_select:
    1042             :                         /* TODO calculate cardinalities using selectivities */
    1043      108879 :                         if (list_length(rel->exps) == 1 && (exp_is_false(rel->exps->h->data) || exp_is_null(rel->exps->h->data))) {
    1044        2743 :                                 set_count_prop(v->sql->sa, rel, 0);
    1045             :                         } else {
    1046      209024 :                                 if (!list_empty(rel->exps) && !is_single(rel)) {
    1047      102888 :                                         BUN cnt = get_rel_count(l), u = 1;
    1048      171648 :                                         for (node *n = rel->exps->h ; n ; n = n->next) {
    1049      112729 :                                                 sql_exp *e = n->data, *el = e->l, *er = e->r;
    1050             : 
    1051             :                                                 /* simple expressions first */
    1052      112729 :                                                 if (e->type == e_cmp && e->flag == cmp_equal && exp_is_atom(er)) {
    1053             :                                                         /* use selectivity */
    1054       57159 :                                                         prop *p;
    1055       57159 :                                                         if ((p = find_prop(el->p, PROP_NUNIQUES))) {
    1056       43969 :                                                                 u = (BUN) p->value.dval;
    1057       43969 :                                                                 break;
    1058             :                                                         }
    1059             :                                                 }
    1060             :                                         }
    1061             :                                         /* u is an *estimate*, so don't set count_prop to 0 unless cnt is 0 */
    1062      102888 :                                         set_count_prop(v->sql->sa, rel, cnt == 0 ? 0 : u == 0 || u > cnt ? 1 : cnt/u);
    1063             :                                 } else {
    1064        3248 :                                         set_count_prop(v->sql->sa, rel, get_rel_count(l));
    1065             :                                 }
    1066             :                         }
    1067             :                         break;
    1068      262856 :                 case op_project:
    1069      262856 :                         if (l) {
    1070      252742 :                                 if (need_distinct(rel)) {
    1071           0 :                                         set_count_prop(v->sql->sa, rel, rel_calc_nuniques(v->sql, l, rel->exps));
    1072             :                                 } else {
    1073      252742 :                                         set_count_prop(v->sql->sa, rel, get_rel_count(l));
    1074             :                                 }
    1075             :                         } else {
    1076       10114 :                                 BUN card = 1;
    1077             : 
    1078       10114 :                                 if (!list_empty(rel->exps)) {
    1079       30661 :                                         for (node *n = rel->exps->h ; n ; n = n->next) {
    1080       20547 :                                                 sql_exp *e = n->data;
    1081       20547 :                                                 card = MAX(card, trivial_project_exp_card(e));
    1082             :                                         }
    1083             :                                 }
    1084       10114 :                                 set_count_prop(v->sql->sa, rel, card);
    1085             :                         }
    1086             :                         break;
    1087       22953 :                 case op_groupby:
    1088       22953 :                         if (list_empty(rel->r)) {
    1089        7421 :                                 set_count_prop(v->sql->sa, rel, 1);
    1090             :                         } else {
    1091       15532 :                                 set_count_prop(v->sql->sa, rel, rel_calc_nuniques(v->sql, l, rel->r));
    1092             :                         }
    1093             :                         break;
    1094             :                 default:
    1095             :                         break;
    1096             :                 }
    1097             :                 break;
    1098             :         }
    1099       16831 :         case op_topn: {
    1100       16831 :                 BUN lv = get_rel_count(rel->l);
    1101             : 
    1102       16831 :                 if (lv != BUN_NONE) {
    1103       16814 :                         sql_exp *le = rel->exps->h->data, *oe = list_length(rel->exps) > 1 ? rel->exps->h->next->data : NULL;
    1104          96 :                         if (oe && oe->l && exp_is_not_null(oe)) { /* no parameters */
    1105          96 :                                 BUN offset = (BUN) ((atom*)oe->l)->data.val.lval;
    1106          96 :                                 lv = offset >= lv ? 0 : lv - offset;
    1107             :                         }
    1108       16814 :                         if (le->l && exp_is_not_null(le)) {
    1109       16776 :                                 BUN limit = (BUN) ((atom*)le->l)->data.val.lval;
    1110       16776 :                                 lv = MIN(lv, limit);
    1111             :                         }
    1112       16814 :                         set_count_prop(v->sql->sa, rel, lv);
    1113             :                 }
    1114             :                 break;
    1115             :         }
    1116          22 :         case op_sample: {
    1117          22 :                 BUN lv = get_rel_count(rel->l);
    1118             : 
    1119          22 :                 if (lv != BUN_NONE) {
    1120           4 :                         sql_exp *se = rel->exps->h->data;
    1121           4 :                         sql_subtype *tp = exp_subtype(se);
    1122             : 
    1123           4 :                         if (se->l && tp->type->eclass == EC_NUM) { /* sample is a number of rows */
    1124           4 :                                 BUN sample = (BUN) ((atom*)se->l)->data.val.lval;
    1125           4 :                                 lv = MIN(lv, sample);
    1126           0 :                         } else if (se->l) { /* sample is a percentage of rows */
    1127           0 :                                 dbl percent = ((atom*)se->l)->data.val.dval;
    1128           0 :                                 assert(tp->type->eclass == EC_FLT);
    1129           0 :                                 lv = (BUN) ceil((dbl)lv * percent);
    1130             :                         }
    1131           4 :                         set_count_prop(v->sql->sa, rel, lv);
    1132             :                 }
    1133             :                 break;
    1134             :         }
    1135        5975 :         case op_table: {
    1136        5975 :                 sql_exp *op = rel->r;
    1137        5975 :                 if (rel->flag != TRIGGER_WRAPPER && op) {
    1138        5663 :                         sql_subfunc *f = op->f;
    1139        5663 :                         if (f->func->lang == FUNC_LANG_SQL) {
    1140          97 :                                 set_count_prop(v->sql->sa, rel, 1000 /* just some fallback value */);
    1141        5566 :                         } else if (f->func->lang == FUNC_LANG_MAL && strcmp(f->func->base.name, "storage") == 0) {
    1142         829 :                                 set_count_prop(v->sql->sa, rel, 1000 /* TODO get size of catalog */);
    1143        4737 :                         } else if (f->func->lang == FUNC_LANG_MAL && strcmp(f->func->base.name, "db_users") == 0) {
    1144           0 :                                 set_count_prop(v->sql->sa, rel, 1000 /* TODO get size of users */);
    1145        4737 :                         } else if (f->func->lang == FUNC_LANG_MAL && strncmp(f->func->base.name, "querylog", 8) == 0) {
    1146        1085 :                                 set_count_prop(v->sql->sa, rel, 1000 /* TODO get size of querylog */);
    1147        3652 :                         } else if (f->func->lang == FUNC_LANG_MAL &&
    1148        3542 :                                            (strcmp(f->func->base.name, "queue") == 0 ||
    1149        3278 :                                                 strcmp(f->func->base.name, "optimizers") == 0 ||
    1150        2966 :                                                 strcmp(f->func->base.name, "env") == 0 ||
    1151        2675 :                                                 strcmp(f->func->base.name, "keywords") == 0 ||
    1152        2675 :                                                 strcmp(f->func->base.name, "statistics") == 0 ||
    1153        2008 :                                                 strcmp(f->func->base.name, "rejects") == 0 ||
    1154        1754 :                                                 strcmp(f->func->base.name, "schemastorage") == 0 ||
    1155        1754 :                                                 strncmp(f->func->base.name, "storage", 7) == 0 ||
    1156        1754 :                                                 strcmp(f->func->base.name, "sessions") == 0) ) {
    1157        2078 :                                 set_count_prop(v->sql->sa, rel, 1000 /* TODO get size of queue */);
    1158             :                         }
    1159             :                         /* else {
    1160             :                                 printf("%%func needs stats : %s\n", f->func->base.name);
    1161             :                         } */
    1162             :                 }
    1163             :                 break;
    1164             :         }
    1165             :         /*These relations are less important for now
    1166             :           TODO later we can tune it */
    1167             :         case op_insert:
    1168             :         case op_update:
    1169             :         case op_delete:
    1170             :         case op_truncate:
    1171             :         case op_ddl:
    1172             :         default:
    1173             :                 break;
    1174             :         }
    1175             : 
    1176             :         return rel;
    1177             : }
    1178             : 
    1179             : static sql_rel *
    1180      539864 : rel_get_statistics(visitor *v, global_props *gp, sql_rel *rel)
    1181             : {
    1182             :         /* Don't prune updates as pruning will possibly result in removing the joins which therefore cannot be used for constraint checking */
    1183      539864 :         uint8_t has_special_modify = (uint8_t) gp->has_special_modify;
    1184      539864 :         v->data = &has_special_modify;
    1185      539864 :         rel = rel_visitor_bottomup(v, rel, &rel_get_statistics_);
    1186      539864 :         v->data = gp;
    1187      539864 :         return rel;
    1188             : }
    1189             : 
    1190             : run_optimizer
    1191      705687 : bind_get_statistics(visitor *v, global_props *gp)
    1192             : {
    1193      705687 :         (void) v;
    1194      705687 :         return (gp->opt_level == 1 && !gp->cnt[op_insert]) ? rel_get_statistics : NULL;
    1195             : }
    1196             : 
    1197             : 
    1198             : static bool
    1199       94898 : point_select_on_unique_column(sql_rel *rel)
    1200             : {
    1201       94898 :         if (is_select(rel->op) && !list_empty(rel->exps)) {
    1202      131375 :                 for (node *n = rel->exps->h; n ; n = n->next) {
    1203       75652 :                         sql_exp *e = n->data, *el = e->l, *er = e->r, *found = NULL;
    1204             : 
    1205       75652 :                         if (is_compare(e->type) && e->flag == cmp_equal) {
    1206       33623 :                                 if (is_numeric_upcast(el))
    1207           0 :                                         el = el->l;
    1208       33623 :                                 if (is_numeric_upcast(er))
    1209           0 :                                         er = er->l;
    1210       33623 :                                 if (is_alias(el->type) && exp_is_atom(er) && (found = rel_find_exp(rel->l, el)) &&
    1211         749 :                                         is_unique(found) && (!is_semantics(e) || !has_nil(found) || !has_nil(er)))
    1212             :                                         return true;
    1213       32874 :                                 if (is_alias(er->type) && exp_is_atom(el) && (found = rel_find_exp(rel->l, er)) &&
    1214           0 :                                         is_unique(found) && (!is_semantics(e) || !has_nil(el) || !has_nil(found)))
    1215             :                                         return true;
    1216             :                         }
    1217             :                 }
    1218             :         }
    1219             :         return false;
    1220             : }
    1221             : 
    1222             : /*
    1223             :  * A point select on an unique column reduces the number of rows to 1. If the same select is under a
    1224             :  * join, the opposite side's select can be pushed above the join.
    1225             :  */
    1226             : static inline sql_rel *
    1227     1244764 : rel_push_select_up(visitor *v, sql_rel *rel)
    1228             : {
    1229     1244764 :         if ((is_innerjoin(rel->op) || is_left(rel->op) || is_right(rel->op) || is_semi(rel->op)) && !is_single(rel)) {
    1230      253995 :                 sql_rel *l = rel->l, *r = rel->r;
    1231      253995 :                 bool can_pushup_left = is_select(l->op) && !rel_is_ref(l) && !is_single(l) && (is_innerjoin(rel->op) || is_left(rel->op) || is_semi(rel->op)),
    1232      253995 :                         can_pushup_right = is_select(r->op) && !rel_is_ref(r) && !is_single(r) && (is_innerjoin(rel->op) || is_right(rel->op));
    1233             : 
    1234      253995 :                 if (can_pushup_left || can_pushup_right) {
    1235       66899 :                         if (can_pushup_left)
    1236       45431 :                                 can_pushup_left = point_select_on_unique_column(r);
    1237       66899 :                         if (can_pushup_right)
    1238       49467 :                                 can_pushup_right = point_select_on_unique_column(l);
    1239             : 
    1240             :                         /* if both selects retrieve one row each, it's not worth it to push both up */
    1241       66899 :                         if (can_pushup_left && !can_pushup_right) {
    1242         683 :                                 sql_rel *nrel = rel_dup_copy(v->sql->sa, rel);
    1243         683 :                                 nrel->l = l->l;
    1244         683 :                                 rel = rel_inplace_select(rel, nrel, l->exps);
    1245         683 :                                 assert(is_select(rel->op));
    1246         683 :                                 v->changes++;
    1247       66216 :                         } else if (!can_pushup_left && can_pushup_right) {
    1248          14 :                                 sql_rel *nrel = rel_dup_copy(v->sql->sa, rel);
    1249          14 :                                 nrel->r = r->l;
    1250          14 :                                 rel = rel_inplace_select(rel, nrel, r->exps);
    1251          14 :                                 assert(is_select(rel->op));
    1252          14 :                                 v->changes++;
    1253             :                         }
    1254             :                 }
    1255             :         }
    1256     1244764 :         return rel;
    1257             : }
    1258             : 
    1259             : static int
    1260       93237 : sql_class_base_score(visitor *v, sql_column *c, sql_subtype *t, bool equality_based)
    1261             : {
    1262       93237 :         int de;
    1263             : 
    1264       93237 :         if (!t)
    1265             :                 return 0;
    1266       93237 :         switch (ATOMstorage(t->type->localtype)) {
    1267             :         case TYPE_bte:
    1268             :                 return 150 - 8;
    1269        1593 :         case TYPE_sht:
    1270        1593 :                 return 150 - 16;
    1271       37290 :         case TYPE_int:
    1272       37290 :                 return 150 - 32;
    1273         518 :         case TYPE_void:
    1274             :         case TYPE_lng:
    1275         518 :                 return 150 - 64;
    1276             :         case TYPE_uuid:
    1277             : #ifdef HAVE_HGE
    1278             :         case TYPE_hge:
    1279             : #endif
    1280             :                 return 150 - 128;
    1281           2 :         case TYPE_flt:
    1282           2 :                 return 75 - 24;
    1283             :         case TYPE_dbl:
    1284             :                 return 75 - 53;
    1285       30567 :         default:
    1286       30567 :                 if (equality_based && c && v->storage_based_opt && (de = mvc_is_duplicate_eliminated(v->sql, c)))
    1287        1554 :                         return 150 - de * 8;
    1288             :                 /* strings and blobs not duplicate eliminated don't get any points here */
    1289             :                 return 0;
    1290             :         }
    1291             : }
    1292             : 
    1293             : static int
    1294       34338 : score_se_base(visitor *v, sql_rel *rel, sql_exp *e)
    1295             : {
    1296       34338 :         int res = 0;
    1297       34338 :         sql_subtype *t = exp_subtype(e);
    1298       34338 :         sql_column *c = NULL;
    1299             : 
    1300       34338 :         if (e->type == e_convert) /* keep unsafes at the end (TODO improve) */
    1301             :                 return -1000;
    1302             :         /* can we find out if the underlying table is sorted */
    1303       33737 :         if ((c = exp_find_column(rel, e, -2)) && v->storage_based_opt && mvc_is_sorted(v->sql, c))
    1304       33737 :                 res += 600;
    1305             : 
    1306             :         /* prefer the shorter var types over the longer ones */
    1307       33737 :         res += sql_class_base_score(v, c, t, is_equality_or_inequality_exp(e->flag)); /* smaller the type, better */
    1308       33737 :         return res;
    1309             : }
    1310             : 
    1311             : static int
    1312       58417 : score_se(visitor *v, sql_rel *rel, sql_exp *e)
    1313             : {
    1314       58417 :         int score = 0;
    1315       58417 :         if (e->type == e_cmp && !is_complex_exp(e->flag)) {
    1316       34338 :                 sql_exp *l = e->l;
    1317             : 
    1318       34338 :                 while (l->type == e_cmp) { /* go through nested comparisons */
    1319           2 :                         sql_exp *ll;
    1320             : 
    1321           2 :                         if (l->flag == cmp_filter || l->flag == cmp_or)
    1322           0 :                                 ll = ((list*)l->l)->h->data;
    1323             :                         else
    1324           2 :                                 ll = l->l;
    1325           2 :                         if (ll->type != e_cmp)
    1326             :                                 break;
    1327             :                         l = ll;
    1328             :                 }
    1329       34338 :                 score += score_se_base(v, rel, l);
    1330             :         }
    1331       58417 :         score += exp_keyvalue(e);
    1332       58417 :         return score;
    1333             : }
    1334             : 
    1335             : static inline sql_rel *
    1336     1244764 : rel_select_order(visitor *v, sql_rel *rel)
    1337             : {
    1338     1244764 :         int *scores = NULL;
    1339     1244764 :         sql_exp **exps = NULL;
    1340             : 
    1341     1244764 :         if (is_select(rel->op) && list_length(rel->exps) > 1) {
    1342       27326 :                 node *n;
    1343       27326 :                 int i, nexps = list_length(rel->exps);
    1344       27326 :                 scores = SA_NEW_ARRAY(v->sql->ta, int, nexps);
    1345       27326 :                 exps = SA_NEW_ARRAY(v->sql->ta, sql_exp*, nexps);
    1346             : 
    1347       85743 :                 for (i = 0, n = rel->exps->h; n; i++, n = n->next) {
    1348       58447 :                         exps[i] = n->data;
    1349       58447 :                         if (find_prop(exps[i]->p, PROP_HASHCOL))
    1350             :                                 return rel;
    1351       58417 :                         scores[i] = score_se(v, rel, n->data);
    1352             :                 }
    1353       27296 :                 GDKqsort(scores, exps, NULL, nexps, sizeof(int), sizeof(void *), TYPE_int, true, true);
    1354             : 
    1355       85713 :                 for (i = 0, n = rel->exps->h; n; i++, n = n->next)
    1356       58417 :                         n->data = exps[i];
    1357             :         }
    1358             : 
    1359             :         return rel;
    1360             : }
    1361             : 
    1362             : /* Compute the efficiency of using this expression early in a group by list */
    1363             : static int
    1364       59500 : score_gbe(visitor *v, sql_rel *rel, sql_exp *e)
    1365             : {
    1366       59500 :         int res = 0;
    1367       59500 :         sql_subtype *t = exp_subtype(e);
    1368       59500 :         sql_column *c = exp_find_column(rel, e, -2);
    1369             : 
    1370       59500 :         if (e->card == CARD_ATOM) /* constants are trivial to group */
    1371          38 :                 res += 1000;
    1372             :         /* can we find out if the underlying table is sorted */
    1373       59500 :         if (is_unique(e) || find_prop(e->p, PROP_HASHCOL) || (c && v->storage_based_opt && mvc_is_unique(v->sql, c))) /* distinct columns */
    1374        3584 :                 res += 700;
    1375       38910 :         if (c && v->storage_based_opt && mvc_is_sorted(v->sql, c))
    1376        3710 :                 res += 500;
    1377       59500 :         if (find_prop(e->p, PROP_HASHIDX)) /* has hash index */
    1378           0 :                 res += 200;
    1379             : 
    1380             :         /* prefer the shorter var types over the longer ones */
    1381       59500 :         res += sql_class_base_score(v, c, t, true); /* smaller the type, better */
    1382       59500 :         return res;
    1383             : }
    1384             : 
    1385             : /* reorder group by expressions */
    1386             : static inline sql_rel *
    1387     1244764 : rel_groupby_order(visitor *v, sql_rel *rel)
    1388             : {
    1389     1244764 :         int *scores = NULL;
    1390     1244764 :         sql_exp **exps = NULL;
    1391             : 
    1392     1244764 :         if (is_groupby(rel->op) && list_length(rel->r) > 1) {
    1393       25587 :                 node *n;
    1394       25587 :                 list *gbe = rel->r;
    1395       25587 :                 int i, ngbe = list_length(gbe);
    1396       25587 :                 scores = SA_NEW_ARRAY(v->sql->ta, int, ngbe);
    1397       25587 :                 exps = SA_NEW_ARRAY(v->sql->ta, sql_exp*, ngbe);
    1398             : 
    1399             :                 /* first sorting step, give priority for integers and sorted columns */
    1400       85087 :                 for (i = 0, n = gbe->h; n; i++, n = n->next) {
    1401       59500 :                         exps[i] = n->data;
    1402       59500 :                         scores[i] = score_gbe(v, rel, exps[i]);
    1403             :                 }
    1404       25587 :                 GDKqsort(scores, exps, NULL, ngbe, sizeof(int), sizeof(void *), TYPE_int, true, true);
    1405             : 
    1406             :                 /* second sorting step, give priority to strings with lower number of digits */
    1407       50504 :                 for (i = ngbe - 1; i && !scores[i]; i--); /* find expressions with no score from the first round */
    1408       25587 :                 if (scores[i])
    1409       24589 :                         i++;
    1410       25587 :                 if (ngbe - i > 1) {
    1411        8604 :                         for (int j = i; j < ngbe; j++) {
    1412        6597 :                                 sql_subtype *t = exp_subtype(exps[j]);
    1413        6597 :                                 scores[j] = t ? t->digits : 0;
    1414             :                         }
    1415             :                         /* the less number of digits the better, order ascending */
    1416        2007 :                         GDKqsort(scores + i, exps + i, NULL, ngbe - i, sizeof(int), sizeof(void *), TYPE_int, false, true);
    1417             :                 }
    1418             : 
    1419       85087 :                 for (i = 0, n = gbe->h; n; i++, n = n->next)
    1420       59500 :                         n->data = exps[i];
    1421             :         }
    1422             : 
    1423     1244764 :         return rel;
    1424             : }
    1425             : 
    1426             : /* This optimization loop contains optimizations that can potentially use statistics */
    1427             : static sql_rel *
    1428     1244764 : rel_final_optimization_loop_(visitor *v, sql_rel *rel)
    1429             : {
    1430             :         /* Run rel_push_select_up only once at the end to avoid an infinite optimization loop */
    1431     1244764 :         rel = rel_push_select_up(v, rel);
    1432     1244764 :         rel = rel_select_order(v, rel);
    1433             : 
    1434             :         /* TODO? Maybe later add rel_simplify_count, rel_join2semijoin, rel_simplify_fk_joins,
    1435             :            rel_distinct_project2groupby, rel_simplify_predicates, rel_simplify_math,
    1436             :            rel_distinct_aggregate_on_unique_values */
    1437             : 
    1438     1244764 :         rel = rel_groupby_order(v, rel);
    1439     1244763 :         return rel;
    1440             : }
    1441             : 
    1442             : static sql_rel *
    1443       66051 : rel_final_optimization_loop(visitor *v, global_props *gp, sql_rel *rel)
    1444             : {
    1445       66051 :         (void) gp;
    1446       66051 :         return rel_visitor_bottomup(v, rel, &rel_final_optimization_loop_);
    1447             : }
    1448             : 
    1449             : run_optimizer
    1450      705682 : bind_final_optimization_loop(visitor *v, global_props *gp)
    1451             : {
    1452      705682 :         int flag = v->sql->sql_optimizer;
    1453             :         /* At the moment, this optimizer has dependency on 3 flags */
    1454      547453 :         return gp->opt_level == 1 && (gp->cnt[op_groupby] || gp->cnt[op_select]) &&
    1455      771735 :                 (flag & push_select_up) && (flag & optimize_select_and_joins_topdown) && (flag & optimize_projections) ? rel_final_optimization_loop : NULL;
    1456             : }

Generated by: LCOV version 1.14