LCOV - code coverage report
Current view: top level - sql/server - rel_statistics.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 831 883 94.1 %
Date: 2024-12-20 20:06:10 Functions: 26 26 100.0 %

          Line data    Source code
       1             : /*
       2             :  * SPDX-License-Identifier: MPL-2.0
       3             :  *
       4             :  * This Source Code Form is subject to the terms of the Mozilla Public
       5             :  * License, v. 2.0.  If a copy of the MPL was not distributed with this
       6             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       7             :  *
       8             :  * Copyright 2024 MonetDB Foundation;
       9             :  * Copyright August 2008 - 2023 MonetDB B.V.;
      10             :  * Copyright 1997 - July 2008 CWI.
      11             :  */
      12             : 
      13             : #include "monetdb_config.h"
      14             : #include "rel_optimizer_private.h"
      15             : #include "rel_statistics.h"
      16             : #include "rel_basetable.h"
      17             : #include "rel_rewriter.h"
      18             : 
      19             : static sql_exp *
      20     3466573 : comparison_find_column(sql_exp *input, sql_exp *e)
      21             : {
      22     3466573 :         switch (input->type) {
      23       98466 :         case e_convert: {
      24       98466 :                 list *types = (list *)input->r;
      25       98466 :                 sql_class from = ((sql_subtype*)types->h->data)->type->eclass, to = ((sql_subtype*)types->h->next->data)->type->eclass;
      26       98466 :                 if (from == to)
      27      189864 :                         return comparison_find_column(input->l, e) ? input : NULL;
      28             :                 return NULL;
      29             :         }
      30     3055002 :         case e_column:
      31     3055002 :                 return exp_match(e, input) ? input : NULL;
      32             :         default:
      33             :                 return NULL;
      34             :         }
      35             : }
      36             : 
      37             : /* multi lo <= col <= hi, maybe still be false even if lo or hi are NULL, possibly similar for filter on multiple
      38             :  * columns */
      39             : #define comp_single_column(c) (!c->f)
      40             : 
      41             : static sql_exp *
      42     8749961 : rel_propagate_column_ref_statistics(mvc *sql, sql_rel *rel, sql_exp *e)
      43             : {
      44     8833121 :         assert(e->type == e_column);
      45     8833121 :         if (rel) {
      46     8833042 :                 switch(rel->op) {
      47     4090202 :                 case op_left:
      48             :                 case op_right:
      49             :                 case op_full:
      50             :                 case op_join:
      51             :                 case op_select:
      52             :                 case op_anti:
      53             :                 case op_semi: {
      54     4090202 :                         bool found_without_semantics = false, found_left = false, found_right = false, still_unique = false;
      55             : 
      56     4090202 :                         if ((is_innerjoin(rel->op) || is_select(rel->op)) && list_length(rel->exps) == 1 && exp_is_false(rel->exps->h->data))
      57             :                                 return NULL; /* nothing will pass, skip */
      58             : 
      59             :                         /* propagate from the bottom first */
      60     4074649 :                         if (rel_propagate_column_ref_statistics(sql, rel->l, e))
      61             :                                 found_left = true;
      62     2591285 :                         if (!found_left && is_join(rel->op) && rel_propagate_column_ref_statistics(sql, rel->r, e))
      63     4074649 :                                 found_right = true;
      64             : 
      65     4074649 :                         if (!found_left && !found_right)
      66             :                                 return NULL;
      67     1778799 :                         if (!list_empty(rel->exps) && rel->op != op_anti) { /* if there's an or, the MIN and MAX get difficult to propagate */
      68     3498992 :                                 for (node *n = rel->exps->h ; n ; n = n->next) {
      69     1811527 :                                         sql_exp *comp = n->data, *le = comp->l, *lne = NULL, *re = comp->r, *rne = NULL, *fe = comp->f, *fne = NULL;
      70             : 
      71     1811527 :                                         if (comp->type == e_cmp) {
      72     1810525 :                                                 if (is_theta_exp(comp->flag) && ((lne = comparison_find_column(le, e)) || (rne = comparison_find_column(re, e)) || (fe && (fne = comparison_find_column(fe, e))))) {
      73      125177 :                                                         atom *lval_min = find_prop_and_get(le->p, PROP_MIN), *lval_max = find_prop_and_get(le->p, PROP_MAX),
      74      125177 :                                                                 *rval_min = find_prop_and_get(re->p, PROP_MIN), *rval_max = find_prop_and_get(re->p, PROP_MAX);
      75             : 
      76             :                                                         /* not semantics found or if explicitly filtering not null values from the column */
      77      125177 :                                                         found_without_semantics |= (!is_semantics(comp) && comp_single_column(comp)) || (comp->flag == cmp_equal && lne && is_anti(comp) && exp_is_null(re));
      78      125177 :                                                         still_unique |= comp->flag == cmp_equal && is_unique(le) && is_unique(re); /* unique if only equi-joins on unique columns are there */
      79      125177 :                                                         if (is_full(rel->op) || (is_left(rel->op) && found_left) || (is_right(rel->op) && found_right)) /* on outer joins, min and max cannot be propagated on some cases */
      80       19791 :                                                                 continue;
      81             :                                                         /* if (end2 >= start1 && start2 <= end1) then the 2 intervals are intersected */
      82      105386 :                                                         if (fe && lval_min && lval_max) { /* range case, the middle expression must intersect the other two */
      83        4622 :                                                                 atom *fval_min = find_prop_and_get(fe->p, PROP_MIN), *fval_max = find_prop_and_get(fe->p, PROP_MAX);
      84        4622 :                                                                 int int1 = rval_min && rval_max && atom_cmp(rval_max, lval_min) >= 0 && atom_cmp(rval_min, lval_max) <= 0,
      85        4622 :                                                                         int2 = fval_min && fval_max && atom_cmp(fval_max, lval_min) >= 0 && atom_cmp(fval_min, lval_max) <= 0,
      86        4622 :                                                                         symmetric = is_symmetric(comp);
      87             : 
      88        4622 :                                                                 if (is_anti(comp) || (!symmetric && fval_min && rval_max && atom_cmp(fval_min, rval_max) < 0)) /* for asymmetric case the re range must be after the fe range */
      89        1817 :                                                                         continue;
      90        2805 :                                                                 if (lne && int1 && int2) {
      91         104 :                                                                         if (symmetric) {
      92           0 :                                                                                 prop *p1 = find_prop(e->p, PROP_MIN), *p2 = find_prop(e->p, PROP_MAX);
      93           0 :                                                                                 atom *nmin = statistics_atom_min(sql, rval_min, fval_min), *nmax = statistics_atom_max(sql, rval_max, fval_max);
      94             :                                                                                 /* max is min from le and (max from re and fe max) */
      95           0 :                                                                                 set_minmax_property(sql, e, PROP_MAX, p2 ? statistics_atom_min(sql, nmax, p2->value.pval) : nmax);
      96             :                                                                                 /* min is max from le and (min from re and fe min) */
      97           0 :                                                                                 set_minmax_property(sql, e, PROP_MIN, p1 ? statistics_atom_max(sql, nmin, p1->value.pval) : nmin);
      98             :                                                                         } else {
      99         104 :                                                                                 prop *p1 = find_prop(e->p, PROP_MIN), *p2 = find_prop(e->p, PROP_MAX);
     100             :                                                                                 /* max is min from le and fe max */
     101         104 :                                                                                 set_minmax_property(sql, e, PROP_MAX, p2 ? statistics_atom_min(sql, fval_max, p2->value.pval) : fval_max);
     102             :                                                                                 /* min is max from le and re min */
     103         104 :                                                                                 set_minmax_property(sql, e, PROP_MIN, p1 ? statistics_atom_max(sql, rval_min, p1->value.pval) : rval_min);
     104             :                                                                         }
     105        2701 :                                                                 } else if (rne) {
     106         590 :                                                                         if (symmetric && int1 && int2) { /* min is max from le and (min from re and fe min) */
     107           0 :                                                                                 prop *p = find_prop(e->p, PROP_MIN);
     108           0 :                                                                                 atom *nmin = p ? statistics_atom_min(sql, p->value.pval, fval_min) : fval_min;
     109           0 :                                                                                 set_minmax_property(sql, e, PROP_MIN, statistics_atom_max(sql, nmin, lval_min));
     110         590 :                                                                         } else if (int1) { /* min is max from le and re min */
     111         100 :                                                                                 prop *p = find_prop(e->p, PROP_MIN);
     112         100 :                                                                                 set_minmax_property(sql, e, PROP_MIN, p ? statistics_atom_max(sql, lval_min, p->value.pval) : lval_min);
     113             :                                                                         }
     114        2111 :                                                                 } else if (fne) {
     115         549 :                                                                         if (symmetric && int1 && int2) { /* max is min from le and (max from re and fe max) */
     116           0 :                                                                                 prop *p = find_prop(e->p, PROP_MAX);
     117           0 :                                                                                 atom *nmax = p ? statistics_atom_max(sql, p->value.pval, rval_max) : rval_max;
     118           0 :                                                                                 set_minmax_property(sql, e, PROP_MAX, p ? statistics_atom_min(sql, nmax, lval_max) : nmax);
     119         549 :                                                                         } else if (int2) { /* max is min from le and fe max */
     120         266 :                                                                                 prop *p = find_prop(e->p, PROP_MAX);
     121         266 :                                                                                 set_minmax_property(sql, e, PROP_MAX, p ? statistics_atom_min(sql, lval_max, p->value.pval) : lval_max);
     122             :                                                                         }
     123             :                                                                 }
     124      100764 :                                                         } else if (lval_min && lval_max && rval_min && rval_max && atom_cmp(rval_max, lval_min) >= 0 && atom_cmp(rval_min, lval_max) <= 0) {
     125             :                                                                 /* both min and max must be set and the intervals must overlap */
     126       43338 :                                                                 switch (comp->flag) {
     127       29002 :                                                                 case cmp_equal: /* for equality reduce */
     128       29002 :                                                                         set_minmax_property(sql, e, PROP_MAX, is_anti(comp) ? statistics_atom_max(sql, lval_max, rval_max) : statistics_atom_min(sql, lval_max, rval_max));
     129       29002 :                                                                         set_minmax_property(sql, e, PROP_MIN, is_anti(comp) ? statistics_atom_min(sql, lval_min, rval_min) : statistics_atom_max(sql, lval_min, rval_min));
     130       29002 :                                                                         break;
     131        4679 :                                                                 case cmp_notequal: /* for inequality expand */
     132        4679 :                                                                         set_minmax_property(sql, e, PROP_MAX, is_anti(comp) ? statistics_atom_min(sql, lval_max, rval_max) : statistics_atom_max(sql, lval_max, rval_max));
     133        4679 :                                                                         set_minmax_property(sql, e, PROP_MIN, is_anti(comp) ? statistics_atom_max(sql, lval_min, rval_min) : statistics_atom_min(sql, lval_min, rval_min));
     134        4679 :                                                                         break;
     135        5651 :                                                                 case cmp_gt:
     136             :                                                                 case cmp_gte:
     137       10364 :                                                                         if (!is_anti(comp) && lne) { /* min is max from both min */
     138        4713 :                                                                                 prop *p = find_prop(e->p, PROP_MIN);
     139        4713 :                                                                                 set_minmax_property(sql, e, PROP_MIN, p ? statistics_atom_max(sql, rval_min, p->value.pval) : rval_min);
     140         938 :                                                                         } else if (!is_anti(comp)) { /* max is min from both max */
     141         938 :                                                                                 prop *p = find_prop(e->p, PROP_MAX);
     142         938 :                                                                                 set_minmax_property(sql, e, PROP_MAX, p ? statistics_atom_min(sql, lval_max, p->value.pval) : lval_max);
     143             :                                                                         }
     144             :                                                                         break;
     145        4006 :                                                                 case cmp_lt:
     146             :                                                                 case cmp_lte:
     147        7205 :                                                                         if (!is_anti(comp) && lne) { /* max is min from both max */
     148        3199 :                                                                                 prop *p = find_prop(e->p, PROP_MAX);
     149        3199 :                                                                                 set_minmax_property(sql, e, PROP_MAX, p ? statistics_atom_min(sql, rval_max, p->value.pval) : rval_max);
     150         807 :                                                                         } else if (!is_anti(comp)) { /* min is max from both min */
     151         807 :                                                                                 prop *p = find_prop(e->p, PROP_MIN);
     152         807 :                                                                                 set_minmax_property(sql, e, PROP_MIN, p ? statistics_atom_max(sql, lval_min, p->value.pval) : lval_min);
     153             :                                                                         }
     154             :                                                                         break;
     155             :                                                                 default: /* Maybe later I can do cmp_in and cmp_notin */
     156             :                                                                         break;
     157             :                                                                 }
     158             :                                                         }
     159             :                                                 }
     160             :                                         }
     161             :                                 }
     162             :                         }
     163     1778799 :                         if (is_full(rel->op) || (is_left(rel->op) && found_right) || (is_right(rel->op) && found_left))
     164       38016 :                                 set_has_nil(e);
     165     1778799 :                         if (!is_outerjoin(rel->op) && found_without_semantics) /* at an outer join, null values pass */
     166       94338 :                                 set_has_no_nil(e);
     167     1778799 :                         if (is_join(rel->op) && is_unique(e) && !still_unique)
     168      118981 :                                 set_not_unique(e);
     169     1778799 :                         return e;
     170             :                 }
     171     4640327 :                 case op_table:
     172             :                 case op_basetable:
     173             :                 case op_union:
     174             :                 case op_except:
     175             :                 case op_inter:
     176             :                 case op_munion:
     177             :                 case op_project:
     178             :                 case op_groupby: {
     179     4640327 :                         sql_exp *found;
     180     4640327 :                         atom *fval;
     181     4640327 :                         prop *est;
     182     4640327 :                         if ((found = rel_find_exp(rel, e))) {
     183     2178128 :                                 if (rel->op != op_table) { /* At the moment don't propagate statistics for table relations */
     184     2133793 :                                         if ((fval = find_prop_and_get(found->p, PROP_MAX)))
     185     1135339 :                                                 set_minmax_property(sql, e, PROP_MAX, fval);
     186     2133792 :                                         if ((fval = find_prop_and_get(found->p, PROP_MIN)))
     187     1142696 :                                                 set_minmax_property(sql, e, PROP_MIN, fval);
     188     2133789 :                                         if (!has_nil(found))
     189     1380055 :                                                 set_has_no_nil(e);
     190     2133789 :                                         if (is_unique(found) || (need_distinct(rel) && list_length(rel->exps) == 1) ||
     191     1722671 :                                                 (is_groupby(rel->op) && (list_empty(rel->r) || (list_length(rel->r) == 1 && exps_find_exp(rel->r, e)))))
     192      421996 :                                                 set_unique(e);
     193             :                                         /* propagate unique estimation for known cases */
     194     2133789 :                                         if (is_groupby(rel->op) && list_empty(rel->r) && !find_prop(e->p, PROP_NUNIQUES)) { /* global aggregate case */
     195        7538 :                                                 prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     196        7538 :                                                 p->value.dval = 1;
     197     2126251 :                                         } else if (((is_basetable(rel->op) || is_except(rel->op) || is_inter(rel->op) || is_simple_project(rel->op) ||
     198       66654 :                                                                  (is_groupby(rel->op) && exps_find_exp(rel->r, e))) &&
     199     2068740 :                                                                 (est = find_prop(found->p, PROP_NUNIQUES)) && !find_prop(e->p, PROP_NUNIQUES))) {
     200      188069 :                                                 prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     201      188069 :                                                 p->value.dval = est->value.dval;
     202             :                                         }
     203             :                                 }
     204     2178121 :                                 return e;
     205             :                         }
     206             :                         return NULL;
     207             :                 }
     208       83160 :                 case op_topn:
     209             :                 case op_sample:
     210       83160 :                         return rel_propagate_column_ref_statistics(sql, rel->l, e);
     211             :                 default:
     212             :                         break;
     213             :                 }
     214             :         }
     215             :         return NULL;
     216             : }
     217             : 
     218             : static atom *
     219     4459857 : atom_from_valptr( allocator *sa, sql_subtype *tpe, ValRecord *v)
     220             : {
     221     4459857 :         atom *a = SA_NEW(sa, atom);
     222             : 
     223     4459790 :         assert(!VALisnil(v));
     224     4459713 :         *a = (atom) {.tpe = *tpe,};
     225     4459713 :         SA_VALcopy(sa, &a->data, v);
     226     4459664 :         return a;
     227             : }
     228             : 
     229             : void
     230     4065030 : sql_column_get_statistics(mvc *sql, sql_column *c, sql_exp *e)
     231             : {
     232     4065030 :         bool nonil = false, unique = false;
     233     4065030 :         double unique_est = 0.0;
     234     4065030 :         ValRecord min, max;
     235     4065030 :         int ok = mvc_col_stats(sql, c, &nonil, &unique, &unique_est, &min, &max);
     236             : 
     237     4065165 :         if (has_nil(e) && nonil)
     238     2657750 :                 set_has_no_nil(e);
     239     4065165 :         if (!is_unique(e) && unique)
     240     1101458 :                 set_unique(e);
     241     4065165 :         if (unique_est != 0.0) {
     242     2828248 :                 prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     243     2828240 :                 p->value.dval = unique_est;
     244             :         }
     245     4065157 :         unsigned int digits = 0;
     246     4065157 :         sql_subtype *et = exp_subtype(e);
     247     4065164 :         if (et->type->eclass == EC_DEC || et->type->eclass == EC_NUM)
     248     2644222 :                 digits = et->digits;
     249     4065164 :         if ((ok & 2) == 2) {
     250     2226825 :                 if (!VALisnil(&max)) {
     251     2226819 :                         prop *p = e->p = prop_create(sql->sa, PROP_MAX, e->p);
     252     2226790 :                         p->value.pval = atom_from_valptr(sql->sa, &c->type, &max);
     253     2226754 :                         if (digits) {
     254     1658274 :                                 unsigned int nd = atom_digits(p->value.pval);
     255     1658304 :                                 if (nd < digits)
     256             :                                         digits = nd;
     257     1658304 :                                 if (!digits)
     258             :                                         digits = 1;
     259             :                         }
     260             :                 }
     261     2226782 :                 VALclear(&max);
     262             :         }
     263     4065101 :         if ((ok & 1) == 1) {
     264     2233231 :                 if (!VALisnil(&min)) {
     265     2233210 :                         prop *p = e->p = prop_create(sql->sa, PROP_MIN, e->p);
     266     2233204 :                         p->value.pval = atom_from_valptr(sql->sa, &c->type, &min);
     267     2233172 :                         if (digits) {
     268     1665780 :                                 unsigned int nd = atom_digits(p->value.pval);
     269     1665775 :                                 if (nd > digits) /* possibly set to low by max value */
     270             :                                         digits = nd;
     271             :                                 if (!digits)
     272             :                                         digits = 1;
     273             :                         }
     274             :                 }
     275     2233166 :                 VALclear(&min);
     276             :         }
     277     4065029 :         if (digits)
     278     2644092 :                 et->digits = digits;
     279     4065029 :         if (et->type->eclass == EC_DEC && et->digits <= et->scale)
     280         216 :                 et->digits = et->scale + 1;
     281     4065029 : }
     282             : 
     283             : static void
     284      937751 : rel_basetable_column_get_statistics(mvc *sql, sql_rel *rel, sql_exp *e)
     285             : {
     286      937751 :         if (e->p)
     287             :                 return;
     288      302484 :         sql_column *c = NULL;
     289             : 
     290      302484 :         if ((c = rel_base_find_column(rel, e->nid))) {
     291      204384 :                 sql_column_get_statistics(sql, c, e);
     292             :         }
     293             : }
     294             : 
     295             : static bool
     296        8872 : rel_setop_get_statistics(mvc *sql, sql_rel *rel, list *lexps, list *rexps, sql_exp *e, int i)
     297             : {
     298        8872 :         sql_exp *le = list_fetch(lexps, i), *re = list_fetch(rexps, i);
     299        8872 :         atom *lval_min = find_prop_and_get(le->p, PROP_MIN), *lval_max = find_prop_and_get(le->p, PROP_MAX),
     300        8872 :                 *rval_min = find_prop_and_get(re->p, PROP_MIN), *rval_max = find_prop_and_get(re->p, PROP_MAX);
     301        8872 :         prop *est;
     302             : 
     303             :         /* for the intersection, if both expressions don't overlap, it can be pruned */
     304        8872 :         if (is_inter(rel->op) && !has_nil(le) && !has_nil(re) &&
     305         511 :                 ((rval_max && lval_min && atom_cmp(rval_max, lval_min) < 0) || (rval_min && lval_max && atom_cmp(rval_min, lval_max) > 0)))
     306          26 :                 return true;
     307             : 
     308        8846 :         if (lval_max && rval_max) {
     309        2557 :                 if (is_union(rel->op))
     310           3 :                         set_minmax_property(sql, e, PROP_MAX, statistics_atom_max(sql, lval_max, rval_max)); /* for union the new max will be the max of the two */
     311        2554 :                 else if (is_inter(rel->op))
     312         399 :                         set_minmax_property(sql, e, PROP_MAX, statistics_atom_min(sql, lval_max, rval_max)); /* for intersect the new max will be the min of the two */
     313             :                 else /* except */
     314        2155 :                         set_minmax_property(sql, e, PROP_MAX, lval_max);
     315             :         }
     316        8846 :         if (lval_min && rval_min) {
     317        2557 :                 if (is_union(rel->op))
     318           3 :                         set_minmax_property(sql, e, PROP_MIN, statistics_atom_min(sql, lval_min, rval_min)); /* for union the new min will be the min of the two */
     319        2554 :                 else if (is_inter(rel->op))
     320         399 :                         set_minmax_property(sql, e, PROP_MIN, statistics_atom_max(sql, lval_min, rval_min)); /* for intersect the new min will be the max of the two */
     321             :                 else /* except */
     322        2155 :                         set_minmax_property(sql, e, PROP_MIN, lval_min);
     323             :         }
     324             : 
     325        8846 :         if (is_union(rel->op) || is_munion(rel->op)) {
     326        5986 :                 if (!has_nil(le) && !has_nil(re))
     327         879 :                         set_has_no_nil(e);
     328        5986 :                 if (need_distinct(rel) && list_length(rel->exps) == 1)
     329           0 :                         set_unique(e);
     330        2860 :         } else if (is_inter(rel->op)) {
     331         564 :                 if (!has_nil(le) || !has_nil(re))
     332         509 :                         set_has_no_nil(e);
     333         564 :                 if (is_unique(le) || (need_distinct(rel) && list_length(rel->exps) == 1))
     334         345 :                         set_unique(e);
     335             :         } else {
     336        2296 :                 assert(is_except(rel->op));
     337        2296 :                 if (!has_nil(le))
     338        2233 :                         set_has_no_nil(e);
     339        2296 :                 if (is_unique(le) || (need_distinct(rel) && list_length(rel->exps) == 1))
     340        2009 :                         set_unique(e);
     341             :         }
     342             :         /* propagate unique estimation for known cases */
     343        8846 :         if (!is_union(rel->op) && (est = find_prop(le->p, PROP_NUNIQUES)) && !find_prop(e->p, PROP_NUNIQUES)) {
     344         205 :                 prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     345         205 :                 p->value.dval = est->value.dval;
     346             :         }
     347             :         return false;
     348             : }
     349             : 
     350             : 
     351             : static void
     352       60761 : rel_munion_get_statistics(mvc *sql, sql_rel *rel, list *rels, sql_exp *e, int i)
     353             : {
     354       60761 :         assert(is_munion(rel->op));
     355             : 
     356       60761 :         sql_rel *l = rels->h->data;
     357       60761 :         sql_exp *le = list_fetch(l->exps, i);
     358       60761 :         atom *lval_min = find_prop_and_get(le->p, PROP_MIN), *lval_max = find_prop_and_get(le->p, PROP_MAX);
     359       60761 :         bool has_nonil = !has_nil(le);
     360             : 
     361      176050 :         for(node *n = rels->h->next; n; n = n->next) {
     362      115289 :                 sql_rel *r = n->data;
     363      115289 :                 sql_exp *re = list_fetch(r->exps, i);
     364      115289 :                 atom *rval_min = find_prop_and_get(re->p, PROP_MIN), *rval_max = find_prop_and_get(re->p, PROP_MAX);
     365             : 
     366      115289 :                 if (lval_max && rval_max) {
     367       42578 :                         set_minmax_property(sql, e, PROP_MAX, statistics_atom_max(sql, lval_max, rval_max)); /* for union the new max will be the max of the two */
     368       42578 :                         lval_max = find_prop_and_get(e->p, PROP_MAX);
     369             :                 }
     370      115289 :                 if (lval_min && rval_min) {
     371       42079 :                         set_minmax_property(sql, e, PROP_MIN, statistics_atom_min(sql, lval_min, rval_min)); /* for union the new min will be the min of the two */
     372       42079 :                         lval_min = find_prop_and_get(e->p, PROP_MIN);
     373             :                 }
     374      115289 :                 has_nonil &= !has_nil(re);
     375             : 
     376             :         }
     377             : 
     378       60761 :         if (has_nonil)
     379       41592 :                 set_has_no_nil(e);
     380             : 
     381       60761 :         if (need_distinct(rel) && list_length(rel->exps) == 1)
     382        1594 :                 set_unique(e);
     383       60761 : }
     384             : 
     385             : 
     386             : static sql_exp *
     387     3483314 : rel_propagate_statistics(visitor *v, sql_rel *rel, sql_exp *e, int depth)
     388             : {
     389     3483314 :         mvc *sql = v->sql;
     390     3483314 :         atom *lval;
     391             : 
     392     3483314 :         (void) depth;
     393     3483314 :         switch(e->type) {
     394     2183957 :         case e_column:
     395     2183957 :                 switch (rel->op) { /* set relations don't call rel_propagate_statistics */
     396      279077 :                 case op_join:
     397             :                 case op_left:
     398             :                 case op_right:
     399             :                 case op_full:
     400             :                 case op_semi:
     401             :                 case op_anti: {
     402      279077 :                         sql_exp *found = rel_propagate_column_ref_statistics(sql, rel->l, e);
     403      279077 :                         if (!found)
     404      139981 :                                 (void) rel_propagate_column_ref_statistics(sql, rel->r, e);
     405             :                         break;
     406             :                 }
     407     1904880 :                 case op_select:
     408             :                 case op_project:
     409             :                 case op_groupby: {
     410     1904880 :                         sql_exp *found = rel_propagate_column_ref_statistics(sql, rel->l, e); /* labels may be found on the same projection, ugh */
     411     1904874 :                         if (!found && is_simple_project(rel->op))
     412      125463 :                                 (void) rel_propagate_column_ref_statistics(sql, rel, e);
     413             :                         break;
     414             :                 }
     415           0 :                 case op_insert:
     416             :                 case op_update:
     417             :                 case op_delete:
     418           0 :                         (void) rel_propagate_column_ref_statistics(sql, rel->r, e);
     419           0 :                         break;
     420             :                 default:
     421             :                         break;
     422             :                 }
     423             :                 break;
     424       98370 :         case e_convert: {
     425       98370 :                 sql_subtype *to = exp_totype(e), *from = exp_fromtype(e);
     426       98370 :                 sql_exp *l = e->l;
     427       98370 :                 sql_class fr = from->type->eclass, too = to->type->eclass;
     428       98370 :                 prop *est;
     429             : 
     430       98370 :                 if (fr == too) {
     431       89291 :                         if ((lval = find_prop_and_get(l->p, PROP_MAX))) {
     432       58695 :                                 atom *res = atom_copy(sql->sa, lval);
     433       58695 :                                 if ((res = atom_cast(sql->sa, res, to)))
     434       58672 :                                         set_minmax_property(sql, e, PROP_MAX, res);
     435             :                         }
     436       89291 :                         if ((lval = find_prop_and_get(l->p, PROP_MIN))) {
     437       59318 :                                 atom *res = atom_copy(sql->sa, lval);
     438       59318 :                                 if ((res = atom_cast(sql->sa, res, to)))
     439       59295 :                                         set_minmax_property(sql, e, PROP_MIN, res);
     440             :                         }
     441             :                 }
     442             :                 /* propagating nuniques through conversions is tricky. I am adding just the general cases */
     443       98370 :                 if ((est = find_prop(l->p, PROP_NUNIQUES)) &&
     444       60962 :                         (((fr == EC_TIME || fr == EC_TIME_TZ || fr == EC_DATE) && (too == EC_TIMESTAMP || too == EC_TIMESTAMP_TZ)) ||
     445       60937 :                          ((fr == EC_NUM || fr == EC_BIT) && too == EC_NUM) || (fr == EC_DEC && too == EC_DEC) || (fr != EC_EXTERNAL && too == EC_STRING))) {
     446       57510 :                         prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     447       57510 :                         p->value.dval = est->value.dval;
     448             :                 }
     449       98370 :                 if (!has_nil(l))
     450       55416 :                         set_has_no_nil(e);
     451             :                 break;
     452             :         }
     453      342059 :         case e_aggr:
     454             :         case e_func: {
     455      342059 :                 BUN lv;
     456      342059 :                 sql_subfunc *f = e->f;
     457             : 
     458      342059 :                 if (!f->func->s) {
     459      315556 :                         int key = hash_key(f->func->base.name); /* Using hash lookup */
     460      315556 :                         sql_hash_e *he = sql_functions_lookup->buckets[key&(sql_functions_lookup->size-1)];
     461      315556 :                         lookup_function look = NULL;
     462             : 
     463      689229 :                         for (; he && !look; he = he->chain) {
     464      373673 :                                 struct function_properties* fp = (struct function_properties*) he->value;
     465             : 
     466      373673 :                                 if (!strcmp(f->func->base.name, fp->name))
     467      107764 :                                         look = fp->func;
     468             :                         }
     469      315556 :                         if (look)
     470      107764 :                                 look(sql, e);
     471             :                 }
     472             :                 /* for global aggregates with no semantics, if the left relation has values, then the output won't be NULL */
     473      342059 :                 if (!is_semantics(e) && e->l && !have_nil(e->l) &&
     474       89234 :                         (e->type != e_aggr || (is_groupby(rel->op) && list_length(rel->r)) || ((lv = get_rel_count(rel->l)) != BUN_NONE && lv > 0)))
     475       88911 :                         set_has_no_nil(e);
     476             :                 /* set properties for global aggregates */
     477      342059 :                 if (e->type == e_aggr && is_groupby(rel->op) && list_empty(rel->r)) {
     478        7871 :                         if (!find_prop(e->p, PROP_NUNIQUES)) {
     479        7871 :                                 prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     480        7871 :                                 p->value.dval = 1;
     481             :                         }
     482        7871 :                         set_unique(e);
     483             :                 }
     484             :                 break;
     485             :         }
     486      571199 :         case e_atom:
     487      571199 :                 if (e->l) {
     488      554468 :                         atom *a = (atom*) e->l;
     489      554468 :                         if (!a->isnull) {
     490      491149 :                                 set_minmax_property(sql, e, PROP_MAX, a);
     491      491149 :                                 set_minmax_property(sql, e, PROP_MIN, a);
     492             :                         }
     493      554468 :                         prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     494      554468 :                         p->value.dval = 1;
     495       16731 :                 } else if (e->f) {
     496        3206 :                         list *vals = (list *) e->f;
     497        3206 :                         sql_exp *first = vals->h ? vals->h->data : NULL;
     498        3206 :                         atom *max = NULL, *min = NULL; /* all child values must have a valid min/max */
     499        3206 :                         int has_nil = 0;
     500             : 
     501        3206 :                         if (first) {
     502        3206 :                                 max = ((lval = find_prop_and_get(first->p, PROP_MAX))) ? lval : NULL;
     503        3206 :                                 min = ((lval = find_prop_and_get(first->p, PROP_MIN))) ? lval : NULL;
     504        3206 :                                 has_nil |= has_nil(first);
     505             :                         }
     506             : 
     507       13958 :                         for (node *n = vals->h ? vals->h->next : NULL ; n ; n = n->next) {
     508        7546 :                                 sql_exp *ee = n->data;
     509             : 
     510        7546 :                                 if (min && max) {
     511        7093 :                                         if ((lval = find_prop_and_get(ee->p, PROP_MAX))) {
     512        7047 :                                                 max = atom_cmp(lval, max) > 0 ? lval : max;
     513             :                                         } else {
     514             :                                                 max = NULL;
     515             :                                         }
     516        7093 :                                         if ((lval = find_prop_and_get(ee->p, PROP_MIN))) {
     517        7047 :                                                 min = atom_cmp(min, lval) > 0 ? lval : min;
     518             :                                         } else {
     519             :                                                 min = NULL;
     520             :                                         }
     521             :                                 }
     522        7546 :                                 has_nil |= has_nil(ee);
     523             :                         }
     524             : 
     525        3206 :                         if (!has_nil)
     526        2840 :                                 set_has_no_nil(e);
     527        3206 :                         if (min && max) {
     528        2817 :                                 set_minmax_property(sql, e, PROP_MAX, max);
     529        2817 :                                 set_minmax_property(sql, e, PROP_MIN, min);
     530             :                         }
     531        3206 :                         prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     532        3206 :                         p->value.dval = (dbl) list_length(vals);
     533             :                 } else {
     534       13525 :                         prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     535       13525 :                         p->value.dval = 1;
     536             :                 }
     537             :                 break;
     538      287729 :         case e_cmp:
     539             :                 /* TODO? propagating min/max/unique of booleans is not very worth it */
     540      287729 :                 if (e->flag == cmp_or || e->flag == cmp_filter) {
     541       18103 :                         if (!have_nil(e->l) && !have_nil(e->r))
     542       13621 :                                 set_has_no_nil(e);
     543      269626 :                 } else if (e->flag == cmp_in || e->flag == cmp_notin) {
     544       21270 :                         sql_exp *le = e->l;
     545       21270 :                         if (!has_nil(le) && !have_nil(e->r))
     546       18224 :                                 set_has_no_nil(e);
     547             :                 } else {
     548      248356 :                         sql_exp *le = e->l, *re = e->r, *fe = e->f;
     549      248356 :                         if (!has_nil(le) && !has_nil(re) && (!e->f || !has_nil(fe)))
     550      178405 :                                 set_has_no_nil(e);
     551             :                 }
     552             :                 break;
     553             :         case e_psm:
     554             :                 break;
     555             :         }
     556             : 
     557             : #ifndef NDEBUG
     558             :         {
     559             :                 /* min and max cannot be NULL and min must be <= than max, if it happens the inner relation must be empty! */
     560     3483308 :                 atom *min = find_prop_and_get(e->p, PROP_MIN), *max = find_prop_and_get(e->p, PROP_MAX);
     561             : 
     562     3483307 :                 (void) min;
     563     3483307 :                 (void) max;
     564     3483307 :                 assert(!min || !min->isnull);
     565     3483307 :                 assert(!max || !max->isnull);
     566     3483307 :                 assert(!min || !max || (min && max)/* atom_cmp(min, max) <= 0*/);
     567             :         }
     568             : #endif
     569     3483307 :         return e;
     570             : }
     571             : 
     572             : static list * /* Remove predicates always false from min/max values */
     573      139699 : rel_prune_predicates(visitor *v, sql_rel *rel)
     574             : {
     575      139699 :         if (rel->l) {
     576      139699 :                 sql_rel *l = rel->l;
     577      139699 :                 if (is_single(l))
     578        3399 :                         return rel->exps;
     579             :         }
     580      136300 :         if (!list_empty(rel->attr))
     581        2972 :                 return rel->exps;
     582      282870 :         for (node *n = rel->exps->h ; n ; n = n->next) {
     583      149542 :                 sql_exp *e = n->data;
     584             : 
     585      149542 :                 if (e->type == e_cmp && is_theta_exp(e->flag)) {
     586      123074 :                         sql_exp *le = e->l, *re = e->r, *fe = e->f;
     587      123074 :                         atom *lval_min = find_prop_and_get(le->p, PROP_MIN), *lval_max = find_prop_and_get(le->p, PROP_MAX),
     588      123074 :                                 *rval_min = find_prop_and_get(re->p, PROP_MIN), *rval_max = find_prop_and_get(re->p, PROP_MAX);
     589      123074 :                         bool always_false = false, always_true = false;
     590             : 
     591      123074 :                         if (fe && !is_symmetric(e)) {
     592        3058 :                                 atom *fval_min = find_prop_and_get(fe->p, PROP_MIN), *fval_max = find_prop_and_get(fe->p, PROP_MAX);
     593        3058 :                                 comp_type lower = range2lcompare(e->flag), higher = range2rcompare(e->flag);
     594        3660 :                                 int not_int1 = rval_min && lval_max && /* the middle and left intervals don't overlap */
     595        1663 :                                         (!is_anti(e) && (lower == cmp_gte ? atom_cmp(rval_min, lval_max) > 0 : atom_cmp(rval_min, lval_max) >= 0)),
     596        4072 :                                         not_int2 = lval_min && fval_max && /* the middle and right intervals don't overlap */
     597        2485 :                                         (!is_anti(e) && (higher == cmp_lte ? atom_cmp(lval_min, fval_max) > 0 : atom_cmp(lval_min, fval_max) >= 0)),
     598        3058 :                                         not_int3 = rval_min && fval_max && /* the left interval is after the right one */
     599        1287 :                                         (!is_anti(e) && (atom_cmp(rval_min, fval_max) > 0));
     600             : 
     601        3058 :                                 always_false |= not_int1 || not_int2 || not_int3;
     602             :                                 /* for anti the middle must be before the left or after the right or the right after the left, for the other the middle must be always between the left and right intervals */
     603        4113 :                                 always_true |= !has_nil(le) && !has_nil(re) && !has_nil(fe) &&
     604        3957 :                                         lval_min && lval_max && rval_min && rval_max && fval_min && fval_max &&
     605         575 :                                         (is_anti(e) ? ((lower == cmp_gte ? atom_cmp(rval_min, lval_max) > 0 : atom_cmp(rval_min, lval_max) >= 0) || (higher == cmp_lte ? atom_cmp(lval_min, fval_max) > 0 : atom_cmp(lval_min, fval_max) >= 0) || atom_cmp(rval_min, fval_max) > 0) :
     606         488 :                                          ((lower == cmp_gte ? atom_cmp(lval_min, rval_max) >= 0 : atom_cmp(lval_min, rval_max) > 0) && (higher == cmp_lte ? atom_cmp(fval_min, lval_max) >= 0 : atom_cmp(fval_min, lval_max) > 0)));
     607             :                         } else if (!fe) {
     608      119998 :                                 if (!is_semantics(e)) /* trivial not null cmp null case */
     609      228578 :                                         always_false |= !is_anti(e) && ((exp_is_not_null(le) && exp_is_null(re)) || (exp_is_null(le) && exp_is_not_null(re)));
     610      119998 :                                 switch (e->flag) {
     611      104508 :                                 case cmp_equal:
     612      104508 :                                         if (lval_min && lval_max && rval_min && rval_max && (!is_semantics(e) || !has_nil(le) || !has_nil(re)))
     613      134024 :                                                 always_false |= (is_anti(e) ? (atom_cmp(lval_min, rval_min) == 0 && atom_cmp(lval_max, rval_max) <= 0) : (atom_cmp(rval_max, lval_min) < 0 || atom_cmp(rval_min, lval_max) > 0));
     614      104508 :                                         if (is_semantics(e)) { /* prune *= NULL cases */
     615        5683 :                                                 always_false |= (is_anti(e) ? (exp_is_null(le) && exp_is_null(re)) : ((exp_is_not_null(le) && exp_is_null(re)) || (exp_is_null(le) && exp_is_not_null(re))));
     616       11366 :                                                 always_true |= (is_anti(e) ? ((exp_is_not_null(le) && exp_is_null(re)) || (exp_is_null(le) && exp_is_not_null(re))) : (exp_is_null(le) && exp_is_null(re)));
     617             :                                         }
     618             :                                         break;
     619        7227 :                                 case cmp_notequal:
     620        7227 :                                         if (lval_min && lval_max && rval_min && rval_max)
     621       11376 :                                                 always_true |= !has_nil(le) && !has_nil(re) && (is_anti(e) ? (atom_cmp(lval_min, rval_min) == 0 && atom_cmp(lval_max, rval_max) <= 0) : (atom_cmp(rval_max, lval_min) < 0 || atom_cmp(rval_min, lval_max) > 0));
     622        7227 :                                         if (is_semantics(e)) {
     623          26 :                                                 always_true |= (is_anti(e) ? (exp_is_null(le) && exp_is_null(re)) : ((exp_is_not_null(le) && exp_is_null(re)) || (exp_is_null(le) && exp_is_not_null(re))));
     624          52 :                                                 always_false |= (is_anti(e) ? ((exp_is_not_null(le) && exp_is_null(re)) || (exp_is_null(le) && exp_is_not_null(re))) : (exp_is_null(le) && exp_is_null(re)));
     625             :                                         }
     626             :                                         break;
     627        5632 :                                 case cmp_gt:
     628        5632 :                                         if (lval_max && rval_min)
     629        1947 :                                                 always_false |= (is_anti(e) ? atom_cmp(lval_max, rval_min) > 0 : atom_cmp(lval_max, rval_min) <= 0);
     630        5632 :                                         if (lval_min && rval_max)
     631        3894 :                                                 always_true |= !has_nil(le) && !has_nil(re) && (is_anti(e) ? atom_cmp(lval_min, rval_max) <= 0 : atom_cmp(lval_min, rval_max) > 0);
     632             :                                         break;
     633         119 :                                 case cmp_gte:
     634         119 :                                         if (lval_max && rval_min)
     635          49 :                                                 always_false |= (is_anti(e) ? atom_cmp(lval_max, rval_min) >= 0 : atom_cmp(lval_max, rval_min) < 0);
     636         119 :                                         if (lval_min && rval_max)
     637          98 :                                                 always_true |= !has_nil(le) && !has_nil(re) && (is_anti(e) ? atom_cmp(lval_min, rval_max) < 0 : atom_cmp(lval_min, rval_max) >= 0);
     638             :                                         break;
     639        2428 :                                 case cmp_lt:
     640        2428 :                                         if (lval_min && rval_max)
     641        1376 :                                                 always_false |= (is_anti(e) ? atom_cmp(lval_min, rval_max) < 0 : atom_cmp(lval_min, rval_max) >= 0);
     642        2428 :                                         if (lval_max && rval_min)
     643        2800 :                                                 always_true |= !has_nil(le) && !has_nil(re) && (is_anti(e) ? atom_cmp(lval_max, rval_min) >= 0 : atom_cmp(lval_max, rval_min) < 0);
     644             :                                         break;
     645          84 :                                 case cmp_lte:
     646          84 :                                         if (lval_min && rval_max)
     647          11 :                                                 always_false |= (is_anti(e) ? atom_cmp(lval_min, rval_max) <= 0 : atom_cmp(lval_min, rval_max) > 0);
     648          84 :                                         if (lval_max && rval_min)
     649          22 :                                                 always_true |= !has_nil(le) && !has_nil(re) && (is_anti(e) ? atom_cmp(lval_max, rval_min) > 0 : atom_cmp(lval_max, rval_min) <= 0);
     650             :                                         break;
     651             :                                 default: /* Maybe later I can do cmp_in and cmp_notin but don't forget to remove is_theta_exp check up there */
     652             :                                         break;
     653             :                                 }
     654             :                         }
     655      123074 :                         assert(!always_false || !always_true);
     656      123074 :                         if (always_false || always_true) {
     657        3665 :                                 sql_exp *ne = exp_atom_bool(v->sql->sa, always_true /* if always_true set then true else false */);
     658        3665 :                                 if (exp_name(e))
     659           0 :                                         exp_prop_alias(v->sql->sa, ne, e);
     660        3665 :                                 n->data = ne;
     661        3665 :                                 v->changes++;
     662             :                         }
     663             :                 }
     664             :         }
     665      133328 :         return rel->exps;
     666             : }
     667             : 
     668             : static sql_rel *
     669          14 : set_setop_side(visitor *v, sql_rel *rel, sql_rel *side)
     670             : {
     671          14 :         if (side == rel->r)
     672           0 :                 rel->r = NULL;
     673             :         else
     674          14 :                 rel->l = NULL;
     675          14 :         if (!is_simple_project(side->op) || rel_is_ref(side) || !list_empty(side->r)) {
     676           0 :                 side = rel_project(v->sql->sa, side, rel_projections(v->sql, side, NULL, 0, 1));
     677           0 :                 set_count_prop(v->sql->sa, side, get_rel_count(side->l));
     678           0 :                 side->exps = exps_exp_visitor_bottomup(v, side, side->exps, 0, &rel_propagate_statistics, false);
     679             :         }
     680          35 :         for (node *n = rel->exps->h, *m = side->exps->h ; n && m ; n = n->next, m = m->next) {
     681          21 :                 sql_exp *e1 = n->data, *e2 = m->data;
     682          21 :                 exp_prop_alias(v->sql->sa, e2, e1);
     683             :         }
     684          14 :         list_hash_clear(side->exps);
     685             : 
     686          14 :         if (need_distinct(rel))
     687           0 :                 set_distinct(side);
     688          14 :         side->p = prop_copy(v->sql->sa, rel->p);
     689          14 :         rel_destroy(rel);
     690          14 :         return side;
     691             : }
     692             : 
     693             : static BUN
     694       21887 : trivial_project_exp_card(sql_exp *e)
     695             : {
     696       22193 :         if (e->type == e_convert)
     697         306 :                 return trivial_project_exp_card(e->l);
     698       21887 :         return e->type == e_atom && e->f ? (BUN) list_length(e->f) : 1;
     699             : }
     700             : 
     701             : static BUN
     702       24249 : rel_calc_nuniques(mvc *sql, sql_rel *l, list *exps)
     703             : {
     704       24249 :         BUN lv = get_rel_count(l);
     705             : 
     706       24249 :         if (lv == 0)
     707             :                 return 0;
     708       21540 :         if (!list_empty(exps)) {
     709       21540 :                 BUN nuniques = 0;
     710             :                 /* compute the highest number of unique values */
     711       51133 :                 for (node *n = exps->h ; n && nuniques != BUN_NONE ; n = n->next) {
     712       29593 :                         sql_exp *e = n->data;
     713       29593 :                         sql_rel *bt = NULL;
     714       29593 :                         prop *p = NULL;
     715       29593 :                         BUN euniques = BUN_NONE;
     716       29593 :                         atom *min, *max, *sub = NULL;
     717       29593 :                         sql_subtype *tp = exp_subtype(e);
     718       29593 :                         sql_class ec = tp ? tp->type->eclass : EC_STRING; /* if 'e' has no type (eg parameter), use a non-number type to fail condition */
     719             : 
     720       29593 :                         if ((p = find_prop(e->p, PROP_NUNIQUES))) {
     721       20957 :                                 euniques = (BUN) p->value.dval;
     722        8636 :                         } else if (e->type == e_column && e->nid && rel_find_exp_and_corresponding_rel(l, e, false, &bt, NULL) && bt && (p = find_prop(bt->p, PROP_COUNT))) {
     723        6593 :                                 euniques = (BUN) p->value.lval;
     724             :                         }
     725             :                         /* use min to max range to compute number of possible values in the domain for number types */
     726       29593 :                         if ((EC_TEMP(ec)||ec==EC_NUM||ec==EC_MONTH||ec==EC_POS) &&
     727       22363 :                                 (min = find_prop_and_get(e->p, PROP_MIN)) && (max = find_prop_and_get(e->p, PROP_MAX))) {
     728             :                                 /* the range includes min and max, so the atom_inc call is needed */
     729             :                                 /* if 'euniques' has number of distinct values, compute min between both */
     730       17543 :                                 if ((sub = atom_sub(sql->sa, max, min)) && (sub = atom_inc(sql->sa, sub)) && (sub = atom_cast(sql->sa, sub, sql_bind_localtype("oid"))))
     731           0 :                                         euniques = MIN(euniques, (BUN) sub->data.val.oval);
     732             :                         }
     733       29593 :                         if (euniques != BUN_NONE)
     734       27550 :                                 nuniques = MAX(nuniques, euniques); /* the highest cardinality sets the estimation */
     735             :                         else
     736             :                                 nuniques = BUN_NONE;
     737             :                 }
     738       21540 :                 if (nuniques != BUN_NONE)
     739             :                         return nuniques;
     740             :         }
     741             :         return lv;
     742             : }
     743             : 
     744             : static sql_rel *
     745     2073754 : rel_get_statistics_(visitor *v, sql_rel *rel)
     746             : {
     747             :         /* Don't prune updates as pruning will possibly result in removing the joins which therefore cannot be used for constraint checking */
     748     2073754 :         uint8_t has_special_modify = *(uint8_t*) v->data;
     749     2073754 :         bool can_be_pruned = !has_special_modify && v->storage_based_opt;
     750             : 
     751             :         /* Don't look at the same relation twice */
     752     2073754 :         if (are_statistics_gathered(rel->used))
     753             :                 return rel;
     754     1332515 :         rel->used |= statistics_gathered;
     755             : 
     756     1332515 :         switch (rel->op) {
     757      312996 :         case op_basetable: {
     758      312996 :                 sql_table *t = (sql_table *) rel->l;
     759      312996 :                 sqlstore *store = v->sql->session->tr->store;
     760             : 
     761      312996 :                 if (!list_empty(rel->exps)) {
     762     1250698 :                         for (node *n = rel->exps->h ; n ; n = n->next)
     763      937751 :                                 rel_basetable_column_get_statistics(v->sql, rel, n->data);
     764             :                 }
     765             :                 /* Set table row count. TODO? look for remote tables. Don't look at storage for declared tables, because it won't be cleaned */
     766      312996 :                 if (isTable(t) && t->s && !isDeclaredTable(t)) /* count active rows only */
     767      259440 :                         set_count_prop(v->sql->sa, rel, (BUN)store->storage_api.count_col(v->sql->session->tr, ol_first_node(t->columns)->data, 10));
     768             :                 break;
     769             :         }
     770        2790 :         case op_union:
     771             :         case op_inter:
     772             :         case op_except: {
     773        2790 :                 bool empty_cross = false;
     774        2790 :                 int i = 0;
     775        2790 :                 sql_rel *l = rel->l, *pl = rel->l, *r = rel->r, *pr = rel->r;
     776             : 
     777        2790 :                 while (is_sample(pl->op) || is_topn(pl->op)) /* skip topN and sample relations in the middle */
     778           0 :                         pl = pl->l;
     779        2790 :                 while (is_sample(pr->op) || is_topn(pr->op))
     780           0 :                         pr = pr->l;
     781             :                 /* if it's not a projection, then project and propagate statistics */
     782        2790 :                 if (!is_project(pl->op) && !is_base(pl->op)) {
     783           0 :                         pl = rel_project(v->sql->sa, pl, rel_projections(v->sql, pl, NULL, 0, 1));
     784           0 :                         set_count_prop(v->sql->sa, pl, get_rel_count(pl->l));
     785           0 :                         pl->exps = exps_exp_visitor_bottomup(v, pl, pl->exps, 0, &rel_propagate_statistics, false);
     786             :                 }
     787        2790 :                 if (!is_project(pr->op) && !is_base(pr->op)) {
     788           0 :                         pr = rel_project(v->sql->sa, pr, rel_projections(v->sql, pr, NULL, 0, 1));
     789           0 :                         set_count_prop(v->sql->sa, pr, get_rel_count(pr->l));
     790           0 :                         pr->exps = exps_exp_visitor_bottomup(v, pr, pr->exps, 0, &rel_propagate_statistics, false);
     791             :                 }
     792             : 
     793       11662 :                 for (node *n = rel->exps->h ; n ; n = n->next) {
     794        8872 :                         empty_cross |= rel_setop_get_statistics(v->sql, rel, pl->exps, pr->exps, n->data, i);
     795        8872 :                         i++;
     796             :                 }
     797             : 
     798             :                 /* propagate row count */
     799        2790 :                 if (is_union(rel->op)) {
     800         277 :                         BUN lv = need_distinct(rel) ? rel_calc_nuniques(v->sql, l, l->exps) : get_rel_count(l),
     801         277 :                                 rv = need_distinct(rel) ? rel_calc_nuniques(v->sql, r, r->exps) : get_rel_count(r);
     802             : 
     803         277 :                         if (lv == 0 && rv == 0) { /* both sides empty */
     804           2 :                                 if (can_be_pruned)
     805             :                                         empty_cross = true;
     806             :                                 else
     807           2 :                                         set_count_prop(v->sql->sa, rel, 0);
     808         275 :                         } else if (can_be_pruned && lv == 0 && !rel_is_ref(rel)) { /* left side empty */
     809           0 :                                 rel = set_setop_side(v, rel, r);
     810           0 :                                 empty_cross = false; /* don't rewrite again */
     811         275 :                         } else if (can_be_pruned && rv == 0 && !rel_is_ref(rel)) { /* right side empty */
     812           0 :                                 rel = set_setop_side(v, rel, l);
     813           0 :                                 empty_cross = false; /* don't rewrite again */
     814         275 :                         } else if (lv != BUN_NONE && rv != BUN_NONE) {
     815           7 :                                 set_count_prop(v->sql->sa, rel, (rv > (BUN_MAX - lv)) ? BUN_MAX : (lv + rv)); /* overflow check */
     816             :                         }
     817        2513 :                 } else if (is_inter(rel->op) || is_except(rel->op)) {
     818        2513 :                         BUN lv = need_distinct(rel) ? rel_calc_nuniques(v->sql, l, l->exps) : get_rel_count(l),
     819        2513 :                                 rv = need_distinct(rel) ? rel_calc_nuniques(v->sql, r, r->exps) : get_rel_count(r);
     820             : 
     821        2513 :                         if (lv == 0) { /* left side empty */
     822          62 :                                 if (can_be_pruned)
     823             :                                         empty_cross = true;
     824             :                                 else
     825           5 :                                         set_count_prop(v->sql->sa, rel, 0);
     826        2451 :                         } else if (rv == 0) { /* right side empty */
     827          17 :                                 if (is_inter(rel->op)) {
     828           3 :                                         if (can_be_pruned)
     829             :                                                 empty_cross = true;
     830             :                                         else
     831           0 :                                                 set_count_prop(v->sql->sa, rel, 0);
     832          14 :                                 } else if (can_be_pruned && !rel_is_ref(rel)) {
     833          14 :                                         rel = set_setop_side(v, rel, l);
     834          14 :                                         empty_cross = false; /* don't rewrite again */
     835             :                                 } else {
     836           0 :                                         set_count_prop(v->sql->sa, rel, lv);
     837             :                                 }
     838             :                         } else {
     839        2434 :                                 set_count_prop(v->sql->sa, rel, lv);
     840             :                         }
     841             :                 }
     842        2790 :                 if (can_be_pruned && empty_cross) {
     843          80 :                         rel_destroy(rel->l);
     844          80 :                         rel->l = NULL;
     845          80 :                         rel_destroy(rel->r);
     846          80 :                         rel->r = NULL;
     847         244 :                         for (node *n = rel->exps->h ; n ; n = n->next) {
     848         164 :                                 sql_exp *e = n->data, *a = exp_atom(v->sql->sa, atom_general(v->sql->sa, exp_subtype(e), NULL, 0));
     849         164 :                                 exp_prop_alias(v->sql->sa, a, e);
     850         164 :                                 n->data = a;
     851             :                         }
     852          80 :                         list_hash_clear(rel->exps);
     853          80 :                         sql_rel *l = rel_project(v->sql->sa, NULL, rel->exps);
     854          80 :                         set_count_prop(v->sql->sa, l, 1);
     855          80 :                         l = rel_select(v->sql->sa, l, exp_atom_bool(v->sql->sa, 0));
     856          80 :                         set_count_prop(v->sql->sa, l, 0);
     857          80 :                         rel->op = op_project;
     858          80 :                         rel->l = l;
     859          80 :                         rel->exps = rel_projections(v->sql, l, NULL, 1, 1);
     860          80 :                         set_count_prop(v->sql->sa, rel, 0);
     861          80 :                         set_nodistinct(rel); /* set relations may have distinct flag set */
     862          80 :                         v->changes++;
     863             :                 }
     864             :                 break;
     865             :         }
     866       12883 :         case op_munion: {
     867       12883 :                 list *l = rel->l, *nrels = sa_list(v->sql->sa);
     868       12883 :                 BUN cnt = 0;
     869       12883 :                 bool needs_pruning = false;
     870             : 
     871       49258 :                 for (node *n = l->h; n; n = n->next) {
     872       36375 :                         sql_rel *r = n->data, *pl = r;
     873             : 
     874       36375 :                         while (is_sample(pl->op) || is_topn(pl->op)) /* skip topN and sample relations in the middle */
     875           0 :                                         pl = pl->l;
     876             :                         /* if it's not a projection, then project and propagate statistics */
     877       36375 :                         if (!is_project(pl->op) && !is_base(pl->op)) {
     878           0 :                                 pl = rel_project(v->sql->sa, pl, rel_projections(v->sql, pl, NULL, 0, 1));
     879           0 :                                 set_count_prop(v->sql->sa, pl, get_rel_count(pl->l));
     880           0 :                                 pl->exps = exps_exp_visitor_bottomup(v, pl, pl->exps, 0, &rel_propagate_statistics, false);
     881             :                         }
     882       36375 :                         nrels = append(nrels, pl);
     883             :                         /* we need new munion statistics */
     884             :                         /* propagate row count */
     885       36375 :                         BUN rv = need_distinct(rel) ? rel_calc_nuniques(v->sql, r, r->exps) : get_rel_count(r);
     886             :                         /* if PROP_COUNT does not exist we assume at least a row (see get_rel_count def) */
     887       36375 :                         if (rv == BUN_NONE) {
     888        1156 :                                 cnt++;
     889        1156 :                                 continue;
     890             :                         }
     891       35219 :                         if (!rv && can_be_pruned)
     892        6636 :                                 needs_pruning = true;
     893             :                         /* overflow check */
     894       35219 :                         if (rv > (BUN_MAX - cnt))
     895       36375 :                                 rv = BUN_MAX;
     896             :                         else
     897       35219 :                                 cnt += rv;
     898             :                 }
     899       12883 :                 int i = 0;
     900       73644 :                 for (node *n = rel->exps->h ; n ; n = n->next, i++)
     901       60761 :                         rel_munion_get_statistics(v->sql, rel, nrels, n->data, i);
     902             : 
     903       12883 :                 if (needs_pruning && !rel_is_ref(rel)) {
     904        4475 :                         v->changes++;
     905        4475 :                         list *nl = sa_list(l->sa);
     906             : 
     907       16498 :                         for (node *n = nrels->h; n; n = n->next) {
     908       12023 :                                 sql_rel *r = n->data;
     909       12023 :                                 BUN rv = need_distinct(rel) ? rel_calc_nuniques(v->sql, r, r->exps) : get_rel_count(r);
     910             : 
     911       12023 :                                 if (!rv) { /* keep last for now */
     912        6166 :                                         rel_destroy(r);
     913        6166 :                                         continue;
     914             :                                 }
     915        5857 :                                 nl = append(nl, r);
     916             :                         }
     917        4475 :                         rel->l = nl;
     918        4475 :                         if (list_length(nl) == 1) {
     919        4149 :                                 sql_rel *l = rel->l = nl->h->data; /* ugh */
     920        4149 :                                 rel->r = NULL;
     921        4149 :                                 rel->op = op_project;
     922             : 
     923       20379 :                                 for (node *n = rel->exps->h, *m = l->exps->h ; n && m ; n = n->next, m = m->next) {
     924       16230 :                                         sql_exp *pe = n->data, *ie = m->data;
     925       16230 :                                         sql_exp *ne = exp_ref(v->sql, ie);
     926       16230 :                                         exp_prop_alias(v->sql->sa, ne, pe);
     927       16230 :                                         n->data = ne;
     928             :                                 }
     929        4149 :                                 list_hash_clear(rel->exps);
     930         326 :                         } else if (list_empty(nl)) {
     931             :                                 /* empty select (project [ nils ] ) */
     932         454 :                                 for (node *n = rel->exps->h ; n ; n = n->next) {
     933         354 :                                         sql_exp *e = n->data, *a = exp_atom(v->sql->sa, atom_general(v->sql->sa, exp_subtype(e), NULL, 0));
     934         354 :                                         exp_prop_alias(v->sql->sa, a, e);
     935         354 :                                         n->data = a;
     936             :                                 }
     937         100 :                                 list_hash_clear(rel->exps);
     938         100 :                                 sql_rel *l = rel_project(v->sql->sa, NULL, rel->exps);
     939         100 :                                 set_count_prop(v->sql->sa, l, 1);
     940         100 :                                 l = rel_select(v->sql->sa, l, exp_atom_bool(v->sql->sa, 0));
     941         100 :                                 set_count_prop(v->sql->sa, l, 0);
     942         100 :                                 rel->op = op_project;
     943         100 :                                 rel->r = NULL;
     944         100 :                                 rel->l = l;
     945         100 :                                 rel->exps = rel_projections(v->sql, l, NULL, 1, 1);
     946         100 :                                 set_count_prop(v->sql->sa, rel, 0);
     947         100 :                                 set_nodistinct(rel); /* set relations may have distinct flag set */
     948             :                         }
     949             :                 } else {
     950        8408 :                         set_count_prop(v->sql->sa, rel, cnt);
     951             :                 }
     952             :                 break;
     953             :         }
     954      532922 :         case op_join:
     955             :         case op_left:
     956             :         case op_right:
     957             :         case op_full:
     958             :         case op_semi:
     959             :         case op_anti:
     960             :         case op_select:
     961             :         case op_project:
     962             :         case op_groupby: {
     963      532922 :                 if (is_groupby(rel->op) && !list_empty(rel->r))
     964       15532 :                         rel->r = exps_exp_visitor_bottomup(v, rel, rel->r, 0, &rel_propagate_statistics, false);
     965      532922 :                 rel->exps = exps_exp_visitor_bottomup(v, rel, rel->exps, 0, &rel_propagate_statistics, false);
     966      532922 :                 if ((is_simple_project(rel->op) || is_groupby(rel->op)) && !list_empty(rel->r))
     967       39250 :                         rel->r = exps_exp_visitor_bottomup(v, rel, rel->r, 0, &rel_propagate_statistics, false);
     968             :                 /* The following optimizations can only be applied after propagating the statistics to rel->exps */
     969      532922 :                 if (can_be_pruned && (is_join(rel->op) || is_select(rel->op)) && !list_empty(rel->exps)) {
     970      139699 :                         int changes = v->changes;
     971      139699 :                         rel->exps = rel_prune_predicates(v, rel);
     972      139699 :                         if (v->changes > changes)
     973        3632 :                                 rel = rewrite_simplify(v, 0, v->value_based_opt, rel);
     974             :                 }
     975             : 
     976             :                 /* propagate row count */
     977      532922 :                 sql_rel *l = rel->l, *r = rel->r;
     978      532922 :                 switch (rel->op) {
     979      134501 :                 case op_join:
     980             :                 case op_left:
     981             :                 case op_right:
     982             :                 case op_full: {
     983      134501 :                         BUN lv = get_rel_count(l), rv = get_rel_count(r), uniques_estimate = BUN_MAX, join_idx_estimate = BUN_MAX;
     984             : 
     985      134501 :                         if (!list_empty(rel->exps) && !is_single(rel)) {
     986      246019 :                                 for (node *n = rel->exps->h ; n ; n = n->next) {
     987      125592 :                                         sql_exp *e = n->data, *el = e->l, *er = e->r;
     988             : 
     989      125592 :                                         if (find_prop(e->p, PROP_JOINIDX)) {
     990         667 :                                                 join_idx_estimate = lv>rv?lv:rv;
     991      124925 :                                         } else if (e->type == e_cmp && e->flag == cmp_equal) {
     992             :                                                 /* if one of the sides is unique, the cardinality will be that exact number, but look for nulls */
     993      121041 :                                                 if (!is_semantics(e) || !has_nil(el) || !has_nil(er)) {
     994      120645 :                                                         BUN lu = 0, ru = 0;
     995      120645 :                                                         prop *p = NULL;
     996      120645 :                                                         if ((p = find_prop(el->p, PROP_NUNIQUES)))
     997       89535 :                                                                 lu = (BUN) p->value.dval;
     998      120645 :                                                         if ((p = find_prop(er->p, PROP_NUNIQUES)))
     999      103955 :                                                                 ru = (BUN) p->value.dval;
    1000      120645 :                                                         if (is_unique(el) || lu > lv) {
    1001       73671 :                                                                 BUN ncount = (is_right(rel->op) || is_full(rel->op)) ? MAX(lv, rv) : lv;
    1002       73671 :                                                                 uniques_estimate = MIN(uniques_estimate, ncount);
    1003       46974 :                                                         } else if (is_unique(er) || ru > rv) {
    1004       29347 :                                                                 BUN ncount = (is_left(rel->op) || is_full(rel->op)) ? MAX(lv, rv) : rv;
    1005       29347 :                                                                 uniques_estimate = MIN(uniques_estimate, ncount);
    1006             :                                                         }
    1007             :                                                 }
    1008             :                                         }
    1009             :                                 }
    1010             :                         }
    1011      134501 :                         if (is_single(rel)) {
    1012        4739 :                                 set_count_prop(v->sql->sa, rel, lv);
    1013      129762 :                         } else if (join_idx_estimate != BUN_MAX) {
    1014         665 :                                 set_count_prop(v->sql->sa, rel, join_idx_estimate);
    1015      129097 :                         } else if (uniques_estimate != BUN_MAX) {
    1016       96561 :                                 set_count_prop(v->sql->sa, rel, uniques_estimate);
    1017       32536 :                         } else if (list_length(rel->exps) == 1 && (exp_is_false(rel->exps->h->data) || exp_is_null(rel->exps->h->data))) {
    1018             :                                 /* corner cases for outer joins */
    1019         126 :                                 if (is_left(rel->op)) {
    1020         114 :                                         set_count_prop(v->sql->sa, rel, lv);
    1021          12 :                                 } else if (is_right(rel->op)) {
    1022           3 :                                         set_count_prop(v->sql->sa, rel, rv);
    1023           9 :                                 } else if (is_full(rel->op) && lv != BUN_NONE && rv != BUN_NONE) {
    1024           9 :                                         set_count_prop(v->sql->sa, rel, (rv > (BUN_MAX - lv)) ? BUN_MAX : (lv + rv)); /* overflow check */
    1025           0 :                                 } else if (lv != BUN_NONE && rv != BUN_NONE) {
    1026           0 :                                         set_count_prop(v->sql->sa, rel, 0);
    1027             :                                 }
    1028       32410 :                         } else if (lv == 0) {
    1029        1170 :                                 set_count_prop(v->sql->sa, rel, (is_right(rel->op) || is_full(rel->op)) ? rv : 0);
    1030       31811 :                         } else if (rv == 0) {
    1031        1568 :                                 set_count_prop(v->sql->sa, rel, (is_left(rel->op) || is_full(rel->op)) ? lv : 0);
    1032       30737 :                         } else if (lv != BUN_NONE && rv != BUN_NONE) {
    1033       18276 :                                 set_count_prop(v->sql->sa, rel, (rv > (BUN_MAX / lv)) ? BUN_MAX : (lv * rv)); /* overflow check */
    1034             :                         }
    1035             :                         break;
    1036             :                 }
    1037        2635 :                 case op_anti:
    1038        2635 :                         set_count_prop(v->sql->sa, rel, get_rel_count(l));
    1039        2635 :                         break;
    1040      109201 :                 case op_semi:
    1041             :                 case op_select:
    1042             :                         /* TODO calculate cardinalities using selectivities */
    1043      109201 :                         if (list_length(rel->exps) == 1 && (exp_is_false(rel->exps->h->data) || exp_is_null(rel->exps->h->data))) {
    1044        2742 :                                 set_count_prop(v->sql->sa, rel, 0);
    1045             :                         } else {
    1046      209670 :                                 if (!list_empty(rel->exps) && !is_single(rel)) {
    1047      103211 :                                         BUN cnt = get_rel_count(l), u = 1;
    1048      172080 :                                         for (node *n = rel->exps->h ; n ; n = n->next) {
    1049      113082 :                                                 sql_exp *e = n->data, *el = e->l, *er = e->r;
    1050             : 
    1051             :                                                 /* simple expressions first */
    1052      113082 :                                                 if (e->type == e_cmp && e->flag == cmp_equal && exp_is_atom(er)) {
    1053             :                                                         /* use selectivity */
    1054       57423 :                                                         prop *p;
    1055       57423 :                                                         if ((p = find_prop(el->p, PROP_NUNIQUES))) {
    1056       44213 :                                                                 u = (BUN) p->value.dval;
    1057       44213 :                                                                 break;
    1058             :                                                         }
    1059             :                                                 }
    1060             :                                         }
    1061             :                                         /* u is an *estimate*, so don't set count_prop to 0 unless cnt is 0 */
    1062      103211 :                                         set_count_prop(v->sql->sa, rel, cnt == 0 ? 0 : u == 0 || u > cnt ? 1 : cnt/u);
    1063             :                                 } else {
    1064        3248 :                                         set_count_prop(v->sql->sa, rel, get_rel_count(l));
    1065             :                                 }
    1066             :                         }
    1067             :                         break;
    1068      263226 :                 case op_project:
    1069      263226 :                         if (l) {
    1070      253109 :                                 if (need_distinct(rel)) {
    1071           0 :                                         set_count_prop(v->sql->sa, rel, rel_calc_nuniques(v->sql, l, rel->exps));
    1072             :                                 } else {
    1073      253109 :                                         set_count_prop(v->sql->sa, rel, get_rel_count(l));
    1074             :                                 }
    1075             :                         } else {
    1076       10117 :                                 BUN card = 1;
    1077             : 
    1078       10117 :                                 if (!list_empty(rel->exps)) {
    1079       30675 :                                         for (node *n = rel->exps->h ; n ; n = n->next) {
    1080       20558 :                                                 sql_exp *e = n->data;
    1081       20558 :                                                 card = MAX(card, trivial_project_exp_card(e));
    1082             :                                         }
    1083             :                                 }
    1084       10117 :                                 set_count_prop(v->sql->sa, rel, card);
    1085             :                         }
    1086             :                         break;
    1087       22954 :                 case op_groupby:
    1088       22954 :                         if (list_empty(rel->r)) {
    1089        7421 :                                 set_count_prop(v->sql->sa, rel, 1);
    1090             :                         } else {
    1091       15533 :                                 set_count_prop(v->sql->sa, rel, rel_calc_nuniques(v->sql, l, rel->r));
    1092             :                         }
    1093             :                         break;
    1094             :                 default:
    1095             :                         break;
    1096             :                 }
    1097             :                 break;
    1098             :         }
    1099       16792 :         case op_topn: {
    1100       16792 :                 BUN lv = get_rel_count(rel->l);
    1101             : 
    1102       16792 :                 if (lv != BUN_NONE) {
    1103       16775 :                         sql_exp *le = rel->exps->h->data, *oe = list_length(rel->exps) > 1 ? rel->exps->h->next->data : NULL;
    1104          84 :                         if (oe && oe->l && exp_is_not_null(oe)) { /* no parameters */
    1105          84 :                                 BUN offset = (BUN) ((atom*)oe->l)->data.val.lval;
    1106          84 :                                 lv = offset >= lv ? 0 : lv - offset;
    1107             :                         }
    1108       16775 :                         if (le->l && exp_is_not_null(le)) {
    1109       16741 :                                 BUN limit = (BUN) ((atom*)le->l)->data.val.lval;
    1110       16741 :                                 lv = MIN(lv, limit);
    1111             :                         }
    1112       16775 :                         set_count_prop(v->sql->sa, rel, lv);
    1113             :                 }
    1114             :                 break;
    1115             :         }
    1116          22 :         case op_sample: {
    1117          22 :                 BUN lv = get_rel_count(rel->l);
    1118             : 
    1119          22 :                 if (lv != BUN_NONE) {
    1120           4 :                         sql_exp *se = rel->exps->h->data;
    1121           4 :                         sql_subtype *tp = exp_subtype(se);
    1122             : 
    1123           4 :                         if (se->l && tp->type->eclass == EC_NUM) { /* sample is a number of rows */
    1124           4 :                                 BUN sample = (BUN) ((atom*)se->l)->data.val.lval;
    1125           4 :                                 lv = MIN(lv, sample);
    1126           0 :                         } else if (se->l) { /* sample is a percentage of rows */
    1127           0 :                                 dbl percent = ((atom*)se->l)->data.val.dval;
    1128           0 :                                 assert(tp->type->eclass == EC_FLT);
    1129           0 :                                 lv = (BUN) ceil((dbl)lv * percent);
    1130             :                         }
    1131           4 :                         set_count_prop(v->sql->sa, rel, lv);
    1132             :                 }
    1133             :                 break;
    1134             :         }
    1135        5977 :         case op_table: {
    1136        5977 :                 sql_exp *op = rel->r;
    1137        5977 :                 if (rel->flag != TRIGGER_WRAPPER && op) {
    1138        5665 :                         sql_subfunc *f = op->f;
    1139        5665 :                         if (f->func->lang == FUNC_LANG_SQL) {
    1140          97 :                                 set_count_prop(v->sql->sa, rel, 1000 /* just some fallback value */);
    1141        5568 :                         } else if (f->func->lang == FUNC_LANG_MAL && strcmp(f->func->base.name, "storage") == 0) {
    1142         829 :                                 set_count_prop(v->sql->sa, rel, 1000 /* TODO get size of catalog */);
    1143        4739 :                         } else if (f->func->lang == FUNC_LANG_MAL && strcmp(f->func->base.name, "db_users") == 0) {
    1144           0 :                                 set_count_prop(v->sql->sa, rel, 1000 /* TODO get size of users */);
    1145        4739 :                         } else if (f->func->lang == FUNC_LANG_MAL && strncmp(f->func->base.name, "querylog", 8) == 0) {
    1146        1085 :                                 set_count_prop(v->sql->sa, rel, 1000 /* TODO get size of querylog */);
    1147        3654 :                         } else if (f->func->lang == FUNC_LANG_MAL &&
    1148        3542 :                                            (strcmp(f->func->base.name, "queue") == 0 ||
    1149        3278 :                                                 strcmp(f->func->base.name, "optimizers") == 0 ||
    1150        2966 :                                                 strcmp(f->func->base.name, "env") == 0 ||
    1151        2675 :                                                 strcmp(f->func->base.name, "keywords") == 0 ||
    1152        2675 :                                                 strcmp(f->func->base.name, "statistics") == 0 ||
    1153        2008 :                                                 strcmp(f->func->base.name, "rejects") == 0 ||
    1154        1754 :                                                 strcmp(f->func->base.name, "schemastorage") == 0 ||
    1155        1754 :                                                 strncmp(f->func->base.name, "storage", 7) == 0 ||
    1156        1754 :                                                 strcmp(f->func->base.name, "sessions") == 0) ) {
    1157        2078 :                                 set_count_prop(v->sql->sa, rel, 1000 /* TODO get size of queue */);
    1158             :                         }
    1159             :                         /* else {
    1160             :                                 printf("%%func needs stats : %s\n", f->func->base.name);
    1161             :                         } */
    1162             :                 }
    1163             :                 break;
    1164             :         }
    1165             :         /*These relations are less important for now
    1166             :           TODO later we can tune it */
    1167             :         case op_insert:
    1168             :         case op_update:
    1169             :         case op_delete:
    1170             :         case op_truncate:
    1171             :         case op_ddl:
    1172             :         default:
    1173             :                 break;
    1174             :         }
    1175             : 
    1176             :         return rel;
    1177             : }
    1178             : 
    1179             : static sql_rel *
    1180      540130 : rel_get_statistics(visitor *v, global_props *gp, sql_rel *rel)
    1181             : {
    1182             :         /* Don't prune updates as pruning will possibly result in removing the joins which therefore cannot be used for constraint checking */
    1183      540130 :         uint8_t has_special_modify = (uint8_t) gp->has_special_modify;
    1184      540130 :         v->data = &has_special_modify;
    1185      540130 :         rel = rel_visitor_bottomup(v, rel, &rel_get_statistics_);
    1186      540131 :         v->data = gp;
    1187      540131 :         return rel;
    1188             : }
    1189             : 
    1190             : run_optimizer
    1191      705970 : bind_get_statistics(visitor *v, global_props *gp)
    1192             : {
    1193      705970 :         (void) v;
    1194      705970 :         return (gp->opt_level == 1 && !gp->cnt[op_insert]) ? rel_get_statistics : NULL;
    1195             : }
    1196             : 
    1197             : 
    1198             : static bool
    1199       94940 : point_select_on_unique_column(sql_rel *rel)
    1200             : {
    1201       94940 :         if (is_select(rel->op) && !list_empty(rel->exps)) {
    1202      131415 :                 for (node *n = rel->exps->h; n ; n = n->next) {
    1203       75676 :                         sql_exp *e = n->data, *el = e->l, *er = e->r, *found = NULL;
    1204             : 
    1205       75676 :                         if (is_compare(e->type) && e->flag == cmp_equal) {
    1206       33647 :                                 if (is_numeric_upcast(el))
    1207           0 :                                         el = el->l;
    1208       33647 :                                 if (is_numeric_upcast(er))
    1209           0 :                                         er = er->l;
    1210       33647 :                                 if (is_alias(el->type) && exp_is_atom(er) && (found = rel_find_exp(rel->l, el)) &&
    1211         749 :                                         is_unique(found) && (!is_semantics(e) || !has_nil(found) || !has_nil(er)))
    1212             :                                         return true;
    1213       32898 :                                 if (is_alias(er->type) && exp_is_atom(el) && (found = rel_find_exp(rel->l, er)) &&
    1214           0 :                                         is_unique(found) && (!is_semantics(e) || !has_nil(el) || !has_nil(found)))
    1215             :                                         return true;
    1216             :                         }
    1217             :                 }
    1218             :         }
    1219             :         return false;
    1220             : }
    1221             : 
    1222             : /*
    1223             :  * A point select on an unique column reduces the number of rows to 1. If the same select is under a
    1224             :  * join, the opposite side's select can be pushed above the join.
    1225             :  */
    1226             : static inline sql_rel *
    1227     1245850 : rel_push_select_up(visitor *v, sql_rel *rel)
    1228             : {
    1229     1245850 :         if ((is_innerjoin(rel->op) || is_left(rel->op) || is_right(rel->op) || is_semi(rel->op)) && !is_single(rel)) {
    1230      254093 :                 sql_rel *l = rel->l, *r = rel->r;
    1231      254093 :                 bool can_pushup_left = is_select(l->op) && !rel_is_ref(l) && !is_single(l) && (is_innerjoin(rel->op) || is_left(rel->op) || is_semi(rel->op)),
    1232      254093 :                         can_pushup_right = is_select(r->op) && !rel_is_ref(r) && !is_single(r) && (is_innerjoin(rel->op) || is_right(rel->op));
    1233             : 
    1234      254093 :                 if (can_pushup_left || can_pushup_right) {
    1235       66933 :                         if (can_pushup_left)
    1236       45458 :                                 can_pushup_left = point_select_on_unique_column(r);
    1237       66933 :                         if (can_pushup_right)
    1238       49482 :                                 can_pushup_right = point_select_on_unique_column(l);
    1239             : 
    1240             :                         /* if both selects retrieve one row each, it's not worth it to push both up */
    1241       66933 :                         if (can_pushup_left && !can_pushup_right) {
    1242         683 :                                 sql_rel *nrel = rel_dup_copy(v->sql->sa, rel);
    1243         683 :                                 nrel->l = l->l;
    1244         683 :                                 rel = rel_inplace_select(rel, nrel, l->exps);
    1245         683 :                                 assert(is_select(rel->op));
    1246         683 :                                 v->changes++;
    1247       66250 :                         } else if (!can_pushup_left && can_pushup_right) {
    1248          14 :                                 sql_rel *nrel = rel_dup_copy(v->sql->sa, rel);
    1249          14 :                                 nrel->r = r->l;
    1250          14 :                                 rel = rel_inplace_select(rel, nrel, r->exps);
    1251          14 :                                 assert(is_select(rel->op));
    1252          14 :                                 v->changes++;
    1253             :                         }
    1254             :                 }
    1255             :         }
    1256     1245850 :         return rel;
    1257             : }
    1258             : 
    1259             : static int
    1260       93351 : sql_class_base_score(visitor *v, sql_column *c, sql_subtype *t, bool equality_based)
    1261             : {
    1262       93351 :         int de;
    1263             : 
    1264       93351 :         if (!t)
    1265             :                 return 0;
    1266       93351 :         switch (ATOMstorage(t->type->localtype)) {
    1267             :         case TYPE_bte:
    1268             :                 return 150 - 8;
    1269        1598 :         case TYPE_sht:
    1270        1598 :                 return 150 - 16;
    1271       37326 :         case TYPE_int:
    1272       37326 :                 return 150 - 32;
    1273         518 :         case TYPE_void:
    1274             :         case TYPE_lng:
    1275         518 :                 return 150 - 64;
    1276             :         case TYPE_uuid:
    1277             : #ifdef HAVE_HGE
    1278             :         case TYPE_hge:
    1279             : #endif
    1280             :                 return 150 - 128;
    1281           2 :         case TYPE_flt:
    1282           2 :                 return 75 - 24;
    1283             :         case TYPE_dbl:
    1284             :                 return 75 - 53;
    1285       30631 :         default:
    1286       30631 :                 if (equality_based && c && v->storage_based_opt && (de = mvc_is_duplicate_eliminated(v->sql, c)))
    1287        1554 :                         return 150 - de * 8;
    1288             :                 /* strings and blobs not duplicate eliminated don't get any points here */
    1289             :                 return 0;
    1290             :         }
    1291             : }
    1292             : 
    1293             : static int
    1294       34452 : score_se_base(visitor *v, sql_rel *rel, sql_exp *e)
    1295             : {
    1296       34452 :         int res = 0;
    1297       34452 :         sql_subtype *t = exp_subtype(e);
    1298       34452 :         sql_column *c = NULL;
    1299             : 
    1300       34452 :         if (e->type == e_convert) /* keep unsafes at the end (TODO improve) */
    1301             :                 return -1000;
    1302             :         /* can we find out if the underlying table is sorted */
    1303       33851 :         if ((c = exp_find_column(rel, e, -2)) && v->storage_based_opt && mvc_is_sorted(v->sql, c))
    1304       33851 :                 res += 600;
    1305             : 
    1306             :         /* prefer the shorter var types over the longer ones */
    1307       33851 :         res += sql_class_base_score(v, c, t, is_equality_or_inequality_exp(e->flag)); /* smaller the type, better */
    1308       33851 :         return res;
    1309             : }
    1310             : 
    1311             : static int
    1312       58553 : score_se(visitor *v, sql_rel *rel, sql_exp *e)
    1313             : {
    1314       58553 :         int score = 0;
    1315       58553 :         if (e->type == e_cmp && !is_complex_exp(e->flag)) {
    1316       34452 :                 sql_exp *l = e->l;
    1317             : 
    1318       34452 :                 while (l->type == e_cmp) { /* go through nested comparisons */
    1319           2 :                         sql_exp *ll;
    1320             : 
    1321           2 :                         if (l->flag == cmp_filter || l->flag == cmp_or)
    1322           0 :                                 ll = ((list*)l->l)->h->data;
    1323             :                         else
    1324           2 :                                 ll = l->l;
    1325           2 :                         if (ll->type != e_cmp)
    1326             :                                 break;
    1327             :                         l = ll;
    1328             :                 }
    1329       34452 :                 score += score_se_base(v, rel, l);
    1330             :         }
    1331       58553 :         score += exp_keyvalue(e);
    1332       58553 :         return score;
    1333             : }
    1334             : 
    1335             : static inline sql_rel *
    1336     1245850 : rel_select_order(visitor *v, sql_rel *rel)
    1337             : {
    1338     1245850 :         int *scores = NULL;
    1339     1245850 :         sql_exp **exps = NULL;
    1340             : 
    1341     1245850 :         if (is_select(rel->op) && list_length(rel->exps) > 1) {
    1342       27380 :                 node *n;
    1343       27380 :                 int i, nexps = list_length(rel->exps);
    1344       27380 :                 scores = SA_NEW_ARRAY(v->sql->ta, int, nexps);
    1345       27380 :                 exps = SA_NEW_ARRAY(v->sql->ta, sql_exp*, nexps);
    1346             : 
    1347       85933 :                 for (i = 0, n = rel->exps->h; n; i++, n = n->next) {
    1348       58583 :                         exps[i] = n->data;
    1349       58583 :                         if (find_prop(exps[i]->p, PROP_HASHCOL))
    1350             :                                 return rel;
    1351       58553 :                         scores[i] = score_se(v, rel, n->data);
    1352             :                 }
    1353       27350 :                 GDKqsort(scores, exps, NULL, nexps, sizeof(int), sizeof(void *), TYPE_int, true, true);
    1354             : 
    1355       85903 :                 for (i = 0, n = rel->exps->h; n; i++, n = n->next)
    1356       58553 :                         n->data = exps[i];
    1357             :         }
    1358             : 
    1359             :         return rel;
    1360             : }
    1361             : 
    1362             : /* Compute the efficiency of using this expression early in a group by list */
    1363             : static int
    1364       59500 : score_gbe(visitor *v, sql_rel *rel, sql_exp *e)
    1365             : {
    1366       59500 :         int res = 0;
    1367       59500 :         sql_subtype *t = exp_subtype(e);
    1368       59500 :         sql_column *c = exp_find_column(rel, e, -2);
    1369             : 
    1370       59500 :         if (e->card == CARD_ATOM) /* constants are trivial to group */
    1371          38 :                 res += 1000;
    1372             :         /* can we find out if the underlying table is sorted */
    1373       59500 :         if (is_unique(e) || find_prop(e->p, PROP_HASHCOL) || (c && v->storage_based_opt && mvc_is_unique(v->sql, c))) /* distinct columns */
    1374        3584 :                 res += 700;
    1375       38910 :         if (c && v->storage_based_opt && mvc_is_sorted(v->sql, c))
    1376        3710 :                 res += 500;
    1377       59500 :         if (find_prop(e->p, PROP_HASHIDX)) /* has hash index */
    1378           0 :                 res += 200;
    1379             : 
    1380             :         /* prefer the shorter var types over the longer ones */
    1381       59500 :         res += sql_class_base_score(v, c, t, true); /* smaller the type, better */
    1382       59500 :         return res;
    1383             : }
    1384             : 
    1385             : /* reorder group by expressions */
    1386             : static inline sql_rel *
    1387     1245850 : rel_groupby_order(visitor *v, sql_rel *rel)
    1388             : {
    1389     1245850 :         int *scores = NULL;
    1390     1245850 :         sql_exp **exps = NULL;
    1391             : 
    1392     1245850 :         if (is_groupby(rel->op) && list_length(rel->r) > 1) {
    1393       25587 :                 node *n;
    1394       25587 :                 list *gbe = rel->r;
    1395       25587 :                 int i, ngbe = list_length(gbe);
    1396       25587 :                 scores = SA_NEW_ARRAY(v->sql->ta, int, ngbe);
    1397       25587 :                 exps = SA_NEW_ARRAY(v->sql->ta, sql_exp*, ngbe);
    1398             : 
    1399             :                 /* first sorting step, give priority for integers and sorted columns */
    1400       85087 :                 for (i = 0, n = gbe->h; n; i++, n = n->next) {
    1401       59500 :                         exps[i] = n->data;
    1402       59500 :                         scores[i] = score_gbe(v, rel, exps[i]);
    1403             :                 }
    1404       25587 :                 GDKqsort(scores, exps, NULL, ngbe, sizeof(int), sizeof(void *), TYPE_int, true, true);
    1405             : 
    1406             :                 /* second sorting step, give priority to strings with lower number of digits */
    1407       50504 :                 for (i = ngbe - 1; i && !scores[i]; i--); /* find expressions with no score from the first round */
    1408       25587 :                 if (scores[i])
    1409       24589 :                         i++;
    1410       25587 :                 if (ngbe - i > 1) {
    1411        8604 :                         for (int j = i; j < ngbe; j++) {
    1412        6597 :                                 sql_subtype *t = exp_subtype(exps[j]);
    1413        6597 :                                 scores[j] = t ? t->digits : 0;
    1414             :                         }
    1415             :                         /* the less number of digits the better, order ascending */
    1416        2007 :                         GDKqsort(scores + i, exps + i, NULL, ngbe - i, sizeof(int), sizeof(void *), TYPE_int, false, true);
    1417             :                 }
    1418             : 
    1419       85087 :                 for (i = 0, n = gbe->h; n; i++, n = n->next)
    1420       59500 :                         n->data = exps[i];
    1421             :         }
    1422             : 
    1423     1245850 :         return rel;
    1424             : }
    1425             : 
    1426             : /* This optimization loop contains optimizations that can potentially use statistics */
    1427             : static sql_rel *
    1428     1245850 : rel_final_optimization_loop_(visitor *v, sql_rel *rel)
    1429             : {
    1430             :         /* Run rel_push_select_up only once at the end to avoid an infinite optimization loop */
    1431     1245850 :         rel = rel_push_select_up(v, rel);
    1432     1245850 :         rel = rel_select_order(v, rel);
    1433             : 
    1434             :         /* TODO? Maybe later add rel_simplify_count, rel_join2semijoin, rel_simplify_fk_joins,
    1435             :            rel_distinct_project2groupby, rel_simplify_predicates, rel_simplify_math,
    1436             :            rel_distinct_aggregate_on_unique_values */
    1437             : 
    1438     1245850 :         rel = rel_groupby_order(v, rel);
    1439     1245850 :         return rel;
    1440             : }
    1441             : 
    1442             : static sql_rel *
    1443       66296 : rel_final_optimization_loop(visitor *v, global_props *gp, sql_rel *rel)
    1444             : {
    1445       66296 :         (void) gp;
    1446       66296 :         return rel_visitor_bottomup(v, rel, &rel_final_optimization_loop_);
    1447             : }
    1448             : 
    1449             : run_optimizer
    1450      705970 : bind_final_optimization_loop(visitor *v, global_props *gp)
    1451             : {
    1452      705970 :         int flag = v->sql->sql_optimizer;
    1453             :         /* At the moment, this optimizer has dependency on 3 flags */
    1454      547722 :         return gp->opt_level == 1 && (gp->cnt[op_groupby] || gp->cnt[op_select]) &&
    1455      772268 :                 (flag & push_select_up) && (flag & optimize_select_and_joins_topdown) && (flag & optimize_projections) ? rel_final_optimization_loop : NULL;
    1456             : }

Generated by: LCOV version 1.14