LCOV - code coverage report
Current view: top level - sql/server - rel_statistics.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 831 883 94.1 %
Date: 2024-11-12 19:36:54 Functions: 26 26 100.0 %

          Line data    Source code
       1             : /*
       2             :  * SPDX-License-Identifier: MPL-2.0
       3             :  *
       4             :  * This Source Code Form is subject to the terms of the Mozilla Public
       5             :  * License, v. 2.0.  If a copy of the MPL was not distributed with this
       6             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       7             :  *
       8             :  * Copyright 2024 MonetDB Foundation;
       9             :  * Copyright August 2008 - 2023 MonetDB B.V.;
      10             :  * Copyright 1997 - July 2008 CWI.
      11             :  */
      12             : 
      13             : #include "monetdb_config.h"
      14             : #include "rel_optimizer_private.h"
      15             : #include "rel_statistics.h"
      16             : #include "rel_basetable.h"
      17             : #include "rel_rewriter.h"
      18             : 
      19             : static sql_exp *
      20     3465738 : comparison_find_column(sql_exp *input, sql_exp *e)
      21             : {
      22     3465738 :         switch (input->type) {
      23       98521 :         case e_convert: {
      24       98521 :                 list *types = (list *)input->r;
      25       98521 :                 sql_class from = ((sql_subtype*)types->h->data)->type->eclass, to = ((sql_subtype*)types->h->next->data)->type->eclass;
      26       98521 :                 if (from == to)
      27      189974 :                         return comparison_find_column(input->l, e) ? input : NULL;
      28             :                 return NULL;
      29             :         }
      30     3055214 :         case e_column:
      31     3055214 :                 return exp_match(e, input) ? input : NULL;
      32             :         default:
      33             :                 return NULL;
      34             :         }
      35             : }
      36             : 
      37             : /* multi lo <= col <= hi, maybe still be false even if lo or hi are NULL, possibly similar for filter on multiple
      38             :  * columns */
      39             : #define comp_single_column(c) (!c->f)
      40             : 
      41             : static sql_exp *
      42     8749172 : rel_propagate_column_ref_statistics(mvc *sql, sql_rel *rel, sql_exp *e)
      43             : {
      44     8832460 :         assert(e->type == e_column);
      45     8832460 :         if (rel) {
      46     8832381 :                 switch(rel->op) {
      47     4090337 :                 case op_left:
      48             :                 case op_right:
      49             :                 case op_full:
      50             :                 case op_join:
      51             :                 case op_select:
      52             :                 case op_anti:
      53             :                 case op_semi: {
      54     4090337 :                         bool found_without_semantics = false, found_left = false, found_right = false, still_unique = false;
      55             : 
      56     4090337 :                         if ((is_innerjoin(rel->op) || is_select(rel->op)) && list_length(rel->exps) == 1 && exp_is_false(rel->exps->h->data))
      57             :                                 return NULL; /* nothing will pass, skip */
      58             : 
      59             :                         /* propagate from the bottom first */
      60     4074769 :                         if (rel_propagate_column_ref_statistics(sql, rel->l, e))
      61             :                                 found_left = true;
      62     2592009 :                         if (!found_left && is_join(rel->op) && rel_propagate_column_ref_statistics(sql, rel->r, e))
      63     4074769 :                                 found_right = true;
      64             : 
      65     4074769 :                         if (!found_left && !found_right)
      66             :                                 return NULL;
      67     1778450 :                         if (!list_empty(rel->exps) && rel->op != op_anti) { /* if there's an or, the MIN and MAX get difficult to propagate */
      68     3498022 :                                 for (node *n = rel->exps->h ; n ; n = n->next) {
      69     1810956 :                                         sql_exp *comp = n->data, *le = comp->l, *lne = NULL, *re = comp->r, *rne = NULL, *fe = comp->f, *fne = NULL;
      70             : 
      71     1810956 :                                         if (comp->type == e_cmp) {
      72     1809954 :                                                 if (is_theta_exp(comp->flag) && ((lne = comparison_find_column(le, e)) || (rne = comparison_find_column(re, e)) || (fe && (fne = comparison_find_column(fe, e))))) {
      73      125227 :                                                         atom *lval_min = find_prop_and_get(le->p, PROP_MIN), *lval_max = find_prop_and_get(le->p, PROP_MAX),
      74      125227 :                                                                 *rval_min = find_prop_and_get(re->p, PROP_MIN), *rval_max = find_prop_and_get(re->p, PROP_MAX);
      75             : 
      76             :                                                         /* not semantics found or if explicitly filtering not null values from the column */
      77      125227 :                                                         found_without_semantics |= (!is_semantics(comp) && comp_single_column(comp)) || (comp->flag == cmp_equal && lne && is_anti(comp) && exp_is_null(re));
      78      125227 :                                                         still_unique |= comp->flag == cmp_equal && is_unique(le) && is_unique(re); /* unique if only equi-joins on unique columns are there */
      79      125227 :                                                         if (is_full(rel->op) || (is_left(rel->op) && found_left) || (is_right(rel->op) && found_right)) /* on outer joins, min and max cannot be propagated on some cases */
      80       19800 :                                                                 continue;
      81             :                                                         /* if (end2 >= start1 && start2 <= end1) then the 2 intervals are intersected */
      82      105427 :                                                         if (fe && lval_min && lval_max) { /* range case, the middle expression must intersect the other two */
      83        4622 :                                                                 atom *fval_min = find_prop_and_get(fe->p, PROP_MIN), *fval_max = find_prop_and_get(fe->p, PROP_MAX);
      84        4622 :                                                                 int int1 = rval_min && rval_max && atom_cmp(rval_max, lval_min) >= 0 && atom_cmp(rval_min, lval_max) <= 0,
      85        4622 :                                                                         int2 = fval_min && fval_max && atom_cmp(fval_max, lval_min) >= 0 && atom_cmp(fval_min, lval_max) <= 0,
      86        4622 :                                                                         symmetric = is_symmetric(comp);
      87             : 
      88        4622 :                                                                 if (is_anti(comp) || (!symmetric && fval_min && rval_max && atom_cmp(fval_min, rval_max) < 0)) /* for asymmetric case the re range must be after the fe range */
      89        1817 :                                                                         continue;
      90        2805 :                                                                 if (lne && int1 && int2) {
      91         104 :                                                                         if (symmetric) {
      92           0 :                                                                                 prop *p1 = find_prop(e->p, PROP_MIN), *p2 = find_prop(e->p, PROP_MAX);
      93           0 :                                                                                 atom *nmin = statistics_atom_min(sql, rval_min, fval_min), *nmax = statistics_atom_max(sql, rval_max, fval_max);
      94             :                                                                                 /* max is min from le and (max from re and fe max) */
      95           0 :                                                                                 set_minmax_property(sql, e, PROP_MAX, p2 ? statistics_atom_min(sql, nmax, p2->value.pval) : nmax);
      96             :                                                                                 /* min is max from le and (min from re and fe min) */
      97           0 :                                                                                 set_minmax_property(sql, e, PROP_MIN, p1 ? statistics_atom_max(sql, nmin, p1->value.pval) : nmin);
      98             :                                                                         } else {
      99         104 :                                                                                 prop *p1 = find_prop(e->p, PROP_MIN), *p2 = find_prop(e->p, PROP_MAX);
     100             :                                                                                 /* max is min from le and fe max */
     101         104 :                                                                                 set_minmax_property(sql, e, PROP_MAX, p2 ? statistics_atom_min(sql, fval_max, p2->value.pval) : fval_max);
     102             :                                                                                 /* min is max from le and re min */
     103         104 :                                                                                 set_minmax_property(sql, e, PROP_MIN, p1 ? statistics_atom_max(sql, rval_min, p1->value.pval) : rval_min);
     104             :                                                                         }
     105        2701 :                                                                 } else if (rne) {
     106         590 :                                                                         if (symmetric && int1 && int2) { /* min is max from le and (min from re and fe min) */
     107           0 :                                                                                 prop *p = find_prop(e->p, PROP_MIN);
     108           0 :                                                                                 atom *nmin = p ? statistics_atom_min(sql, p->value.pval, fval_min) : fval_min;
     109           0 :                                                                                 set_minmax_property(sql, e, PROP_MIN, statistics_atom_max(sql, nmin, lval_min));
     110         590 :                                                                         } else if (int1) { /* min is max from le and re min */
     111         100 :                                                                                 prop *p = find_prop(e->p, PROP_MIN);
     112         100 :                                                                                 set_minmax_property(sql, e, PROP_MIN, p ? statistics_atom_max(sql, lval_min, p->value.pval) : lval_min);
     113             :                                                                         }
     114        2111 :                                                                 } else if (fne) {
     115         549 :                                                                         if (symmetric && int1 && int2) { /* max is min from le and (max from re and fe max) */
     116           0 :                                                                                 prop *p = find_prop(e->p, PROP_MAX);
     117           0 :                                                                                 atom *nmax = p ? statistics_atom_max(sql, p->value.pval, rval_max) : rval_max;
     118           0 :                                                                                 set_minmax_property(sql, e, PROP_MAX, p ? statistics_atom_min(sql, nmax, lval_max) : nmax);
     119         549 :                                                                         } else if (int2) { /* max is min from le and fe max */
     120         266 :                                                                                 prop *p = find_prop(e->p, PROP_MAX);
     121         266 :                                                                                 set_minmax_property(sql, e, PROP_MAX, p ? statistics_atom_min(sql, lval_max, p->value.pval) : lval_max);
     122             :                                                                         }
     123             :                                                                 }
     124      100805 :                                                         } else if (lval_min && lval_max && rval_min && rval_max && atom_cmp(rval_max, lval_min) >= 0 && atom_cmp(rval_min, lval_max) <= 0) {
     125             :                                                                 /* both min and max must be set and the intervals must overlap */
     126       43365 :                                                                 switch (comp->flag) {
     127       29025 :                                                                 case cmp_equal: /* for equality reduce */
     128       29025 :                                                                         set_minmax_property(sql, e, PROP_MAX, is_anti(comp) ? statistics_atom_max(sql, lval_max, rval_max) : statistics_atom_min(sql, lval_max, rval_max));
     129       29025 :                                                                         set_minmax_property(sql, e, PROP_MIN, is_anti(comp) ? statistics_atom_min(sql, lval_min, rval_min) : statistics_atom_max(sql, lval_min, rval_min));
     130       29025 :                                                                         break;
     131        4683 :                                                                 case cmp_notequal: /* for inequality expand */
     132        4683 :                                                                         set_minmax_property(sql, e, PROP_MAX, is_anti(comp) ? statistics_atom_min(sql, lval_max, rval_max) : statistics_atom_max(sql, lval_max, rval_max));
     133        4683 :                                                                         set_minmax_property(sql, e, PROP_MIN, is_anti(comp) ? statistics_atom_max(sql, lval_min, rval_min) : statistics_atom_min(sql, lval_min, rval_min));
     134        4683 :                                                                         break;
     135        5651 :                                                                 case cmp_gt:
     136             :                                                                 case cmp_gte:
     137       10364 :                                                                         if (!is_anti(comp) && lne) { /* min is max from both min */
     138        4713 :                                                                                 prop *p = find_prop(e->p, PROP_MIN);
     139        4713 :                                                                                 set_minmax_property(sql, e, PROP_MIN, p ? statistics_atom_max(sql, rval_min, p->value.pval) : rval_min);
     140         938 :                                                                         } else if (!is_anti(comp)) { /* max is min from both max */
     141         938 :                                                                                 prop *p = find_prop(e->p, PROP_MAX);
     142         938 :                                                                                 set_minmax_property(sql, e, PROP_MAX, p ? statistics_atom_min(sql, lval_max, p->value.pval) : lval_max);
     143             :                                                                         }
     144             :                                                                         break;
     145        4006 :                                                                 case cmp_lt:
     146             :                                                                 case cmp_lte:
     147        7205 :                                                                         if (!is_anti(comp) && lne) { /* max is min from both max */
     148        3199 :                                                                                 prop *p = find_prop(e->p, PROP_MAX);
     149        3199 :                                                                                 set_minmax_property(sql, e, PROP_MAX, p ? statistics_atom_min(sql, rval_max, p->value.pval) : rval_max);
     150         807 :                                                                         } else if (!is_anti(comp)) { /* min is max from both min */
     151         807 :                                                                                 prop *p = find_prop(e->p, PROP_MIN);
     152         807 :                                                                                 set_minmax_property(sql, e, PROP_MIN, p ? statistics_atom_max(sql, lval_min, p->value.pval) : lval_min);
     153             :                                                                         }
     154             :                                                                         break;
     155             :                                                                 default: /* Maybe later I can do cmp_in and cmp_notin */
     156             :                                                                         break;
     157             :                                                                 }
     158             :                                                         }
     159             :                                                 }
     160             :                                         }
     161             :                                 }
     162             :                         }
     163     1778450 :                         if (is_full(rel->op) || (is_left(rel->op) && found_right) || (is_right(rel->op) && found_left))
     164       38039 :                                 set_has_nil(e);
     165     1778450 :                         if (!is_outerjoin(rel->op) && found_without_semantics) /* at an outer join, null values pass */
     166       94368 :                                 set_has_no_nil(e);
     167     1778450 :                         if (is_join(rel->op) && is_unique(e) && !still_unique)
     168      119007 :                                 set_not_unique(e);
     169     1778450 :                         return e;
     170             :                 }
     171     4639406 :                 case op_table:
     172             :                 case op_basetable:
     173             :                 case op_union:
     174             :                 case op_except:
     175             :                 case op_inter:
     176             :                 case op_munion:
     177             :                 case op_project:
     178             :                 case op_groupby: {
     179     4639406 :                         sql_exp *found;
     180     4639406 :                         atom *fval;
     181     4639406 :                         prop *est;
     182     4639406 :                         if ((found = rel_find_exp(rel, e))) {
     183     2176727 :                                 if (rel->op != op_table) { /* At the moment don't propagate statistics for table relations */
     184     2132415 :                                         if ((fval = find_prop_and_get(found->p, PROP_MAX)))
     185     1134144 :                                                 set_minmax_property(sql, e, PROP_MAX, fval);
     186     2132410 :                                         if ((fval = find_prop_and_get(found->p, PROP_MIN)))
     187     1141590 :                                                 set_minmax_property(sql, e, PROP_MIN, fval);
     188     2132412 :                                         if (!has_nil(found))
     189     1378788 :                                                 set_has_no_nil(e);
     190     2132412 :                                         if (is_unique(found) || (need_distinct(rel) && list_length(rel->exps) == 1) ||
     191     1721761 :                                                 (is_groupby(rel->op) && (list_empty(rel->r) || (list_length(rel->r) == 1 && exps_find_exp(rel->r, e)))))
     192      421526 :                                                 set_unique(e);
     193             :                                         /* propagate unique estimation for known cases */
     194     2132411 :                                         if (is_groupby(rel->op) && list_empty(rel->r) && !find_prop(e->p, PROP_NUNIQUES)) { /* global aggregate case */
     195        7538 :                                                 prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     196        7538 :                                                 p->value.dval = 1;
     197     2124873 :                                         } else if (((is_basetable(rel->op) || is_except(rel->op) || is_inter(rel->op) || is_simple_project(rel->op) ||
     198       66659 :                                                                  (is_groupby(rel->op) && exps_find_exp(rel->r, e))) &&
     199     2067282 :                                                                 (est = find_prop(found->p, PROP_NUNIQUES)) && !find_prop(e->p, PROP_NUNIQUES))) {
     200      188052 :                                                 prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     201      188052 :                                                 p->value.dval = est->value.dval;
     202             :                                         }
     203             :                                 }
     204     2176725 :                                 return e;
     205             :                         }
     206             :                         return NULL;
     207             :                 }
     208       83288 :                 case op_topn:
     209             :                 case op_sample:
     210       83288 :                         return rel_propagate_column_ref_statistics(sql, rel->l, e);
     211             :                 default:
     212             :                         break;
     213             :                 }
     214             :         }
     215             :         return NULL;
     216             : }
     217             : 
     218             : static atom *
     219     4454183 : atom_from_valptr( allocator *sa, sql_subtype *tpe, ValRecord *v)
     220             : {
     221     4454183 :         atom *a = SA_NEW(sa, atom);
     222             : 
     223     4454173 :         assert(!VALisnil(v));
     224     4454208 :         *a = (atom) {.tpe = *tpe,};
     225     4454208 :         SA_VALcopy(sa, &a->data, v);
     226     4454245 :         return a;
     227             : }
     228             : 
     229             : void
     230     4060803 : sql_column_get_statistics(mvc *sql, sql_column *c, sql_exp *e)
     231             : {
     232     4060803 :         bool nonil = false, unique = false;
     233     4060803 :         double unique_est = 0.0;
     234     4060803 :         ValRecord min, max;
     235     4060803 :         int ok = mvc_col_stats(sql, c, &nonil, &unique, &unique_est, &min, &max);
     236             : 
     237     4061909 :         if (has_nil(e) && nonil)
     238     2656651 :                 set_has_no_nil(e);
     239     4061909 :         if (!is_unique(e) && unique)
     240     1101113 :                 set_unique(e);
     241     4061909 :         if (unique_est != 0.0) {
     242     2825317 :                 prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     243     2825182 :                 p->value.dval = unique_est;
     244             :         }
     245     4061774 :         unsigned int digits = 0;
     246     4061774 :         sql_subtype *et = exp_subtype(e);
     247     4061936 :         if (et->type->eclass == EC_DEC || et->type->eclass == EC_NUM)
     248     2642404 :                 digits = et->digits;
     249     4061936 :         if ((ok & 2) == 2) {
     250     2224087 :                 if (!VALisnil(&max)) {
     251     2224065 :                         prop *p = e->p = prop_create(sql->sa, PROP_MAX, e->p);
     252     2224042 :                         p->value.pval = atom_from_valptr(sql->sa, &c->type, &max);
     253     2223966 :                         if (digits) {
     254     1656498 :                                 unsigned int nd = atom_digits(p->value.pval);
     255     1656420 :                                 if (nd < digits)
     256             :                                         digits = nd;
     257     1656420 :                                 if (!digits)
     258             :                                         digits = 1;
     259             :                         }
     260             :                 }
     261     2223838 :                 VALclear(&max);
     262             :         }
     263     4061690 :         if ((ok & 1) == 1) {
     264     2230451 :                 if (!VALisnil(&min)) {
     265     2230453 :                         prop *p = e->p = prop_create(sql->sa, PROP_MIN, e->p);
     266     2230549 :                         p->value.pval = atom_from_valptr(sql->sa, &c->type, &min);
     267     2230660 :                         if (digits) {
     268     1664245 :                                 unsigned int nd = atom_digits(p->value.pval);
     269     1664243 :                                 if (nd > digits) /* possibly set to low by max value */
     270             :                                         digits = nd;
     271             :                                 if (!digits)
     272             :                                         digits = 1;
     273             :                         }
     274             :                 }
     275     2230651 :                 VALclear(&min);
     276             :         }
     277     4061903 :         if (digits)
     278     2642333 :                 et->digits = digits;
     279     4061903 :         if (et->type->eclass == EC_DEC && et->digits <= et->scale)
     280         216 :                 et->digits = et->scale + 1;
     281     4061903 : }
     282             : 
     283             : static void
     284      936304 : rel_basetable_column_get_statistics(mvc *sql, sql_rel *rel, sql_exp *e)
     285             : {
     286      936304 :         if (e->p)
     287             :                 return;
     288      302165 :         sql_column *c = NULL;
     289             : 
     290      302165 :         if ((c = rel_base_find_column(rel, e->nid))) {
     291      204279 :                 sql_column_get_statistics(sql, c, e);
     292             :         }
     293             : }
     294             : 
     295             : static bool
     296        8876 : rel_setop_get_statistics(mvc *sql, sql_rel *rel, list *lexps, list *rexps, sql_exp *e, int i)
     297             : {
     298        8876 :         sql_exp *le = list_fetch(lexps, i), *re = list_fetch(rexps, i);
     299        8876 :         atom *lval_min = find_prop_and_get(le->p, PROP_MIN), *lval_max = find_prop_and_get(le->p, PROP_MAX),
     300        8876 :                 *rval_min = find_prop_and_get(re->p, PROP_MIN), *rval_max = find_prop_and_get(re->p, PROP_MAX);
     301        8876 :         prop *est;
     302             : 
     303             :         /* for the intersection, if both expressions don't overlap, it can be pruned */
     304        8876 :         if (is_inter(rel->op) && !has_nil(le) && !has_nil(re) &&
     305         511 :                 ((rval_max && lval_min && atom_cmp(rval_max, lval_min) < 0) || (rval_min && lval_max && atom_cmp(rval_min, lval_max) > 0)))
     306          26 :                 return true;
     307             : 
     308        8850 :         if (lval_max && rval_max) {
     309        2557 :                 if (is_union(rel->op))
     310           3 :                         set_minmax_property(sql, e, PROP_MAX, statistics_atom_max(sql, lval_max, rval_max)); /* for union the new max will be the max of the two */
     311        2554 :                 else if (is_inter(rel->op))
     312         399 :                         set_minmax_property(sql, e, PROP_MAX, statistics_atom_min(sql, lval_max, rval_max)); /* for intersect the new max will be the min of the two */
     313             :                 else /* except */
     314        2155 :                         set_minmax_property(sql, e, PROP_MAX, lval_max);
     315             :         }
     316        8850 :         if (lval_min && rval_min) {
     317        2557 :                 if (is_union(rel->op))
     318           3 :                         set_minmax_property(sql, e, PROP_MIN, statistics_atom_min(sql, lval_min, rval_min)); /* for union the new min will be the min of the two */
     319        2554 :                 else if (is_inter(rel->op))
     320         399 :                         set_minmax_property(sql, e, PROP_MIN, statistics_atom_max(sql, lval_min, rval_min)); /* for intersect the new min will be the max of the two */
     321             :                 else /* except */
     322        2155 :                         set_minmax_property(sql, e, PROP_MIN, lval_min);
     323             :         }
     324             : 
     325        8850 :         if (is_union(rel->op) || is_munion(rel->op)) {
     326        5990 :                 if (!has_nil(le) && !has_nil(re))
     327         880 :                         set_has_no_nil(e);
     328        5990 :                 if (need_distinct(rel) && list_length(rel->exps) == 1)
     329           0 :                         set_unique(e);
     330        2860 :         } else if (is_inter(rel->op)) {
     331         564 :                 if (!has_nil(le) || !has_nil(re))
     332         509 :                         set_has_no_nil(e);
     333         564 :                 if (is_unique(le) || (need_distinct(rel) && list_length(rel->exps) == 1))
     334         345 :                         set_unique(e);
     335             :         } else {
     336        2296 :                 assert(is_except(rel->op));
     337        2296 :                 if (!has_nil(le))
     338        2233 :                         set_has_no_nil(e);
     339        2296 :                 if (is_unique(le) || (need_distinct(rel) && list_length(rel->exps) == 1))
     340        2009 :                         set_unique(e);
     341             :         }
     342             :         /* propagate unique estimation for known cases */
     343        8850 :         if (!is_union(rel->op) && (est = find_prop(le->p, PROP_NUNIQUES)) && !find_prop(e->p, PROP_NUNIQUES)) {
     344         205 :                 prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     345         205 :                 p->value.dval = est->value.dval;
     346             :         }
     347             :         return false;
     348             : }
     349             : 
     350             : 
     351             : static void
     352       60730 : rel_munion_get_statistics(mvc *sql, sql_rel *rel, list *rels, sql_exp *e, int i)
     353             : {
     354       60730 :         assert(is_munion(rel->op));
     355             : 
     356       60730 :         sql_rel *l = rels->h->data;
     357       60730 :         sql_exp *le = list_fetch(l->exps, i);
     358       60730 :         atom *lval_min = find_prop_and_get(le->p, PROP_MIN), *lval_max = find_prop_and_get(le->p, PROP_MAX);
     359       60730 :         bool has_nonil = !has_nil(le);
     360             : 
     361      175993 :         for(node *n = rels->h->next; n; n = n->next) {
     362      115263 :                 sql_rel *r = n->data;
     363      115263 :                 sql_exp *re = list_fetch(r->exps, i);
     364      115263 :                 atom *rval_min = find_prop_and_get(re->p, PROP_MIN), *rval_max = find_prop_and_get(re->p, PROP_MAX);
     365             : 
     366      115263 :                 if (lval_max && rval_max) {
     367       42591 :                         set_minmax_property(sql, e, PROP_MAX, statistics_atom_max(sql, lval_max, rval_max)); /* for union the new max will be the max of the two */
     368       42591 :                         lval_max = find_prop_and_get(e->p, PROP_MAX);
     369             :                 }
     370      115263 :                 if (lval_min && rval_min) {
     371       42092 :                         set_minmax_property(sql, e, PROP_MIN, statistics_atom_min(sql, lval_min, rval_min)); /* for union the new min will be the min of the two */
     372       42092 :                         lval_min = find_prop_and_get(e->p, PROP_MIN);
     373             :                 }
     374      115263 :                 has_nonil &= !has_nil(re);
     375             : 
     376             :         }
     377             : 
     378       60730 :         if (has_nonil)
     379       41579 :                 set_has_no_nil(e);
     380             : 
     381       60730 :         if (need_distinct(rel) && list_length(rel->exps) == 1)
     382        1594 :                 set_unique(e);
     383       60730 : }
     384             : 
     385             : 
     386             : static sql_exp *
     387     3480807 : rel_propagate_statistics(visitor *v, sql_rel *rel, sql_exp *e, int depth)
     388             : {
     389     3480807 :         mvc *sql = v->sql;
     390     3480807 :         atom *lval;
     391             : 
     392     3480807 :         (void) depth;
     393     3480807 :         switch(e->type) {
     394     2182554 :         case e_column:
     395     2182554 :                 switch (rel->op) { /* set relations don't call rel_propagate_statistics */
     396      279093 :                 case op_join:
     397             :                 case op_left:
     398             :                 case op_right:
     399             :                 case op_full:
     400             :                 case op_semi:
     401             :                 case op_anti: {
     402      279093 :                         sql_exp *found = rel_propagate_column_ref_statistics(sql, rel->l, e);
     403      279093 :                         if (!found)
     404      139995 :                                 (void) rel_propagate_column_ref_statistics(sql, rel->r, e);
     405             :                         break;
     406             :                 }
     407     1903461 :                 case op_select:
     408             :                 case op_project:
     409             :                 case op_groupby: {
     410     1903461 :                         sql_exp *found = rel_propagate_column_ref_statistics(sql, rel->l, e); /* labels may be found on the same projection, ugh */
     411     1903455 :                         if (!found && is_simple_project(rel->op))
     412      125481 :                                 (void) rel_propagate_column_ref_statistics(sql, rel, e);
     413             :                         break;
     414             :                 }
     415           0 :                 case op_insert:
     416             :                 case op_update:
     417             :                 case op_delete:
     418           0 :                         (void) rel_propagate_column_ref_statistics(sql, rel->r, e);
     419           0 :                         break;
     420             :                 default:
     421             :                         break;
     422             :                 }
     423             :                 break;
     424       98487 :         case e_convert: {
     425       98487 :                 sql_subtype *to = exp_totype(e), *from = exp_fromtype(e);
     426       98487 :                 sql_exp *l = e->l;
     427       98487 :                 sql_class fr = from->type->eclass, too = to->type->eclass;
     428       98487 :                 prop *est;
     429             : 
     430       98487 :                 if (fr == too) {
     431       89412 :                         if ((lval = find_prop_and_get(l->p, PROP_MAX))) {
     432       58844 :                                 atom *res = atom_copy(sql->sa, lval);
     433       58844 :                                 if ((res = atom_cast(sql->sa, res, to)))
     434       58821 :                                         set_minmax_property(sql, e, PROP_MAX, res);
     435             :                         }
     436       89412 :                         if ((lval = find_prop_and_get(l->p, PROP_MIN))) {
     437       59465 :                                 atom *res = atom_copy(sql->sa, lval);
     438       59465 :                                 if ((res = atom_cast(sql->sa, res, to)))
     439       59442 :                                         set_minmax_property(sql, e, PROP_MIN, res);
     440             :                         }
     441             :                 }
     442             :                 /* propagating nuniques through conversions is tricky. I am adding just the general cases */
     443       98487 :                 if ((est = find_prop(l->p, PROP_NUNIQUES)) &&
     444       61117 :                         (((fr == EC_TIME || fr == EC_TIME_TZ || fr == EC_DATE) && (too == EC_TIMESTAMP || too == EC_TIMESTAMP_TZ)) ||
     445       61092 :                          ((fr == EC_NUM || fr == EC_BIT) && too == EC_NUM) || (fr == EC_DEC && too == EC_DEC) || (fr != EC_EXTERNAL && too == EC_STRING))) {
     446       57663 :                         prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     447       57663 :                         p->value.dval = est->value.dval;
     448             :                 }
     449       98487 :                 if (!has_nil(l))
     450       55541 :                         set_has_no_nil(e);
     451             :                 break;
     452             :         }
     453      341722 :         case e_aggr:
     454             :         case e_func: {
     455      341722 :                 BUN lv;
     456      341722 :                 sql_subfunc *f = e->f;
     457             : 
     458      341722 :                 if (!f->func->s) {
     459      315244 :                         int key = hash_key(f->func->base.name); /* Using hash lookup */
     460      315244 :                         sql_hash_e *he = sql_functions_lookup->buckets[key&(sql_functions_lookup->size-1)];
     461      315244 :                         lookup_function look = NULL;
     462             : 
     463      688567 :                         for (; he && !look; he = he->chain) {
     464      373323 :                                 struct function_properties* fp = (struct function_properties*) he->value;
     465             : 
     466      373323 :                                 if (!strcmp(f->func->base.name, fp->name))
     467      107728 :                                         look = fp->func;
     468             :                         }
     469      315244 :                         if (look)
     470      107728 :                                 look(sql, e);
     471             :                 }
     472             :                 /* for global aggregates with no semantics, if the left relation has values, then the output won't be NULL */
     473      341722 :                 if (!is_semantics(e) && e->l && !have_nil(e->l) &&
     474       89177 :                         (e->type != e_aggr || (is_groupby(rel->op) && list_length(rel->r)) || ((lv = get_rel_count(rel->l)) != BUN_NONE && lv > 0)))
     475       88854 :                         set_has_no_nil(e);
     476             :                 /* set properties for global aggregates */
     477      341722 :                 if (e->type == e_aggr && is_groupby(rel->op) && list_empty(rel->r)) {
     478        7871 :                         if (!find_prop(e->p, PROP_NUNIQUES)) {
     479        7871 :                                 prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     480        7871 :                                 p->value.dval = 1;
     481             :                         }
     482        7871 :                         set_unique(e);
     483             :                 }
     484             :                 break;
     485             :         }
     486      570583 :         case e_atom:
     487      570583 :                 if (e->l) {
     488      553848 :                         atom *a = (atom*) e->l;
     489      553848 :                         if (!a->isnull) {
     490      490512 :                                 set_minmax_property(sql, e, PROP_MAX, a);
     491      490512 :                                 set_minmax_property(sql, e, PROP_MIN, a);
     492             :                         }
     493      553848 :                         prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     494      553848 :                         p->value.dval = 1;
     495       16735 :                 } else if (e->f) {
     496        3210 :                         list *vals = (list *) e->f;
     497        3210 :                         sql_exp *first = vals->h ? vals->h->data : NULL;
     498        3210 :                         atom *max = NULL, *min = NULL; /* all child values must have a valid min/max */
     499        3210 :                         int has_nil = 0;
     500             : 
     501        3210 :                         if (first) {
     502        3210 :                                 max = ((lval = find_prop_and_get(first->p, PROP_MAX))) ? lval : NULL;
     503        3210 :                                 min = ((lval = find_prop_and_get(first->p, PROP_MIN))) ? lval : NULL;
     504        3210 :                                 has_nil |= has_nil(first);
     505             :                         }
     506             : 
     507       13988 :                         for (node *n = vals->h ? vals->h->next : NULL ; n ; n = n->next) {
     508        7568 :                                 sql_exp *ee = n->data;
     509             : 
     510        7568 :                                 if (min && max) {
     511        7115 :                                         if ((lval = find_prop_and_get(ee->p, PROP_MAX))) {
     512        7069 :                                                 max = atom_cmp(lval, max) > 0 ? lval : max;
     513             :                                         } else {
     514             :                                                 max = NULL;
     515             :                                         }
     516        7115 :                                         if ((lval = find_prop_and_get(ee->p, PROP_MIN))) {
     517        7069 :                                                 min = atom_cmp(min, lval) > 0 ? lval : min;
     518             :                                         } else {
     519             :                                                 min = NULL;
     520             :                                         }
     521             :                                 }
     522        7568 :                                 has_nil |= has_nil(ee);
     523             :                         }
     524             : 
     525        3210 :                         if (!has_nil)
     526        2844 :                                 set_has_no_nil(e);
     527        3210 :                         if (min && max) {
     528        2821 :                                 set_minmax_property(sql, e, PROP_MAX, max);
     529        2821 :                                 set_minmax_property(sql, e, PROP_MIN, min);
     530             :                         }
     531        3210 :                         prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     532        3210 :                         p->value.dval = (dbl) list_length(vals);
     533             :                 } else {
     534       13525 :                         prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     535       13525 :                         p->value.dval = 1;
     536             :                 }
     537             :                 break;
     538      287461 :         case e_cmp:
     539             :                 /* TODO? propagating min/max/unique of booleans is not very worth it */
     540      287461 :                 if (e->flag == cmp_or || e->flag == cmp_filter) {
     541       18064 :                         if (!have_nil(e->l) && !have_nil(e->r))
     542       13591 :                                 set_has_no_nil(e);
     543      269397 :                 } else if (e->flag == cmp_in || e->flag == cmp_notin) {
     544       21252 :                         sql_exp *le = e->l;
     545       21252 :                         if (!has_nil(le) && !have_nil(e->r))
     546       18209 :                                 set_has_no_nil(e);
     547             :                 } else {
     548      248145 :                         sql_exp *le = e->l, *re = e->r, *fe = e->f;
     549      248145 :                         if (!has_nil(le) && !has_nil(re) && (!e->f || !has_nil(fe)))
     550      178192 :                                 set_has_no_nil(e);
     551             :                 }
     552             :                 break;
     553             :         case e_psm:
     554             :                 break;
     555             :         }
     556             : 
     557             : #ifndef NDEBUG
     558             :         {
     559             :                 /* min and max cannot be NULL and min must be <= than max, if it happens the inner relation must be empty! */
     560     3480801 :                 atom *min = find_prop_and_get(e->p, PROP_MIN), *max = find_prop_and_get(e->p, PROP_MAX);
     561             : 
     562     3480793 :                 (void) min;
     563     3480793 :                 (void) max;
     564     3480793 :                 assert(!min || !min->isnull);
     565     3480793 :                 assert(!max || !max->isnull);
     566     3480793 :                 assert(!min || !max || (min && max)/* atom_cmp(min, max) <= 0*/);
     567             :         }
     568             : #endif
     569     3480793 :         return e;
     570             : }
     571             : 
     572             : static list * /* Remove predicates always false from min/max values */
     573      139547 : rel_prune_predicates(visitor *v, sql_rel *rel)
     574             : {
     575      139547 :         if (rel->l) {
     576      139547 :                 sql_rel *l = rel->l;
     577      139547 :                 if (is_single(l))
     578        3414 :                         return rel->exps;
     579             :         }
     580      136133 :         if (!list_empty(rel->attr))
     581        2962 :                 return rel->exps;
     582      282461 :         for (node *n = rel->exps->h ; n ; n = n->next) {
     583      149290 :                 sql_exp *e = n->data;
     584             : 
     585      149290 :                 if (e->type == e_cmp && is_theta_exp(e->flag)) {
     586      122879 :                         sql_exp *le = e->l, *re = e->r, *fe = e->f;
     587      122879 :                         atom *lval_min = find_prop_and_get(le->p, PROP_MIN), *lval_max = find_prop_and_get(le->p, PROP_MAX),
     588      122879 :                                 *rval_min = find_prop_and_get(re->p, PROP_MIN), *rval_max = find_prop_and_get(re->p, PROP_MAX);
     589      122879 :                         bool always_false = false, always_true = false;
     590             : 
     591      122879 :                         if (fe && !is_symmetric(e)) {
     592        3058 :                                 atom *fval_min = find_prop_and_get(fe->p, PROP_MIN), *fval_max = find_prop_and_get(fe->p, PROP_MAX);
     593        3058 :                                 comp_type lower = range2lcompare(e->flag), higher = range2rcompare(e->flag);
     594        3660 :                                 int not_int1 = rval_min && lval_max && /* the middle and left intervals don't overlap */
     595        1663 :                                         (!is_anti(e) && (lower == cmp_gte ? atom_cmp(rval_min, lval_max) > 0 : atom_cmp(rval_min, lval_max) >= 0)),
     596        4072 :                                         not_int2 = lval_min && fval_max && /* the middle and right intervals don't overlap */
     597        2485 :                                         (!is_anti(e) && (higher == cmp_lte ? atom_cmp(lval_min, fval_max) > 0 : atom_cmp(lval_min, fval_max) >= 0)),
     598        3058 :                                         not_int3 = rval_min && fval_max && /* the left interval is after the right one */
     599        1288 :                                         (!is_anti(e) && (atom_cmp(rval_min, fval_max) > 0));
     600             : 
     601        3058 :                                 always_false |= not_int1 || not_int2 || not_int3;
     602             :                                 /* for anti the middle must be before the left or after the right or the right after the left, for the other the middle must be always between the left and right intervals */
     603        4113 :                                 always_true |= !has_nil(le) && !has_nil(re) && !has_nil(fe) &&
     604        3957 :                                         lval_min && lval_max && rval_min && rval_max && fval_min && fval_max &&
     605         575 :                                         (is_anti(e) ? ((lower == cmp_gte ? atom_cmp(rval_min, lval_max) > 0 : atom_cmp(rval_min, lval_max) >= 0) || (higher == cmp_lte ? atom_cmp(lval_min, fval_max) > 0 : atom_cmp(lval_min, fval_max) >= 0) || atom_cmp(rval_min, fval_max) > 0) :
     606         488 :                                          ((lower == cmp_gte ? atom_cmp(lval_min, rval_max) >= 0 : atom_cmp(lval_min, rval_max) > 0) && (higher == cmp_lte ? atom_cmp(fval_min, lval_max) >= 0 : atom_cmp(fval_min, lval_max) > 0)));
     607             :                         } else if (!fe) {
     608      119803 :                                 if (!is_semantics(e)) /* trivial not null cmp null case */
     609      228174 :                                         always_false |= !is_anti(e) && ((exp_is_not_null(le) && exp_is_null(re)) || (exp_is_null(le) && exp_is_not_null(re)));
     610      119803 :                                 switch (e->flag) {
     611      104324 :                                 case cmp_equal:
     612      104324 :                                         if (lval_min && lval_max && rval_min && rval_max && (!is_semantics(e) || !has_nil(le) || !has_nil(re)))
     613      133738 :                                                 always_false |= (is_anti(e) ? (atom_cmp(lval_min, rval_min) == 0 && atom_cmp(lval_max, rval_max) <= 0) : (atom_cmp(rval_max, lval_min) < 0 || atom_cmp(rval_min, lval_max) > 0));
     614      104324 :                                         if (is_semantics(e)) { /* prune *= NULL cases */
     615        5690 :                                                 always_false |= (is_anti(e) ? (exp_is_null(le) && exp_is_null(re)) : ((exp_is_not_null(le) && exp_is_null(re)) || (exp_is_null(le) && exp_is_not_null(re))));
     616       11380 :                                                 always_true |= (is_anti(e) ? ((exp_is_not_null(le) && exp_is_null(re)) || (exp_is_null(le) && exp_is_not_null(re))) : (exp_is_null(le) && exp_is_null(re)));
     617             :                                         }
     618             :                                         break;
     619        7216 :                                 case cmp_notequal:
     620        7216 :                                         if (lval_min && lval_max && rval_min && rval_max)
     621       11358 :                                                 always_true |= !has_nil(le) && !has_nil(re) && (is_anti(e) ? (atom_cmp(lval_min, rval_min) == 0 && atom_cmp(lval_max, rval_max) <= 0) : (atom_cmp(rval_max, lval_min) < 0 || atom_cmp(rval_min, lval_max) > 0));
     622        7216 :                                         if (is_semantics(e)) {
     623          26 :                                                 always_true |= (is_anti(e) ? (exp_is_null(le) && exp_is_null(re)) : ((exp_is_not_null(le) && exp_is_null(re)) || (exp_is_null(le) && exp_is_not_null(re))));
     624          52 :                                                 always_false |= (is_anti(e) ? ((exp_is_not_null(le) && exp_is_null(re)) || (exp_is_null(le) && exp_is_not_null(re))) : (exp_is_null(le) && exp_is_null(re)));
     625             :                                         }
     626             :                                         break;
     627        5632 :                                 case cmp_gt:
     628        5632 :                                         if (lval_max && rval_min)
     629        1947 :                                                 always_false |= (is_anti(e) ? atom_cmp(lval_max, rval_min) > 0 : atom_cmp(lval_max, rval_min) <= 0);
     630        5632 :                                         if (lval_min && rval_max)
     631        3894 :                                                 always_true |= !has_nil(le) && !has_nil(re) && (is_anti(e) ? atom_cmp(lval_min, rval_max) <= 0 : atom_cmp(lval_min, rval_max) > 0);
     632             :                                         break;
     633         119 :                                 case cmp_gte:
     634         119 :                                         if (lval_max && rval_min)
     635          49 :                                                 always_false |= (is_anti(e) ? atom_cmp(lval_max, rval_min) >= 0 : atom_cmp(lval_max, rval_min) < 0);
     636         119 :                                         if (lval_min && rval_max)
     637          98 :                                                 always_true |= !has_nil(le) && !has_nil(re) && (is_anti(e) ? atom_cmp(lval_min, rval_max) < 0 : atom_cmp(lval_min, rval_max) >= 0);
     638             :                                         break;
     639        2428 :                                 case cmp_lt:
     640        2428 :                                         if (lval_min && rval_max)
     641        1376 :                                                 always_false |= (is_anti(e) ? atom_cmp(lval_min, rval_max) < 0 : atom_cmp(lval_min, rval_max) >= 0);
     642        2428 :                                         if (lval_max && rval_min)
     643        2800 :                                                 always_true |= !has_nil(le) && !has_nil(re) && (is_anti(e) ? atom_cmp(lval_max, rval_min) >= 0 : atom_cmp(lval_max, rval_min) < 0);
     644             :                                         break;
     645          84 :                                 case cmp_lte:
     646          84 :                                         if (lval_min && rval_max)
     647          11 :                                                 always_false |= (is_anti(e) ? atom_cmp(lval_min, rval_max) <= 0 : atom_cmp(lval_min, rval_max) > 0);
     648          84 :                                         if (lval_max && rval_min)
     649          22 :                                                 always_true |= !has_nil(le) && !has_nil(re) && (is_anti(e) ? atom_cmp(lval_max, rval_min) > 0 : atom_cmp(lval_max, rval_min) <= 0);
     650             :                                         break;
     651             :                                 default: /* Maybe later I can do cmp_in and cmp_notin but don't forget to remove is_theta_exp check up there */
     652             :                                         break;
     653             :                                 }
     654             :                         }
     655      122879 :                         assert(!always_false || !always_true);
     656      122879 :                         if (always_false || always_true) {
     657        3652 :                                 sql_exp *ne = exp_atom_bool(v->sql->sa, always_true /* if always_true set then true else false */);
     658        3652 :                                 if (exp_name(e))
     659           0 :                                         exp_prop_alias(v->sql->sa, ne, e);
     660        3652 :                                 n->data = ne;
     661        3652 :                                 v->changes++;
     662             :                         }
     663             :                 }
     664             :         }
     665      133171 :         return rel->exps;
     666             : }
     667             : 
     668             : static sql_rel *
     669          14 : set_setop_side(visitor *v, sql_rel *rel, sql_rel *side)
     670             : {
     671          14 :         if (side == rel->r)
     672           0 :                 rel->r = NULL;
     673             :         else
     674          14 :                 rel->l = NULL;
     675          14 :         if (!is_simple_project(side->op) || rel_is_ref(side) || !list_empty(side->r)) {
     676           0 :                 side = rel_project(v->sql->sa, side, rel_projections(v->sql, side, NULL, 0, 1));
     677           0 :                 set_count_prop(v->sql->sa, side, get_rel_count(side->l));
     678           0 :                 side->exps = exps_exp_visitor_bottomup(v, side, side->exps, 0, &rel_propagate_statistics, false);
     679             :         }
     680          35 :         for (node *n = rel->exps->h, *m = side->exps->h ; n && m ; n = n->next, m = m->next) {
     681          21 :                 sql_exp *e1 = n->data, *e2 = m->data;
     682          21 :                 exp_prop_alias(v->sql->sa, e2, e1);
     683             :         }
     684          14 :         list_hash_clear(side->exps);
     685             : 
     686          14 :         if (need_distinct(rel))
     687           0 :                 set_distinct(side);
     688          14 :         side->p = prop_copy(v->sql->sa, rel->p);
     689          14 :         rel_destroy(rel);
     690          14 :         return side;
     691             : }
     692             : 
     693             : static BUN
     694       21900 : trivial_project_exp_card(sql_exp *e)
     695             : {
     696       22194 :         if (e->type == e_convert)
     697         294 :                 return trivial_project_exp_card(e->l);
     698       21900 :         return e->type == e_atom && e->f ? (BUN) list_length(e->f) : 1;
     699             : }
     700             : 
     701             : static BUN
     702       24248 : rel_calc_nuniques(mvc *sql, sql_rel *l, list *exps)
     703             : {
     704       24248 :         BUN lv = get_rel_count(l);
     705             : 
     706       24248 :         if (lv == 0)
     707             :                 return 0;
     708       21539 :         if (!list_empty(exps)) {
     709       21539 :                 BUN nuniques = 0;
     710             :                 /* compute the highest number of unique values */
     711       51135 :                 for (node *n = exps->h ; n && nuniques != BUN_NONE ; n = n->next) {
     712       29596 :                         sql_exp *e = n->data;
     713       29596 :                         sql_rel *bt = NULL;
     714       29596 :                         prop *p = NULL;
     715       29596 :                         BUN euniques = BUN_NONE;
     716       29596 :                         atom *min, *max, *sub = NULL;
     717       29596 :                         sql_subtype *tp = exp_subtype(e);
     718       29597 :                         sql_class ec = tp ? tp->type->eclass : EC_STRING; /* if 'e' has no type (eg parameter), use a non-number type to fail condition */
     719             : 
     720       29597 :                         if ((p = find_prop(e->p, PROP_NUNIQUES))) {
     721       20959 :                                 euniques = (BUN) p->value.dval;
     722        8637 :                         } else if (e->type == e_column && e->nid && rel_find_exp_and_corresponding_rel(l, e, false, &bt, NULL) && bt && (p = find_prop(bt->p, PROP_COUNT))) {
     723        6594 :                                 euniques = (BUN) p->value.lval;
     724             :                         }
     725             :                         /* use min to max range to compute number of possible values in the domain for number types */
     726       29596 :                         if ((EC_TEMP(ec)||ec==EC_NUM||ec==EC_MONTH||ec==EC_POS) &&
     727       22362 :                                 (min = find_prop_and_get(e->p, PROP_MIN)) && (max = find_prop_and_get(e->p, PROP_MAX))) {
     728             :                                 /* the range includes min and max, so the atom_inc call is needed */
     729             :                                 /* if 'euniques' has number of distinct values, compute min between both */
     730       17543 :                                 if ((sub = atom_sub(sql->sa, max, min)) && (sub = atom_inc(sql->sa, sub)) && (sub = atom_cast(sql->sa, sub, sql_bind_localtype("oid"))))
     731           0 :                                         euniques = MIN(euniques, (BUN) sub->data.val.oval);
     732             :                         }
     733       29596 :                         if (euniques != BUN_NONE)
     734       27553 :                                 nuniques = MAX(nuniques, euniques); /* the highest cardinality sets the estimation */
     735             :                         else
     736             :                                 nuniques = BUN_NONE;
     737             :                 }
     738       21539 :                 if (nuniques != BUN_NONE)
     739             :                         return nuniques;
     740             :         }
     741             :         return lv;
     742             : }
     743             : 
     744             : static sql_rel *
     745     2072669 : rel_get_statistics_(visitor *v, sql_rel *rel)
     746             : {
     747             :         /* Don't prune updates as pruning will possibly result in removing the joins which therefore cannot be used for constraint checking */
     748     2072669 :         uint8_t has_special_modify = *(uint8_t*) v->data;
     749     2072669 :         bool can_be_pruned = !has_special_modify && v->storage_based_opt;
     750             : 
     751             :         /* Don't look at the same relation twice */
     752     2072669 :         if (are_statistics_gathered(rel->used))
     753             :                 return rel;
     754     1331406 :         rel->used |= statistics_gathered;
     755             : 
     756     1331406 :         switch (rel->op) {
     757      312348 :         case op_basetable: {
     758      312348 :                 sql_table *t = (sql_table *) rel->l;
     759      312348 :                 sqlstore *store = v->sql->session->tr->store;
     760             : 
     761      312348 :                 if (!list_empty(rel->exps)) {
     762     1248722 :                         for (node *n = rel->exps->h ; n ; n = n->next)
     763      936232 :                                 rel_basetable_column_get_statistics(v->sql, rel, n->data);
     764             :                 }
     765             :                 /* Set table row count. TODO? look for remote tables. Don't look at storage for declared tables, because it won't be cleaned */
     766      312504 :                 if (isTable(t) && t->s && !isDeclaredTable(t)) /* count active rows only */
     767      259085 :                         set_count_prop(v->sql->sa, rel, (BUN)store->storage_api.count_col(v->sql->session->tr, ol_first_node(t->columns)->data, 10));
     768             :                 break;
     769             :         }
     770        2791 :         case op_union:
     771             :         case op_inter:
     772             :         case op_except: {
     773        2791 :                 bool empty_cross = false;
     774        2791 :                 int i = 0;
     775        2791 :                 sql_rel *l = rel->l, *pl = rel->l, *r = rel->r, *pr = rel->r;
     776             : 
     777        2791 :                 while (is_sample(pl->op) || is_topn(pl->op)) /* skip topN and sample relations in the middle */
     778           0 :                         pl = pl->l;
     779        2791 :                 while (is_sample(pr->op) || is_topn(pr->op))
     780           0 :                         pr = pr->l;
     781             :                 /* if it's not a projection, then project and propagate statistics */
     782        2791 :                 if (!is_project(pl->op) && !is_base(pl->op)) {
     783           0 :                         pl = rel_project(v->sql->sa, pl, rel_projections(v->sql, pl, NULL, 0, 1));
     784           0 :                         set_count_prop(v->sql->sa, pl, get_rel_count(pl->l));
     785           0 :                         pl->exps = exps_exp_visitor_bottomup(v, pl, pl->exps, 0, &rel_propagate_statistics, false);
     786             :                 }
     787        2791 :                 if (!is_project(pr->op) && !is_base(pr->op)) {
     788           0 :                         pr = rel_project(v->sql->sa, pr, rel_projections(v->sql, pr, NULL, 0, 1));
     789           0 :                         set_count_prop(v->sql->sa, pr, get_rel_count(pr->l));
     790           0 :                         pr->exps = exps_exp_visitor_bottomup(v, pr, pr->exps, 0, &rel_propagate_statistics, false);
     791             :                 }
     792             : 
     793       11667 :                 for (node *n = rel->exps->h ; n ; n = n->next) {
     794        8876 :                         empty_cross |= rel_setop_get_statistics(v->sql, rel, pl->exps, pr->exps, n->data, i);
     795        8876 :                         i++;
     796             :                 }
     797             : 
     798             :                 /* propagate row count */
     799        2791 :                 if (is_union(rel->op)) {
     800         278 :                         BUN lv = need_distinct(rel) ? rel_calc_nuniques(v->sql, l, l->exps) : get_rel_count(l),
     801         278 :                                 rv = need_distinct(rel) ? rel_calc_nuniques(v->sql, r, r->exps) : get_rel_count(r);
     802             : 
     803         278 :                         if (lv == 0 && rv == 0) { /* both sides empty */
     804           2 :                                 if (can_be_pruned)
     805             :                                         empty_cross = true;
     806             :                                 else
     807           2 :                                         set_count_prop(v->sql->sa, rel, 0);
     808         276 :                         } else if (can_be_pruned && lv == 0 && !rel_is_ref(rel)) { /* left side empty */
     809           0 :                                 rel = set_setop_side(v, rel, r);
     810           0 :                                 empty_cross = false; /* don't rewrite again */
     811         276 :                         } else if (can_be_pruned && rv == 0 && !rel_is_ref(rel)) { /* right side empty */
     812           0 :                                 rel = set_setop_side(v, rel, l);
     813           0 :                                 empty_cross = false; /* don't rewrite again */
     814         276 :                         } else if (lv != BUN_NONE && rv != BUN_NONE) {
     815           7 :                                 set_count_prop(v->sql->sa, rel, (rv > (BUN_MAX - lv)) ? BUN_MAX : (lv + rv)); /* overflow check */
     816             :                         }
     817        2513 :                 } else if (is_inter(rel->op) || is_except(rel->op)) {
     818        2513 :                         BUN lv = need_distinct(rel) ? rel_calc_nuniques(v->sql, l, l->exps) : get_rel_count(l),
     819        2513 :                                 rv = need_distinct(rel) ? rel_calc_nuniques(v->sql, r, r->exps) : get_rel_count(r);
     820             : 
     821        2513 :                         if (lv == 0) { /* left side empty */
     822          62 :                                 if (can_be_pruned)
     823             :                                         empty_cross = true;
     824             :                                 else
     825           5 :                                         set_count_prop(v->sql->sa, rel, 0);
     826        2451 :                         } else if (rv == 0) { /* right side empty */
     827          17 :                                 if (is_inter(rel->op)) {
     828           3 :                                         if (can_be_pruned)
     829             :                                                 empty_cross = true;
     830             :                                         else
     831           0 :                                                 set_count_prop(v->sql->sa, rel, 0);
     832          14 :                                 } else if (can_be_pruned && !rel_is_ref(rel)) {
     833          14 :                                         rel = set_setop_side(v, rel, l);
     834          14 :                                         empty_cross = false; /* don't rewrite again */
     835             :                                 } else {
     836           0 :                                         set_count_prop(v->sql->sa, rel, lv);
     837             :                                 }
     838             :                         } else {
     839        2434 :                                 set_count_prop(v->sql->sa, rel, lv);
     840             :                         }
     841             :                 }
     842        2791 :                 if (can_be_pruned && empty_cross) {
     843          80 :                         rel_destroy(rel->l);
     844          80 :                         rel->l = NULL;
     845          80 :                         rel_destroy(rel->r);
     846          80 :                         rel->r = NULL;
     847         244 :                         for (node *n = rel->exps->h ; n ; n = n->next) {
     848         164 :                                 sql_exp *e = n->data, *a = exp_atom(v->sql->sa, atom_general(v->sql->sa, exp_subtype(e), NULL, 0));
     849         164 :                                 exp_prop_alias(v->sql->sa, a, e);
     850         164 :                                 n->data = a;
     851             :                         }
     852          80 :                         list_hash_clear(rel->exps);
     853          80 :                         sql_rel *l = rel_project(v->sql->sa, NULL, rel->exps);
     854          80 :                         set_count_prop(v->sql->sa, l, 1);
     855          80 :                         l = rel_select(v->sql->sa, l, exp_atom_bool(v->sql->sa, 0));
     856          80 :                         set_count_prop(v->sql->sa, l, 0);
     857          80 :                         rel->op = op_project;
     858          80 :                         rel->l = l;
     859          80 :                         rel->exps = rel_projections(v->sql, l, NULL, 1, 1);
     860          80 :                         set_count_prop(v->sql->sa, rel, 0);
     861          80 :                         set_nodistinct(rel); /* set relations may have distinct flag set */
     862          80 :                         v->changes++;
     863             :                 }
     864             :                 break;
     865             :         }
     866       12869 :         case op_munion: {
     867       12869 :                 list *l = rel->l, *nrels = sa_list(v->sql->sa);
     868       12869 :                 BUN cnt = 0;
     869       12869 :                 bool needs_pruning = false;
     870             : 
     871       49217 :                 for (node *n = l->h; n; n = n->next) {
     872       36348 :                         sql_rel *r = n->data, *pl = r;
     873             : 
     874       36348 :                         while (is_sample(pl->op) || is_topn(pl->op)) /* skip topN and sample relations in the middle */
     875           0 :                                         pl = pl->l;
     876             :                         /* if it's not a projection, then project and propagate statistics */
     877       36348 :                         if (!is_project(pl->op) && !is_base(pl->op)) {
     878           0 :                                 pl = rel_project(v->sql->sa, pl, rel_projections(v->sql, pl, NULL, 0, 1));
     879           0 :                                 set_count_prop(v->sql->sa, pl, get_rel_count(pl->l));
     880           0 :                                 pl->exps = exps_exp_visitor_bottomup(v, pl, pl->exps, 0, &rel_propagate_statistics, false);
     881             :                         }
     882       36348 :                         nrels = append(nrels, pl);
     883             :                         /* we need new munion statistics */
     884             :                         /* propagate row count */
     885       36348 :                         BUN rv = need_distinct(rel) ? rel_calc_nuniques(v->sql, r, r->exps) : get_rel_count(r);
     886             :                         /* if PROP_COUNT does not exist we assume at least a row (see get_rel_count def) */
     887       36348 :                         if (rv == BUN_NONE) {
     888        1156 :                                 cnt++;
     889        1156 :                                 continue;
     890             :                         }
     891       35192 :                         if (!rv && can_be_pruned)
     892        6619 :                                 needs_pruning = true;
     893             :                         /* overflow check */
     894       35192 :                         if (rv > (BUN_MAX - cnt))
     895       36348 :                                 rv = BUN_MAX;
     896             :                         else
     897       35192 :                                 cnt += rv;
     898             :                 }
     899       12869 :                 int i = 0;
     900       73599 :                 for (node *n = rel->exps->h ; n ; n = n->next, i++)
     901       60730 :                         rel_munion_get_statistics(v->sql, rel, nrels, n->data, i);
     902             : 
     903       12869 :                 if (needs_pruning && !rel_is_ref(rel)) {
     904        4457 :                         v->changes++;
     905        4457 :                         list *nl = sa_list(l->sa);
     906             : 
     907       16444 :                         for (node *n = nrels->h; n; n = n->next) {
     908       11987 :                                 sql_rel *r = n->data;
     909       11987 :                                 BUN rv = need_distinct(rel) ? rel_calc_nuniques(v->sql, r, r->exps) : get_rel_count(r);
     910             : 
     911       11987 :                                 if (!rv) { /* keep last for now */
     912        6148 :                                         rel_destroy(r);
     913        6148 :                                         continue;
     914             :                                 }
     915        5839 :                                 nl = append(nl, r);
     916             :                         }
     917        4457 :                         rel->l = nl;
     918        4457 :                         if (list_length(nl) == 1) {
     919        4131 :                                 sql_rel *l = rel->l = nl->h->data; /* ugh */
     920        4131 :                                 rel->r = NULL;
     921        4131 :                                 rel->op = op_project;
     922             : 
     923       20291 :                                 for (node *n = rel->exps->h, *m = l->exps->h ; n && m ; n = n->next, m = m->next) {
     924       16160 :                                         sql_exp *pe = n->data, *ie = m->data;
     925       16160 :                                         sql_exp *ne = exp_ref(v->sql, ie);
     926       16160 :                                         exp_prop_alias(v->sql->sa, ne, pe);
     927       16160 :                                         n->data = ne;
     928             :                                 }
     929        4131 :                                 list_hash_clear(rel->exps);
     930         326 :                         } else if (list_empty(nl)) {
     931             :                                 /* empty select (project [ nils ] ) */
     932         454 :                                 for (node *n = rel->exps->h ; n ; n = n->next) {
     933         354 :                                         sql_exp *e = n->data, *a = exp_atom(v->sql->sa, atom_general(v->sql->sa, exp_subtype(e), NULL, 0));
     934         354 :                                         exp_prop_alias(v->sql->sa, a, e);
     935         354 :                                         n->data = a;
     936             :                                 }
     937         100 :                                 list_hash_clear(rel->exps);
     938         100 :                                 sql_rel *l = rel_project(v->sql->sa, NULL, rel->exps);
     939         100 :                                 set_count_prop(v->sql->sa, l, 1);
     940         100 :                                 l = rel_select(v->sql->sa, l, exp_atom_bool(v->sql->sa, 0));
     941         100 :                                 set_count_prop(v->sql->sa, l, 0);
     942         100 :                                 rel->op = op_project;
     943         100 :                                 rel->r = NULL;
     944         100 :                                 rel->l = l;
     945         100 :                                 rel->exps = rel_projections(v->sql, l, NULL, 1, 1);
     946         100 :                                 set_count_prop(v->sql->sa, rel, 0);
     947         100 :                                 set_nodistinct(rel); /* set relations may have distinct flag set */
     948             :                         }
     949             :                 } else {
     950        8412 :                         set_count_prop(v->sql->sa, rel, cnt);
     951             :                 }
     952             :                 break;
     953             :         }
     954      532553 :         case op_join:
     955             :         case op_left:
     956             :         case op_right:
     957             :         case op_full:
     958             :         case op_semi:
     959             :         case op_anti:
     960             :         case op_select:
     961             :         case op_project:
     962             :         case op_groupby: {
     963      532553 :                 if (is_groupby(rel->op) && !list_empty(rel->r))
     964       15532 :                         rel->r = exps_exp_visitor_bottomup(v, rel, rel->r, 0, &rel_propagate_statistics, false);
     965      532553 :                 rel->exps = exps_exp_visitor_bottomup(v, rel, rel->exps, 0, &rel_propagate_statistics, false);
     966      532546 :                 if ((is_simple_project(rel->op) || is_groupby(rel->op)) && !list_empty(rel->r))
     967       39288 :                         rel->r = exps_exp_visitor_bottomup(v, rel, rel->r, 0, &rel_propagate_statistics, false);
     968             :                 /* The following optimizations can only be applied after propagating the statistics to rel->exps */
     969      532549 :                 if (can_be_pruned && (is_join(rel->op) || is_select(rel->op)) && !list_empty(rel->exps)) {
     970      139547 :                         int changes = v->changes;
     971      139547 :                         rel->exps = rel_prune_predicates(v, rel);
     972      139547 :                         if (v->changes > changes)
     973        3619 :                                 rel = rewrite_simplify(v, 0, v->value_based_opt, rel);
     974             :                 }
     975             : 
     976             :                 /* propagate row count */
     977      532549 :                 sql_rel *l = rel->l, *r = rel->r;
     978      532549 :                 switch (rel->op) {
     979      134539 :                 case op_join:
     980             :                 case op_left:
     981             :                 case op_right:
     982             :                 case op_full: {
     983      134539 :                         BUN lv = get_rel_count(l), rv = get_rel_count(r), uniques_estimate = BUN_MAX, join_idx_estimate = BUN_MAX;
     984             : 
     985      134539 :                         if (!list_empty(rel->exps) && !is_single(rel)) {
     986      246075 :                                 for (node *n = rel->exps->h ; n ; n = n->next) {
     987      125623 :                                         sql_exp *e = n->data, *el = e->l, *er = e->r;
     988             : 
     989      125623 :                                         if (find_prop(e->p, PROP_JOINIDX)) {
     990         667 :                                                 join_idx_estimate = lv>rv?lv:rv;
     991      124956 :                                         } else if (e->type == e_cmp && e->flag == cmp_equal) {
     992             :                                                 /* if one of the sides is unique, the cardinality will be that exact number, but look for nulls */
     993      121071 :                                                 if (!is_semantics(e) || !has_nil(el) || !has_nil(er)) {
     994      120675 :                                                         BUN lu = 0, ru = 0;
     995      120675 :                                                         prop *p = NULL;
     996      120675 :                                                         if ((p = find_prop(el->p, PROP_NUNIQUES)))
     997       89586 :                                                                 lu = (BUN) p->value.dval;
     998      120675 :                                                         if ((p = find_prop(er->p, PROP_NUNIQUES)))
     999      103979 :                                                                 ru = (BUN) p->value.dval;
    1000      120675 :                                                         if (is_unique(el) || lu > lv) {
    1001       73715 :                                                                 BUN ncount = (is_right(rel->op) || is_full(rel->op)) ? MAX(lv, rv) : lv;
    1002       73715 :                                                                 uniques_estimate = MIN(uniques_estimate, ncount);
    1003       46960 :                                                         } else if (is_unique(er) || ru > rv) {
    1004       29351 :                                                                 BUN ncount = (is_left(rel->op) || is_full(rel->op)) ? MAX(lv, rv) : rv;
    1005       29351 :                                                                 uniques_estimate = MIN(uniques_estimate, ncount);
    1006             :                                                         }
    1007             :                                                 }
    1008             :                                         }
    1009             :                                 }
    1010             :                         }
    1011      134539 :                         if (is_single(rel)) {
    1012        4754 :                                 set_count_prop(v->sql->sa, rel, lv);
    1013      129785 :                         } else if (join_idx_estimate != BUN_MAX) {
    1014         665 :                                 set_count_prop(v->sql->sa, rel, join_idx_estimate);
    1015      129120 :                         } else if (uniques_estimate != BUN_MAX) {
    1016       96613 :                                 set_count_prop(v->sql->sa, rel, uniques_estimate);
    1017       32507 :                         } else if (list_length(rel->exps) == 1 && (exp_is_false(rel->exps->h->data) || exp_is_null(rel->exps->h->data))) {
    1018             :                                 /* corner cases for outer joins */
    1019         126 :                                 if (is_left(rel->op)) {
    1020         114 :                                         set_count_prop(v->sql->sa, rel, lv);
    1021          12 :                                 } else if (is_right(rel->op)) {
    1022           3 :                                         set_count_prop(v->sql->sa, rel, rv);
    1023           9 :                                 } else if (is_full(rel->op) && lv != BUN_NONE && rv != BUN_NONE) {
    1024           9 :                                         set_count_prop(v->sql->sa, rel, (rv > (BUN_MAX - lv)) ? BUN_MAX : (lv + rv)); /* overflow check */
    1025           0 :                                 } else if (lv != BUN_NONE && rv != BUN_NONE) {
    1026           0 :                                         set_count_prop(v->sql->sa, rel, 0);
    1027             :                                 }
    1028       32381 :                         } else if (lv == 0) {
    1029        1166 :                                 set_count_prop(v->sql->sa, rel, (is_right(rel->op) || is_full(rel->op)) ? rv : 0);
    1030       31784 :                         } else if (rv == 0) {
    1031        1568 :                                 set_count_prop(v->sql->sa, rel, (is_left(rel->op) || is_full(rel->op)) ? lv : 0);
    1032       30710 :                         } else if (lv != BUN_NONE && rv != BUN_NONE) {
    1033       18258 :                                 set_count_prop(v->sql->sa, rel, (rv > (BUN_MAX / lv)) ? BUN_MAX : (lv * rv)); /* overflow check */
    1034             :                         }
    1035             :                         break;
    1036             :                 }
    1037        2636 :                 case op_anti:
    1038        2636 :                         set_count_prop(v->sql->sa, rel, get_rel_count(l));
    1039        2636 :                         break;
    1040      109015 :                 case op_semi:
    1041             :                 case op_select:
    1042             :                         /* TODO calculate cardinalities using selectivities */
    1043      109015 :                         if (list_length(rel->exps) == 1 && (exp_is_false(rel->exps->h->data) || exp_is_null(rel->exps->h->data))) {
    1044        2744 :                                 set_count_prop(v->sql->sa, rel, 0);
    1045             :                         } else {
    1046      209294 :                                 if (!list_empty(rel->exps) && !is_single(rel)) {
    1047      103023 :                                         BUN cnt = get_rel_count(l), u = 1;
    1048      171832 :                                         for (node *n = rel->exps->h ; n ; n = n->next) {
    1049      112870 :                                                 sql_exp *e = n->data, *el = e->l, *er = e->r;
    1050             : 
    1051             :                                                 /* simple expressions first */
    1052      112870 :                                                 if (e->type == e_cmp && e->flag == cmp_equal && exp_is_atom(er)) {
    1053             :                                                         /* use selectivity */
    1054       57256 :                                                         prop *p;
    1055       57256 :                                                         if ((p = find_prop(el->p, PROP_NUNIQUES))) {
    1056       44061 :                                                                 u = (BUN) p->value.dval;
    1057       44061 :                                                                 break;
    1058             :                                                         }
    1059             :                                                 }
    1060             :                                         }
    1061             :                                         /* u is an *estimate*, so don't set count_prop to 0 unless cnt is 0 */
    1062      103023 :                                         set_count_prop(v->sql->sa, rel, cnt == 0 ? 0 : u == 0 || u > cnt ? 1 : cnt/u);
    1063             :                                 } else {
    1064        3248 :                                         set_count_prop(v->sql->sa, rel, get_rel_count(l));
    1065             :                                 }
    1066             :                         }
    1067             :                         break;
    1068      263001 :                 case op_project:
    1069      263001 :                         if (l) {
    1070      252879 :                                 if (need_distinct(rel)) {
    1071           0 :                                         set_count_prop(v->sql->sa, rel, rel_calc_nuniques(v->sql, l, rel->exps));
    1072             :                                 } else {
    1073      252879 :                                         set_count_prop(v->sql->sa, rel, get_rel_count(l));
    1074             :                                 }
    1075             :                         } else {
    1076       10122 :                                 BUN card = 1;
    1077             : 
    1078       10122 :                                 if (!list_empty(rel->exps)) {
    1079       30685 :                                         for (node *n = rel->exps->h ; n ; n = n->next) {
    1080       20563 :                                                 sql_exp *e = n->data;
    1081       20563 :                                                 card = MAX(card, trivial_project_exp_card(e));
    1082             :                                         }
    1083             :                                 }
    1084       10122 :                                 set_count_prop(v->sql->sa, rel, card);
    1085             :                         }
    1086             :                         break;
    1087       22953 :                 case op_groupby:
    1088       22953 :                         if (list_empty(rel->r)) {
    1089        7421 :                                 set_count_prop(v->sql->sa, rel, 1);
    1090             :                         } else {
    1091       15532 :                                 set_count_prop(v->sql->sa, rel, rel_calc_nuniques(v->sql, l, rel->r));
    1092             :                         }
    1093             :                         break;
    1094             :                 default:
    1095             :                         break;
    1096             :                 }
    1097             :                 break;
    1098             :         }
    1099       16830 :         case op_topn: {
    1100       16830 :                 BUN lv = get_rel_count(rel->l);
    1101             : 
    1102       16830 :                 if (lv != BUN_NONE) {
    1103       16813 :                         sql_exp *le = rel->exps->h->data, *oe = list_length(rel->exps) > 1 ? rel->exps->h->next->data : NULL;
    1104          96 :                         if (oe && oe->l && exp_is_not_null(oe)) { /* no parameters */
    1105          96 :                                 BUN offset = (BUN) ((atom*)oe->l)->data.val.lval;
    1106          96 :                                 lv = offset >= lv ? 0 : lv - offset;
    1107             :                         }
    1108       16813 :                         if (le->l && exp_is_not_null(le)) {
    1109       16775 :                                 BUN limit = (BUN) ((atom*)le->l)->data.val.lval;
    1110       16775 :                                 lv = MIN(lv, limit);
    1111             :                         }
    1112       16813 :                         set_count_prop(v->sql->sa, rel, lv);
    1113             :                 }
    1114             :                 break;
    1115             :         }
    1116          22 :         case op_sample: {
    1117          22 :                 BUN lv = get_rel_count(rel->l);
    1118             : 
    1119          22 :                 if (lv != BUN_NONE) {
    1120           4 :                         sql_exp *se = rel->exps->h->data;
    1121           4 :                         sql_subtype *tp = exp_subtype(se);
    1122             : 
    1123           4 :                         if (se->l && tp->type->eclass == EC_NUM) { /* sample is a number of rows */
    1124           4 :                                 BUN sample = (BUN) ((atom*)se->l)->data.val.lval;
    1125           4 :                                 lv = MIN(lv, sample);
    1126           0 :                         } else if (se->l) { /* sample is a percentage of rows */
    1127           0 :                                 dbl percent = ((atom*)se->l)->data.val.dval;
    1128           0 :                                 assert(tp->type->eclass == EC_FLT);
    1129           0 :                                 lv = (BUN) ceil((dbl)lv * percent);
    1130             :                         }
    1131           4 :                         set_count_prop(v->sql->sa, rel, lv);
    1132             :                 }
    1133             :                 break;
    1134             :         }
    1135        5972 :         case op_table: {
    1136        5972 :                 sql_exp *op = rel->r;
    1137        5972 :                 if (rel->flag != TRIGGER_WRAPPER && op) {
    1138        5660 :                         sql_subfunc *f = op->f;
    1139        5660 :                         if (f->func->lang == FUNC_LANG_SQL) {
    1140          97 :                                 set_count_prop(v->sql->sa, rel, 1000 /* just some fallback value */);
    1141        5563 :                         } else if (f->func->lang == FUNC_LANG_MAL && strcmp(f->func->base.name, "storage") == 0) {
    1142         829 :                                 set_count_prop(v->sql->sa, rel, 1000 /* TODO get size of catalog */);
    1143        4734 :                         } else if (f->func->lang == FUNC_LANG_MAL && strcmp(f->func->base.name, "db_users") == 0) {
    1144           0 :                                 set_count_prop(v->sql->sa, rel, 1000 /* TODO get size of users */);
    1145        4734 :                         } else if (f->func->lang == FUNC_LANG_MAL && strncmp(f->func->base.name, "querylog", 8) == 0) {
    1146        1085 :                                 set_count_prop(v->sql->sa, rel, 1000 /* TODO get size of querylog */);
    1147        3649 :                         } else if (f->func->lang == FUNC_LANG_MAL &&
    1148        3539 :                                            (strcmp(f->func->base.name, "queue") == 0 ||
    1149        3275 :                                                 strcmp(f->func->base.name, "optimizers") == 0 ||
    1150        2963 :                                                 strcmp(f->func->base.name, "env") == 0 ||
    1151        2672 :                                                 strcmp(f->func->base.name, "keywords") == 0 ||
    1152        2672 :                                                 strcmp(f->func->base.name, "statistics") == 0 ||
    1153        2005 :                                                 strcmp(f->func->base.name, "rejects") == 0 ||
    1154        1751 :                                                 strcmp(f->func->base.name, "schemastorage") == 0 ||
    1155        1751 :                                                 strncmp(f->func->base.name, "storage", 7) == 0 ||
    1156        1751 :                                                 strcmp(f->func->base.name, "sessions") == 0) ) {
    1157        2078 :                                 set_count_prop(v->sql->sa, rel, 1000 /* TODO get size of queue */);
    1158             :                         }
    1159             :                         /* else {
    1160             :                                 printf("%%func needs stats : %s\n", f->func->base.name);
    1161             :                         } */
    1162             :                 }
    1163             :                 break;
    1164             :         }
    1165             :         /*These relations are less important for now
    1166             :           TODO later we can tune it */
    1167             :         case op_insert:
    1168             :         case op_update:
    1169             :         case op_delete:
    1170             :         case op_truncate:
    1171             :         case op_ddl:
    1172             :         default:
    1173             :                 break;
    1174             :         }
    1175             : 
    1176             :         return rel;
    1177             : }
    1178             : 
    1179             : static sql_rel *
    1180      539480 : rel_get_statistics(visitor *v, global_props *gp, sql_rel *rel)
    1181             : {
    1182             :         /* Don't prune updates as pruning will possibly result in removing the joins which therefore cannot be used for constraint checking */
    1183      539480 :         uint8_t has_special_modify = (uint8_t) gp->has_special_modify;
    1184      539480 :         v->data = &has_special_modify;
    1185      539480 :         rel = rel_visitor_bottomup(v, rel, &rel_get_statistics_);
    1186      539892 :         v->data = gp;
    1187      539892 :         return rel;
    1188             : }
    1189             : 
    1190             : run_optimizer
    1191      704679 : bind_get_statistics(visitor *v, global_props *gp)
    1192             : {
    1193      704679 :         (void) v;
    1194      704679 :         return (gp->opt_level == 1 && !gp->cnt[op_insert]) ? rel_get_statistics : NULL;
    1195             : }
    1196             : 
    1197             : 
    1198             : static bool
    1199       94972 : point_select_on_unique_column(sql_rel *rel)
    1200             : {
    1201       94972 :         if (is_select(rel->op) && !list_empty(rel->exps)) {
    1202      131461 :                 for (node *n = rel->exps->h; n ; n = n->next) {
    1203       75696 :                         sql_exp *e = n->data, *el = e->l, *er = e->r, *found = NULL;
    1204             : 
    1205       75696 :                         if (is_compare(e->type) && e->flag == cmp_equal) {
    1206       33664 :                                 if (is_numeric_upcast(el))
    1207           0 :                                         el = el->l;
    1208       33664 :                                 if (is_numeric_upcast(er))
    1209           0 :                                         er = er->l;
    1210       33664 :                                 if (is_alias(el->type) && exp_is_atom(er) && (found = rel_find_exp(rel->l, el)) &&
    1211         749 :                                         is_unique(found) && (!is_semantics(e) || !has_nil(found) || !has_nil(er)))
    1212             :                                         return true;
    1213       32915 :                                 if (is_alias(er->type) && exp_is_atom(el) && (found = rel_find_exp(rel->l, er)) &&
    1214           0 :                                         is_unique(found) && (!is_semantics(e) || !has_nil(el) || !has_nil(found)))
    1215             :                                         return true;
    1216             :                         }
    1217             :                 }
    1218             :         }
    1219             :         return false;
    1220             : }
    1221             : 
    1222             : /*
    1223             :  * A point select on an unique column reduces the number of rows to 1. If the same select is under a
    1224             :  * join, the opposite side's select can be pushed above the join.
    1225             :  */
    1226             : static inline sql_rel *
    1227     1245427 : rel_push_select_up(visitor *v, sql_rel *rel)
    1228             : {
    1229     1245427 :         if ((is_innerjoin(rel->op) || is_left(rel->op) || is_right(rel->op) || is_semi(rel->op)) && !is_single(rel)) {
    1230      254131 :                 sql_rel *l = rel->l, *r = rel->r;
    1231      254131 :                 bool can_pushup_left = is_select(l->op) && !rel_is_ref(l) && !is_single(l) && (is_innerjoin(rel->op) || is_left(rel->op) || is_semi(rel->op)),
    1232      254131 :                         can_pushup_right = is_select(r->op) && !rel_is_ref(r) && !is_single(r) && (is_innerjoin(rel->op) || is_right(rel->op));
    1233             : 
    1234      254131 :                 if (can_pushup_left || can_pushup_right) {
    1235       66952 :                         if (can_pushup_left)
    1236       45469 :                                 can_pushup_left = point_select_on_unique_column(r);
    1237       66952 :                         if (can_pushup_right)
    1238       49503 :                                 can_pushup_right = point_select_on_unique_column(l);
    1239             : 
    1240             :                         /* if both selects retrieve one row each, it's not worth it to push both up */
    1241       66952 :                         if (can_pushup_left && !can_pushup_right) {
    1242         683 :                                 sql_rel *nrel = rel_dup_copy(v->sql->sa, rel);
    1243         683 :                                 nrel->l = l->l;
    1244         683 :                                 rel = rel_inplace_select(rel, nrel, l->exps);
    1245         683 :                                 assert(is_select(rel->op));
    1246         683 :                                 v->changes++;
    1247       66269 :                         } else if (!can_pushup_left && can_pushup_right) {
    1248          14 :                                 sql_rel *nrel = rel_dup_copy(v->sql->sa, rel);
    1249          14 :                                 nrel->r = r->l;
    1250          14 :                                 rel = rel_inplace_select(rel, nrel, r->exps);
    1251          14 :                                 assert(is_select(rel->op));
    1252          14 :                                 v->changes++;
    1253             :                         }
    1254             :                 }
    1255             :         }
    1256     1245427 :         return rel;
    1257             : }
    1258             : 
    1259             : static int
    1260       93255 : sql_class_base_score(visitor *v, sql_column *c, sql_subtype *t, bool equality_based)
    1261             : {
    1262       93255 :         int de;
    1263             : 
    1264       93255 :         if (!t)
    1265             :                 return 0;
    1266       93255 :         switch (ATOMstorage(t->type->localtype)) {
    1267             :         case TYPE_bte:
    1268             :                 return 150 - 8;
    1269        1597 :         case TYPE_sht:
    1270        1597 :                 return 150 - 16;
    1271       37292 :         case TYPE_int:
    1272       37292 :                 return 150 - 32;
    1273         518 :         case TYPE_void:
    1274             :         case TYPE_lng:
    1275         518 :                 return 150 - 64;
    1276             :         case TYPE_uuid:
    1277             : #ifdef HAVE_HGE
    1278             :         case TYPE_hge:
    1279             : #endif
    1280             :                 return 150 - 128;
    1281           2 :         case TYPE_flt:
    1282           2 :                 return 75 - 24;
    1283             :         case TYPE_dbl:
    1284             :                 return 75 - 53;
    1285       30573 :         default:
    1286       30573 :                 if (equality_based && c && v->storage_based_opt && (de = mvc_is_duplicate_eliminated(v->sql, c)))
    1287        1558 :                         return 150 - de * 8;
    1288             :                 /* strings and blobs not duplicate eliminated don't get any points here */
    1289             :                 return 0;
    1290             :         }
    1291             : }
    1292             : 
    1293             : static int
    1294       34351 : score_se_base(visitor *v, sql_rel *rel, sql_exp *e)
    1295             : {
    1296       34351 :         int res = 0;
    1297       34351 :         sql_subtype *t = exp_subtype(e);
    1298       34351 :         sql_column *c = NULL;
    1299             : 
    1300       34351 :         if (e->type == e_convert) /* keep unsafes at the end (TODO improve) */
    1301             :                 return -1000;
    1302             :         /* can we find out if the underlying table is sorted */
    1303       33750 :         if ((c = exp_find_column(rel, e, -2)) && v->storage_based_opt && mvc_is_sorted(v->sql, c))
    1304       33750 :                 res += 600;
    1305             : 
    1306             :         /* prefer the shorter var types over the longer ones */
    1307       33750 :         res += sql_class_base_score(v, c, t, is_equality_or_inequality_exp(e->flag)); /* smaller the type, better */
    1308       33750 :         return res;
    1309             : }
    1310             : 
    1311             : static int
    1312       58433 : score_se(visitor *v, sql_rel *rel, sql_exp *e)
    1313             : {
    1314       58433 :         int score = 0;
    1315       58433 :         if (e->type == e_cmp && !is_complex_exp(e->flag)) {
    1316       34351 :                 sql_exp *l = e->l;
    1317             : 
    1318       34351 :                 while (l->type == e_cmp) { /* go through nested comparisons */
    1319           2 :                         sql_exp *ll;
    1320             : 
    1321           2 :                         if (l->flag == cmp_filter || l->flag == cmp_or)
    1322           0 :                                 ll = ((list*)l->l)->h->data;
    1323             :                         else
    1324           2 :                                 ll = l->l;
    1325           2 :                         if (ll->type != e_cmp)
    1326             :                                 break;
    1327             :                         l = ll;
    1328             :                 }
    1329       34351 :                 score += score_se_base(v, rel, l);
    1330             :         }
    1331       58433 :         score += exp_keyvalue(e);
    1332       58433 :         return score;
    1333             : }
    1334             : 
    1335             : static inline sql_rel *
    1336     1245427 : rel_select_order(visitor *v, sql_rel *rel)
    1337             : {
    1338     1245427 :         int *scores = NULL;
    1339     1245427 :         sql_exp **exps = NULL;
    1340             : 
    1341     1245427 :         if (is_select(rel->op) && list_length(rel->exps) > 1) {
    1342       27334 :                 node *n;
    1343       27334 :                 int i, nexps = list_length(rel->exps);
    1344       27334 :                 scores = SA_NEW_ARRAY(v->sql->ta, int, nexps);
    1345       27334 :                 exps = SA_NEW_ARRAY(v->sql->ta, sql_exp*, nexps);
    1346             : 
    1347       85767 :                 for (i = 0, n = rel->exps->h; n; i++, n = n->next) {
    1348       58463 :                         exps[i] = n->data;
    1349       58463 :                         if (find_prop(exps[i]->p, PROP_HASHCOL))
    1350             :                                 return rel;
    1351       58433 :                         scores[i] = score_se(v, rel, n->data);
    1352             :                 }
    1353       27304 :                 GDKqsort(scores, exps, NULL, nexps, sizeof(int), sizeof(void *), TYPE_int, true, true);
    1354             : 
    1355       85737 :                 for (i = 0, n = rel->exps->h; n; i++, n = n->next)
    1356       58433 :                         n->data = exps[i];
    1357             :         }
    1358             : 
    1359             :         return rel;
    1360             : }
    1361             : 
    1362             : /* Compute the efficiency of using this expression early in a group by list */
    1363             : static int
    1364       59505 : score_gbe(visitor *v, sql_rel *rel, sql_exp *e)
    1365             : {
    1366       59505 :         int res = 0;
    1367       59505 :         sql_subtype *t = exp_subtype(e);
    1368       59505 :         sql_column *c = exp_find_column(rel, e, -2);
    1369             : 
    1370       59505 :         if (e->card == CARD_ATOM) /* constants are trivial to group */
    1371          38 :                 res += 1000;
    1372             :         /* can we find out if the underlying table is sorted */
    1373       59505 :         if (is_unique(e) || find_prop(e->p, PROP_HASHCOL) || (c && v->storage_based_opt && mvc_is_unique(v->sql, c))) /* distinct columns */
    1374        3584 :                 res += 700;
    1375       38914 :         if (c && v->storage_based_opt && mvc_is_sorted(v->sql, c))
    1376        3710 :                 res += 500;
    1377       59505 :         if (find_prop(e->p, PROP_HASHIDX)) /* has hash index */
    1378           0 :                 res += 200;
    1379             : 
    1380             :         /* prefer the shorter var types over the longer ones */
    1381       59505 :         res += sql_class_base_score(v, c, t, true); /* smaller the type, better */
    1382       59505 :         return res;
    1383             : }
    1384             : 
    1385             : /* reorder group by expressions */
    1386             : static inline sql_rel *
    1387     1245428 : rel_groupby_order(visitor *v, sql_rel *rel)
    1388             : {
    1389     1245428 :         int *scores = NULL;
    1390     1245428 :         sql_exp **exps = NULL;
    1391             : 
    1392     1245428 :         if (is_groupby(rel->op) && list_length(rel->r) > 1) {
    1393       25588 :                 node *n;
    1394       25588 :                 list *gbe = rel->r;
    1395       25588 :                 int i, ngbe = list_length(gbe);
    1396       25588 :                 scores = SA_NEW_ARRAY(v->sql->ta, int, ngbe);
    1397       25588 :                 exps = SA_NEW_ARRAY(v->sql->ta, sql_exp*, ngbe);
    1398             : 
    1399             :                 /* first sorting step, give priority for integers and sorted columns */
    1400       85093 :                 for (i = 0, n = gbe->h; n; i++, n = n->next) {
    1401       59505 :                         exps[i] = n->data;
    1402       59505 :                         scores[i] = score_gbe(v, rel, exps[i]);
    1403             :                 }
    1404       25588 :                 GDKqsort(scores, exps, NULL, ngbe, sizeof(int), sizeof(void *), TYPE_int, true, true);
    1405             : 
    1406             :                 /* second sorting step, give priority to strings with lower number of digits */
    1407       50506 :                 for (i = ngbe - 1; i && !scores[i]; i--); /* find expressions with no score from the first round */
    1408       25588 :                 if (scores[i])
    1409       24590 :                         i++;
    1410       25588 :                 if (ngbe - i > 1) {
    1411        8604 :                         for (int j = i; j < ngbe; j++) {
    1412        6597 :                                 sql_subtype *t = exp_subtype(exps[j]);
    1413        6597 :                                 scores[j] = t ? t->digits : 0;
    1414             :                         }
    1415             :                         /* the less number of digits the better, order ascending */
    1416        2007 :                         GDKqsort(scores + i, exps + i, NULL, ngbe - i, sizeof(int), sizeof(void *), TYPE_int, false, true);
    1417             :                 }
    1418             : 
    1419       85093 :                 for (i = 0, n = gbe->h; n; i++, n = n->next)
    1420       59505 :                         n->data = exps[i];
    1421             :         }
    1422             : 
    1423     1245428 :         return rel;
    1424             : }
    1425             : 
    1426             : /* This optimization loop contains optimizations that can potentially use statistics */
    1427             : static sql_rel *
    1428     1245427 : rel_final_optimization_loop_(visitor *v, sql_rel *rel)
    1429             : {
    1430             :         /* Run rel_push_select_up only once at the end to avoid an infinite optimization loop */
    1431     1245427 :         rel = rel_push_select_up(v, rel);
    1432     1245428 :         rel = rel_select_order(v, rel);
    1433             : 
    1434             :         /* TODO? Maybe later add rel_simplify_count, rel_join2semijoin, rel_simplify_fk_joins,
    1435             :            rel_distinct_project2groupby, rel_simplify_predicates, rel_simplify_math,
    1436             :            rel_distinct_aggregate_on_unique_values */
    1437             : 
    1438     1245428 :         rel = rel_groupby_order(v, rel);
    1439     1245426 :         return rel;
    1440             : }
    1441             : 
    1442             : static sql_rel *
    1443       66104 : rel_final_optimization_loop(visitor *v, global_props *gp, sql_rel *rel)
    1444             : {
    1445       66104 :         (void) gp;
    1446       66104 :         return rel_visitor_bottomup(v, rel, &rel_final_optimization_loop_);
    1447             : }
    1448             : 
    1449             : run_optimizer
    1450      705238 : bind_final_optimization_loop(visitor *v, global_props *gp)
    1451             : {
    1452      705238 :         int flag = v->sql->sql_optimizer;
    1453             :         /* At the moment, this optimizer has dependency on 3 flags */
    1454      547412 :         return gp->opt_level == 1 && (gp->cnt[op_groupby] || gp->cnt[op_select]) &&
    1455      771345 :                 (flag & push_select_up) && (flag & optimize_select_and_joins_topdown) && (flag & optimize_projections) ? rel_final_optimization_loop : NULL;
    1456             : }

Generated by: LCOV version 1.14