LCOV - code coverage report
Current view: top level - sql/server - rel_statistics.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 833 885 94.1 %
Date: 2024-12-20 21:24:02 Functions: 26 26 100.0 %

          Line data    Source code
       1             : /*
       2             :  * SPDX-License-Identifier: MPL-2.0
       3             :  *
       4             :  * This Source Code Form is subject to the terms of the Mozilla Public
       5             :  * License, v. 2.0.  If a copy of the MPL was not distributed with this
       6             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       7             :  *
       8             :  * Copyright 2024 MonetDB Foundation;
       9             :  * Copyright August 2008 - 2023 MonetDB B.V.;
      10             :  * Copyright 1997 - July 2008 CWI.
      11             :  */
      12             : 
      13             : #include "monetdb_config.h"
      14             : #include "rel_optimizer_private.h"
      15             : #include "rel_statistics.h"
      16             : #include "rel_basetable.h"
      17             : #include "rel_rewriter.h"
      18             : #include "sql_storage.h"
      19             : 
      20             : static sql_exp *
      21     3467188 : comparison_find_column(sql_exp *input, sql_exp *e)
      22             : {
      23     3467188 :         switch (input->type) {
      24       99051 :         case e_convert: {
      25       99051 :                 list *types = (list *)input->r;
      26       99051 :                 sql_class from = ((sql_subtype*)types->h->data)->type->eclass, to = ((sql_subtype*)types->h->next->data)->type->eclass;
      27       99051 :                 if (from == to)
      28      191277 :                         return comparison_find_column(input->l, e) ? input : NULL;
      29             :                 return NULL;
      30             :         }
      31     3052965 :         case e_column:
      32     3052965 :                 return exp_match(e, input) ? input : NULL;
      33             :         default:
      34             :                 return NULL;
      35             :         }
      36             : }
      37             : 
      38             : /* multi lo <= col <= hi, maybe still be false even if lo or hi are NULL, possibly similar for filter on multiple
      39             :  * columns */
      40             : #define comp_single_column(c) (!c->f)
      41             : 
      42             : static sql_exp *
      43     8755006 : rel_propagate_column_ref_statistics(mvc *sql, sql_rel *rel, sql_exp *e)
      44             : {
      45     8839301 :         assert(e->type == e_column);
      46     8839301 :         if (rel) {
      47     8839220 :                 switch(rel->op) {
      48     4093645 :                 case op_left:
      49             :                 case op_right:
      50             :                 case op_full:
      51             :                 case op_join:
      52             :                 case op_select:
      53             :                 case op_anti:
      54             :                 case op_semi: {
      55     4093645 :                         bool found_without_semantics = false, found_left = false, found_right = false, still_unique = false;
      56             : 
      57     4093645 :                         if ((is_innerjoin(rel->op) || is_select(rel->op)) && list_length(rel->exps) == 1 && exp_is_false(rel->exps->h->data))
      58             :                                 return NULL; /* nothing will pass, skip */
      59             : 
      60             :                         /* propagate from the bottom first */
      61     4078004 :                         if (rel_propagate_column_ref_statistics(sql, rel->l, e))
      62             :                                 found_left = true;
      63     2592152 :                         if (!found_left && is_join(rel->op) && rel_propagate_column_ref_statistics(sql, rel->r, e))
      64     4078004 :                                 found_right = true;
      65             : 
      66     4078004 :                         if (!found_left && !found_right)
      67             :                                 return NULL;
      68     1781561 :                         if (!list_empty(rel->exps) && rel->op != op_anti) { /* if there's an or, the MIN and MAX get difficult to propagate */
      69     3498812 :                                 for (node *n = rel->exps->h ; n ; n = n->next) {
      70     1812836 :                                         sql_exp *comp = n->data, *le = comp->l, *lne = NULL, *re = comp->r, *rne = NULL, *fe = comp->f, *fne = NULL;
      71             : 
      72     1812836 :                                         if (comp->type == e_cmp) {
      73     1811832 :                                                 if (is_theta_exp(comp->flag) && ((lne = comparison_find_column(le, e)) || (rne = comparison_find_column(re, e)) || (fe && (fne = comparison_find_column(fe, e))))) {
      74      124335 :                                                         atom *lval_min = find_prop_and_get(le->p, PROP_MIN), *lval_max = find_prop_and_get(le->p, PROP_MAX),
      75      124334 :                                                                 *rval_min = find_prop_and_get(re->p, PROP_MIN), *rval_max = find_prop_and_get(re->p, PROP_MAX);
      76             : 
      77             :                                                         /* not semantics found or if explicitly filtering not null values from the column */
      78      124334 :                                                         found_without_semantics |= (!is_semantics(comp) && comp_single_column(comp)) || (comp->flag == cmp_equal && lne && is_anti(comp) && exp_is_null(re));
      79      124334 :                                                         still_unique |= comp->flag == cmp_equal && is_unique(le) && is_unique(re); /* unique if only equi-joins on unique columns are there */
      80      124334 :                                                         if (is_full(rel->op) || (is_left(rel->op) && found_left) || (is_right(rel->op) && found_right)) /* on outer joins, min and max cannot be propagated on some cases */
      81       19707 :                                                                 continue;
      82             :                                                         /* if (end2 >= start1 && start2 <= end1) then the 2 intervals are intersected */
      83      104627 :                                                         if (fe && lval_min && lval_max) { /* range case, the middle expression must intersect the other two */
      84        4621 :                                                                 atom *fval_min = find_prop_and_get(fe->p, PROP_MIN), *fval_max = find_prop_and_get(fe->p, PROP_MAX);
      85        4621 :                                                                 int int1 = rval_min && rval_max && atom_cmp(rval_max, lval_min) >= 0 && atom_cmp(rval_min, lval_max) <= 0,
      86        4621 :                                                                         int2 = fval_min && fval_max && atom_cmp(fval_max, lval_min) >= 0 && atom_cmp(fval_min, lval_max) <= 0,
      87        4621 :                                                                         symmetric = is_symmetric(comp);
      88             : 
      89        4621 :                                                                 if (is_anti(comp) || (!symmetric && fval_min && rval_max && atom_cmp(fval_min, rval_max) < 0)) /* for asymmetric case the re range must be after the fe range */
      90        1816 :                                                                         continue;
      91        2805 :                                                                 if (lne && int1 && int2) {
      92         104 :                                                                         if (symmetric) {
      93           0 :                                                                                 prop *p1 = find_prop(e->p, PROP_MIN), *p2 = find_prop(e->p, PROP_MAX);
      94           0 :                                                                                 atom *nmin = statistics_atom_min(sql, rval_min, fval_min), *nmax = statistics_atom_max(sql, rval_max, fval_max);
      95             :                                                                                 /* max is min from le and (max from re and fe max) */
      96           0 :                                                                                 set_minmax_property(sql, e, PROP_MAX, p2 ? statistics_atom_min(sql, nmax, p2->value.pval) : nmax);
      97             :                                                                                 /* min is max from le and (min from re and fe min) */
      98           0 :                                                                                 set_minmax_property(sql, e, PROP_MIN, p1 ? statistics_atom_max(sql, nmin, p1->value.pval) : nmin);
      99             :                                                                         } else {
     100         104 :                                                                                 prop *p1 = find_prop(e->p, PROP_MIN), *p2 = find_prop(e->p, PROP_MAX);
     101             :                                                                                 /* max is min from le and fe max */
     102         104 :                                                                                 set_minmax_property(sql, e, PROP_MAX, p2 ? statistics_atom_min(sql, fval_max, p2->value.pval) : fval_max);
     103             :                                                                                 /* min is max from le and re min */
     104         104 :                                                                                 set_minmax_property(sql, e, PROP_MIN, p1 ? statistics_atom_max(sql, rval_min, p1->value.pval) : rval_min);
     105             :                                                                         }
     106        2701 :                                                                 } else if (rne) {
     107         590 :                                                                         if (symmetric && int1 && int2) { /* min is max from le and (min from re and fe min) */
     108           0 :                                                                                 prop *p = find_prop(e->p, PROP_MIN);
     109           0 :                                                                                 atom *nmin = p ? statistics_atom_min(sql, p->value.pval, fval_min) : fval_min;
     110           0 :                                                                                 set_minmax_property(sql, e, PROP_MIN, statistics_atom_max(sql, nmin, lval_min));
     111         590 :                                                                         } else if (int1) { /* min is max from le and re min */
     112         100 :                                                                                 prop *p = find_prop(e->p, PROP_MIN);
     113         100 :                                                                                 set_minmax_property(sql, e, PROP_MIN, p ? statistics_atom_max(sql, lval_min, p->value.pval) : lval_min);
     114             :                                                                         }
     115        2111 :                                                                 } else if (fne) {
     116         549 :                                                                         if (symmetric && int1 && int2) { /* max is min from le and (max from re and fe max) */
     117           0 :                                                                                 prop *p = find_prop(e->p, PROP_MAX);
     118           0 :                                                                                 atom *nmax = p ? statistics_atom_max(sql, p->value.pval, rval_max) : rval_max;
     119           0 :                                                                                 set_minmax_property(sql, e, PROP_MAX, p ? statistics_atom_min(sql, nmax, lval_max) : nmax);
     120         549 :                                                                         } else if (int2) { /* max is min from le and fe max */
     121         266 :                                                                                 prop *p = find_prop(e->p, PROP_MAX);
     122         266 :                                                                                 set_minmax_property(sql, e, PROP_MAX, p ? statistics_atom_min(sql, lval_max, p->value.pval) : lval_max);
     123             :                                                                         }
     124             :                                                                 }
     125      100006 :                                                         } else if (lval_min && lval_max && rval_min && rval_max && atom_cmp(rval_max, lval_min) >= 0 && atom_cmp(rval_min, lval_max) <= 0) {
     126             :                                                                 /* both min and max must be set and the intervals must overlap */
     127       42029 :                                                                 switch (comp->flag) {
     128       28005 :                                                                 case cmp_equal: /* for equality reduce */
     129       28005 :                                                                         set_minmax_property(sql, e, PROP_MAX, is_anti(comp) ? statistics_atom_max(sql, lval_max, rval_max) : statistics_atom_min(sql, lval_max, rval_max));
     130       28005 :                                                                         set_minmax_property(sql, e, PROP_MIN, is_anti(comp) ? statistics_atom_min(sql, lval_min, rval_min) : statistics_atom_max(sql, lval_min, rval_min));
     131       28005 :                                                                         break;
     132        4707 :                                                                 case cmp_notequal: /* for inequality expand */
     133        4707 :                                                                         set_minmax_property(sql, e, PROP_MAX, is_anti(comp) ? statistics_atom_min(sql, lval_max, rval_max) : statistics_atom_max(sql, lval_max, rval_max));
     134        4707 :                                                                         set_minmax_property(sql, e, PROP_MIN, is_anti(comp) ? statistics_atom_max(sql, lval_min, rval_min) : statistics_atom_min(sql, lval_min, rval_min));
     135        4707 :                                                                         break;
     136        5311 :                                                                 case cmp_gt:
     137             :                                                                 case cmp_gte:
     138        9684 :                                                                         if (!is_anti(comp) && lne) { /* min is max from both min */
     139        4373 :                                                                                 prop *p = find_prop(e->p, PROP_MIN);
     140        4373 :                                                                                 set_minmax_property(sql, e, PROP_MIN, p ? statistics_atom_max(sql, rval_min, p->value.pval) : rval_min);
     141         938 :                                                                         } else if (!is_anti(comp)) { /* max is min from both max */
     142         938 :                                                                                 prop *p = find_prop(e->p, PROP_MAX);
     143         938 :                                                                                 set_minmax_property(sql, e, PROP_MAX, p ? statistics_atom_min(sql, lval_max, p->value.pval) : lval_max);
     144             :                                                                         }
     145             :                                                                         break;
     146        4006 :                                                                 case cmp_lt:
     147             :                                                                 case cmp_lte:
     148        7205 :                                                                         if (!is_anti(comp) && lne) { /* max is min from both max */
     149        3199 :                                                                                 prop *p = find_prop(e->p, PROP_MAX);
     150        3199 :                                                                                 set_minmax_property(sql, e, PROP_MAX, p ? statistics_atom_min(sql, rval_max, p->value.pval) : rval_max);
     151         807 :                                                                         } else if (!is_anti(comp)) { /* min is max from both min */
     152         807 :                                                                                 prop *p = find_prop(e->p, PROP_MIN);
     153         807 :                                                                                 set_minmax_property(sql, e, PROP_MIN, p ? statistics_atom_max(sql, lval_min, p->value.pval) : lval_min);
     154             :                                                                         }
     155             :                                                                         break;
     156             :                                                                 default: /* Maybe later I can do cmp_in and cmp_notin */
     157             :                                                                         break;
     158             :                                                                 }
     159             :                                                         }
     160             :                                                 }
     161             :                                         }
     162             :                                 }
     163             :                         }
     164     1781561 :                         if (is_full(rel->op) || (is_left(rel->op) && found_right) || (is_right(rel->op) && found_left))
     165       35468 :                                 set_has_nil(e);
     166     1781561 :                         if (!is_outerjoin(rel->op) && found_without_semantics) /* at an outer join, null values pass */
     167       93803 :                                 set_has_no_nil(e);
     168     1781561 :                         if (is_join(rel->op) && is_unique(e) && !still_unique)
     169      118741 :                                 set_not_unique(e);
     170     1781561 :                         return e;
     171             :                 }
     172     4641919 :                 case op_table:
     173             :                 case op_basetable:
     174             :                 case op_union:
     175             :                 case op_except:
     176             :                 case op_inter:
     177             :                 case op_munion:
     178             :                 case op_project:
     179             :                 case op_groupby: {
     180     4641919 :                         if (is_recursive(rel))
     181             :                                 return NULL;
     182     4641724 :                         sql_exp *found;
     183     4641724 :                         atom *fval;
     184     4641724 :                         prop *est;
     185     4641724 :                         if ((found = rel_find_exp(rel, e))) {
     186     2175703 :                                 if (rel->op != op_table) { /* At the moment don't propagate statistics for table relations */
     187     2131209 :                                         if ((fval = find_prop_and_get(found->p, PROP_MAX)))
     188     1121022 :                                                 set_minmax_property(sql, e, PROP_MAX, fval);
     189     2131200 :                                         if ((fval = find_prop_and_get(found->p, PROP_MIN)))
     190     1128068 :                                                 set_minmax_property(sql, e, PROP_MIN, fval);
     191     2131194 :                                         if (!has_nil(found))
     192     1374110 :                                                 set_has_no_nil(e);
     193     2131194 :                                         if (is_unique(found) || (need_distinct(rel) && list_length(rel->exps) == 1) ||
     194     1717079 :                                                 (is_groupby(rel->op) && (list_empty(rel->r) || (list_length(rel->r) == 1 && exps_find_exp(rel->r, e)))))
     195      425357 :                                                 set_unique(e);
     196             :                                         /* propagate unique estimation for known cases */
     197     2131195 :                                         if (is_groupby(rel->op) && list_empty(rel->r) && !find_prop(e->p, PROP_NUNIQUES)) { /* global aggregate case */
     198        7592 :                                                 prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     199        7592 :                                                 p->value.dval = 1;
     200     2123603 :                                         } else if (((is_basetable(rel->op) || is_except(rel->op) || is_inter(rel->op) || is_simple_project(rel->op) ||
     201       71518 :                                                                  (is_groupby(rel->op) && exps_find_exp(rel->r, e))) &&
     202     2064753 :                                                                 (est = find_prop(found->p, PROP_NUNIQUES)) && !find_prop(e->p, PROP_NUNIQUES))) {
     203      193931 :                                                 prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     204      193932 :                                                 p->value.dval = est->value.dval;
     205             :                                         }
     206             :                                 }
     207     2175695 :                                 return e;
     208             :                         }
     209             :                         return NULL;
     210             :                 }
     211       84295 :                 case op_topn:
     212             :                 case op_sample:
     213       84295 :                         return rel_propagate_column_ref_statistics(sql, rel->l, e);
     214             :                 default:
     215             :                         break;
     216             :                 }
     217             :         }
     218             :         return NULL;
     219             : }
     220             : 
     221             : static atom *
     222     4592781 : atom_from_valptr( allocator *sa, sql_subtype *tpe, ValRecord *v)
     223             : {
     224     4592781 :         atom *a = SA_NEW(sa, atom);
     225             : 
     226     4592830 :         assert(!VALisnil(v));
     227     4592876 :         *a = (atom) {.tpe = *tpe,};
     228     4592876 :         SA_VALcopy(sa, &a->data, v);
     229     4593029 :         return a;
     230             : }
     231             : 
     232             : void
     233     4243728 : sql_column_get_statistics(mvc *sql, sql_column *c, sql_exp *e)
     234             : {
     235     4243728 :         bool nonil = false, unique = false;
     236     4243728 :         double unique_est = 0.0;
     237     4243728 :         ValRecord min, max;
     238     4243728 :         int ok = mvc_col_stats(sql, c, &nonil, &unique, &unique_est, &min, &max);
     239             : 
     240     4244913 :         if (has_nil(e) && nonil)
     241     2797491 :                 set_has_no_nil(e);
     242     4244913 :         if (!is_unique(e) && unique)
     243     1120154 :                 set_unique(e);
     244     4244913 :         if (unique_est != 0.0) {
     245     2987804 :                 prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     246     2987644 :                 p->value.dval = unique_est;
     247             :         }
     248     4244753 :         unsigned int digits = 0;
     249     4244753 :         sql_subtype *et = exp_subtype(e);
     250     4244896 :         if (et->type->eclass == EC_DEC || et->type->eclass == EC_NUM)
     251     2758220 :                 digits = et->digits;
     252     4244896 :         if ((ok & 2) == 2) {
     253     2293718 :                 if (!VALisnil(&max)) {
     254     2293642 :                         prop *p = e->p = prop_create(sql->sa, PROP_MAX, e->p);
     255     2293576 :                         p->value.pval = atom_from_valptr(sql->sa, &c->type, &max);
     256     2293609 :                         if (digits) {
     257     1704694 :                                 unsigned int nd = atom_digits(p->value.pval);
     258     1704555 :                                 if (nd < digits)
     259             :                                         digits = nd;
     260     1704555 :                                 if (!digits)
     261             :                                         digits = 1;
     262             :                         }
     263             :                 }
     264     2293450 :                 VALclear(&max);
     265             :         }
     266     4244623 :         if ((ok & 1) == 1) {
     267     2299615 :                 if (!VALisnil(&min)) {
     268     2299626 :                         prop *p = e->p = prop_create(sql->sa, PROP_MIN, e->p);
     269     2299686 :                         p->value.pval = atom_from_valptr(sql->sa, &c->type, &min);
     270     2299863 :                         if (digits) {
     271     1711919 :                                 unsigned int nd = atom_digits(p->value.pval);
     272     1711937 :                                 if (nd > digits) /* possibly set to low by max value */
     273             :                                         digits = nd;
     274             :                                 if (!digits)
     275             :                                         digits = 1;
     276             :                         }
     277             :                 }
     278     2299888 :                 VALclear(&min);
     279             :         }
     280     4244920 :         if (digits)
     281     2758235 :                 et->digits = digits;
     282     4244920 :         if (et->type->eclass == EC_DEC && et->digits <= et->scale)
     283         216 :                 et->digits = et->scale + 1;
     284     4244920 : }
     285             : 
     286             : static void
     287      947620 : rel_basetable_column_get_statistics(mvc *sql, sql_rel *rel, sql_exp *e)
     288             : {
     289      947620 :         if (e->p)
     290             :                 return;
     291      304454 :         sql_column *c = NULL;
     292             : 
     293      304454 :         if ((c = rel_base_find_column(rel, e->nid))) {
     294      206825 :                 sql_column_get_statistics(sql, c, e);
     295             :         }
     296             : }
     297             : 
     298             : static bool
     299        8875 : rel_setop_get_statistics(mvc *sql, sql_rel *rel, list *lexps, list *rexps, sql_exp *e, int i)
     300             : {
     301        8875 :         sql_exp *le = list_fetch(lexps, i), *re = list_fetch(rexps, i);
     302        8875 :         atom *lval_min = find_prop_and_get(le->p, PROP_MIN), *lval_max = find_prop_and_get(le->p, PROP_MAX),
     303        8875 :                 *rval_min = find_prop_and_get(re->p, PROP_MIN), *rval_max = find_prop_and_get(re->p, PROP_MAX);
     304        8875 :         prop *est;
     305             : 
     306             :         /* for the intersection, if both expressions don't overlap, it can be pruned */
     307        8875 :         if (is_inter(rel->op) && !has_nil(le) && !has_nil(re) &&
     308         511 :                 ((rval_max && lval_min && atom_cmp(rval_max, lval_min) < 0) || (rval_min && lval_max && atom_cmp(rval_min, lval_max) > 0)))
     309          26 :                 return true;
     310             : 
     311        8849 :         if (lval_max && rval_max) {
     312        2557 :                 if (is_union(rel->op))
     313           3 :                         set_minmax_property(sql, e, PROP_MAX, statistics_atom_max(sql, lval_max, rval_max)); /* for union the new max will be the max of the two */
     314        2554 :                 else if (is_inter(rel->op))
     315         399 :                         set_minmax_property(sql, e, PROP_MAX, statistics_atom_min(sql, lval_max, rval_max)); /* for intersect the new max will be the min of the two */
     316             :                 else /* except */
     317        2155 :                         set_minmax_property(sql, e, PROP_MAX, lval_max);
     318             :         }
     319        8849 :         if (lval_min && rval_min) {
     320        2557 :                 if (is_union(rel->op))
     321           3 :                         set_minmax_property(sql, e, PROP_MIN, statistics_atom_min(sql, lval_min, rval_min)); /* for union the new min will be the min of the two */
     322        2554 :                 else if (is_inter(rel->op))
     323         399 :                         set_minmax_property(sql, e, PROP_MIN, statistics_atom_max(sql, lval_min, rval_min)); /* for intersect the new min will be the max of the two */
     324             :                 else /* except */
     325        2155 :                         set_minmax_property(sql, e, PROP_MIN, lval_min);
     326             :         }
     327             : 
     328        8849 :         if (is_union(rel->op) || is_munion(rel->op)) {
     329        5986 :                 if (!has_nil(le) && !has_nil(re))
     330         879 :                         set_has_no_nil(e);
     331        5986 :                 if (need_distinct(rel) && list_length(rel->exps) == 1)
     332           0 :                         set_unique(e);
     333        2863 :         } else if (is_inter(rel->op)) {
     334         564 :                 if (!has_nil(le) || !has_nil(re))
     335         509 :                         set_has_no_nil(e);
     336         564 :                 if (is_unique(le) || (need_distinct(rel) && list_length(rel->exps) == 1))
     337         345 :                         set_unique(e);
     338             :         } else {
     339        2299 :                 assert(is_except(rel->op));
     340        2299 :                 if (!has_nil(le))
     341        2236 :                         set_has_no_nil(e);
     342        2299 :                 if (is_unique(le) || (need_distinct(rel) && list_length(rel->exps) == 1))
     343        2012 :                         set_unique(e);
     344             :         }
     345             :         /* propagate unique estimation for known cases */
     346        8849 :         if (!is_union(rel->op) && (est = find_prop(le->p, PROP_NUNIQUES)) && !find_prop(e->p, PROP_NUNIQUES)) {
     347         207 :                 prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     348         207 :                 p->value.dval = est->value.dval;
     349             :         }
     350             :         return false;
     351             : }
     352             : 
     353             : 
     354             : static void
     355       62381 : rel_munion_get_statistics(mvc *sql, sql_rel *rel, list *rels, sql_exp *e, int i)
     356             : {
     357       62381 :         if (is_recursive(rel))
     358             :                 return ;
     359       62381 :         assert(is_munion(rel->op));
     360             : 
     361       62381 :         sql_rel *l = rels->h->data;
     362       62381 :         sql_exp *le = list_fetch(l->exps, i);
     363       62381 :         atom *lval_min = find_prop_and_get(le->p, PROP_MIN), *lval_max = find_prop_and_get(le->p, PROP_MAX);
     364       62381 :         bool has_nonil = !has_nil(le);
     365             : 
     366      181448 :         for(node *n = rels->h->next; n; n = n->next) {
     367      119067 :                 sql_rel *r = n->data;
     368      119067 :                 sql_exp *re = list_fetch(r->exps, i);
     369      119067 :                 atom *rval_min = find_prop_and_get(re->p, PROP_MIN), *rval_max = find_prop_and_get(re->p, PROP_MAX);
     370             : 
     371      119067 :                 if (lval_max && rval_max) {
     372       43903 :                         set_minmax_property(sql, e, PROP_MAX, statistics_atom_max(sql, lval_max, rval_max)); /* for union the new max will be the max of the two */
     373       43903 :                         lval_max = find_prop_and_get(e->p, PROP_MAX);
     374             :                 }
     375      119067 :                 if (lval_min && rval_min) {
     376       43354 :                         set_minmax_property(sql, e, PROP_MIN, statistics_atom_min(sql, lval_min, rval_min)); /* for union the new min will be the min of the two */
     377       43354 :                         lval_min = find_prop_and_get(e->p, PROP_MIN);
     378             :                 }
     379      119067 :                 has_nonil &= !has_nil(re);
     380             : 
     381             :         }
     382             : 
     383       62381 :         if (has_nonil)
     384       42482 :                 set_has_no_nil(e);
     385             : 
     386       62381 :         if (need_distinct(rel) && list_length(rel->exps) == 1)
     387        1596 :                 set_unique(e);
     388             : }
     389             : 
     390             : 
     391             : static sql_exp *
     392     3493969 : rel_propagate_statistics(visitor *v, sql_rel *rel, sql_exp *e, int depth)
     393             : {
     394     3493969 :         mvc *sql = v->sql;
     395     3493969 :         atom *lval;
     396             : 
     397     3493969 :         (void) depth;
     398     3493969 :         switch(e->type) {
     399     2181679 :         case e_column:
     400     2181679 :                 switch (rel->op) { /* set relations don't call rel_propagate_statistics */
     401      282787 :                 case op_join:
     402             :                 case op_left:
     403             :                 case op_right:
     404             :                 case op_full:
     405             :                 case op_semi:
     406             :                 case op_anti: {
     407      282787 :                         sql_exp *found = rel_propagate_column_ref_statistics(sql, rel->l, e);
     408      282787 :                         if (!found)
     409      141835 :                                 (void) rel_propagate_column_ref_statistics(sql, rel->r, e);
     410             :                         break;
     411             :                 }
     412     1898892 :                 case op_select:
     413             :                 case op_project:
     414             :                 case op_groupby: {
     415     1898892 :                         sql_exp *found = rel_propagate_column_ref_statistics(sql, rel->l, e); /* labels may be found on the same projection, ugh */
     416     1898882 :                         if (!found && is_simple_project(rel->op))
     417      127706 :                                 (void) rel_propagate_column_ref_statistics(sql, rel, e);
     418             :                         break;
     419             :                 }
     420           0 :                 case op_insert:
     421             :                 case op_update:
     422             :                 case op_delete:
     423           0 :                         (void) rel_propagate_column_ref_statistics(sql, rel->r, e);
     424           0 :                         break;
     425             :                 default:
     426             :                         break;
     427             :                 }
     428             :                 break;
     429      101111 :         case e_convert: {
     430      101111 :                 sql_subtype *to = exp_totype(e), *from = exp_fromtype(e);
     431      101111 :                 sql_exp *l = e->l;
     432      101111 :                 sql_class fr = from->type->eclass, too = to->type->eclass;
     433      101111 :                 prop *est;
     434             : 
     435      101111 :                 if (fr == too) {
     436       91999 :                         if ((lval = find_prop_and_get(l->p, PROP_MAX))) {
     437       59868 :                                 atom *res = atom_copy(sql->sa, lval);
     438       59868 :                                 if ((res = atom_cast(sql->sa, res, to)))
     439       59845 :                                         set_minmax_property(sql, e, PROP_MAX, res);
     440             :                         }
     441       91998 :                         if ((lval = find_prop_and_get(l->p, PROP_MIN))) {
     442       60505 :                                 atom *res = atom_copy(sql->sa, lval);
     443       60504 :                                 if ((res = atom_cast(sql->sa, res, to)))
     444       60482 :                                         set_minmax_property(sql, e, PROP_MIN, res);
     445             :                         }
     446             :                 }
     447             :                 /* propagating nuniques through conversions is tricky. I am adding just the general cases */
     448      101111 :                 if ((est = find_prop(l->p, PROP_NUNIQUES)) &&
     449       62903 :                         (((fr == EC_TIME || fr == EC_TIME_TZ || fr == EC_DATE) && (too == EC_TIMESTAMP || too == EC_TIMESTAMP_TZ)) ||
     450       62878 :                          ((fr == EC_NUM || fr == EC_BIT) && too == EC_NUM) || (fr == EC_DEC && too == EC_DEC) || (fr != EC_EXTERNAL && too == EC_STRING))) {
     451       59450 :                         prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     452       59450 :                         p->value.dval = est->value.dval;
     453             :                 }
     454      101110 :                 if (!has_nil(l))
     455       57508 :                         set_has_no_nil(e);
     456             :                 break;
     457             :         }
     458      342820 :         case e_aggr:
     459             :         case e_func: {
     460      342820 :                 BUN lv;
     461      342820 :                 sql_subfunc *f = e->f;
     462             : 
     463      342820 :                 if (!f->func->s) {
     464      316037 :                         int key = hash_key(f->func->base.name); /* Using hash lookup */
     465      316037 :                         sql_hash_e *he = sql_functions_lookup->buckets[key&(sql_functions_lookup->size-1)];
     466      316037 :                         lookup_function look = NULL;
     467             : 
     468      690653 :                         for (; he && !look; he = he->chain) {
     469      374616 :                                 struct function_properties* fp = (struct function_properties*) he->value;
     470             : 
     471      374616 :                                 if (!strcmp(f->func->base.name, fp->name))
     472      108083 :                                         look = fp->func;
     473             :                         }
     474      316037 :                         if (look)
     475      108083 :                                 look(sql, e);
     476             :                 }
     477             :                 /* for global aggregates with no semantics, if the left relation has values, then the output won't be NULL */
     478      342820 :                 if (!is_semantics(e) && e->l && !have_nil(e->l) &&
     479       90809 :                         (e->type != e_aggr || (is_groupby(rel->op) && list_length(rel->r)) || ((lv = get_rel_count(rel->l)) != BUN_NONE && lv > 0)))
     480       90478 :                         set_has_no_nil(e);
     481             :                 /* set properties for global aggregates */
     482      342820 :                 if (e->type == e_aggr && is_groupby(rel->op) && list_empty(rel->r)) {
     483        7920 :                         if (!find_prop(e->p, PROP_NUNIQUES)) {
     484        7920 :                                 prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     485        7920 :                                 p->value.dval = 1;
     486             :                         }
     487        7920 :                         set_unique(e);
     488             :                 }
     489             :                 break;
     490             :         }
     491      578349 :         case e_atom:
     492      578349 :                 if (e->l) {
     493      560493 :                         atom *a = (atom*) e->l;
     494      560493 :                         if (!a->isnull) {
     495      496184 :                                 set_minmax_property(sql, e, PROP_MAX, a);
     496      496184 :                                 set_minmax_property(sql, e, PROP_MIN, a);
     497             :                         }
     498      560493 :                         prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     499      560493 :                         p->value.dval = 1;
     500       17856 :                 } else if (e->f) {
     501        4280 :                         list *vals = (list *) e->f;
     502        4280 :                         sql_exp *first = vals->h ? vals->h->data : NULL;
     503        4280 :                         atom *max = NULL, *min = NULL; /* all child values must have a valid min/max */
     504        4280 :                         int has_nil = 0;
     505             : 
     506        4280 :                         if (first) {
     507        4280 :                                 max = ((lval = find_prop_and_get(first->p, PROP_MAX))) ? lval : NULL;
     508        4280 :                                 min = ((lval = find_prop_and_get(first->p, PROP_MIN))) ? lval : NULL;
     509        4280 :                                 has_nil |= has_nil(first);
     510             :                         }
     511             : 
     512       18356 :                         for (node *n = vals->h ? vals->h->next : NULL ; n ; n = n->next) {
     513        9796 :                                 sql_exp *ee = n->data;
     514             : 
     515        9796 :                                 if (min && max) {
     516        9303 :                                         if ((lval = find_prop_and_get(ee->p, PROP_MAX))) {
     517        9257 :                                                 max = atom_cmp(lval, max) > 0 ? lval : max;
     518             :                                         } else {
     519             :                                                 max = NULL;
     520             :                                         }
     521        9303 :                                         if ((lval = find_prop_and_get(ee->p, PROP_MIN))) {
     522        9257 :                                                 min = atom_cmp(min, lval) > 0 ? lval : min;
     523             :                                         } else {
     524             :                                                 min = NULL;
     525             :                                         }
     526             :                                 }
     527        9796 :                                 has_nil |= has_nil(ee);
     528             :                         }
     529             : 
     530        4280 :                         if (!has_nil)
     531        3909 :                                 set_has_no_nil(e);
     532        4280 :                         if (min && max) {
     533        3867 :                                 set_minmax_property(sql, e, PROP_MAX, max);
     534        3867 :                                 set_minmax_property(sql, e, PROP_MIN, min);
     535             :                         }
     536        4280 :                         prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     537        4280 :                         p->value.dval = (dbl) list_length(vals);
     538             :                 } else {
     539       13576 :                         prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     540       13576 :                         p->value.dval = 1;
     541             :                 }
     542             :                 break;
     543      290010 :         case e_cmp:
     544             :                 /* TODO? propagating min/max/unique of booleans is not very worth it */
     545      290010 :                 if (e->flag == cmp_or || e->flag == cmp_filter) {
     546       17845 :                         if (!have_nil(e->l) && !have_nil(e->r))
     547       13202 :                                 set_has_no_nil(e);
     548      272165 :                 } else if (e->flag == cmp_in || e->flag == cmp_notin) {
     549       21734 :                         sql_exp *le = e->l;
     550       21734 :                         if (!has_nil(le) && !have_nil(e->r))
     551       18611 :                                 set_has_no_nil(e);
     552             :                 } else {
     553      250431 :                         sql_exp *le = e->l, *re = e->r, *fe = e->f;
     554      250431 :                         if (!has_nil(le) && !has_nil(re) && (!e->f || !has_nil(fe)))
     555      179860 :                                 set_has_no_nil(e);
     556             :                 }
     557             :                 break;
     558             :         case e_psm:
     559             :                 break;
     560             :         }
     561             : 
     562             : #ifndef NDEBUG
     563             :         {
     564             :                 /* min and max cannot be NULL and min must be <= than max, if it happens the inner relation must be empty! */
     565     3493959 :                 atom *min = find_prop_and_get(e->p, PROP_MIN), *max = find_prop_and_get(e->p, PROP_MAX);
     566             : 
     567     3493970 :                 (void) min;
     568     3493970 :                 (void) max;
     569     3493970 :                 assert(!min || !min->isnull);
     570     3493970 :                 assert(!max || !max->isnull);
     571     3493970 :                 assert(!min || !max || (min && max)/* atom_cmp(min, max) <= 0*/);
     572             :         }
     573             : #endif
     574     3493970 :         return e;
     575             : }
     576             : 
     577             : static list * /* Remove predicates always false from min/max values */
     578      141247 : rel_prune_predicates(visitor *v, sql_rel *rel)
     579             : {
     580      141247 :         if (rel->l) {
     581      141247 :                 sql_rel *l = rel->l;
     582      141247 :                 if (is_single(l))
     583        3533 :                         return rel->exps;
     584             :         }
     585      137714 :         if (!list_empty(rel->attr))
     586        2989 :                 return rel->exps;
     587      286749 :         for (node *n = rel->exps->h ; n ; n = n->next) {
     588      152024 :                 sql_exp *e = n->data;
     589             : 
     590      152024 :                 if (e->type == e_cmp && is_theta_exp(e->flag)) {
     591      125085 :                         sql_exp *le = e->l, *re = e->r, *fe = e->f;
     592      125085 :                         atom *lval_min = find_prop_and_get(le->p, PROP_MIN), *lval_max = find_prop_and_get(le->p, PROP_MAX),
     593      125085 :                                 *rval_min = find_prop_and_get(re->p, PROP_MIN), *rval_max = find_prop_and_get(re->p, PROP_MAX);
     594      125085 :                         bool always_false = false, always_true = false;
     595             : 
     596      125085 :                         if (fe && !is_symmetric(e)) {
     597        3059 :                                 atom *fval_min = find_prop_and_get(fe->p, PROP_MIN), *fval_max = find_prop_and_get(fe->p, PROP_MAX);
     598        3059 :                                 comp_type lower = range2lcompare(e->flag), higher = range2rcompare(e->flag);
     599        3662 :                                 int not_int1 = rval_min && lval_max && /* the middle and left intervals don't overlap */
     600        1664 :                                         (!is_anti(e) && (lower == cmp_gte ? atom_cmp(rval_min, lval_max) > 0 : atom_cmp(rval_min, lval_max) >= 0)),
     601        4074 :                                         not_int2 = lval_min && fval_max && /* the middle and right intervals don't overlap */
     602        2486 :                                         (!is_anti(e) && (higher == cmp_lte ? atom_cmp(lval_min, fval_max) > 0 : atom_cmp(lval_min, fval_max) >= 0)),
     603        3059 :                                         not_int3 = rval_min && fval_max && /* the left interval is after the right one */
     604        1288 :                                         (!is_anti(e) && (atom_cmp(rval_min, fval_max) > 0));
     605             : 
     606        3059 :                                 always_false |= not_int1 || not_int2 || not_int3;
     607             :                                 /* for anti the middle must be before the left or after the right or the right after the left, for the other the middle must be always between the left and right intervals */
     608        4116 :                                 always_true |= !has_nil(le) && !has_nil(re) && !has_nil(fe) &&
     609        3960 :                                         lval_min && lval_max && rval_min && rval_max && fval_min && fval_max &&
     610         575 :                                         (is_anti(e) ? ((lower == cmp_gte ? atom_cmp(rval_min, lval_max) > 0 : atom_cmp(rval_min, lval_max) >= 0) || (higher == cmp_lte ? atom_cmp(lval_min, fval_max) > 0 : atom_cmp(lval_min, fval_max) >= 0) || atom_cmp(rval_min, fval_max) > 0) :
     611         489 :                                          ((lower == cmp_gte ? atom_cmp(lval_min, rval_max) >= 0 : atom_cmp(lval_min, rval_max) > 0) && (higher == cmp_lte ? atom_cmp(fval_min, lval_max) >= 0 : atom_cmp(fval_min, lval_max) > 0)));
     612             :                         } else if (!fe) {
     613      122008 :                                 if (!is_semantics(e)) /* trivial not null cmp null case */
     614      232620 :                                         always_false |= !is_anti(e) && ((exp_is_not_null(le) && exp_is_null(re)) || (exp_is_null(le) && exp_is_not_null(re)));
     615      122008 :                                 switch (e->flag) {
     616      106460 :                                 case cmp_equal:
     617      106460 :                                         if (lval_min && lval_max && rval_min && rval_max && (!is_semantics(e) || !has_nil(le) || !has_nil(re)))
     618      135966 :                                                 always_false |= (is_anti(e) ? (atom_cmp(lval_min, rval_min) == 0 && atom_cmp(lval_max, rval_max) <= 0) : (atom_cmp(rval_max, lval_min) < 0 || atom_cmp(rval_min, lval_max) > 0));
     619      106460 :                                         if (is_semantics(e)) { /* prune *= NULL cases */
     620        5669 :                                                 always_false |= (is_anti(e) ? (exp_is_null(le) && exp_is_null(re)) : ((exp_is_not_null(le) && exp_is_null(re)) || (exp_is_null(le) && exp_is_not_null(re))));
     621       11338 :                                                 always_true |= (is_anti(e) ? ((exp_is_not_null(le) && exp_is_null(re)) || (exp_is_null(le) && exp_is_not_null(re))) : (exp_is_null(le) && exp_is_null(re)));
     622             :                                         }
     623             :                                         break;
     624        7352 :                                 case cmp_notequal:
     625        7352 :                                         if (lval_min && lval_max && rval_min && rval_max)
     626       11434 :                                                 always_true |= !has_nil(le) && !has_nil(re) && (is_anti(e) ? (atom_cmp(lval_min, rval_min) == 0 && atom_cmp(lval_max, rval_max) <= 0) : (atom_cmp(rval_max, lval_min) < 0 || atom_cmp(rval_min, lval_max) > 0));
     627        7352 :                                         if (is_semantics(e)) {
     628          29 :                                                 always_true |= (is_anti(e) ? (exp_is_null(le) && exp_is_null(re)) : ((exp_is_not_null(le) && exp_is_null(re)) || (exp_is_null(le) && exp_is_not_null(re))));
     629          58 :                                                 always_false |= (is_anti(e) ? ((exp_is_not_null(le) && exp_is_null(re)) || (exp_is_null(le) && exp_is_not_null(re))) : (exp_is_null(le) && exp_is_null(re)));
     630             :                                         }
     631             :                                         break;
     632        5489 :                                 case cmp_gt:
     633        5489 :                                         if (lval_max && rval_min)
     634        1835 :                                                 always_false |= (is_anti(e) ? atom_cmp(lval_max, rval_min) > 0 : atom_cmp(lval_max, rval_min) <= 0);
     635        5489 :                                         if (lval_min && rval_max)
     636        3670 :                                                 always_true |= !has_nil(le) && !has_nil(re) && (is_anti(e) ? atom_cmp(lval_min, rval_max) <= 0 : atom_cmp(lval_min, rval_max) > 0);
     637             :                                         break;
     638         121 :                                 case cmp_gte:
     639         121 :                                         if (lval_max && rval_min)
     640          51 :                                                 always_false |= (is_anti(e) ? atom_cmp(lval_max, rval_min) >= 0 : atom_cmp(lval_max, rval_min) < 0);
     641         121 :                                         if (lval_min && rval_max)
     642         102 :                                                 always_true |= !has_nil(le) && !has_nil(re) && (is_anti(e) ? atom_cmp(lval_min, rval_max) < 0 : atom_cmp(lval_min, rval_max) >= 0);
     643             :                                         break;
     644        2501 :                                 case cmp_lt:
     645        2501 :                                         if (lval_min && rval_max)
     646        1382 :                                                 always_false |= (is_anti(e) ? atom_cmp(lval_min, rval_max) < 0 : atom_cmp(lval_min, rval_max) >= 0);
     647        2501 :                                         if (lval_max && rval_min)
     648        2812 :                                                 always_true |= !has_nil(le) && !has_nil(re) && (is_anti(e) ? atom_cmp(lval_max, rval_min) >= 0 : atom_cmp(lval_max, rval_min) < 0);
     649             :                                         break;
     650          85 :                                 case cmp_lte:
     651          85 :                                         if (lval_min && rval_max)
     652          11 :                                                 always_false |= (is_anti(e) ? atom_cmp(lval_min, rval_max) <= 0 : atom_cmp(lval_min, rval_max) > 0);
     653          85 :                                         if (lval_max && rval_min)
     654          22 :                                                 always_true |= !has_nil(le) && !has_nil(re) && (is_anti(e) ? atom_cmp(lval_max, rval_min) > 0 : atom_cmp(lval_max, rval_min) <= 0);
     655             :                                         break;
     656             :                                 default: /* Maybe later I can do cmp_in and cmp_notin but don't forget to remove is_theta_exp check up there */
     657             :                                         break;
     658             :                                 }
     659             :                         }
     660      125085 :                         assert(!always_false || !always_true);
     661      125085 :                         if (always_false || always_true) {
     662        3688 :                                 sql_exp *ne = exp_atom_bool(v->sql->sa, always_true /* if always_true set then true else false */);
     663        3688 :                                 if (exp_name(e))
     664           0 :                                         exp_prop_alias(v->sql->sa, ne, e);
     665        3688 :                                 n->data = ne;
     666        3688 :                                 v->changes++;
     667             :                         }
     668             :                 }
     669             :         }
     670      134725 :         return rel->exps;
     671             : }
     672             : 
     673             : static sql_rel *
     674          14 : set_setop_side(visitor *v, sql_rel *rel, sql_rel *side)
     675             : {
     676          14 :         if (side == rel->r)
     677           0 :                 rel->r = NULL;
     678             :         else
     679          14 :                 rel->l = NULL;
     680          14 :         if (!is_simple_project(side->op) || rel_is_ref(side) || !list_empty(side->r)) {
     681           0 :                 side = rel_project(v->sql->sa, side, rel_projections(v->sql, side, NULL, 0, 1));
     682           0 :                 set_count_prop(v->sql->sa, side, get_rel_count(side->l));
     683           0 :                 side->exps = exps_exp_visitor_bottomup(v, side, side->exps, 0, &rel_propagate_statistics, false);
     684             :         }
     685          35 :         for (node *n = rel->exps->h, *m = side->exps->h ; n && m ; n = n->next, m = m->next) {
     686          21 :                 sql_exp *e1 = n->data, *e2 = m->data;
     687          21 :                 exp_prop_alias(v->sql->sa, e2, e1);
     688             :         }
     689          14 :         list_hash_clear(side->exps);
     690             : 
     691          14 :         if (need_distinct(rel))
     692           0 :                 set_distinct(side);
     693          14 :         side->p = prop_copy(v->sql->sa, rel->p);
     694          14 :         rel_destroy(rel);
     695          14 :         return side;
     696             : }
     697             : 
     698             : static BUN
     699       22176 : trivial_project_exp_card(sql_exp *e)
     700             : {
     701       22545 :         if (e->type == e_convert)
     702         369 :                 return trivial_project_exp_card(e->l);
     703       22176 :         return e->type == e_atom && e->f ? (BUN) list_length(e->f) : 1;
     704             : }
     705             : 
     706             : static BUN
     707       26140 : rel_calc_nuniques(mvc *sql, sql_rel *l, list *exps)
     708             : {
     709       26140 :         BUN lv = get_rel_count(l);
     710             : 
     711       26140 :         if (lv == 0)
     712             :                 return 0;
     713       23425 :         if (!list_empty(exps)) {
     714       23425 :                 BUN nuniques = 0;
     715             :                 /* compute the highest number of unique values */
     716       58508 :                 for (node *n = exps->h ; n && nuniques != BUN_NONE ; n = n->next) {
     717       35083 :                         sql_exp *e = n->data;
     718       35083 :                         sql_rel *bt = NULL;
     719       35083 :                         prop *p = NULL;
     720       35083 :                         BUN euniques = BUN_NONE;
     721       35083 :                         atom *min, *max, *sub = NULL;
     722       35083 :                         sql_subtype *tp = exp_subtype(e);
     723       35082 :                         sql_class ec = tp ? tp->type->eclass : EC_STRING; /* if 'e' has no type (eg parameter), use a non-number type to fail condition */
     724             : 
     725       35082 :                         if ((p = find_prop(e->p, PROP_NUNIQUES))) {
     726       25375 :                                 euniques = (BUN) p->value.dval;
     727        9707 :                         } else if (e->type == e_column && e->nid && rel_find_exp_and_corresponding_rel(l, e, false, &bt, NULL) && bt && (p = find_prop(bt->p, PROP_COUNT))) {
     728        7633 :                                 euniques = (BUN) p->value.lval;
     729             :                         }
     730             :                         /* use min to max range to compute number of possible values in the domain for number types */
     731       35082 :                         if ((EC_TEMP(ec)||ec==EC_NUM||ec==EC_MONTH||ec==EC_POS) &&
     732       24254 :                                 (min = find_prop_and_get(e->p, PROP_MIN)) && (max = find_prop_and_get(e->p, PROP_MAX))) {
     733             :                                 /* the range includes min and max, so the atom_inc call is needed */
     734             :                                 /* if 'euniques' has number of distinct values, compute min between both */
     735       19307 :                                 if ((sub = atom_sub(sql->sa, max, min)) && (sub = atom_inc(sql->sa, sub)) && (sub = atom_cast(sql->sa, sub, sql_bind_localtype("oid"))))
     736           0 :                                         euniques = MIN(euniques, (BUN) sub->data.val.oval);
     737             :                         }
     738       35083 :                         if (euniques != BUN_NONE)
     739       33009 :                                 nuniques = MAX(nuniques, euniques); /* the highest cardinality sets the estimation */
     740             :                         else
     741             :                                 nuniques = BUN_NONE;
     742             :                 }
     743       23425 :                 if (nuniques != BUN_NONE)
     744             :                         return nuniques;
     745             :         }
     746             :         return lv;
     747             : }
     748             : 
     749             : static sql_rel *
     750     2090517 : rel_get_statistics_(visitor *v, sql_rel *rel)
     751             : {
     752             :         /* Don't prune updates as pruning will possibly result in removing the joins which therefore cannot be used for constraint checking */
     753     2090517 :         uint8_t has_special_modify = *(uint8_t*) v->data;
     754     2090517 :         bool can_be_pruned = !has_special_modify && v->storage_based_opt;
     755             : 
     756             :         /* Don't look at the same relation twice */
     757     2090517 :         if (are_statistics_gathered(rel->used))
     758             :                 return rel;
     759     1348898 :         rel->used |= statistics_gathered;
     760             : 
     761     1348898 :         switch (rel->op) {
     762      317306 :         case op_basetable: {
     763      317306 :                 sql_table *t = (sql_table *) rel->l;
     764      317306 :                 sqlstore *store = v->sql->session->tr->store;
     765             : 
     766      317306 :                 if (!list_empty(rel->exps)) {
     767     1265237 :                         for (node *n = rel->exps->h ; n ; n = n->next)
     768      947705 :                                 rel_basetable_column_get_statistics(v->sql, rel, n->data);
     769             :                 }
     770             :                 /* Set table row count. TODO? look for remote tables. Don't look at storage for declared tables, because it won't be cleaned */
     771      317526 :                 if (isTable(t) && t->s && !isDeclaredTable(t)) /* count active rows only */
     772      263545 :                         set_count_prop(v->sql->sa, rel, (BUN)store->storage_api.count_col(v->sql->session->tr, ol_first_node(t->columns)->data, 10));
     773             :                 break;
     774             :         }
     775        2793 :         case op_union:
     776             :         case op_inter:
     777             :         case op_except: {
     778        2793 :                 bool empty_cross = false;
     779        2793 :                 int i = 0;
     780        2793 :                 sql_rel *l = rel->l, *pl = rel->l, *r = rel->r, *pr = rel->r;
     781             : 
     782        2793 :                 while (is_sample(pl->op) || is_topn(pl->op)) /* skip topN and sample relations in the middle */
     783           0 :                         pl = pl->l;
     784        2793 :                 while (is_sample(pr->op) || is_topn(pr->op))
     785           0 :                         pr = pr->l;
     786             :                 /* if it's not a projection, then project and propagate statistics */
     787        2793 :                 if (!is_project(pl->op) && !is_base(pl->op)) {
     788           0 :                         pl = rel_project(v->sql->sa, pl, rel_projections(v->sql, pl, NULL, 0, 1));
     789           0 :                         set_count_prop(v->sql->sa, pl, get_rel_count(pl->l));
     790           0 :                         pl->exps = exps_exp_visitor_bottomup(v, pl, pl->exps, 0, &rel_propagate_statistics, false);
     791             :                 }
     792        2793 :                 if (!is_project(pr->op) && !is_base(pr->op)) {
     793           0 :                         pr = rel_project(v->sql->sa, pr, rel_projections(v->sql, pr, NULL, 0, 1));
     794           0 :                         set_count_prop(v->sql->sa, pr, get_rel_count(pr->l));
     795           0 :                         pr->exps = exps_exp_visitor_bottomup(v, pr, pr->exps, 0, &rel_propagate_statistics, false);
     796             :                 }
     797             : 
     798       11668 :                 for (node *n = rel->exps->h ; n ; n = n->next) {
     799        8875 :                         empty_cross |= rel_setop_get_statistics(v->sql, rel, pl->exps, pr->exps, n->data, i);
     800        8875 :                         i++;
     801             :                 }
     802             : 
     803             :                 /* propagate row count */
     804        2793 :                 if (is_union(rel->op)) {
     805         277 :                         BUN lv = need_distinct(rel) ? rel_calc_nuniques(v->sql, l, l->exps) : get_rel_count(l),
     806         277 :                                 rv = need_distinct(rel) ? rel_calc_nuniques(v->sql, r, r->exps) : get_rel_count(r);
     807             : 
     808         277 :                         if (lv == 0 && rv == 0) { /* both sides empty */
     809           2 :                                 if (can_be_pruned)
     810             :                                         empty_cross = true;
     811             :                                 else
     812           2 :                                         set_count_prop(v->sql->sa, rel, 0);
     813         275 :                         } else if (can_be_pruned && lv == 0 && !rel_is_ref(rel)) { /* left side empty */
     814           0 :                                 rel = set_setop_side(v, rel, r);
     815           0 :                                 empty_cross = false; /* don't rewrite again */
     816         275 :                         } else if (can_be_pruned && rv == 0 && !rel_is_ref(rel)) { /* right side empty */
     817           0 :                                 rel = set_setop_side(v, rel, l);
     818           0 :                                 empty_cross = false; /* don't rewrite again */
     819         275 :                         } else if (lv != BUN_NONE && rv != BUN_NONE) {
     820           7 :                                 set_count_prop(v->sql->sa, rel, (rv > (BUN_MAX - lv)) ? BUN_MAX : (lv + rv)); /* overflow check */
     821             :                         }
     822        2516 :                 } else if (is_inter(rel->op) || is_except(rel->op)) {
     823        2516 :                         BUN lv = need_distinct(rel) ? rel_calc_nuniques(v->sql, l, l->exps) : get_rel_count(l),
     824        2516 :                                 rv = need_distinct(rel) ? rel_calc_nuniques(v->sql, r, r->exps) : get_rel_count(r);
     825             : 
     826        2516 :                         if (lv == 0) { /* left side empty */
     827          62 :                                 if (can_be_pruned)
     828             :                                         empty_cross = true;
     829             :                                 else
     830           5 :                                         set_count_prop(v->sql->sa, rel, 0);
     831        2454 :                         } else if (rv == 0) { /* right side empty */
     832          17 :                                 if (is_inter(rel->op)) {
     833           3 :                                         if (can_be_pruned)
     834             :                                                 empty_cross = true;
     835             :                                         else
     836           0 :                                                 set_count_prop(v->sql->sa, rel, 0);
     837          14 :                                 } else if (can_be_pruned && !rel_is_ref(rel)) {
     838          14 :                                         rel = set_setop_side(v, rel, l);
     839          14 :                                         empty_cross = false; /* don't rewrite again */
     840             :                                 } else {
     841           0 :                                         set_count_prop(v->sql->sa, rel, lv);
     842             :                                 }
     843             :                         } else {
     844        2437 :                                 set_count_prop(v->sql->sa, rel, lv);
     845             :                         }
     846             :                 }
     847        2793 :                 if (can_be_pruned && empty_cross) {
     848          80 :                         rel_destroy(rel->l);
     849          80 :                         rel->l = NULL;
     850          80 :                         rel_destroy(rel->r);
     851          80 :                         rel->r = NULL;
     852         244 :                         for (node *n = rel->exps->h ; n ; n = n->next) {
     853         164 :                                 sql_exp *e = n->data, *a = exp_atom(v->sql->sa, atom_general(v->sql->sa, exp_subtype(e), NULL, 0));
     854         164 :                                 exp_prop_alias(v->sql->sa, a, e);
     855         164 :                                 n->data = a;
     856             :                         }
     857          80 :                         list_hash_clear(rel->exps);
     858          80 :                         sql_rel *l = rel_project(v->sql->sa, NULL, rel->exps);
     859          80 :                         set_count_prop(v->sql->sa, l, 1);
     860          80 :                         l = rel_select(v->sql->sa, l, exp_atom_bool(v->sql->sa, 0));
     861          80 :                         set_count_prop(v->sql->sa, l, 0);
     862          80 :                         rel->op = op_project;
     863          80 :                         rel->l = l;
     864          80 :                         rel->exps = rel_projections(v->sql, l, NULL, 1, 1);
     865          80 :                         set_count_prop(v->sql->sa, rel, 0);
     866          80 :                         set_nodistinct(rel); /* set relations may have distinct flag set */
     867          80 :                         v->changes++;
     868             :                 }
     869             :                 break;
     870             :         }
     871       13395 :         case op_munion: {
     872       13395 :                 list *l = rel->l, *nrels = sa_list(v->sql->sa);
     873       13395 :                 BUN cnt = 0;
     874       13395 :                 bool needs_pruning = false;
     875             : 
     876       13395 :                 if (is_recursive(rel))
     877             :                         break;
     878       50928 :                 for (node *n = l->h; n; n = n->next) {
     879       37604 :                         sql_rel *r = n->data, *pl = r;
     880             : 
     881       37604 :                         while (is_sample(pl->op) || is_topn(pl->op)) /* skip topN and sample relations in the middle */
     882           0 :                                         pl = pl->l;
     883             :                         /* if it's not a projection, then project and propagate statistics */
     884       37604 :                         if (!is_project(pl->op) && !is_base(pl->op)) {
     885           0 :                                 pl = rel_project(v->sql->sa, pl, rel_projections(v->sql, pl, NULL, 0, 1));
     886           0 :                                 set_count_prop(v->sql->sa, pl, get_rel_count(pl->l));
     887           0 :                                 pl->exps = exps_exp_visitor_bottomup(v, pl, pl->exps, 0, &rel_propagate_statistics, false);
     888             :                         }
     889       37604 :                         nrels = append(nrels, pl);
     890             :                         /* we need new munion statistics */
     891             :                         /* propagate row count */
     892       37604 :                         BUN rv = need_distinct(rel) ? rel_calc_nuniques(v->sql, r, r->exps) : get_rel_count(r);
     893             :                         /* if PROP_COUNT does not exist we assume at least a row (see get_rel_count def) */
     894       37604 :                         if (rv == BUN_NONE) {
     895        1208 :                                 cnt++;
     896        1208 :                                 continue;
     897             :                         }
     898       36396 :                         if (!rv && can_be_pruned)
     899        6703 :                                 needs_pruning = true;
     900             :                         /* overflow check */
     901       36396 :                         if (rv > (BUN_MAX - cnt))
     902       37604 :                                 rv = BUN_MAX;
     903             :                         else
     904       36396 :                                 cnt += rv;
     905             :                 }
     906       13324 :                 int i = 0;
     907       75705 :                 for (node *n = rel->exps->h ; n ; n = n->next, i++)
     908       62381 :                         rel_munion_get_statistics(v->sql, rel, nrels, n->data, i);
     909             : 
     910       13324 :                 if (needs_pruning && !rel_is_ref(rel)) {
     911        4542 :                         v->changes++;
     912        4542 :                         list *nl = sa_list(l->sa);
     913             : 
     914       16769 :                         for (node *n = nrels->h; n; n = n->next) {
     915       12227 :                                 sql_rel *r = n->data;
     916       12227 :                                 BUN rv = need_distinct(rel) ? rel_calc_nuniques(v->sql, r, r->exps) : get_rel_count(r);
     917             : 
     918       12227 :                                 if (!rv) { /* keep last for now */
     919        6233 :                                         rel_destroy(r);
     920        6233 :                                         continue;
     921             :                                 }
     922        5994 :                                 nl = append(nl, r);
     923             :                         }
     924        4542 :                         rel->l = nl;
     925        4542 :                         if (list_length(nl) == 1) {
     926        4206 :                                 sql_rel *l = rel->l = nl->h->data; /* ugh */
     927        4206 :                                 rel->r = NULL;
     928        4206 :                                 rel->op = op_project;
     929             : 
     930       20683 :                                 for (node *n = rel->exps->h, *m = l->exps->h ; n && m ; n = n->next, m = m->next) {
     931       16477 :                                         sql_exp *pe = n->data, *ie = m->data;
     932       16477 :                                         sql_exp *ne = exp_ref(v->sql, ie);
     933       16477 :                                         exp_prop_alias(v->sql->sa, ne, pe);
     934       16477 :                                         n->data = ne;
     935             :                                 }
     936        4206 :                                 list_hash_clear(rel->exps);
     937         336 :                         } else if (list_empty(nl)) {
     938             :                                 /* empty select (project [ nils ] ) */
     939         454 :                                 for (node *n = rel->exps->h ; n ; n = n->next) {
     940         354 :                                         sql_exp *e = n->data, *a = exp_atom(v->sql->sa, atom_general(v->sql->sa, exp_subtype(e), NULL, 0));
     941         354 :                                         exp_prop_alias(v->sql->sa, a, e);
     942         354 :                                         n->data = a;
     943             :                                 }
     944         100 :                                 list_hash_clear(rel->exps);
     945         100 :                                 sql_rel *l = rel_project(v->sql->sa, NULL, rel->exps);
     946         100 :                                 set_count_prop(v->sql->sa, l, 1);
     947         100 :                                 l = rel_select(v->sql->sa, l, exp_atom_bool(v->sql->sa, 0));
     948         100 :                                 set_count_prop(v->sql->sa, l, 0);
     949         100 :                                 rel->op = op_project;
     950         100 :                                 rel->r = NULL;
     951         100 :                                 rel->l = l;
     952         100 :                                 rel->exps = rel_projections(v->sql, l, NULL, 1, 1);
     953         100 :                                 set_count_prop(v->sql->sa, rel, 0);
     954         100 :                                 set_nodistinct(rel); /* set relations may have distinct flag set */
     955             :                         }
     956             :                 } else {
     957        8782 :                         set_count_prop(v->sql->sa, rel, cnt);
     958             :                 }
     959             :                 break;
     960             :         }
     961      539349 :         case op_join:
     962             :         case op_left:
     963             :         case op_right:
     964             :         case op_full:
     965             :         case op_semi:
     966             :         case op_anti:
     967             :         case op_select:
     968             :         case op_project:
     969             :         case op_groupby: {
     970      539349 :                 if (is_groupby(rel->op) && !list_empty(rel->r))
     971       16893 :                         rel->r = exps_exp_visitor_bottomup(v, rel, rel->r, 0, &rel_propagate_statistics, false);
     972      539348 :                 rel->exps = exps_exp_visitor_bottomup(v, rel, rel->exps, 0, &rel_propagate_statistics, false);
     973      539333 :                 if ((is_simple_project(rel->op) || is_groupby(rel->op)) && !list_empty(rel->r))
     974       39361 :                         rel->r = exps_exp_visitor_bottomup(v, rel, rel->r, 0, &rel_propagate_statistics, false);
     975             :                 /* The following optimizations can only be applied after propagating the statistics to rel->exps */
     976      539336 :                 if (can_be_pruned && (is_join(rel->op) || is_select(rel->op)) && !list_empty(rel->exps)) {
     977      141247 :                         int changes = v->changes;
     978      141247 :                         rel->exps = rel_prune_predicates(v, rel);
     979      141247 :                         if (v->changes > changes)
     980        3655 :                                 rel = rewrite_simplify(v, 0, v->value_based_opt, rel);
     981             :                 }
     982             : 
     983             :                 /* propagate row count */
     984      539336 :                 sql_rel *l = rel->l, *r = rel->r;
     985      539336 :                 switch (rel->op) {
     986      136403 :                 case op_join:
     987             :                 case op_left:
     988             :                 case op_right:
     989             :                 case op_full: {
     990      136403 :                         BUN lv = get_rel_count(l), rv = get_rel_count(r), uniques_estimate = BUN_MAX, join_idx_estimate = BUN_MAX;
     991             : 
     992      136403 :                         if (!list_empty(rel->exps) && !is_single(rel)) {
     993      249036 :                                 for (node *n = rel->exps->h ; n ; n = n->next) {
     994      126986 :                                         sql_exp *e = n->data, *el = e->l, *er = e->r;
     995             : 
     996      126986 :                                         if (find_prop(e->p, PROP_JOINIDX)) {
     997         667 :                                                 join_idx_estimate = lv>rv?lv:rv;
     998      126319 :                                         } else if (e->type == e_cmp && e->flag == cmp_equal) {
     999             :                                                 /* if one of the sides is unique, the cardinality will be that exact number, but look for nulls */
    1000      122434 :                                                 if (!is_semantics(e) || !has_nil(el) || !has_nil(er)) {
    1001      122257 :                                                         BUN lu = 0, ru = 0;
    1002      122257 :                                                         prop *p = NULL;
    1003      122257 :                                                         if ((p = find_prop(el->p, PROP_NUNIQUES)))
    1004       90810 :                                                                 lu = (BUN) p->value.dval;
    1005      122257 :                                                         if ((p = find_prop(er->p, PROP_NUNIQUES)))
    1006      105330 :                                                                 ru = (BUN) p->value.dval;
    1007      122257 :                                                         if (is_unique(el) || lu > lv) {
    1008       74604 :                                                                 BUN ncount = (is_right(rel->op) || is_full(rel->op)) ? MAX(lv, rv) : lv;
    1009       74604 :                                                                 uniques_estimate = MIN(uniques_estimate, ncount);
    1010       47653 :                                                         } else if (is_unique(er) || ru > rv) {
    1011       29805 :                                                                 BUN ncount = (is_left(rel->op) || is_full(rel->op)) ? MAX(lv, rv) : rv;
    1012       29805 :                                                                 uniques_estimate = MIN(uniques_estimate, ncount);
    1013             :                                                         }
    1014             :                                                 }
    1015             :                                         }
    1016             :                                 }
    1017             :                         }
    1018      136403 :                         if (is_single(rel)) {
    1019        4947 :                                 set_count_prop(v->sql->sa, rel, lv);
    1020      131456 :                         } else if (join_idx_estimate != BUN_MAX) {
    1021         665 :                                 set_count_prop(v->sql->sa, rel, join_idx_estimate);
    1022      130791 :                         } else if (uniques_estimate != BUN_MAX) {
    1023       97898 :                                 set_count_prop(v->sql->sa, rel, uniques_estimate);
    1024       32893 :                         } else if (list_length(rel->exps) == 1 && (exp_is_false(rel->exps->h->data) || exp_is_null(rel->exps->h->data))) {
    1025             :                                 /* corner cases for outer joins */
    1026         126 :                                 if (is_left(rel->op)) {
    1027         114 :                                         set_count_prop(v->sql->sa, rel, lv);
    1028          12 :                                 } else if (is_right(rel->op)) {
    1029           3 :                                         set_count_prop(v->sql->sa, rel, rv);
    1030           9 :                                 } else if (is_full(rel->op) && lv != BUN_NONE && rv != BUN_NONE) {
    1031           9 :                                         set_count_prop(v->sql->sa, rel, (rv > (BUN_MAX - lv)) ? BUN_MAX : (lv + rv)); /* overflow check */
    1032           0 :                                 } else if (lv != BUN_NONE && rv != BUN_NONE) {
    1033           0 :                                         set_count_prop(v->sql->sa, rel, 0);
    1034             :                                 }
    1035       32767 :                         } else if (lv == 0) {
    1036        1180 :                                 set_count_prop(v->sql->sa, rel, (is_right(rel->op) || is_full(rel->op)) ? rv : 0);
    1037       32163 :                         } else if (rv == 0) {
    1038        1570 :                                 set_count_prop(v->sql->sa, rel, (is_left(rel->op) || is_full(rel->op)) ? lv : 0);
    1039       31087 :                         } else if (lv != BUN_NONE && rv != BUN_NONE) {
    1040       18442 :                                 set_count_prop(v->sql->sa, rel, (rv > (BUN_MAX / lv)) ? BUN_MAX : (lv * rv)); /* overflow check */
    1041             :                         }
    1042             :                         break;
    1043             :                 }
    1044        2917 :                 case op_anti:
    1045        2917 :                         set_count_prop(v->sql->sa, rel, get_rel_count(l));
    1046        2917 :                         break;
    1047      110179 :                 case op_semi:
    1048             :                 case op_select:
    1049             :                         /* TODO calculate cardinalities using selectivities */
    1050      110179 :                         if (list_length(rel->exps) == 1 && (exp_is_false(rel->exps->h->data) || exp_is_null(rel->exps->h->data))) {
    1051        2759 :                                 set_count_prop(v->sql->sa, rel, 0);
    1052             :                         } else {
    1053      211578 :                                 if (!list_empty(rel->exps) && !is_single(rel)) {
    1054      104156 :                                         BUN cnt = get_rel_count(l), u = 1;
    1055      172754 :                                         for (node *n = rel->exps->h ; n ; n = n->next) {
    1056      114295 :                                                 sql_exp *e = n->data, *el = e->l, *er = e->r;
    1057             : 
    1058             :                                                 /* simple expressions first */
    1059      114295 :                                                 if (e->type == e_cmp && e->flag == cmp_equal && exp_is_atom(er)) {
    1060             :                                                         /* use selectivity */
    1061       59005 :                                                         prop *p;
    1062       59005 :                                                         if ((p = find_prop(el->p, PROP_NUNIQUES))) {
    1063       45698 :                                                                 u = (BUN) p->value.dval;
    1064       45698 :                                                                 break;
    1065             :                                                         }
    1066             :                                                 }
    1067             :                                         }
    1068             :                                         /* u is an *estimate*, so don't set count_prop to 0 unless cnt is 0 */
    1069      104157 :                                         set_count_prop(v->sql->sa, rel, cnt == 0 ? 0 : u == 0 || u > cnt ? 1 : cnt/u);
    1070             :                                 } else {
    1071        3265 :                                         set_count_prop(v->sql->sa, rel, get_rel_count(l));
    1072             :                                 }
    1073             :                         }
    1074             :                         break;
    1075      265064 :                 case op_project:
    1076      265064 :                         if (l) {
    1077      254799 :                                 if (need_distinct(rel)) {
    1078           0 :                                         set_count_prop(v->sql->sa, rel, rel_calc_nuniques(v->sql, l, rel->exps));
    1079             :                                 } else {
    1080      254799 :                                         set_count_prop(v->sql->sa, rel, get_rel_count(l));
    1081             :                                 }
    1082             :                         } else {
    1083       10265 :                                 BUN card = 1;
    1084             : 
    1085       10265 :                                 if (!list_empty(rel->exps)) {
    1086       31098 :                                         for (node *n = rel->exps->h ; n ; n = n->next) {
    1087       20833 :                                                 sql_exp *e = n->data;
    1088       20833 :                                                 card = MAX(card, trivial_project_exp_card(e));
    1089             :                                         }
    1090             :                                 }
    1091       10265 :                                 set_count_prop(v->sql->sa, rel, card);
    1092             :                         }
    1093             :                         break;
    1094       24360 :                 case op_groupby:
    1095       24360 :                         if (list_empty(rel->r)) {
    1096        7467 :                                 set_count_prop(v->sql->sa, rel, 1);
    1097             :                         } else {
    1098       16893 :                                 set_count_prop(v->sql->sa, rel, rel_calc_nuniques(v->sql, l, rel->r));
    1099             :                         }
    1100             :                         break;
    1101             :                 default:
    1102             :                         break;
    1103             :                 }
    1104             :                 break;
    1105             :         }
    1106       17038 :         case op_topn: {
    1107       17038 :                 BUN lv = get_rel_count(rel->l);
    1108             : 
    1109       17038 :                 if (lv != BUN_NONE) {
    1110       17015 :                         sql_exp *le = rel->exps->h->data, *oe = list_length(rel->exps) > 1 ? rel->exps->h->next->data : NULL;
    1111          84 :                         if (oe && oe->l && exp_is_not_null(oe)) { /* no parameters */
    1112          84 :                                 BUN offset = (BUN) ((atom*)oe->l)->data.val.lval;
    1113          84 :                                 lv = offset >= lv ? 0 : lv - offset;
    1114             :                         }
    1115       17015 :                         if (le->l && exp_is_not_null(le)) {
    1116       16980 :                                 BUN limit = (BUN) ((atom*)le->l)->data.val.lval;
    1117       16980 :                                 lv = MIN(lv, limit);
    1118             :                         }
    1119       17015 :                         set_count_prop(v->sql->sa, rel, lv);
    1120             :                 }
    1121             :                 break;
    1122             :         }
    1123          22 :         case op_sample: {
    1124          22 :                 BUN lv = get_rel_count(rel->l);
    1125             : 
    1126          22 :                 if (lv != BUN_NONE) {
    1127           4 :                         sql_exp *se = rel->exps->h->data;
    1128           4 :                         sql_subtype *tp = exp_subtype(se);
    1129             : 
    1130           4 :                         if (se->l && tp->type->eclass == EC_NUM) { /* sample is a number of rows */
    1131           4 :                                 BUN sample = (BUN) ((atom*)se->l)->data.val.lval;
    1132           4 :                                 lv = MIN(lv, sample);
    1133           0 :                         } else if (se->l) { /* sample is a percentage of rows */
    1134           0 :                                 dbl percent = ((atom*)se->l)->data.val.dval;
    1135           0 :                                 assert(tp->type->eclass == EC_FLT);
    1136           0 :                                 lv = (BUN) ceil((dbl)lv * percent);
    1137             :                         }
    1138           4 :                         set_count_prop(v->sql->sa, rel, lv);
    1139             :                 }
    1140             :                 break;
    1141             :         }
    1142        6015 :         case op_table: {
    1143        6015 :                 sql_exp *op = rel->r;
    1144        6015 :                 if (rel->flag != TRIGGER_WRAPPER && op) {
    1145        5703 :                         sql_subfunc *f = op->f;
    1146        5703 :                         if (f->func->lang == FUNC_LANG_SQL) {
    1147          97 :                                 set_count_prop(v->sql->sa, rel, 1000 /* just some fallback value */);
    1148        5606 :                         } else if (f->func->lang == FUNC_LANG_MAL && strcmp(f->func->base.name, "storage") == 0) {
    1149         831 :                                 set_count_prop(v->sql->sa, rel, 1000 /* TODO get size of catalog */);
    1150        4775 :                         } else if (f->func->lang == FUNC_LANG_MAL && strcmp(f->func->base.name, "db_users") == 0) {
    1151           0 :                                 set_count_prop(v->sql->sa, rel, 1000 /* TODO get size of users */);
    1152        4775 :                         } else if (f->func->lang == FUNC_LANG_MAL && strncmp(f->func->base.name, "querylog", 8) == 0) {
    1153        1089 :                                 set_count_prop(v->sql->sa, rel, 1000 /* TODO get size of querylog */);
    1154        3686 :                         } else if (f->func->lang == FUNC_LANG_MAL &&
    1155        3574 :                                            (strcmp(f->func->base.name, "queue") == 0 ||
    1156        3309 :                                                 strcmp(f->func->base.name, "optimizers") == 0 ||
    1157        2996 :                                                 strcmp(f->func->base.name, "env") == 0 ||
    1158        2703 :                                                 strcmp(f->func->base.name, "keywords") == 0 ||
    1159        2703 :                                                 strcmp(f->func->base.name, "statistics") == 0 ||
    1160        2034 :                                                 strcmp(f->func->base.name, "rejects") == 0 ||
    1161        1779 :                                                 strcmp(f->func->base.name, "schemastorage") == 0 ||
    1162        1779 :                                                 strncmp(f->func->base.name, "storage", 7) == 0 ||
    1163        1779 :                                                 strcmp(f->func->base.name, "sessions") == 0) ) {
    1164        2086 :                                 set_count_prop(v->sql->sa, rel, 1000 /* TODO get size of queue */);
    1165             :                         }
    1166             :                         /* else {
    1167             :                                 printf("%%func needs stats : %s\n", f->func->base.name);
    1168             :                         } */
    1169             :                 }
    1170             :                 break;
    1171             :         }
    1172             :         /*These relations are less important for now
    1173             :           TODO later we can tune it */
    1174             :         case op_insert:
    1175             :         case op_update:
    1176             :         case op_delete:
    1177             :         case op_truncate:
    1178             :         case op_ddl:
    1179             :         default:
    1180             :                 break;
    1181             :         }
    1182             : 
    1183             :         return rel;
    1184             : }
    1185             : 
    1186             : static sql_rel *
    1187      545770 : rel_get_statistics(visitor *v, global_props *gp, sql_rel *rel)
    1188             : {
    1189             :         /* Don't prune updates as pruning will possibly result in removing the joins which therefore cannot be used for constraint checking */
    1190      545770 :         uint8_t has_special_modify = (uint8_t) gp->has_special_modify;
    1191      545770 :         v->data = &has_special_modify;
    1192      545770 :         rel = rel_visitor_bottomup(v, rel, &rel_get_statistics_);
    1193      546319 :         v->data = gp;
    1194      546319 :         return rel;
    1195             : }
    1196             : 
    1197             : run_optimizer
    1198      721649 : bind_get_statistics(visitor *v, global_props *gp)
    1199             : {
    1200      721649 :         (void) v;
    1201      721649 :         return (gp->opt_level == 1 && !gp->cnt[op_insert]) ? rel_get_statistics : NULL;
    1202             : }
    1203             : 
    1204             : 
    1205             : static bool
    1206       95557 : point_select_on_unique_column(sql_rel *rel)
    1207             : {
    1208       95557 :         if (is_select(rel->op) && !list_empty(rel->exps)) {
    1209      131620 :                 for (node *n = rel->exps->h; n ; n = n->next) {
    1210       75836 :                         sql_exp *e = n->data, *el = e->l, *er = e->r, *found = NULL;
    1211             : 
    1212       75836 :                         if (is_compare(e->type) && e->flag == cmp_equal) {
    1213       33632 :                                 if (is_numeric_upcast(el))
    1214           0 :                                         el = el->l;
    1215       33632 :                                 if (is_numeric_upcast(er))
    1216           0 :                                         er = er->l;
    1217       33632 :                                 if (is_alias(el->type) && exp_is_atom(er) && (found = rel_find_exp(rel->l, el)) &&
    1218         752 :                                         is_unique(found) && (!is_semantics(e) || !has_nil(found) || !has_nil(er)))
    1219             :                                         return true;
    1220       32880 :                                 if (is_alias(er->type) && exp_is_atom(el) && (found = rel_find_exp(rel->l, er)) &&
    1221           0 :                                         is_unique(found) && (!is_semantics(e) || !has_nil(el) || !has_nil(found)))
    1222             :                                         return true;
    1223             :                         }
    1224             :                 }
    1225             :         }
    1226             :         return false;
    1227             : }
    1228             : 
    1229             : /*
    1230             :  * A point select on an unique column reduces the number of rows to 1. If the same select is under a
    1231             :  * join, the opposite side's select can be pushed above the join.
    1232             :  */
    1233             : static inline sql_rel *
    1234     1258996 : rel_push_select_up(visitor *v, sql_rel *rel)
    1235             : {
    1236     1258996 :         if ((is_innerjoin(rel->op) || is_left(rel->op) || is_right(rel->op) || is_semi(rel->op)) && !is_single(rel)) {
    1237      256110 :                 sql_rel *l = rel->l, *r = rel->r;
    1238      256110 :                 bool can_pushup_left = is_select(l->op) && !rel_is_ref(l) && !is_single(l) && (is_innerjoin(rel->op) || is_left(rel->op) || is_semi(rel->op)),
    1239      256110 :                         can_pushup_right = is_select(r->op) && !rel_is_ref(r) && !is_single(r) && (is_innerjoin(rel->op) || is_right(rel->op));
    1240             : 
    1241      256110 :                 if (can_pushup_left || can_pushup_right) {
    1242       67413 :                         if (can_pushup_left)
    1243       45775 :                                 can_pushup_left = point_select_on_unique_column(r);
    1244       67413 :                         if (can_pushup_right)
    1245       49782 :                                 can_pushup_right = point_select_on_unique_column(l);
    1246             : 
    1247             :                         /* if both selects retrieve one row each, it's not worth it to push both up */
    1248       67413 :                         if (can_pushup_left && !can_pushup_right) {
    1249         686 :                                 sql_rel *nrel = rel_dup_copy(v->sql->sa, rel);
    1250         686 :                                 nrel->l = l->l;
    1251         686 :                                 rel = rel_inplace_select(rel, nrel, l->exps);
    1252         686 :                                 assert(is_select(rel->op));
    1253         686 :                                 v->changes++;
    1254       66727 :                         } else if (!can_pushup_left && can_pushup_right) {
    1255          14 :                                 sql_rel *nrel = rel_dup_copy(v->sql->sa, rel);
    1256          14 :                                 nrel->r = r->l;
    1257          14 :                                 rel = rel_inplace_select(rel, nrel, r->exps);
    1258          14 :                                 assert(is_select(rel->op));
    1259          14 :                                 v->changes++;
    1260             :                         }
    1261             :                 }
    1262             :         }
    1263     1258996 :         return rel;
    1264             : }
    1265             : 
    1266             : static int
    1267       98711 : sql_class_base_score(visitor *v, sql_column *c, sql_subtype *t, bool equality_based)
    1268             : {
    1269       98711 :         int de;
    1270             : 
    1271       98711 :         if (!t)
    1272             :                 return 0;
    1273       98711 :         switch (ATOMstorage(t->type->localtype)) {
    1274             :         case TYPE_bte:
    1275             :                 return 150 - 8;
    1276        1632 :         case TYPE_sht:
    1277        1632 :                 return 150 - 16;
    1278       39131 :         case TYPE_int:
    1279       39131 :                 return 150 - 32;
    1280         519 :         case TYPE_void:
    1281             :         case TYPE_lng:
    1282         519 :                 return 150 - 64;
    1283             :         case TYPE_uuid:
    1284             : #ifdef HAVE_HGE
    1285             :         case TYPE_hge:
    1286             : #endif
    1287             :                 return 150 - 128;
    1288           2 :         case TYPE_flt:
    1289           2 :                 return 75 - 24;
    1290             :         case TYPE_dbl:
    1291             :                 return 75 - 53;
    1292       33888 :         default:
    1293       33888 :                 if (equality_based && c && v->storage_based_opt && (de = mvc_is_duplicate_eliminated(v->sql, c)))
    1294        1666 :                         return 150 - de * 8;
    1295             :                 /* strings and blobs not duplicate eliminated don't get any points here */
    1296             :                 return 0;
    1297             :         }
    1298             : }
    1299             : 
    1300             : static int
    1301       35952 : score_se_base(visitor *v, sql_rel *rel, sql_exp *e)
    1302             : {
    1303       35952 :         int res = 0;
    1304       35952 :         sql_subtype *t = exp_subtype(e);
    1305       35952 :         sql_column *c = NULL;
    1306             : 
    1307       35952 :         if (e->type == e_convert) /* keep unsafes at the end (TODO improve) */
    1308             :                 return -1000;
    1309             :         /* can we find out if the underlying table is sorted */
    1310       35351 :         if ((c = exp_find_column(rel, e, -2)) && v->storage_based_opt && mvc_is_sorted(v->sql, c))
    1311       35351 :                 res += 600;
    1312             : 
    1313             :         /* prefer the shorter var types over the longer ones */
    1314       35351 :         res += sql_class_base_score(v, c, t, is_equality_or_inequality_exp(e->flag)); /* smaller the type, better */
    1315       35351 :         return res;
    1316             : }
    1317             : 
    1318             : static int
    1319       60453 : score_se(visitor *v, sql_rel *rel, sql_exp *e)
    1320             : {
    1321       60453 :         int score = 0;
    1322       60453 :         if (e->type == e_cmp && !is_complex_exp(e->flag)) {
    1323       35952 :                 sql_exp *l = e->l;
    1324             : 
    1325       35952 :                 while (l->type == e_cmp) { /* go through nested comparisons */
    1326           2 :                         sql_exp *ll;
    1327             : 
    1328           2 :                         if (l->flag == cmp_filter || l->flag == cmp_or)
    1329           0 :                                 ll = ((list*)l->l)->h->data;
    1330             :                         else
    1331           2 :                                 ll = l->l;
    1332           2 :                         if (ll->type != e_cmp)
    1333             :                                 break;
    1334             :                         l = ll;
    1335             :                 }
    1336       35952 :                 score += score_se_base(v, rel, l);
    1337             :         }
    1338       60453 :         score += exp_keyvalue(e);
    1339       60453 :         return score;
    1340             : }
    1341             : 
    1342             : static inline sql_rel *
    1343     1258996 : rel_select_order(visitor *v, sql_rel *rel)
    1344             : {
    1345     1258996 :         int *scores = NULL;
    1346     1258996 :         sql_exp **exps = NULL;
    1347             : 
    1348     1258996 :         if (is_select(rel->op) && list_length(rel->exps) > 1) {
    1349       28117 :                 node *n;
    1350       28117 :                 int i, nexps = list_length(rel->exps);
    1351       28117 :                 scores = SA_NEW_ARRAY(v->sql->ta, int, nexps);
    1352       28117 :                 exps = SA_NEW_ARRAY(v->sql->ta, sql_exp*, nexps);
    1353             : 
    1354       88570 :                 for (i = 0, n = rel->exps->h; n; i++, n = n->next) {
    1355       60483 :                         exps[i] = n->data;
    1356       60483 :                         if (find_prop(exps[i]->p, PROP_HASHCOL))
    1357             :                                 return rel;
    1358       60453 :                         scores[i] = score_se(v, rel, n->data);
    1359             :                 }
    1360       28087 :                 GDKqsort(scores, exps, NULL, nexps, sizeof(int), sizeof(void *), TYPE_int, true, true);
    1361             : 
    1362       88540 :                 for (i = 0, n = rel->exps->h; n; i++, n = n->next)
    1363       60453 :                         n->data = exps[i];
    1364             :         }
    1365             : 
    1366             :         return rel;
    1367             : }
    1368             : 
    1369             : /* Compute the efficiency of using this expression early in a group by list */
    1370             : static int
    1371       63360 : score_gbe(visitor *v, sql_rel *rel, sql_exp *e)
    1372             : {
    1373       63360 :         int res = 0;
    1374       63360 :         sql_subtype *t = exp_subtype(e);
    1375       63360 :         sql_column *c = exp_find_column(rel, e, -2);
    1376             : 
    1377       63360 :         if (e->card == CARD_ATOM) /* constants are trivial to group */
    1378          38 :                 res += 1000;
    1379             :         /* can we find out if the underlying table is sorted */
    1380       63360 :         if (is_unique(e) || find_prop(e->p, PROP_HASHCOL) || (c && v->storage_based_opt && mvc_is_unique(v->sql, c))) /* distinct columns */
    1381        3584 :                 res += 700;
    1382       42134 :         if (c && v->storage_based_opt && mvc_is_sorted(v->sql, c))
    1383        3692 :                 res += 500;
    1384       63360 :         if (find_prop(e->p, PROP_HASHIDX)) /* has hash index */
    1385           0 :                 res += 200;
    1386             : 
    1387             :         /* prefer the shorter var types over the longer ones */
    1388       63360 :         res += sql_class_base_score(v, c, t, true); /* smaller the type, better */
    1389       63360 :         return res;
    1390             : }
    1391             : 
    1392             : /* reorder group by expressions */
    1393             : static inline sql_rel *
    1394     1258996 : rel_groupby_order(visitor *v, sql_rel *rel)
    1395             : {
    1396     1258996 :         int *scores = NULL;
    1397     1258996 :         sql_exp **exps = NULL;
    1398             : 
    1399     1258996 :         if (is_groupby(rel->op) && list_length(rel->r) > 1) {
    1400       26727 :                 node *n;
    1401       26727 :                 list *gbe = rel->r;
    1402       26727 :                 int i, ngbe = list_length(gbe);
    1403       26727 :                 scores = SA_NEW_ARRAY(v->sql->ta, int, ngbe);
    1404       26727 :                 exps = SA_NEW_ARRAY(v->sql->ta, sql_exp*, ngbe);
    1405             : 
    1406             :                 /* first sorting step, give priority for integers and sorted columns */
    1407       90087 :                 for (i = 0, n = gbe->h; n; i++, n = n->next) {
    1408       63360 :                         exps[i] = n->data;
    1409       63360 :                         scores[i] = score_gbe(v, rel, exps[i]);
    1410             :                 }
    1411       26727 :                 GDKqsort(scores, exps, NULL, ngbe, sizeof(int), sizeof(void *), TYPE_int, true, true);
    1412             : 
    1413             :                 /* second sorting step, give priority to strings with lower number of digits */
    1414       54217 :                 for (i = ngbe - 1; i && !scores[i]; i--); /* find expressions with no score from the first round */
    1415       26727 :                 if (scores[i])
    1416       25725 :                         i++;
    1417       26727 :                 if (ngbe - i > 1) {
    1418       12055 :                         for (int j = i; j < ngbe; j++) {
    1419        9064 :                                 sql_subtype *t = exp_subtype(exps[j]);
    1420        9064 :                                 scores[j] = t ? t->digits : 0;
    1421             :                         }
    1422             :                         /* the less number of digits the better, order ascending */
    1423        2991 :                         GDKqsort(scores + i, exps + i, NULL, ngbe - i, sizeof(int), sizeof(void *), TYPE_int, false, true);
    1424             :                 }
    1425             : 
    1426       90087 :                 for (i = 0, n = gbe->h; n; i++, n = n->next)
    1427       63360 :                         n->data = exps[i];
    1428             :         }
    1429             : 
    1430     1258996 :         return rel;
    1431             : }
    1432             : 
    1433             : /* This optimization loop contains optimizations that can potentially use statistics */
    1434             : static sql_rel *
    1435     1258996 : rel_final_optimization_loop_(visitor *v, sql_rel *rel)
    1436             : {
    1437             :         /* Run rel_push_select_up only once at the end to avoid an infinite optimization loop */
    1438     1258996 :         rel = rel_push_select_up(v, rel);
    1439     1258996 :         rel = rel_select_order(v, rel);
    1440             : 
    1441             :         /* TODO? Maybe later add rel_simplify_count, rel_join2semijoin, rel_simplify_fk_joins,
    1442             :            rel_distinct_project2groupby, rel_simplify_predicates, rel_simplify_math,
    1443             :            rel_distinct_aggregate_on_unique_values */
    1444             : 
    1445     1258996 :         rel = rel_groupby_order(v, rel);
    1446     1258996 :         return rel;
    1447             : }
    1448             : 
    1449             : static sql_rel *
    1450       67725 : rel_final_optimization_loop(visitor *v, global_props *gp, sql_rel *rel)
    1451             : {
    1452       67725 :         (void) gp;
    1453       67725 :         return rel_visitor_bottomup(v, rel, &rel_final_optimization_loop_);
    1454             : }
    1455             : 
    1456             : run_optimizer
    1457      722331 : bind_final_optimization_loop(visitor *v, global_props *gp)
    1458             : {
    1459      722331 :         int flag = v->sql->sql_optimizer;
    1460             :         /* At the moment, this optimizer has dependency on 3 flags */
    1461      554117 :         return gp->opt_level == 1 && (gp->cnt[op_groupby] || gp->cnt[op_select]) &&
    1462      790058 :                 (flag & push_select_up) && (flag & optimize_select_and_joins_topdown) && (flag & optimize_projections) ? rel_final_optimization_loop : NULL;
    1463             : }

Generated by: LCOV version 1.14