LCOV - code coverage report
Current view: top level - sql/server - rel_statistics.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 833 885 94.1 %
Date: 2025-03-26 20:06:40 Functions: 26 26 100.0 %

          Line data    Source code
       1             : /*
       2             :  * SPDX-License-Identifier: MPL-2.0
       3             :  *
       4             :  * This Source Code Form is subject to the terms of the Mozilla Public
       5             :  * License, v. 2.0.  If a copy of the MPL was not distributed with this
       6             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       7             :  *
       8             :  * Copyright 2024, 2025 MonetDB Foundation;
       9             :  * Copyright August 2008 - 2023 MonetDB B.V.;
      10             :  * Copyright 1997 - July 2008 CWI.
      11             :  */
      12             : 
      13             : #include "monetdb_config.h"
      14             : #include "rel_optimizer_private.h"
      15             : #include "rel_statistics.h"
      16             : #include "rel_basetable.h"
      17             : #include "rel_rewriter.h"
      18             : #include "sql_storage.h"
      19             : 
      20             : static sql_exp *
      21     3487684 : comparison_find_column(sql_exp *input, sql_exp *e)
      22             : {
      23     3487684 :         switch (input->type) {
      24      100105 :         case e_convert: {
      25      100105 :                 list *types = (list *)input->r;
      26      100105 :                 sql_class from = ((sql_subtype*)types->h->data)->type->eclass, to = ((sql_subtype*)types->h->next->data)->type->eclass;
      27      100105 :                 if (from == to)
      28      193445 :                         return comparison_find_column(input->l, e) ? input : NULL;
      29             :                 return NULL;
      30             :         }
      31     3067500 :         case e_column:
      32     3067500 :                 return exp_match(e, input) ? input : NULL;
      33             :         default:
      34             :                 return NULL;
      35             :         }
      36             : }
      37             : 
      38             : /* multi lo <= col <= hi, maybe still be false even if lo or hi are NULL, possibly similar for filter on multiple
      39             :  * columns */
      40             : #define comp_single_column(c) (!c->f)
      41             : 
      42             : static sql_exp *
      43     8806471 : rel_propagate_column_ref_statistics(mvc *sql, sql_rel *rel, sql_exp *e)
      44             : {
      45     8890671 :         assert(e->type == e_column);
      46     8890671 :         if (rel) {
      47     8890590 :                 switch(rel->op) {
      48     4110095 :                 case op_left:
      49             :                 case op_right:
      50             :                 case op_full:
      51             :                 case op_join:
      52             :                 case op_select:
      53             :                 case op_anti:
      54             :                 case op_semi: {
      55     4110095 :                         bool found_without_semantics = false, found_left = false, found_right = false, still_unique = false;
      56             : 
      57     4110095 :                         if ((is_innerjoin(rel->op) || is_select(rel->op)) && list_length(rel->exps) == 1 && exp_is_false(rel->exps->h->data))
      58             :                                 return NULL; /* nothing will pass, skip */
      59             : 
      60             :                         /* propagate from the bottom first */
      61     4094464 :                         if (rel_propagate_column_ref_statistics(sql, rel->l, e))
      62             :                                 found_left = true;
      63     2601533 :                         if (!found_left && is_join(rel->op) && rel_propagate_column_ref_statistics(sql, rel->r, e))
      64     4094464 :                                 found_right = true;
      65             : 
      66     4094464 :                         if (!found_left && !found_right)
      67             :                                 return NULL;
      68     1790870 :                         if (!list_empty(rel->exps) && rel->op != op_anti) { /* if there's an or, the MIN and MAX get difficult to propagate */
      69     3515820 :                                 for (node *n = rel->exps->h ; n ; n = n->next) {
      70     1820315 :                                         sql_exp *comp = n->data, *le = comp->l, *lne = NULL, *re = comp->r, *rne = NULL, *fe = comp->f, *fne = NULL;
      71             : 
      72     1820315 :                                         if (comp->type == e_cmp) {
      73     1819311 :                                                 if (is_theta_exp(comp->flag) && ((lne = comparison_find_column(le, e)) || (rne = comparison_find_column(re, e)) || (fe && (fne = comparison_find_column(fe, e))))) {
      74      124770 :                                                         atom *lval_min = find_prop_and_get(le->p, PROP_MIN), *lval_max = find_prop_and_get(le->p, PROP_MAX),
      75      124769 :                                                                 *rval_min = find_prop_and_get(re->p, PROP_MIN), *rval_max = find_prop_and_get(re->p, PROP_MAX);
      76             : 
      77             :                                                         /* not semantics found or if explicitly filtering not null values from the column */
      78      124769 :                                                         found_without_semantics |= (!is_semantics(comp) && comp_single_column(comp)) || (comp->flag == cmp_equal && lne && is_anti(comp) && exp_is_null(re));
      79      124769 :                                                         still_unique |= comp->flag == cmp_equal && is_unique(le) && is_unique(re); /* unique if only equi-joins on unique columns are there */
      80      124769 :                                                         if (is_full(rel->op) || (is_left(rel->op) && found_left) || (is_right(rel->op) && found_right)) /* on outer joins, min and max cannot be propagated on some cases */
      81       19870 :                                                                 continue;
      82             :                                                         /* if (end2 >= start1 && start2 <= end1) then the 2 intervals are intersected */
      83      104899 :                                                         if (fe && lval_min && lval_max) { /* range case, the middle expression must intersect the other two */
      84        4618 :                                                                 atom *fval_min = find_prop_and_get(fe->p, PROP_MIN), *fval_max = find_prop_and_get(fe->p, PROP_MAX);
      85        4618 :                                                                 int int1 = rval_min && rval_max && atom_cmp(rval_max, lval_min) >= 0 && atom_cmp(rval_min, lval_max) <= 0,
      86        4618 :                                                                         int2 = fval_min && fval_max && atom_cmp(fval_max, lval_min) >= 0 && atom_cmp(fval_min, lval_max) <= 0,
      87        4618 :                                                                         symmetric = is_symmetric(comp);
      88             : 
      89        4618 :                                                                 if (is_anti(comp) || (!symmetric && fval_min && rval_max && atom_cmp(fval_min, rval_max) < 0)) /* for asymmetric case the re range must be after the fe range */
      90        1815 :                                                                         continue;
      91        2803 :                                                                 if (lne && int1 && int2) {
      92         103 :                                                                         if (symmetric) {
      93           0 :                                                                                 prop *p1 = find_prop(e->p, PROP_MIN), *p2 = find_prop(e->p, PROP_MAX);
      94           0 :                                                                                 atom *nmin = statistics_atom_min(sql, rval_min, fval_min), *nmax = statistics_atom_max(sql, rval_max, fval_max);
      95             :                                                                                 /* max is min from le and (max from re and fe max) */
      96           0 :                                                                                 set_minmax_property(sql, e, PROP_MAX, p2 ? statistics_atom_min(sql, nmax, p2->value.pval) : nmax);
      97             :                                                                                 /* min is max from le and (min from re and fe min) */
      98           0 :                                                                                 set_minmax_property(sql, e, PROP_MIN, p1 ? statistics_atom_max(sql, nmin, p1->value.pval) : nmin);
      99             :                                                                         } else {
     100         103 :                                                                                 prop *p1 = find_prop(e->p, PROP_MIN), *p2 = find_prop(e->p, PROP_MAX);
     101             :                                                                                 /* max is min from le and fe max */
     102         103 :                                                                                 set_minmax_property(sql, e, PROP_MAX, p2 ? statistics_atom_min(sql, fval_max, p2->value.pval) : fval_max);
     103             :                                                                                 /* min is max from le and re min */
     104         103 :                                                                                 set_minmax_property(sql, e, PROP_MIN, p1 ? statistics_atom_max(sql, rval_min, p1->value.pval) : rval_min);
     105             :                                                                         }
     106        2700 :                                                                 } else if (rne) {
     107         590 :                                                                         if (symmetric && int1 && int2) { /* min is max from le and (min from re and fe min) */
     108           0 :                                                                                 prop *p = find_prop(e->p, PROP_MIN);
     109           0 :                                                                                 atom *nmin = p ? statistics_atom_min(sql, p->value.pval, fval_min) : fval_min;
     110           0 :                                                                                 set_minmax_property(sql, e, PROP_MIN, statistics_atom_max(sql, nmin, lval_min));
     111         590 :                                                                         } else if (int1) { /* min is max from le and re min */
     112         100 :                                                                                 prop *p = find_prop(e->p, PROP_MIN);
     113         100 :                                                                                 set_minmax_property(sql, e, PROP_MIN, p ? statistics_atom_max(sql, lval_min, p->value.pval) : lval_min);
     114             :                                                                         }
     115        2110 :                                                                 } else if (fne) {
     116         549 :                                                                         if (symmetric && int1 && int2) { /* max is min from le and (max from re and fe max) */
     117           0 :                                                                                 prop *p = find_prop(e->p, PROP_MAX);
     118           0 :                                                                                 atom *nmax = p ? statistics_atom_max(sql, p->value.pval, rval_max) : rval_max;
     119           0 :                                                                                 set_minmax_property(sql, e, PROP_MAX, p ? statistics_atom_min(sql, nmax, lval_max) : nmax);
     120         549 :                                                                         } else if (int2) { /* max is min from le and fe max */
     121         266 :                                                                                 prop *p = find_prop(e->p, PROP_MAX);
     122         266 :                                                                                 set_minmax_property(sql, e, PROP_MAX, p ? statistics_atom_min(sql, lval_max, p->value.pval) : lval_max);
     123             :                                                                         }
     124             :                                                                 }
     125      100281 :                                                         } else if (lval_min && lval_max && rval_min && rval_max && atom_cmp(rval_max, lval_min) >= 0 && atom_cmp(rval_min, lval_max) <= 0) {
     126             :                                                                 /* both min and max must be set and the intervals must overlap */
     127       41897 :                                                                 switch (comp->flag) {
     128       28235 :                                                                 case cmp_equal: /* for equality reduce */
     129       28235 :                                                                         set_minmax_property(sql, e, PROP_MAX, is_anti(comp) ? statistics_atom_max(sql, lval_max, rval_max) : statistics_atom_min(sql, lval_max, rval_max));
     130       28235 :                                                                         set_minmax_property(sql, e, PROP_MIN, is_anti(comp) ? statistics_atom_min(sql, lval_min, rval_min) : statistics_atom_max(sql, lval_min, rval_min));
     131       28235 :                                                                         break;
     132        4345 :                                                                 case cmp_notequal: /* for inequality expand */
     133        4345 :                                                                         set_minmax_property(sql, e, PROP_MAX, is_anti(comp) ? statistics_atom_min(sql, lval_max, rval_max) : statistics_atom_max(sql, lval_max, rval_max));
     134        4345 :                                                                         set_minmax_property(sql, e, PROP_MIN, is_anti(comp) ? statistics_atom_max(sql, lval_min, rval_min) : statistics_atom_min(sql, lval_min, rval_min));
     135        4345 :                                                                         break;
     136        5311 :                                                                 case cmp_gt:
     137             :                                                                 case cmp_gte:
     138        9684 :                                                                         if (!is_anti(comp) && lne) { /* min is max from both min */
     139        4373 :                                                                                 prop *p = find_prop(e->p, PROP_MIN);
     140        4373 :                                                                                 set_minmax_property(sql, e, PROP_MIN, p ? statistics_atom_max(sql, rval_min, p->value.pval) : rval_min);
     141         938 :                                                                         } else if (!is_anti(comp)) { /* max is min from both max */
     142         938 :                                                                                 prop *p = find_prop(e->p, PROP_MAX);
     143         938 :                                                                                 set_minmax_property(sql, e, PROP_MAX, p ? statistics_atom_min(sql, lval_max, p->value.pval) : lval_max);
     144             :                                                                         }
     145             :                                                                         break;
     146        4006 :                                                                 case cmp_lt:
     147             :                                                                 case cmp_lte:
     148        7205 :                                                                         if (!is_anti(comp) && lne) { /* max is min from both max */
     149        3199 :                                                                                 prop *p = find_prop(e->p, PROP_MAX);
     150        3199 :                                                                                 set_minmax_property(sql, e, PROP_MAX, p ? statistics_atom_min(sql, rval_max, p->value.pval) : rval_max);
     151         807 :                                                                         } else if (!is_anti(comp)) { /* min is max from both min */
     152         807 :                                                                                 prop *p = find_prop(e->p, PROP_MIN);
     153         807 :                                                                                 set_minmax_property(sql, e, PROP_MIN, p ? statistics_atom_max(sql, lval_min, p->value.pval) : lval_min);
     154             :                                                                         }
     155             :                                                                         break;
     156             :                                                                 default: /* Maybe later I can do cmp_in and cmp_notin */
     157             :                                                                         break;
     158             :                                                                 }
     159             :                                                         }
     160             :                                                 }
     161             :                                         }
     162             :                                 }
     163             :                         }
     164     1790871 :                         if (is_full(rel->op) || (is_left(rel->op) && found_right) || (is_right(rel->op) && found_left))
     165       35775 :                                 set_has_nil(e);
     166     1790871 :                         if (!is_outerjoin(rel->op) && found_without_semantics) /* at an outer join, null values pass */
     167       94052 :                                 set_has_no_nil(e);
     168     1790871 :                         if (is_join(rel->op) && is_unique(e) && !still_unique)
     169      119363 :                                 set_not_unique(e);
     170     1790871 :                         return e;
     171             :                 }
     172     4676927 :                 case op_table:
     173             :                 case op_basetable:
     174             :                 case op_union:
     175             :                 case op_except:
     176             :                 case op_inter:
     177             :                 case op_munion:
     178             :                 case op_project:
     179             :                 case op_groupby: {
     180     4676927 :                         if (is_recursive(rel))
     181             :                                 return NULL;
     182     4676730 :                         sql_exp *found;
     183     4676730 :                         atom *fval;
     184     4676730 :                         prop *est;
     185     4676730 :                         if ((found = rel_find_exp(rel, e))) {
     186     2201562 :                                 if (rel->op != op_table) { /* At the moment don't propagate statistics for table relations */
     187     2156236 :                                         if ((fval = find_prop_and_get(found->p, PROP_MAX)))
     188     1138635 :                                                 set_minmax_property(sql, e, PROP_MAX, fval);
     189     2156235 :                                         if ((fval = find_prop_and_get(found->p, PROP_MIN)))
     190     1145712 :                                                 set_minmax_property(sql, e, PROP_MIN, fval);
     191     2156236 :                                         if (!has_nil(found))
     192     1394473 :                                                 set_has_no_nil(e);
     193     2156236 :                                         if (is_unique(found) || (need_distinct(rel) && list_length(rel->exps) == 1) ||
     194     1739671 :                                                 (is_groupby(rel->op) && (list_empty(rel->r) || (list_length(rel->r) == 1 && exps_find_exp(rel->r, e)))))
     195      427845 :                                                 set_unique(e);
     196             :                                         /* propagate unique estimation for known cases */
     197     2156236 :                                         if (is_groupby(rel->op) && list_empty(rel->r) && !find_prop(e->p, PROP_NUNIQUES)) { /* global aggregate case */
     198        7645 :                                                 prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     199        7645 :                                                 p->value.dval = 1;
     200     2148591 :                                         } else if (((is_basetable(rel->op) || is_except(rel->op) || is_inter(rel->op) || is_simple_project(rel->op) ||
     201       72019 :                                                                  (is_groupby(rel->op) && exps_find_exp(rel->r, e))) &&
     202     2089288 :                                                                 (est = find_prop(found->p, PROP_NUNIQUES)) && !find_prop(e->p, PROP_NUNIQUES))) {
     203      195320 :                                                 prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     204      195320 :                                                 p->value.dval = est->value.dval;
     205             :                                         }
     206             :                                 }
     207     2201560 :                                 return e;
     208             :                         }
     209             :                         return NULL;
     210             :                 }
     211       84200 :                 case op_topn:
     212             :                 case op_sample:
     213       84200 :                         return rel_propagate_column_ref_statistics(sql, rel->l, e);
     214             :                 default:
     215             :                         break;
     216             :                 }
     217             :         }
     218             :         return NULL;
     219             : }
     220             : 
     221             : static atom *
     222     4630633 : atom_from_valptr( allocator *sa, sql_subtype *tpe, ValRecord *v)
     223             : {
     224     4630633 :         atom *a = SA_NEW(sa, atom);
     225             : 
     226     4630560 :         assert(!VALisnil(v));
     227     4630360 :         *a = (atom) {.tpe = *tpe,};
     228     4630360 :         SA_VALcopy(sa, &a->data, v);
     229     4630415 :         return a;
     230             : }
     231             : 
     232             : void
     233     4277875 : sql_column_get_statistics(mvc *sql, sql_column *c, sql_exp *e)
     234             : {
     235     4277875 :         bool nonil = false, unique = false;
     236     4277875 :         double unique_est = 0.0;
     237     4277875 :         ValRecord min, max;
     238     4277875 :         int ok = mvc_col_stats(sql, c, &nonil, &unique, &unique_est, &min, &max);
     239             : 
     240     4278058 :         if (has_nil(e) && nonil)
     241     2821229 :                 set_has_no_nil(e);
     242     4278058 :         if (!is_unique(e) && unique)
     243     1129232 :                 set_unique(e);
     244     4278058 :         if (unique_est != 0.0) {
     245     3009192 :                 prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     246     3009198 :                 p->value.dval = unique_est;
     247             :         }
     248     4278064 :         unsigned int digits = 0;
     249     4278064 :         sql_subtype *et = exp_subtype(e);
     250     4278052 :         if (et->type->eclass == EC_DEC || et->type->eclass == EC_NUM)
     251     2779581 :                 digits = et->digits;
     252     4278052 :         if ((ok & 2) == 2) {
     253     2312323 :                 if (!VALisnil(&max)) {
     254     2312313 :                         prop *p = e->p = prop_create(sql->sa, PROP_MAX, e->p);
     255     2312275 :                         p->value.pval = atom_from_valptr(sql->sa, &c->type, &max);
     256     2312260 :                         if (digits) {
     257     1718874 :                                 unsigned int nd = atom_digits(p->value.pval);
     258     1718900 :                                 if (nd < digits)
     259             :                                         digits = nd;
     260     1718900 :                                 if (!digits)
     261             :                                         digits = 1;
     262             :                         }
     263             :                 }
     264     2312292 :                 VALclear(&max);
     265             :         }
     266     4277991 :         if ((ok & 1) == 1) {
     267     2318457 :                 if (!VALisnil(&min)) {
     268     2318444 :                         prop *p = e->p = prop_create(sql->sa, PROP_MIN, e->p);
     269     2318430 :                         p->value.pval = atom_from_valptr(sql->sa, &c->type, &min);
     270     2318374 :                         if (digits) {
     271     1726000 :                                 unsigned int nd = atom_digits(p->value.pval);
     272     1726001 :                                 if (nd > digits) /* possibly set to low by max value */
     273             :                                         digits = nd;
     274             :                                 if (!digits)
     275             :                                         digits = 1;
     276             :                         }
     277             :                 }
     278     2318375 :                 VALclear(&min);
     279             :         }
     280     4277903 :         if (digits)
     281     2779431 :                 et->digits = digits;
     282     4277903 :         if (et->type->eclass == EC_DEC && et->digits <= et->scale)
     283         216 :                 et->digits = et->scale + 1;
     284     4277903 : }
     285             : 
     286             : static void
     287      953848 : rel_basetable_column_get_statistics(mvc *sql, sql_rel *rel, sql_exp *e)
     288             : {
     289      953848 :         if (e->p)
     290             :                 return;
     291      306958 :         sql_column *c = NULL;
     292             : 
     293      306958 :         if ((c = rel_base_find_column(rel, e->nid))) {
     294      208620 :                 sql_column_get_statistics(sql, c, e);
     295             :         }
     296             : }
     297             : 
     298             : static bool
     299        8876 : rel_setop_get_statistics(mvc *sql, sql_rel *rel, list *lexps, list *rexps, sql_exp *e, int i)
     300             : {
     301        8876 :         sql_exp *le = list_fetch(lexps, i), *re = list_fetch(rexps, i);
     302        8876 :         atom *lval_min = find_prop_and_get(le->p, PROP_MIN), *lval_max = find_prop_and_get(le->p, PROP_MAX),
     303        8876 :                 *rval_min = find_prop_and_get(re->p, PROP_MIN), *rval_max = find_prop_and_get(re->p, PROP_MAX);
     304        8876 :         prop *est;
     305             : 
     306             :         /* for the intersection, if both expressions don't overlap, it can be pruned */
     307        8876 :         if (is_inter(rel->op) && !has_nil(le) && !has_nil(re) &&
     308         511 :                 ((rval_max && lval_min && atom_cmp(rval_max, lval_min) < 0) || (rval_min && lval_max && atom_cmp(rval_min, lval_max) > 0)))
     309          26 :                 return true;
     310             : 
     311        8850 :         if (lval_max && rval_max) {
     312        2557 :                 if (is_union(rel->op))
     313           3 :                         set_minmax_property(sql, e, PROP_MAX, statistics_atom_max(sql, lval_max, rval_max)); /* for union the new max will be the max of the two */
     314        2554 :                 else if (is_inter(rel->op))
     315         399 :                         set_minmax_property(sql, e, PROP_MAX, statistics_atom_min(sql, lval_max, rval_max)); /* for intersect the new max will be the min of the two */
     316             :                 else /* except */
     317        2155 :                         set_minmax_property(sql, e, PROP_MAX, lval_max);
     318             :         }
     319        8850 :         if (lval_min && rval_min) {
     320        2557 :                 if (is_union(rel->op))
     321           3 :                         set_minmax_property(sql, e, PROP_MIN, statistics_atom_min(sql, lval_min, rval_min)); /* for union the new min will be the min of the two */
     322        2554 :                 else if (is_inter(rel->op))
     323         399 :                         set_minmax_property(sql, e, PROP_MIN, statistics_atom_max(sql, lval_min, rval_min)); /* for intersect the new min will be the max of the two */
     324             :                 else /* except */
     325        2155 :                         set_minmax_property(sql, e, PROP_MIN, lval_min);
     326             :         }
     327             : 
     328        8850 :         if (is_union(rel->op) || is_munion(rel->op)) {
     329        5986 :                 if (!has_nil(le) && !has_nil(re))
     330         879 :                         set_has_no_nil(e);
     331        5986 :                 if (need_distinct(rel) && list_length(rel->exps) == 1)
     332           0 :                         set_unique(e);
     333        2864 :         } else if (is_inter(rel->op)) {
     334         564 :                 if (!has_nil(le) || !has_nil(re))
     335         509 :                         set_has_no_nil(e);
     336         564 :                 if (is_unique(le) || (need_distinct(rel) && list_length(rel->exps) == 1))
     337         345 :                         set_unique(e);
     338             :         } else {
     339        2300 :                 assert(is_except(rel->op));
     340        2300 :                 if (!has_nil(le))
     341        2236 :                         set_has_no_nil(e);
     342        2300 :                 if (is_unique(le) || (need_distinct(rel) && list_length(rel->exps) == 1))
     343        2012 :                         set_unique(e);
     344             :         }
     345             :         /* propagate unique estimation for known cases */
     346        8850 :         if (!is_union(rel->op) && (est = find_prop(le->p, PROP_NUNIQUES)) && !find_prop(e->p, PROP_NUNIQUES)) {
     347         207 :                 prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     348         207 :                 p->value.dval = est->value.dval;
     349             :         }
     350             :         return false;
     351             : }
     352             : 
     353             : 
     354             : static void
     355       62850 : rel_munion_get_statistics(mvc *sql, sql_rel *rel, list *rels, sql_exp *e, int i)
     356             : {
     357       62850 :         if (is_recursive(rel))
     358             :                 return ;
     359       62850 :         assert(is_munion(rel->op));
     360             : 
     361       62850 :         sql_rel *l = rels->h->data;
     362       62850 :         sql_exp *le = list_fetch(l->exps, i);
     363       62850 :         atom *lval_min = find_prop_and_get(le->p, PROP_MIN), *lval_max = find_prop_and_get(le->p, PROP_MAX);
     364       62850 :         bool has_nonil = !has_nil(le);
     365             : 
     366      182776 :         for(node *n = rels->h->next; n; n = n->next) {
     367      119926 :                 sql_rel *r = n->data;
     368      119926 :                 sql_exp *re = list_fetch(r->exps, i);
     369      119926 :                 atom *rval_min = find_prop_and_get(re->p, PROP_MIN), *rval_max = find_prop_and_get(re->p, PROP_MAX);
     370             : 
     371      119926 :                 if (lval_max && rval_max) {
     372       44201 :                         set_minmax_property(sql, e, PROP_MAX, statistics_atom_max(sql, lval_max, rval_max)); /* for union the new max will be the max of the two */
     373       44201 :                         lval_max = find_prop_and_get(e->p, PROP_MAX);
     374             :                 }
     375      119926 :                 if (lval_min && rval_min) {
     376       43652 :                         set_minmax_property(sql, e, PROP_MIN, statistics_atom_min(sql, lval_min, rval_min)); /* for union the new min will be the min of the two */
     377       43652 :                         lval_min = find_prop_and_get(e->p, PROP_MIN);
     378             :                 }
     379      119926 :                 has_nonil &= !has_nil(re);
     380             : 
     381             :         }
     382             : 
     383       62850 :         if (has_nonil)
     384       42803 :                 set_has_no_nil(e);
     385             : 
     386       62850 :         if (need_distinct(rel) && list_length(rel->exps) == 1)
     387        1597 :                 set_unique(e);
     388             : }
     389             : 
     390             : 
     391             : static sql_exp *
     392     3555405 : rel_propagate_statistics(visitor *v, sql_rel *rel, sql_exp *e, int depth)
     393             : {
     394     3555405 :         mvc *sql = v->sql;
     395     3555405 :         atom *lval;
     396             : 
     397     3555405 :         (void) depth;
     398     3555405 :         switch(e->type) {
     399     2207547 :         case e_column:
     400     2207547 :                 switch (rel->op) { /* set relations don't call rel_propagate_statistics */
     401      284692 :                 case op_join:
     402             :                 case op_left:
     403             :                 case op_right:
     404             :                 case op_full:
     405             :                 case op_semi:
     406             :                 case op_anti: {
     407      284692 :                         sql_exp *found = rel_propagate_column_ref_statistics(sql, rel->l, e);
     408      284692 :                         if (!found)
     409      142794 :                                 (void) rel_propagate_column_ref_statistics(sql, rel->r, e);
     410             :                         break;
     411             :                 }
     412     1922855 :                 case op_select:
     413             :                 case op_project:
     414             :                 case op_groupby: {
     415     1922855 :                         sql_exp *found = rel_propagate_column_ref_statistics(sql, rel->l, e); /* labels may be found on the same projection, ugh */
     416     1922851 :                         if (!found && is_simple_project(rel->op))
     417      128704 :                                 (void) rel_propagate_column_ref_statistics(sql, rel, e);
     418             :                         break;
     419             :                 }
     420           0 :                 case op_insert:
     421             :                 case op_update:
     422             :                 case op_delete:
     423           0 :                         (void) rel_propagate_column_ref_statistics(sql, rel->r, e);
     424           0 :                         break;
     425             :                 default:
     426             :                         break;
     427             :                 }
     428             :                 break;
     429      101863 :         case e_convert: {
     430      101863 :                 sql_subtype *to = exp_totype(e), *from = exp_fromtype(e);
     431      101863 :                 sql_exp *l = e->l;
     432      101863 :                 sql_class fr = from->type->eclass, too = to->type->eclass;
     433      101863 :                 prop *est;
     434             : 
     435      101863 :                 if (fr == too) {
     436       92701 :                         if ((lval = find_prop_and_get(l->p, PROP_MAX))) {
     437       60263 :                                 atom *res = atom_copy(sql->sa, lval);
     438       60263 :                                 if ((res = atom_cast(sql->sa, res, to)))
     439       60240 :                                         set_minmax_property(sql, e, PROP_MAX, res);
     440             :                         }
     441       92701 :                         if ((lval = find_prop_and_get(l->p, PROP_MIN))) {
     442       60901 :                                 atom *res = atom_copy(sql->sa, lval);
     443       60901 :                                 if ((res = atom_cast(sql->sa, res, to)))
     444       60878 :                                         set_minmax_property(sql, e, PROP_MIN, res);
     445             :                         }
     446             :                 }
     447             :                 /* propagating nuniques through conversions is tricky. I am adding just the general cases */
     448      101863 :                 if ((est = find_prop(l->p, PROP_NUNIQUES)) &&
     449       63324 :                         (((fr == EC_TIME || fr == EC_TIME_TZ || fr == EC_DATE) && (too == EC_TIMESTAMP || too == EC_TIMESTAMP_TZ)) ||
     450       63299 :                          ((fr == EC_NUM || fr == EC_BIT) && too == EC_NUM) || (fr == EC_DEC && too == EC_DEC) || (fr != EC_EXTERNAL && too == EC_STRING))) {
     451       59865 :                         prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     452       59865 :                         p->value.dval = est->value.dval;
     453             :                 }
     454      101863 :                 if (!has_nil(l))
     455       57955 :                         set_has_no_nil(e);
     456             :                 break;
     457             :         }
     458      379261 :         case e_aggr:
     459             :         case e_func: {
     460      379261 :                 BUN lv;
     461      379261 :                 sql_subfunc *f = e->f;
     462             : 
     463      379261 :                 if (!f->func->s) {
     464      352146 :                         int key = hash_key(f->func->base.name); /* Using hash lookup */
     465      352146 :                         sql_hash_e *he = sql_functions_lookup->buckets[key&(sql_functions_lookup->size-1)];
     466      352146 :                         lookup_function look = NULL;
     467             : 
     468      753772 :                         for (; he && !look; he = he->chain) {
     469      401626 :                                 struct function_properties* fp = (struct function_properties*) he->value;
     470             : 
     471      401626 :                                 if (!strcmp(f->func->base.name, fp->name))
     472      108572 :                                         look = fp->func;
     473             :                         }
     474      352146 :                         if (look)
     475      108572 :                                 look(sql, e);
     476             :                 }
     477             :                 /* for global aggregates with no semantics, if the left relation has values, then the output won't be NULL */
     478      379261 :                 if (!is_semantics(e) && e->l && !have_nil(e->l) &&
     479      109271 :                         (e->type != e_aggr || (is_groupby(rel->op) && list_length(rel->r)) || ((lv = get_rel_count(rel->l)) != BUN_NONE && lv > 0)))
     480      108935 :                         set_has_no_nil(e);
     481             :                 /* set properties for global aggregates */
     482      379261 :                 if (e->type == e_aggr && is_groupby(rel->op) && list_empty(rel->r)) {
     483        7949 :                         if (!find_prop(e->p, PROP_NUNIQUES)) {
     484        7949 :                                 prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     485        7949 :                                 p->value.dval = 1;
     486             :                         }
     487        7949 :                         set_unique(e);
     488             :                 }
     489             :                 break;
     490             :         }
     491      584525 :         case e_atom:
     492      584525 :                 if (e->l) {
     493      566496 :                         atom *a = (atom*) e->l;
     494      566496 :                         if (!a->isnull) {
     495      501590 :                                 set_minmax_property(sql, e, PROP_MAX, a);
     496      501590 :                                 set_minmax_property(sql, e, PROP_MIN, a);
     497             :                         }
     498      566496 :                         prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     499      566496 :                         p->value.dval = 1;
     500       18029 :                 } else if (e->f) {
     501        4302 :                         list *vals = (list *) e->f;
     502        4302 :                         sql_exp *first = vals->h ? vals->h->data : NULL;
     503        4302 :                         atom *max = NULL, *min = NULL; /* all child values must have a valid min/max */
     504        4302 :                         int has_nil = 0;
     505             : 
     506        4302 :                         if (first) {
     507        4302 :                                 max = ((lval = find_prop_and_get(first->p, PROP_MAX))) ? lval : NULL;
     508        4302 :                                 min = ((lval = find_prop_and_get(first->p, PROP_MIN))) ? lval : NULL;
     509        4302 :                                 has_nil |= has_nil(first);
     510             :                         }
     511             : 
     512       18436 :                         for (node *n = vals->h ? vals->h->next : NULL ; n ; n = n->next) {
     513        9832 :                                 sql_exp *ee = n->data;
     514             : 
     515        9832 :                                 if (min && max) {
     516        9339 :                                         if ((lval = find_prop_and_get(ee->p, PROP_MAX))) {
     517        9293 :                                                 max = atom_cmp(lval, max) > 0 ? lval : max;
     518             :                                         } else {
     519             :                                                 max = NULL;
     520             :                                         }
     521        9339 :                                         if ((lval = find_prop_and_get(ee->p, PROP_MIN))) {
     522        9293 :                                                 min = atom_cmp(min, lval) > 0 ? lval : min;
     523             :                                         } else {
     524             :                                                 min = NULL;
     525             :                                         }
     526             :                                 }
     527        9832 :                                 has_nil |= has_nil(ee);
     528             :                         }
     529             : 
     530        4302 :                         if (!has_nil)
     531        3928 :                                 set_has_no_nil(e);
     532        4302 :                         if (min && max) {
     533        3886 :                                 set_minmax_property(sql, e, PROP_MAX, max);
     534        3886 :                                 set_minmax_property(sql, e, PROP_MIN, min);
     535             :                         }
     536        4302 :                         prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     537        4302 :                         p->value.dval = (dbl) list_length(vals);
     538             :                 } else {
     539       13727 :                         prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     540       13727 :                         p->value.dval = 1;
     541             :                 }
     542             :                 break;
     543      282209 :         case e_cmp:
     544             :                 /* TODO? propagating min/max/unique of booleans is not very worth it */
     545      282209 :                 if (e->flag == cmp_or || e->flag == cmp_filter) {
     546       14765 :                         if (!have_nil(e->l) && !have_nil(e->r))
     547       10114 :                                 set_has_no_nil(e);
     548      267444 :                 } else if (e->flag == cmp_in || e->flag == cmp_notin) {
     549       20162 :                         sql_exp *le = e->l;
     550       20162 :                         if (!has_nil(le) && !have_nil(e->r))
     551       16927 :                                 set_has_no_nil(e);
     552             :                 } else {
     553      247282 :                         sql_exp *le = e->l, *re = e->r, *fe = e->f;
     554      247282 :                         if (!has_nil(le) && !has_nil(re) && (!e->f || !has_nil(fe)))
     555      174382 :                                 set_has_no_nil(e);
     556             :                 }
     557             :                 break;
     558             :         case e_psm:
     559             :                 break;
     560             :         }
     561             : 
     562             : #ifndef NDEBUG
     563             :         {
     564             :                 /* min and max cannot be NULL and min must be <= than max, if it happens the inner relation must be empty! */
     565     3555401 :                 atom *min = find_prop_and_get(e->p, PROP_MIN), *max = find_prop_and_get(e->p, PROP_MAX);
     566             : 
     567     3555402 :                 (void) min;
     568     3555402 :                 (void) max;
     569     3555402 :                 assert(!min || !min->isnull);
     570     3555402 :                 assert(!max || !max->isnull);
     571     3555402 :                 assert(!min || !max || (min && max)/* atom_cmp(min, max) <= 0*/);
     572             :         }
     573             : #endif
     574     3555402 :         return e;
     575             : }
     576             : 
     577             : static list * /* Remove predicates always false from min/max values */
     578      141312 : rel_prune_predicates(visitor *v, sql_rel *rel)
     579             : {
     580      141312 :         if (rel->l) {
     581      141312 :                 sql_rel *l = rel->l;
     582      141312 :                 if (is_single(l))
     583        3372 :                         return rel->exps;
     584             :         }
     585      137940 :         if (!list_empty(rel->attr))
     586        3003 :                 return rel->exps;
     587      284285 :         for (node *n = rel->exps->h ; n ; n = n->next) {
     588      149348 :                 sql_exp *e = n->data;
     589             : 
     590      149348 :                 if (e->type == e_cmp && is_theta_exp(e->flag)) {
     591      125227 :                         sql_exp *le = e->l, *re = e->r, *fe = e->f;
     592      125227 :                         atom *lval_min = find_prop_and_get(le->p, PROP_MIN), *lval_max = find_prop_and_get(le->p, PROP_MAX),
     593      125227 :                                 *rval_min = find_prop_and_get(re->p, PROP_MIN), *rval_max = find_prop_and_get(re->p, PROP_MAX);
     594      125227 :                         bool always_false = false, always_true = false;
     595             : 
     596      125227 :                         if (fe && !is_symmetric(e)) {
     597        3045 :                                 atom *fval_min = find_prop_and_get(fe->p, PROP_MIN), *fval_max = find_prop_and_get(fe->p, PROP_MAX);
     598        3045 :                                 comp_type lower = range2lcompare(e->flag), higher = range2rcompare(e->flag);
     599        3644 :                                 int not_int1 = rval_min && lval_max && /* the middle and left intervals don't overlap */
     600        1654 :                                         (!is_anti(e) && (lower == cmp_gte ? atom_cmp(rval_min, lval_max) > 0 : atom_cmp(rval_min, lval_max) >= 0)),
     601        4059 :                                         not_int2 = lval_min && fval_max && /* the middle and right intervals don't overlap */
     602        2482 :                                         (!is_anti(e) && (higher == cmp_lte ? atom_cmp(lval_min, fval_max) > 0 : atom_cmp(lval_min, fval_max) >= 0)),
     603        3045 :                                         not_int3 = rval_min && fval_max && /* the left interval is after the right one */
     604        1285 :                                         (!is_anti(e) && (atom_cmp(rval_min, fval_max) > 0));
     605             : 
     606        3045 :                                 always_false |= not_int1 || not_int2 || not_int3;
     607             :                                 /* for anti the middle must be before the left or after the right or the right after the left, for the other the middle must be always between the left and right intervals */
     608        4089 :                                 always_true |= !has_nil(le) && !has_nil(re) && !has_nil(fe) &&
     609        3941 :                                         lval_min && lval_max && rval_min && rval_max && fval_min && fval_max &&
     610         573 :                                         (is_anti(e) ? ((lower == cmp_gte ? atom_cmp(rval_min, lval_max) > 0 : atom_cmp(rval_min, lval_max) >= 0) || (higher == cmp_lte ? atom_cmp(lval_min, fval_max) > 0 : atom_cmp(lval_min, fval_max) >= 0) || atom_cmp(rval_min, fval_max) > 0) :
     611         488 :                                          ((lower == cmp_gte ? atom_cmp(lval_min, rval_max) >= 0 : atom_cmp(lval_min, rval_max) > 0) && (higher == cmp_lte ? atom_cmp(fval_min, lval_max) >= 0 : atom_cmp(fval_min, lval_max) > 0)));
     612             :                         } else if (!fe) {
     613      122166 :                                 if (!is_semantics(e)) /* trivial not null cmp null case */
     614      232888 :                                         always_false |= !is_anti(e) && ((exp_is_not_null(le) && exp_is_null(re)) || (exp_is_null(le) && exp_is_not_null(re)));
     615      122166 :                                 switch (e->flag) {
     616      108640 :                                 case cmp_equal:
     617      108640 :                                         if (lval_min && lval_max && rval_min && rval_max && (!is_semantics(e) || !has_nil(le) || !has_nil(re)))
     618      135960 :                                                 always_false |= (is_anti(e) ? (atom_cmp(lval_min, rval_min) == 0 && atom_cmp(lval_max, rval_max) <= 0) : (atom_cmp(rval_max, lval_min) < 0 || atom_cmp(rval_min, lval_max) > 0));
     619      108640 :                                         if (is_semantics(e)) { /* prune *= NULL cases */
     620        5685 :                                                 always_false |= (is_anti(e) ? (exp_is_null(le) && exp_is_null(re)) : ((exp_is_not_null(le) && exp_is_null(re)) || (exp_is_null(le) && exp_is_not_null(re))));
     621       11370 :                                                 always_true |= (is_anti(e) ? ((exp_is_not_null(le) && exp_is_null(re)) || (exp_is_null(le) && exp_is_not_null(re))) : (exp_is_null(le) && exp_is_null(re)));
     622             :                                         }
     623             :                                         break;
     624        5334 :                                 case cmp_notequal:
     625        5334 :                                         if (lval_min && lval_max && rval_min && rval_max)
     626        7364 :                                                 always_true |= !has_nil(le) && !has_nil(re) && (is_anti(e) ? (atom_cmp(lval_min, rval_min) == 0 && atom_cmp(lval_max, rval_max) <= 0) : (atom_cmp(rval_max, lval_min) < 0 || atom_cmp(rval_min, lval_max) > 0));
     627        5334 :                                         if (is_semantics(e)) {
     628          37 :                                                 always_true |= (is_anti(e) ? (exp_is_null(le) && exp_is_null(re)) : ((exp_is_not_null(le) && exp_is_null(re)) || (exp_is_null(le) && exp_is_not_null(re))));
     629          74 :                                                 always_false |= (is_anti(e) ? ((exp_is_not_null(le) && exp_is_null(re)) || (exp_is_null(le) && exp_is_not_null(re))) : (exp_is_null(le) && exp_is_null(re)));
     630             :                                         }
     631             :                                         break;
     632        5487 :                                 case cmp_gt:
     633        5487 :                                         if (lval_max && rval_min)
     634        1834 :                                                 always_false |= (is_anti(e) ? atom_cmp(lval_max, rval_min) > 0 : atom_cmp(lval_max, rval_min) <= 0);
     635        5487 :                                         if (lval_min && rval_max)
     636        3668 :                                                 always_true |= !has_nil(le) && !has_nil(re) && (is_anti(e) ? atom_cmp(lval_min, rval_max) <= 0 : atom_cmp(lval_min, rval_max) > 0);
     637             :                                         break;
     638         121 :                                 case cmp_gte:
     639         121 :                                         if (lval_max && rval_min)
     640          51 :                                                 always_false |= (is_anti(e) ? atom_cmp(lval_max, rval_min) >= 0 : atom_cmp(lval_max, rval_min) < 0);
     641         121 :                                         if (lval_min && rval_max)
     642         102 :                                                 always_true |= !has_nil(le) && !has_nil(re) && (is_anti(e) ? atom_cmp(lval_min, rval_max) < 0 : atom_cmp(lval_min, rval_max) >= 0);
     643             :                                         break;
     644        2502 :                                 case cmp_lt:
     645        2502 :                                         if (lval_min && rval_max)
     646        1382 :                                                 always_false |= (is_anti(e) ? atom_cmp(lval_min, rval_max) < 0 : atom_cmp(lval_min, rval_max) >= 0);
     647        2502 :                                         if (lval_max && rval_min)
     648        2812 :                                                 always_true |= !has_nil(le) && !has_nil(re) && (is_anti(e) ? atom_cmp(lval_max, rval_min) >= 0 : atom_cmp(lval_max, rval_min) < 0);
     649             :                                         break;
     650          82 :                                 case cmp_lte:
     651          82 :                                         if (lval_min && rval_max)
     652          11 :                                                 always_false |= (is_anti(e) ? atom_cmp(lval_min, rval_max) <= 0 : atom_cmp(lval_min, rval_max) > 0);
     653          82 :                                         if (lval_max && rval_min)
     654          22 :                                                 always_true |= !has_nil(le) && !has_nil(re) && (is_anti(e) ? atom_cmp(lval_max, rval_min) > 0 : atom_cmp(lval_max, rval_min) <= 0);
     655             :                                         break;
     656             :                                 default: /* Maybe later I can do cmp_in and cmp_notin but don't forget to remove is_theta_exp check up there */
     657             :                                         break;
     658             :                                 }
     659             :                         }
     660      125227 :                         assert(!always_false || !always_true);
     661      125227 :                         if (always_false || always_true) {
     662        3698 :                                 sql_exp *ne = exp_atom_bool(v->sql->sa, always_true /* if always_true set then true else false */);
     663        3698 :                                 if (exp_name(e))
     664           0 :                                         exp_prop_alias(v->sql->sa, ne, e);
     665        3698 :                                 n->data = ne;
     666        3698 :                                 v->changes++;
     667             :                         }
     668             :                 }
     669             :         }
     670      134937 :         return rel->exps;
     671             : }
     672             : 
     673             : static sql_rel *
     674          14 : set_setop_side(visitor *v, sql_rel *rel, sql_rel *side)
     675             : {
     676          14 :         if (side == rel->r)
     677           0 :                 rel->r = NULL;
     678             :         else
     679          14 :                 rel->l = NULL;
     680          14 :         if (!is_simple_project(side->op) || rel_is_ref(side) || !list_empty(side->r)) {
     681           0 :                 side = rel_project(v->sql->sa, side, rel_projections(v->sql, side, NULL, 0, 1));
     682           0 :                 set_count_prop(v->sql->sa, side, get_rel_count(side->l));
     683           0 :                 side->exps = exps_exp_visitor_bottomup(v, side, side->exps, 0, &rel_propagate_statistics, false);
     684             :         }
     685          35 :         for (node *n = rel->exps->h, *m = side->exps->h ; n && m ; n = n->next, m = m->next) {
     686          21 :                 sql_exp *e1 = n->data, *e2 = m->data;
     687          21 :                 exp_prop_alias(v->sql->sa, e2, e1);
     688             :         }
     689          14 :         list_hash_clear(side->exps);
     690             : 
     691          14 :         if (need_distinct(rel))
     692           0 :                 set_distinct(side);
     693          14 :         side->p = prop_copy(v->sql->sa, rel->p);
     694          14 :         rel_destroy(rel);
     695          14 :         return side;
     696             : }
     697             : 
     698             : static BUN
     699       22258 : trivial_project_exp_card(sql_exp *e)
     700             : {
     701       22633 :         if (e->type == e_convert)
     702         375 :                 return trivial_project_exp_card(e->l);
     703       22258 :         return e->type == e_atom && e->f ? (BUN) list_length(e->f) : 1;
     704             : }
     705             : 
     706             : static BUN
     707       26198 : rel_calc_nuniques(mvc *sql, sql_rel *l, list *exps)
     708             : {
     709       26198 :         BUN lv = get_rel_count(l);
     710             : 
     711       26198 :         if (lv == 0)
     712             :                 return 0;
     713       23462 :         if (!list_empty(exps)) {
     714       23462 :                 BUN nuniques = 0;
     715             :                 /* compute the highest number of unique values */
     716       58644 :                 for (node *n = exps->h ; n && nuniques != BUN_NONE ; n = n->next) {
     717       35182 :                         sql_exp *e = n->data;
     718       35182 :                         sql_rel *bt = NULL;
     719       35182 :                         prop *p = NULL;
     720       35182 :                         BUN euniques = BUN_NONE;
     721       35182 :                         atom *min, *max, *sub = NULL;
     722       35182 :                         sql_subtype *tp = exp_subtype(e);
     723       35182 :                         sql_class ec = tp ? tp->type->eclass : EC_STRING; /* if 'e' has no type (eg parameter), use a non-number type to fail condition */
     724             : 
     725       35182 :                         if ((p = find_prop(e->p, PROP_NUNIQUES))) {
     726       25419 :                                 euniques = (BUN) p->value.dval;
     727        9763 :                         } else if (e->type == e_column && e->nid && rel_find_exp_and_corresponding_rel(l, e, false, &bt, NULL) && bt && (p = find_prop(bt->p, PROP_COUNT))) {
     728        7665 :                                 euniques = (BUN) p->value.lval;
     729             :                         }
     730             :                         /* use min to max range to compute number of possible values in the domain for number types */
     731       35182 :                         if ((EC_TEMP(ec)||ec==EC_NUM||ec==EC_MONTH||ec==EC_POS) &&
     732       24292 :                                 (min = find_prop_and_get(e->p, PROP_MIN)) && (max = find_prop_and_get(e->p, PROP_MAX))) {
     733             :                                 /* the range includes min and max, so the atom_inc call is needed */
     734             :                                 /* if 'euniques' has number of distinct values, compute min between both */
     735       19346 :                                 if ((sub = atom_sub(sql->sa, max, min)) && (sub = atom_inc(sql->sa, sub)) && (sub = atom_cast(sql->sa, sub, sql_bind_localtype("oid"))))
     736           0 :                                         euniques = MIN(euniques, (BUN) sub->data.val.oval);
     737             :                         }
     738       35182 :                         if (euniques != BUN_NONE)
     739       33084 :                                 nuniques = MAX(nuniques, euniques); /* the highest cardinality sets the estimation */
     740             :                         else
     741             :                                 nuniques = BUN_NONE;
     742             :                 }
     743       23462 :                 if (nuniques != BUN_NONE)
     744             :                         return nuniques;
     745             :         }
     746             :         return lv;
     747             : }
     748             : 
     749             : static sql_rel *
     750     2108733 : rel_get_statistics_(visitor *v, sql_rel *rel)
     751             : {
     752             :         /* Don't prune updates as pruning will possibly result in removing the joins which therefore cannot be used for constraint checking */
     753     2108733 :         uint8_t has_special_modify = *(uint8_t*) v->data;
     754     2108733 :         bool can_be_pruned = !has_special_modify && v->storage_based_opt;
     755             : 
     756             :         /* Don't look at the same relation twice */
     757     2108733 :         if (are_statistics_gathered(rel->used))
     758             :                 return rel;
     759     1360063 :         rel->used |= statistics_gathered;
     760             : 
     761     1360063 :         switch (rel->op) {
     762      319466 :         case op_basetable: {
     763      319466 :                 sql_table *t = (sql_table *) rel->l;
     764      319466 :                 sqlstore *store = v->sql->session->tr->store;
     765             : 
     766      319466 :                 if (!list_empty(rel->exps)) {
     767     1273265 :                         for (node *n = rel->exps->h ; n ; n = n->next)
     768      953848 :                                 rel_basetable_column_get_statistics(v->sql, rel, n->data);
     769             :                 }
     770             :                 /* Set table row count. TODO? look for remote tables. Don't look at storage for declared tables, because it won't be cleaned */
     771      319466 :                 if (isTable(t) && t->s && !isDeclaredTable(t)) /* count active rows only */
     772      264703 :                         set_count_prop(v->sql->sa, rel, (BUN)store->storage_api.count_col(v->sql->session->tr, ol_first_node(t->columns)->data, 10));
     773             :                 break;
     774             :         }
     775        2794 :         case op_union:
     776             :         case op_inter:
     777             :         case op_except: {
     778        2794 :                 bool empty_cross = false;
     779        2794 :                 int i = 0;
     780        2794 :                 sql_rel *l = rel->l, *pl = rel->l, *r = rel->r, *pr = rel->r;
     781             : 
     782        2794 :                 while (is_sample(pl->op) || is_topn(pl->op)) /* skip topN and sample relations in the middle */
     783           0 :                         pl = pl->l;
     784        2794 :                 while (is_sample(pr->op) || is_topn(pr->op))
     785           0 :                         pr = pr->l;
     786             :                 /* if it's not a projection, then project and propagate statistics */
     787        2794 :                 if (!is_project(pl->op) && !is_base(pl->op)) {
     788           0 :                         pl = rel_project(v->sql->sa, pl, rel_projections(v->sql, pl, NULL, 0, 1));
     789           0 :                         set_count_prop(v->sql->sa, pl, get_rel_count(pl->l));
     790           0 :                         pl->exps = exps_exp_visitor_bottomup(v, pl, pl->exps, 0, &rel_propagate_statistics, false);
     791             :                 }
     792        2794 :                 if (!is_project(pr->op) && !is_base(pr->op)) {
     793           0 :                         pr = rel_project(v->sql->sa, pr, rel_projections(v->sql, pr, NULL, 0, 1));
     794           0 :                         set_count_prop(v->sql->sa, pr, get_rel_count(pr->l));
     795           0 :                         pr->exps = exps_exp_visitor_bottomup(v, pr, pr->exps, 0, &rel_propagate_statistics, false);
     796             :                 }
     797             : 
     798       11670 :                 for (node *n = rel->exps->h ; n ; n = n->next) {
     799        8876 :                         empty_cross |= rel_setop_get_statistics(v->sql, rel, pl->exps, pr->exps, n->data, i);
     800        8876 :                         i++;
     801             :                 }
     802             : 
     803             :                 /* propagate row count */
     804        2794 :                 if (is_union(rel->op)) {
     805         277 :                         BUN lv = need_distinct(rel) ? rel_calc_nuniques(v->sql, l, l->exps) : get_rel_count(l),
     806         277 :                                 rv = need_distinct(rel) ? rel_calc_nuniques(v->sql, r, r->exps) : get_rel_count(r);
     807             : 
     808         277 :                         if (lv == 0 && rv == 0) { /* both sides empty */
     809           2 :                                 if (can_be_pruned)
     810             :                                         empty_cross = true;
     811             :                                 else
     812           2 :                                         set_count_prop(v->sql->sa, rel, 0);
     813         275 :                         } else if (can_be_pruned && lv == 0 && !rel_is_ref(rel)) { /* left side empty */
     814           0 :                                 rel = set_setop_side(v, rel, r);
     815           0 :                                 empty_cross = false; /* don't rewrite again */
     816         275 :                         } else if (can_be_pruned && rv == 0 && !rel_is_ref(rel)) { /* right side empty */
     817           0 :                                 rel = set_setop_side(v, rel, l);
     818           0 :                                 empty_cross = false; /* don't rewrite again */
     819         275 :                         } else if (lv != BUN_NONE && rv != BUN_NONE) {
     820           7 :                                 set_count_prop(v->sql->sa, rel, (rv > (BUN_MAX - lv)) ? BUN_MAX : (lv + rv)); /* overflow check */
     821             :                         }
     822        2517 :                 } else if (is_inter(rel->op) || is_except(rel->op)) {
     823        2517 :                         BUN lv = need_distinct(rel) ? rel_calc_nuniques(v->sql, l, l->exps) : get_rel_count(l),
     824        2517 :                                 rv = need_distinct(rel) ? rel_calc_nuniques(v->sql, r, r->exps) : get_rel_count(r);
     825             : 
     826        2517 :                         if (lv == 0) { /* left side empty */
     827          63 :                                 if (can_be_pruned)
     828             :                                         empty_cross = true;
     829             :                                 else
     830           5 :                                         set_count_prop(v->sql->sa, rel, 0);
     831        2454 :                         } else if (rv == 0) { /* right side empty */
     832          17 :                                 if (is_inter(rel->op)) {
     833           3 :                                         if (can_be_pruned)
     834             :                                                 empty_cross = true;
     835             :                                         else
     836           0 :                                                 set_count_prop(v->sql->sa, rel, 0);
     837          14 :                                 } else if (can_be_pruned && !rel_is_ref(rel)) {
     838          14 :                                         rel = set_setop_side(v, rel, l);
     839          14 :                                         empty_cross = false; /* don't rewrite again */
     840             :                                 } else {
     841           0 :                                         set_count_prop(v->sql->sa, rel, lv);
     842             :                                 }
     843             :                         } else {
     844        2437 :                                 set_count_prop(v->sql->sa, rel, lv);
     845             :                         }
     846             :                 }
     847        2794 :                 if (can_be_pruned && empty_cross) {
     848          81 :                         rel_destroy(rel->l);
     849          81 :                         rel->l = NULL;
     850          81 :                         rel_destroy(rel->r);
     851          81 :                         rel->r = NULL;
     852         246 :                         for (node *n = rel->exps->h ; n ; n = n->next) {
     853         165 :                                 sql_exp *e = n->data, *a = exp_atom(v->sql->sa, atom_general(v->sql->sa, exp_subtype(e), NULL, 0));
     854         165 :                                 exp_prop_alias(v->sql->sa, a, e);
     855         165 :                                 n->data = a;
     856             :                         }
     857          81 :                         list_hash_clear(rel->exps);
     858          81 :                         sql_rel *l = rel_project(v->sql->sa, NULL, rel->exps);
     859          81 :                         set_count_prop(v->sql->sa, l, 1);
     860          81 :                         l = rel_select(v->sql->sa, l, exp_atom_bool(v->sql->sa, 0));
     861          81 :                         set_count_prop(v->sql->sa, l, 0);
     862          81 :                         rel->op = op_project;
     863          81 :                         rel->l = l;
     864          81 :                         rel->exps = rel_projections(v->sql, l, NULL, 1, 1);
     865          81 :                         set_count_prop(v->sql->sa, rel, 0);
     866          81 :                         set_nodistinct(rel); /* set relations may have distinct flag set */
     867          81 :                         v->changes++;
     868             :                 }
     869             :                 break;
     870             :         }
     871       13477 :         case op_munion: {
     872       13477 :                 list *l = rel->l, *nrels = sa_list(v->sql->sa);
     873       13477 :                 BUN cnt = 0;
     874       13477 :                 bool needs_pruning = false;
     875             : 
     876       13477 :                 if (is_recursive(rel))
     877             :                         break;
     878       51248 :                 for (node *n = l->h; n; n = n->next) {
     879       37842 :                         sql_rel *r = n->data, *pl = r;
     880             : 
     881       37842 :                         while (is_sample(pl->op) || is_topn(pl->op)) /* skip topN and sample relations in the middle */
     882           0 :                                         pl = pl->l;
     883             :                         /* if it's not a projection, then project and propagate statistics */
     884       37842 :                         if (!is_project(pl->op) && !is_base(pl->op)) {
     885           0 :                                 pl = rel_project(v->sql->sa, pl, rel_projections(v->sql, pl, NULL, 0, 1));
     886           0 :                                 set_count_prop(v->sql->sa, pl, get_rel_count(pl->l));
     887           0 :                                 pl->exps = exps_exp_visitor_bottomup(v, pl, pl->exps, 0, &rel_propagate_statistics, false);
     888             :                         }
     889       37842 :                         nrels = append(nrels, pl);
     890             :                         /* we need new munion statistics */
     891             :                         /* propagate row count */
     892       37842 :                         BUN rv = need_distinct(rel) ? rel_calc_nuniques(v->sql, r, r->exps) : get_rel_count(r);
     893             :                         /* if PROP_COUNT does not exist we assume at least a row (see get_rel_count def) */
     894       37842 :                         if (rv == BUN_NONE) {
     895        1220 :                                 cnt++;
     896        1220 :                                 continue;
     897             :                         }
     898       36622 :                         if (!rv && can_be_pruned)
     899        6724 :                                 needs_pruning = true;
     900             :                         /* overflow check */
     901       36622 :                         if (rv > (BUN_MAX - cnt))
     902       37842 :                                 rv = BUN_MAX;
     903             :                         else
     904       36622 :                                 cnt += rv;
     905             :                 }
     906       13406 :                 int i = 0;
     907       76256 :                 for (node *n = rel->exps->h ; n ; n = n->next, i++)
     908       62850 :                         rel_munion_get_statistics(v->sql, rel, nrels, n->data, i);
     909             : 
     910       13406 :                 if (needs_pruning && !rel_is_ref(rel)) {
     911        4563 :                         v->changes++;
     912        4563 :                         list *nl = sa_list(l->sa);
     913             : 
     914       16832 :                         for (node *n = nrels->h; n; n = n->next) {
     915       12269 :                                 sql_rel *r = n->data;
     916       12269 :                                 BUN rv = need_distinct(rel) ? rel_calc_nuniques(v->sql, r, r->exps) : get_rel_count(r);
     917             : 
     918       12269 :                                 if (!rv) { /* keep last for now */
     919        6254 :                                         rel_destroy(r);
     920        6254 :                                         continue;
     921             :                                 }
     922        6015 :                                 nl = append(nl, r);
     923             :                         }
     924        4563 :                         rel->l = nl;
     925        4563 :                         if (list_length(nl) == 1) {
     926        4227 :                                 sql_rel *l = rel->l = nl->h->data; /* ugh */
     927        4227 :                                 rel->r = NULL;
     928        4227 :                                 rel->op = op_project;
     929             : 
     930       20767 :                                 for (node *n = rel->exps->h, *m = l->exps->h ; n && m ; n = n->next, m = m->next) {
     931       16540 :                                         sql_exp *pe = n->data, *ie = m->data;
     932       16540 :                                         sql_exp *ne = exp_ref(v->sql, ie);
     933       16540 :                                         exp_prop_alias(v->sql->sa, ne, pe);
     934       16540 :                                         n->data = ne;
     935             :                                 }
     936        4227 :                                 list_hash_clear(rel->exps);
     937         336 :                         } else if (list_empty(nl)) {
     938             :                                 /* empty select (project [ nils ] ) */
     939         441 :                                 for (node *n = rel->exps->h ; n ; n = n->next) {
     940         341 :                                         sql_exp *e = n->data, *a = exp_atom(v->sql->sa, atom_general(v->sql->sa, exp_subtype(e), NULL, 0));
     941         341 :                                         exp_prop_alias(v->sql->sa, a, e);
     942         341 :                                         n->data = a;
     943             :                                 }
     944         100 :                                 list_hash_clear(rel->exps);
     945         100 :                                 sql_rel *l = rel_project(v->sql->sa, NULL, rel->exps);
     946         100 :                                 set_count_prop(v->sql->sa, l, 1);
     947         100 :                                 l = rel_select(v->sql->sa, l, exp_atom_bool(v->sql->sa, 0));
     948         100 :                                 set_count_prop(v->sql->sa, l, 0);
     949         100 :                                 rel->op = op_project;
     950         100 :                                 rel->r = NULL;
     951         100 :                                 rel->l = l;
     952         100 :                                 rel->exps = rel_projections(v->sql, l, NULL, 1, 1);
     953         100 :                                 set_count_prop(v->sql->sa, rel, 0);
     954         100 :                                 set_nodistinct(rel); /* set relations may have distinct flag set */
     955             :                         }
     956             :                 } else {
     957        8843 :                         set_count_prop(v->sql->sa, rel, cnt);
     958             :                 }
     959             :                 break;
     960             :         }
     961      542078 :         case op_join:
     962             :         case op_left:
     963             :         case op_right:
     964             :         case op_full:
     965             :         case op_semi:
     966             :         case op_anti:
     967             :         case op_select:
     968             :         case op_project:
     969             :         case op_groupby: {
     970      542078 :                 if (is_groupby(rel->op) && !list_empty(rel->r))
     971       16934 :                         rel->r = exps_exp_visitor_bottomup(v, rel, rel->r, 0, &rel_propagate_statistics, false);
     972      542078 :                 rel->exps = exps_exp_visitor_bottomup(v, rel, rel->exps, 0, &rel_propagate_statistics, false);
     973      542077 :                 if ((is_simple_project(rel->op) || is_groupby(rel->op)) && !list_empty(rel->r))
     974       39574 :                         rel->r = exps_exp_visitor_bottomup(v, rel, rel->r, 0, &rel_propagate_statistics, false);
     975             :                 /* The following optimizations can only be applied after propagating the statistics to rel->exps */
     976      542077 :                 if (can_be_pruned && (is_join(rel->op) || is_select(rel->op)) && !list_empty(rel->exps)) {
     977      141312 :                         int changes = v->changes;
     978      141312 :                         rel->exps = rel_prune_predicates(v, rel);
     979      141312 :                         if (v->changes > changes)
     980        3665 :                                 rel = rewrite_simplify(v, 0, v->value_based_opt, rel);
     981             :                 }
     982             : 
     983             :                 /* propagate row count */
     984      542077 :                 sql_rel *l = rel->l, *r = rel->r;
     985      542077 :                 switch (rel->op) {
     986      137091 :                 case op_join:
     987             :                 case op_left:
     988             :                 case op_right:
     989             :                 case op_full: {
     990      137091 :                         BUN lv = get_rel_count(l), rv = get_rel_count(r), uniques_estimate = BUN_MAX, join_idx_estimate = BUN_MAX;
     991             : 
     992      137091 :                         if (!list_empty(rel->exps) && !is_single(rel)) {
     993      250671 :                                 for (node *n = rel->exps->h ; n ; n = n->next) {
     994      127814 :                                         sql_exp *e = n->data, *el = e->l, *er = e->r;
     995             : 
     996      127814 :                                         if (find_prop(e->p, PROP_JOINIDX)) {
     997         667 :                                                 join_idx_estimate = lv>rv?lv:rv;
     998      127147 :                                         } else if (e->type == e_cmp && e->flag == cmp_equal) {
     999             :                                                 /* if one of the sides is unique, the cardinality will be that exact number, but look for nulls */
    1000      123262 :                                                 if (!is_semantics(e) || !has_nil(el) || !has_nil(er)) {
    1001      123085 :                                                         BUN lu = 0, ru = 0;
    1002      123085 :                                                         prop *p = NULL;
    1003      123085 :                                                         if ((p = find_prop(el->p, PROP_NUNIQUES)))
    1004       91312 :                                                                 lu = (BUN) p->value.dval;
    1005      123085 :                                                         if ((p = find_prop(er->p, PROP_NUNIQUES)))
    1006      105982 :                                                                 ru = (BUN) p->value.dval;
    1007      123085 :                                                         if (is_unique(el) || lu > lv) {
    1008       74975 :                                                                 BUN ncount = (is_right(rel->op) || is_full(rel->op)) ? MAX(lv, rv) : lv;
    1009       74975 :                                                                 uniques_estimate = MIN(uniques_estimate, ncount);
    1010       48110 :                                                         } else if (is_unique(er) || ru > rv) {
    1011       30057 :                                                                 BUN ncount = (is_left(rel->op) || is_full(rel->op)) ? MAX(lv, rv) : rv;
    1012       30057 :                                                                 uniques_estimate = MIN(uniques_estimate, ncount);
    1013             :                                                         }
    1014             :                                                 }
    1015             :                                         }
    1016             :                                 }
    1017             :                         }
    1018      137091 :                         if (is_single(rel)) {
    1019        4801 :                                 set_count_prop(v->sql->sa, rel, lv);
    1020      132290 :                         } else if (join_idx_estimate != BUN_MAX) {
    1021         665 :                                 set_count_prop(v->sql->sa, rel, join_idx_estimate);
    1022      131625 :                         } else if (uniques_estimate != BUN_MAX) {
    1023       98478 :                                 set_count_prop(v->sql->sa, rel, uniques_estimate);
    1024       33147 :                         } else if (list_length(rel->exps) == 1 && (exp_is_false(rel->exps->h->data) || exp_is_null(rel->exps->h->data))) {
    1025             :                                 /* corner cases for outer joins */
    1026         126 :                                 if (is_left(rel->op)) {
    1027         114 :                                         set_count_prop(v->sql->sa, rel, lv);
    1028          12 :                                 } else if (is_right(rel->op)) {
    1029           3 :                                         set_count_prop(v->sql->sa, rel, rv);
    1030           9 :                                 } else if (is_full(rel->op) && lv != BUN_NONE && rv != BUN_NONE) {
    1031           9 :                                         set_count_prop(v->sql->sa, rel, (rv > (BUN_MAX - lv)) ? BUN_MAX : (lv + rv)); /* overflow check */
    1032           0 :                                 } else if (lv != BUN_NONE && rv != BUN_NONE) {
    1033           0 :                                         set_count_prop(v->sql->sa, rel, 0);
    1034             :                                 }
    1035       33021 :                         } else if (lv == 0) {
    1036        1204 :                                 set_count_prop(v->sql->sa, rel, (is_right(rel->op) || is_full(rel->op)) ? rv : 0);
    1037       32405 :                         } else if (rv == 0) {
    1038        1574 :                                 set_count_prop(v->sql->sa, rel, (is_left(rel->op) || is_full(rel->op)) ? lv : 0);
    1039       31325 :                         } else if (lv != BUN_NONE && rv != BUN_NONE) {
    1040       18541 :                                 set_count_prop(v->sql->sa, rel, (rv > (BUN_MAX / lv)) ? BUN_MAX : (lv * rv)); /* overflow check */
    1041             :                         }
    1042             :                         break;
    1043             :                 }
    1044        2935 :                 case op_anti:
    1045        2935 :                         set_count_prop(v->sql->sa, rel, get_rel_count(l));
    1046        2935 :                         break;
    1047      110552 :                 case op_semi:
    1048             :                 case op_select:
    1049             :                         /* TODO calculate cardinalities using selectivities */
    1050      110552 :                         if (list_length(rel->exps) == 1 && (exp_is_false(rel->exps->h->data) || exp_is_null(rel->exps->h->data))) {
    1051        2755 :                                 set_count_prop(v->sql->sa, rel, 0);
    1052             :                         } else {
    1053      212307 :                                 if (!list_empty(rel->exps) && !is_single(rel)) {
    1054      104511 :                                         BUN cnt = get_rel_count(l), u = 1;
    1055      170387 :                                         for (node *n = rel->exps->h ; n ; n = n->next) {
    1056      111675 :                                                 sql_exp *e = n->data, *el = e->l, *er = e->r;
    1057             : 
    1058             :                                                 /* simple expressions first */
    1059      111675 :                                                 if (e->type == e_cmp && e->flag == cmp_equal && exp_is_atom(er)) {
    1060             :                                                         /* use selectivity */
    1061       61262 :                                                         prop *p;
    1062       61262 :                                                         if ((p = find_prop(el->p, PROP_NUNIQUES))) {
    1063       45800 :                                                                 u = (BUN) p->value.dval;
    1064       45800 :                                                                 break;
    1065             :                                                         }
    1066             :                                                 }
    1067             :                                         }
    1068             :                                         /* u is an *estimate*, so don't set count_prop to 0 unless cnt is 0 */
    1069      104512 :                                         set_count_prop(v->sql->sa, rel, cnt == 0 ? 0 : u == 0 || u > cnt ? 1 : cnt/u);
    1070             :                                 } else {
    1071        3285 :                                         set_count_prop(v->sql->sa, rel, get_rel_count(l));
    1072             :                                 }
    1073             :                         }
    1074             :                         break;
    1075      266657 :                 case op_project:
    1076      266657 :                         if (l) {
    1077      256322 :                                 if (need_distinct(rel)) {
    1078           0 :                                         set_count_prop(v->sql->sa, rel, rel_calc_nuniques(v->sql, l, rel->exps));
    1079             :                                 } else {
    1080      256322 :                                         set_count_prop(v->sql->sa, rel, get_rel_count(l));
    1081             :                                 }
    1082             :                         } else {
    1083       10335 :                                 BUN card = 1;
    1084             : 
    1085       10335 :                                 if (!list_empty(rel->exps)) {
    1086       31244 :                                         for (node *n = rel->exps->h ; n ; n = n->next) {
    1087       20909 :                                                 sql_exp *e = n->data;
    1088       20909 :                                                 card = MAX(card, trivial_project_exp_card(e));
    1089             :                                         }
    1090             :                                 }
    1091       10335 :                                 set_count_prop(v->sql->sa, rel, card);
    1092             :                         }
    1093             :                         break;
    1094       24431 :                 case op_groupby:
    1095       24431 :                         if (list_empty(rel->r)) {
    1096        7496 :                                 set_count_prop(v->sql->sa, rel, 1);
    1097             :                         } else {
    1098       16935 :                                 set_count_prop(v->sql->sa, rel, rel_calc_nuniques(v->sql, l, rel->r));
    1099             :                         }
    1100             :                         break;
    1101             :                 default:
    1102             :                         break;
    1103             :                 }
    1104             :                 break;
    1105             :         }
    1106       17014 :         case op_topn: {
    1107       17014 :                 BUN lv = get_rel_count(rel->l);
    1108             : 
    1109       17014 :                 if (lv != BUN_NONE) {
    1110       16991 :                         sql_exp *le = rel->exps->h->data, *oe = list_length(rel->exps) > 1 ? rel->exps->h->next->data : NULL;
    1111          84 :                         if (oe && oe->l && exp_is_not_null(oe)) { /* no parameters */
    1112          84 :                                 BUN offset = (BUN) ((atom*)oe->l)->data.val.lval;
    1113          84 :                                 lv = offset >= lv ? 0 : lv - offset;
    1114             :                         }
    1115       16991 :                         if (le->l && exp_is_not_null(le)) {
    1116       16956 :                                 BUN limit = (BUN) ((atom*)le->l)->data.val.lval;
    1117       16956 :                                 lv = MIN(lv, limit);
    1118             :                         }
    1119       16991 :                         set_count_prop(v->sql->sa, rel, lv);
    1120             :                 }
    1121             :                 break;
    1122             :         }
    1123          22 :         case op_sample: {
    1124          22 :                 BUN lv = get_rel_count(rel->l);
    1125             : 
    1126          22 :                 if (lv != BUN_NONE) {
    1127           4 :                         sql_exp *se = rel->exps->h->data;
    1128           4 :                         sql_subtype *tp = exp_subtype(se);
    1129             : 
    1130           4 :                         if (se->l && tp->type->eclass == EC_NUM) { /* sample is a number of rows */
    1131           4 :                                 BUN sample = (BUN) ((atom*)se->l)->data.val.lval;
    1132           4 :                                 lv = MIN(lv, sample);
    1133           0 :                         } else if (se->l) { /* sample is a percentage of rows */
    1134           0 :                                 dbl percent = ((atom*)se->l)->data.val.dval;
    1135           0 :                                 assert(tp->type->eclass == EC_FLT);
    1136           0 :                                 lv = (BUN) ceil((dbl)lv * percent);
    1137             :                         }
    1138           4 :                         set_count_prop(v->sql->sa, rel, lv);
    1139             :                 }
    1140             :                 break;
    1141             :         }
    1142        6196 :         case op_table: {
    1143        6196 :                 sql_exp *op = rel->r;
    1144        6196 :                 if (rel->flag != TRIGGER_WRAPPER && op) {
    1145        5884 :                         sql_subfunc *f = op->f;
    1146        5884 :                         if (f->func->lang == FUNC_LANG_SQL) {
    1147          97 :                                 set_count_prop(v->sql->sa, rel, 1000 /* just some fallback value */);
    1148        5787 :                         } else if (f->func->lang == FUNC_LANG_MAL && strcmp(f->func->base.name, "storage") == 0) {
    1149         837 :                                 set_count_prop(v->sql->sa, rel, 1000 /* TODO get size of catalog */);
    1150        4950 :                         } else if (f->func->lang == FUNC_LANG_MAL && strcmp(f->func->base.name, "db_users") == 0) {
    1151           0 :                                 set_count_prop(v->sql->sa, rel, 1000 /* TODO get size of users */);
    1152        4950 :                         } else if (f->func->lang == FUNC_LANG_MAL && strncmp(f->func->base.name, "querylog", 8) == 0) {
    1153        1101 :                                 set_count_prop(v->sql->sa, rel, 1000 /* TODO get size of querylog */);
    1154        3849 :                         } else if (f->func->lang == FUNC_LANG_MAL &&
    1155        3684 :                                            (strcmp(f->func->base.name, "queue") == 0 ||
    1156        3416 :                                                 strcmp(f->func->base.name, "optimizers") == 0 ||
    1157        3100 :                                                 strcmp(f->func->base.name, "env") == 0 ||
    1158        2734 :                                                 strcmp(f->func->base.name, "keywords") == 0 ||
    1159        2734 :                                                 strcmp(f->func->base.name, "statistics") == 0 ||
    1160        2057 :                                                 strcmp(f->func->base.name, "rejects") == 0 ||
    1161        1799 :                                                 strcmp(f->func->base.name, "schemastorage") == 0 ||
    1162        1799 :                                                 strncmp(f->func->base.name, "storage", 7) == 0 ||
    1163        1799 :                                                 strcmp(f->func->base.name, "sessions") == 0) ) {
    1164        2179 :                                 set_count_prop(v->sql->sa, rel, 1000 /* TODO get size of queue */);
    1165             :                         }
    1166             :                         /* else {
    1167             :                                 printf("%%func needs stats : %s\n", f->func->base.name);
    1168             :                         } */
    1169             :                 }
    1170             :                 break;
    1171             :         }
    1172             :         /*These relations are less important for now
    1173             :           TODO later we can tune it */
    1174             :         case op_insert:
    1175             :         case op_update:
    1176             :         case op_delete:
    1177             :         case op_truncate:
    1178             :         case op_ddl:
    1179             :         default:
    1180             :                 break;
    1181             :         }
    1182             : 
    1183             :         return rel;
    1184             : }
    1185             : 
    1186             : static sql_rel *
    1187      552909 : rel_get_statistics(visitor *v, global_props *gp, sql_rel *rel)
    1188             : {
    1189             :         /* Don't prune updates as pruning will possibly result in removing the joins which therefore cannot be used for constraint checking */
    1190      552909 :         uint8_t has_special_modify = (uint8_t) gp->has_special_modify;
    1191      552909 :         v->data = &has_special_modify;
    1192      552909 :         rel = rel_visitor_bottomup(v, rel, &rel_get_statistics_);
    1193      552909 :         v->data = gp;
    1194      552909 :         return rel;
    1195             : }
    1196             : 
    1197             : run_optimizer
    1198      729811 : bind_get_statistics(visitor *v, global_props *gp)
    1199             : {
    1200      729811 :         (void) v;
    1201      729811 :         return (gp->opt_level == 1 && !gp->cnt[op_insert]) ? rel_get_statistics : NULL;
    1202             : }
    1203             : 
    1204             : 
    1205             : static bool
    1206       96527 : point_select_on_unique_column(sql_rel *rel)
    1207             : {
    1208       96527 :         if (is_select(rel->op) && !list_empty(rel->exps)) {
    1209      133131 :                 for (node *n = rel->exps->h; n ; n = n->next) {
    1210       76721 :                         sql_exp *e = n->data, *el = e->l, *er = e->r, *found = NULL;
    1211             : 
    1212       76721 :                         if (is_compare(e->type) && e->flag == cmp_equal) {
    1213       33992 :                                 if (is_numeric_upcast(el))
    1214           0 :                                         el = el->l;
    1215       33992 :                                 if (is_numeric_upcast(er))
    1216           0 :                                         er = er->l;
    1217       33992 :                                 if (is_alias(el->type) && exp_is_atom(er) && (found = rel_find_exp(rel->l, el)) &&
    1218         761 :                                         is_unique(found) && (!is_semantics(e) || !has_nil(found) || !has_nil(er)))
    1219             :                                         return true;
    1220       33231 :                                 if (is_alias(er->type) && exp_is_atom(el) && (found = rel_find_exp(rel->l, er)) &&
    1221           0 :                                         is_unique(found) && (!is_semantics(e) || !has_nil(el) || !has_nil(found)))
    1222             :                                         return true;
    1223             :                         }
    1224             :                 }
    1225             :         }
    1226             :         return false;
    1227             : }
    1228             : 
    1229             : /*
    1230             :  * A point select on an unique column reduces the number of rows to 1. If the same select is under a
    1231             :  * join, the opposite side's select can be pushed above the join.
    1232             :  */
    1233             : static inline sql_rel *
    1234     1269837 : rel_push_select_up(visitor *v, sql_rel *rel)
    1235             : {
    1236     1269837 :         if ((is_innerjoin(rel->op) || is_left(rel->op) || is_right(rel->op) || is_semi(rel->op)) && !is_single(rel)) {
    1237      258573 :                 sql_rel *l = rel->l, *r = rel->r;
    1238      258573 :                 bool can_pushup_left = is_select(l->op) && !rel_is_ref(l) && !is_single(l) && (is_innerjoin(rel->op) || is_left(rel->op) || is_semi(rel->op)),
    1239      258573 :                         can_pushup_right = is_select(r->op) && !rel_is_ref(r) && !is_single(r) && (is_innerjoin(rel->op) || is_right(rel->op));
    1240             : 
    1241      258573 :                 if (can_pushup_left || can_pushup_right) {
    1242       68067 :                         if (can_pushup_left)
    1243       46246 :                                 can_pushup_left = point_select_on_unique_column(r);
    1244       68067 :                         if (can_pushup_right)
    1245       50281 :                                 can_pushup_right = point_select_on_unique_column(l);
    1246             : 
    1247             :                         /* if both selects retrieve one row each, it's not worth it to push both up */
    1248       68067 :                         if (can_pushup_left && !can_pushup_right) {
    1249         695 :                                 sql_rel *nrel = rel_dup_copy(v->sql->sa, rel);
    1250         695 :                                 nrel->l = l->l;
    1251         695 :                                 rel = rel_inplace_select(rel, nrel, l->exps);
    1252         695 :                                 assert(is_select(rel->op));
    1253         695 :                                 v->changes++;
    1254       67372 :                         } else if (!can_pushup_left && can_pushup_right) {
    1255          14 :                                 sql_rel *nrel = rel_dup_copy(v->sql->sa, rel);
    1256          14 :                                 nrel->r = r->l;
    1257          14 :                                 rel = rel_inplace_select(rel, nrel, r->exps);
    1258          14 :                                 assert(is_select(rel->op));
    1259          14 :                                 v->changes++;
    1260             :                         }
    1261             :                 }
    1262             :         }
    1263     1269837 :         return rel;
    1264             : }
    1265             : 
    1266             : static int
    1267       97955 : sql_class_base_score(visitor *v, sql_column *c, sql_subtype *t, bool equality_based)
    1268             : {
    1269       97955 :         int de;
    1270             : 
    1271       97955 :         if (!t)
    1272             :                 return 0;
    1273       97955 :         switch (ATOMstorage(t->type->localtype)) {
    1274             :         case TYPE_bte:
    1275             :                 return 150 - 8;
    1276        1642 :         case TYPE_sht:
    1277        1642 :                 return 150 - 16;
    1278       37649 :         case TYPE_int:
    1279       37649 :                 return 150 - 32;
    1280         523 :         case TYPE_void:
    1281             :         case TYPE_lng:
    1282         523 :                 return 150 - 64;
    1283             :         case TYPE_uuid:
    1284             : #ifdef HAVE_HGE
    1285             :         case TYPE_hge:
    1286             : #endif
    1287             :                 return 150 - 128;
    1288           2 :         case TYPE_flt:
    1289           2 :                 return 75 - 24;
    1290             :         case TYPE_dbl:
    1291             :                 return 75 - 53;
    1292       34302 :         default:
    1293       34302 :                 if (equality_based && c && v->storage_based_opt && (de = mvc_is_duplicate_eliminated(v->sql, c)))
    1294        1656 :                         return 150 - de * 8;
    1295             :                 /* strings and blobs not duplicate eliminated don't get any points here */
    1296             :                 return 0;
    1297             :         }
    1298             : }
    1299             : 
    1300             : static int
    1301       34506 : score_se_base(visitor *v, sql_rel *rel, sql_exp *e)
    1302             : {
    1303       34506 :         int res = 0;
    1304       34506 :         sql_subtype *t = exp_subtype(e);
    1305       34506 :         sql_column *c = NULL;
    1306             : 
    1307       34506 :         if (e->type == e_convert) /* keep unsafes at the end (TODO improve) */
    1308             :                 return -1000;
    1309             :         /* can we find out if the underlying table is sorted */
    1310       33906 :         if ((c = exp_find_column(rel, e, -2)) && v->storage_based_opt && mvc_is_sorted(v->sql, c))
    1311       33906 :                 res += 600;
    1312             : 
    1313             :         /* prefer the shorter var types over the longer ones */
    1314       33906 :         res += sql_class_base_score(v, c, t, is_equality_or_inequality_exp(e->flag)); /* smaller the type, better */
    1315       33906 :         return res;
    1316             : }
    1317             : 
    1318             : static int
    1319       56536 : score_se(visitor *v, sql_rel *rel, sql_exp *e)
    1320             : {
    1321       56536 :         int score = 0;
    1322       56536 :         if (e->type == e_cmp && !is_complex_exp(e->flag)) {
    1323       34506 :                 sql_exp *l = e->l;
    1324             : 
    1325       34506 :                 while (l->type == e_cmp) { /* go through nested comparisons */
    1326           2 :                         sql_exp *ll;
    1327             : 
    1328           2 :                         if (l->flag == cmp_filter || l->flag == cmp_or)
    1329           0 :                                 ll = ((list*)l->l)->h->data;
    1330             :                         else
    1331           2 :                                 ll = l->l;
    1332           2 :                         if (ll->type != e_cmp)
    1333             :                                 break;
    1334             :                         l = ll;
    1335             :                 }
    1336       34506 :                 score += score_se_base(v, rel, l);
    1337             :         }
    1338       56536 :         score += exp_keyvalue(e);
    1339       56536 :         return score;
    1340             : }
    1341             : 
    1342             : static inline sql_rel *
    1343     1269837 : rel_select_order(visitor *v, sql_rel *rel)
    1344             : {
    1345     1269837 :         int *scores = NULL;
    1346     1269837 :         sql_exp **exps = NULL;
    1347             : 
    1348     1269837 :         if (is_select(rel->op) && list_length(rel->exps) > 1) {
    1349       26852 :                 node *n;
    1350       26852 :                 int i, nexps = list_length(rel->exps);
    1351       26852 :                 scores = SA_NEW_ARRAY(v->sql->ta, int, nexps);
    1352       26852 :                 exps = SA_NEW_ARRAY(v->sql->ta, sql_exp*, nexps);
    1353             : 
    1354       83388 :                 for (i = 0, n = rel->exps->h; n; i++, n = n->next) {
    1355       56566 :                         exps[i] = n->data;
    1356       56566 :                         if (find_prop(exps[i]->p, PROP_HASHCOL))
    1357             :                                 return rel;
    1358       56536 :                         scores[i] = score_se(v, rel, n->data);
    1359             :                 }
    1360       26822 :                 GDKqsort(scores, exps, NULL, nexps, sizeof(int), sizeof(void *), TYPE_int, true, true);
    1361             : 
    1362       83358 :                 for (i = 0, n = rel->exps->h; n; i++, n = n->next)
    1363       56536 :                         n->data = exps[i];
    1364             :         }
    1365             : 
    1366             :         return rel;
    1367             : }
    1368             : 
    1369             : /* Compute the efficiency of using this expression early in a group by list */
    1370             : static int
    1371       64049 : score_gbe(visitor *v, sql_rel *rel, sql_exp *e)
    1372             : {
    1373       64049 :         int res = 0;
    1374       64049 :         sql_subtype *t = exp_subtype(e);
    1375       64049 :         sql_column *c = exp_find_column(rel, e, -2);
    1376             : 
    1377       64049 :         if (e->card == CARD_ATOM) /* constants are trivial to group */
    1378          38 :                 res += 1000;
    1379             :         /* can we find out if the underlying table is sorted */
    1380       64049 :         if (is_unique(e) || find_prop(e->p, PROP_HASHCOL) || (c && v->storage_based_opt && mvc_is_unique(v->sql, c))) /* distinct columns */
    1381        3636 :                 res += 700;
    1382       42586 :         if (c && v->storage_based_opt && mvc_is_sorted(v->sql, c))
    1383        3729 :                 res += 500;
    1384       64049 :         if (find_prop(e->p, PROP_HASHIDX)) /* has hash index */
    1385           0 :                 res += 200;
    1386             : 
    1387             :         /* prefer the shorter var types over the longer ones */
    1388       64049 :         res += sql_class_base_score(v, c, t, true); /* smaller the type, better */
    1389       64049 :         return res;
    1390             : }
    1391             : 
    1392             : /* reorder group by expressions */
    1393             : static inline sql_rel *
    1394     1269838 : rel_groupby_order(visitor *v, sql_rel *rel)
    1395             : {
    1396     1269838 :         int *scores = NULL;
    1397     1269838 :         sql_exp **exps = NULL;
    1398             : 
    1399     1269838 :         if (is_groupby(rel->op) && list_length(rel->r) > 1) {
    1400       27017 :                 node *n;
    1401       27017 :                 list *gbe = rel->r;
    1402       27017 :                 int i, ngbe = list_length(gbe);
    1403       27017 :                 scores = SA_NEW_ARRAY(v->sql->ta, int, ngbe);
    1404       27017 :                 exps = SA_NEW_ARRAY(v->sql->ta, sql_exp*, ngbe);
    1405             : 
    1406             :                 /* first sorting step, give priority for integers and sorted columns */
    1407       91066 :                 for (i = 0, n = gbe->h; n; i++, n = n->next) {
    1408       64049 :                         exps[i] = n->data;
    1409       64049 :                         scores[i] = score_gbe(v, rel, exps[i]);
    1410             :                 }
    1411       27017 :                 GDKqsort(scores, exps, NULL, ngbe, sizeof(int), sizeof(void *), TYPE_int, true, true);
    1412             : 
    1413             :                 /* second sorting step, give priority to strings with lower number of digits */
    1414       54839 :                 for (i = ngbe - 1; i && !scores[i]; i--); /* find expressions with no score from the first round */
    1415       27017 :                 if (scores[i])
    1416       26021 :                         i++;
    1417       27017 :                 if (ngbe - i > 1) {
    1418       12154 :                         for (int j = i; j < ngbe; j++) {
    1419        9143 :                                 sql_subtype *t = exp_subtype(exps[j]);
    1420        9143 :                                 scores[j] = t ? t->digits : 0;
    1421             :                         }
    1422             :                         /* the less number of digits the better, order ascending */
    1423        3011 :                         GDKqsort(scores + i, exps + i, NULL, ngbe - i, sizeof(int), sizeof(void *), TYPE_int, false, true);
    1424             :                 }
    1425             : 
    1426       91066 :                 for (i = 0, n = gbe->h; n; i++, n = n->next)
    1427       64049 :                         n->data = exps[i];
    1428             :         }
    1429             : 
    1430     1269838 :         return rel;
    1431             : }
    1432             : 
    1433             : /* This optimization loop contains optimizations that can potentially use statistics */
    1434             : static sql_rel *
    1435     1269837 : rel_final_optimization_loop_(visitor *v, sql_rel *rel)
    1436             : {
    1437             :         /* Run rel_push_select_up only once at the end to avoid an infinite optimization loop */
    1438     1269837 :         rel = rel_push_select_up(v, rel);
    1439     1269837 :         rel = rel_select_order(v, rel);
    1440             : 
    1441             :         /* TODO? Maybe later add rel_simplify_count, rel_join2semijoin, rel_simplify_fk_joins,
    1442             :            rel_distinct_project2groupby, rel_simplify_predicates, rel_simplify_math,
    1443             :            rel_distinct_aggregate_on_unique_values */
    1444             : 
    1445     1269838 :         rel = rel_groupby_order(v, rel);
    1446     1269838 :         return rel;
    1447             : }
    1448             : 
    1449             : static sql_rel *
    1450       68274 : rel_final_optimization_loop(visitor *v, global_props *gp, sql_rel *rel)
    1451             : {
    1452       68274 :         (void) gp;
    1453       68274 :         return rel_visitor_bottomup(v, rel, &rel_final_optimization_loop_);
    1454             : }
    1455             : 
    1456             : run_optimizer
    1457      729811 : bind_final_optimization_loop(visitor *v, global_props *gp)
    1458             : {
    1459      729811 :         int flag = v->sql->sql_optimizer;
    1460             :         /* At the moment, this optimizer has dependency on 3 flags */
    1461      560923 :         return gp->opt_level == 1 && (gp->cnt[op_groupby] || gp->cnt[op_select]) &&
    1462      798086 :                 (flag & push_select_up) && (flag & optimize_select_and_joins_topdown) && (flag & optimize_projections) ? rel_final_optimization_loop : NULL;
    1463             : }

Generated by: LCOV version 1.14