LCOV - code coverage report
Current view: top level - sql/server - rel_statistics.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 834 885 94.2 %
Date: 2025-03-25 21:27:32 Functions: 26 26 100.0 %

          Line data    Source code
       1             : /*
       2             :  * SPDX-License-Identifier: MPL-2.0
       3             :  *
       4             :  * This Source Code Form is subject to the terms of the Mozilla Public
       5             :  * License, v. 2.0.  If a copy of the MPL was not distributed with this
       6             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       7             :  *
       8             :  * Copyright 2024, 2025 MonetDB Foundation;
       9             :  * Copyright August 2008 - 2023 MonetDB B.V.;
      10             :  * Copyright 1997 - July 2008 CWI.
      11             :  */
      12             : 
      13             : #include "monetdb_config.h"
      14             : #include "rel_optimizer_private.h"
      15             : #include "rel_statistics.h"
      16             : #include "rel_basetable.h"
      17             : #include "rel_rewriter.h"
      18             : #include "sql_storage.h"
      19             : 
      20             : static sql_exp *
      21     3538870 : comparison_find_column(sql_exp *input, sql_exp *e)
      22             : {
      23     3538870 :         switch (input->type) {
      24      100727 :         case e_convert: {
      25      100727 :                 list *types = (list *)input->r;
      26      100727 :                 sql_class from = ((sql_subtype*)types->h->data)->type->eclass, to = ((sql_subtype*)types->h->next->data)->type->eclass;
      27      100727 :                 if (from == to)
      28      194673 :                         return comparison_find_column(input->l, e) ? input : NULL;
      29             :                 return NULL;
      30             :         }
      31     3112697 :         case e_column:
      32     3112697 :                 return exp_match(e, input) ? input : NULL;
      33             :         default:
      34             :                 return NULL;
      35             :         }
      36             : }
      37             : 
      38             : /* multi lo <= col <= hi, maybe still be false even if lo or hi are NULL, possibly similar for filter on multiple
      39             :  * columns */
      40             : #define comp_single_column(c) (!c->f)
      41             : 
      42             : static sql_exp *
      43     8835698 : rel_propagate_column_ref_statistics(mvc *sql, sql_rel *rel, sql_exp *e)
      44             : {
      45     8917555 :         assert(e->type == e_column);
      46     8917555 :         if (rel) {
      47     8917474 :                 switch(rel->op) {
      48     4134447 :                 case op_left:
      49             :                 case op_right:
      50             :                 case op_full:
      51             :                 case op_join:
      52             :                 case op_select:
      53             :                 case op_anti:
      54             :                 case op_semi: {
      55     4134447 :                         bool found_without_semantics = false, found_left = false, found_right = false, still_unique = false;
      56             : 
      57     4134447 :                         if ((is_innerjoin(rel->op) || is_select(rel->op)) && list_length(rel->exps) == 1 && exp_is_false(rel->exps->h->data))
      58             :                                 return NULL; /* nothing will pass, skip */
      59             : 
      60             :                         /* propagate from the bottom first */
      61     4113809 :                         if (rel_propagate_column_ref_statistics(sql, rel->l, e))
      62             :                                 found_left = true;
      63     2599437 :                         if (!found_left && is_join(rel->op) && rel_propagate_column_ref_statistics(sql, rel->r, e))
      64     4113809 :                                 found_right = true;
      65             : 
      66     4113809 :                         if (!found_left && !found_right)
      67             :                                 return NULL;
      68     1812470 :                         if (!list_empty(rel->exps) && rel->op != op_anti) { /* if there's an or, the MIN and MAX get difficult to propagate */
      69     3565423 :                                 for (node *n = rel->exps->h ; n ; n = n->next) {
      70     1849380 :                                         sql_exp *comp = n->data, *le = comp->l, *lne = NULL, *re = comp->r, *rne = NULL, *fe = comp->f, *fne = NULL;
      71             : 
      72     1849380 :                                         if (comp->type == e_cmp) {
      73     1848512 :                                                 if (is_theta_exp(comp->flag) && ((lne = comparison_find_column(le, e)) || (rne = comparison_find_column(re, e)) || (fe && (fne = comparison_find_column(fe, e))))) {
      74      127726 :                                                         atom *lval_min = find_prop_and_get(le->p, PROP_MIN), *lval_max = find_prop_and_get(le->p, PROP_MAX),
      75      127726 :                                                                 *rval_min = find_prop_and_get(re->p, PROP_MIN), *rval_max = find_prop_and_get(re->p, PROP_MAX);
      76             : 
      77             :                                                         /* not semantics found or if explicitly filtering not null values from the column */
      78      127726 :                                                         found_without_semantics |= (!is_semantics(comp) && comp_single_column(comp)) || (comp->flag == cmp_equal && lne && is_anti(comp) && exp_is_null(re));
      79      127726 :                                                         still_unique |= comp->flag == cmp_equal && is_unique(le) && is_unique(re); /* unique if only equi-joins on unique columns are there */
      80      127726 :                                                         if (is_full(rel->op) || (is_left(rel->op) && found_left) || (is_right(rel->op) && found_right)) /* on outer joins, min and max cannot be propagated on some cases */
      81       22721 :                                                                 continue;
      82             :                                                         /* if (end2 >= start1 && start2 <= end1) then the 2 intervals are intersected */
      83      105005 :                                                         if (fe && lval_min && lval_max) { /* range case, the middle expression must intersect the other two */
      84        4618 :                                                                 atom *fval_min = find_prop_and_get(fe->p, PROP_MIN), *fval_max = find_prop_and_get(fe->p, PROP_MAX);
      85        4618 :                                                                 int int1 = rval_min && rval_max && atom_cmp(rval_max, lval_min) >= 0 && atom_cmp(rval_min, lval_max) <= 0,
      86        4618 :                                                                         int2 = fval_min && fval_max && atom_cmp(fval_max, lval_min) >= 0 && atom_cmp(fval_min, lval_max) <= 0,
      87        4618 :                                                                         symmetric = is_symmetric(comp);
      88             : 
      89        4618 :                                                                 if (is_anti(comp) || (!symmetric && fval_min && rval_max && atom_cmp(fval_min, rval_max) < 0)) /* for asymmetric case the re range must be after the fe range */
      90        1815 :                                                                         continue;
      91        2803 :                                                                 if (lne && int1 && int2) {
      92         103 :                                                                         if (symmetric) {
      93           0 :                                                                                 prop *p1 = find_prop(e->p, PROP_MIN), *p2 = find_prop(e->p, PROP_MAX);
      94           0 :                                                                                 atom *nmin = statistics_atom_min(sql, rval_min, fval_min), *nmax = statistics_atom_max(sql, rval_max, fval_max);
      95             :                                                                                 /* max is min from le and (max from re and fe max) */
      96           0 :                                                                                 set_minmax_property(sql, e, PROP_MAX, p2 ? statistics_atom_min(sql, nmax, p2->value.pval) : nmax);
      97             :                                                                                 /* min is max from le and (min from re and fe min) */
      98           0 :                                                                                 set_minmax_property(sql, e, PROP_MIN, p1 ? statistics_atom_max(sql, nmin, p1->value.pval) : nmin);
      99             :                                                                         } else {
     100         103 :                                                                                 prop *p1 = find_prop(e->p, PROP_MIN), *p2 = find_prop(e->p, PROP_MAX);
     101             :                                                                                 /* max is min from le and fe max */
     102         103 :                                                                                 set_minmax_property(sql, e, PROP_MAX, p2 ? statistics_atom_min(sql, fval_max, p2->value.pval) : fval_max);
     103             :                                                                                 /* min is max from le and re min */
     104         103 :                                                                                 set_minmax_property(sql, e, PROP_MIN, p1 ? statistics_atom_max(sql, rval_min, p1->value.pval) : rval_min);
     105             :                                                                         }
     106        2700 :                                                                 } else if (rne) {
     107         590 :                                                                         if (symmetric && int1 && int2) { /* min is max from le and (min from re and fe min) */
     108           0 :                                                                                 prop *p = find_prop(e->p, PROP_MIN);
     109           0 :                                                                                 atom *nmin = p ? statistics_atom_min(sql, p->value.pval, fval_min) : fval_min;
     110           0 :                                                                                 set_minmax_property(sql, e, PROP_MIN, statistics_atom_max(sql, nmin, lval_min));
     111         590 :                                                                         } else if (int1) { /* min is max from le and re min */
     112         100 :                                                                                 prop *p = find_prop(e->p, PROP_MIN);
     113         100 :                                                                                 set_minmax_property(sql, e, PROP_MIN, p ? statistics_atom_max(sql, lval_min, p->value.pval) : lval_min);
     114             :                                                                         }
     115        2110 :                                                                 } else if (fne) {
     116         549 :                                                                         if (symmetric && int1 && int2) { /* max is min from le and (max from re and fe max) */
     117           0 :                                                                                 prop *p = find_prop(e->p, PROP_MAX);
     118           0 :                                                                                 atom *nmax = p ? statistics_atom_max(sql, p->value.pval, rval_max) : rval_max;
     119           0 :                                                                                 set_minmax_property(sql, e, PROP_MAX, p ? statistics_atom_min(sql, nmax, lval_max) : nmax);
     120         549 :                                                                         } else if (int2) { /* max is min from le and fe max */
     121         266 :                                                                                 prop *p = find_prop(e->p, PROP_MAX);
     122         266 :                                                                                 set_minmax_property(sql, e, PROP_MAX, p ? statistics_atom_min(sql, lval_max, p->value.pval) : lval_max);
     123             :                                                                         }
     124             :                                                                 }
     125      100387 :                                                         } else if (lval_min && lval_max && rval_min && rval_max && atom_cmp(rval_max, lval_min) >= 0 && atom_cmp(rval_min, lval_max) <= 0) {
     126             :                                                                 /* both min and max must be set and the intervals must overlap */
     127       41977 :                                                                 switch (comp->flag) {
     128       28304 :                                                                 case cmp_equal: /* for equality reduce */
     129       28304 :                                                                         set_minmax_property(sql, e, PROP_MAX, is_anti(comp) ? statistics_atom_max(sql, lval_max, rval_max) : statistics_atom_min(sql, lval_max, rval_max));
     130       28304 :                                                                         set_minmax_property(sql, e, PROP_MIN, is_anti(comp) ? statistics_atom_min(sql, lval_min, rval_min) : statistics_atom_max(sql, lval_min, rval_min));
     131       28304 :                                                                         break;
     132        4357 :                                                                 case cmp_notequal: /* for inequality expand */
     133        4357 :                                                                         set_minmax_property(sql, e, PROP_MAX, is_anti(comp) ? statistics_atom_min(sql, lval_max, rval_max) : statistics_atom_max(sql, lval_max, rval_max));
     134        4357 :                                                                         set_minmax_property(sql, e, PROP_MIN, is_anti(comp) ? statistics_atom_max(sql, lval_min, rval_min) : statistics_atom_min(sql, lval_min, rval_min));
     135        4357 :                                                                         break;
     136        5310 :                                                                 case cmp_gt:
     137             :                                                                 case cmp_gte:
     138        9682 :                                                                         if (!is_anti(comp) && lne) { /* min is max from both min */
     139        4372 :                                                                                 prop *p = find_prop(e->p, PROP_MIN);
     140        4372 :                                                                                 set_minmax_property(sql, e, PROP_MIN, p ? statistics_atom_max(sql, rval_min, p->value.pval) : rval_min);
     141         938 :                                                                         } else if (!is_anti(comp)) { /* max is min from both max */
     142         938 :                                                                                 prop *p = find_prop(e->p, PROP_MAX);
     143         938 :                                                                                 set_minmax_property(sql, e, PROP_MAX, p ? statistics_atom_min(sql, lval_max, p->value.pval) : lval_max);
     144             :                                                                         }
     145             :                                                                         break;
     146        4006 :                                                                 case cmp_lt:
     147             :                                                                 case cmp_lte:
     148        7205 :                                                                         if (!is_anti(comp) && lne) { /* max is min from both max */
     149        3199 :                                                                                 prop *p = find_prop(e->p, PROP_MAX);
     150        3199 :                                                                                 set_minmax_property(sql, e, PROP_MAX, p ? statistics_atom_min(sql, rval_max, p->value.pval) : rval_max);
     151         807 :                                                                         } else if (!is_anti(comp)) { /* min is max from both min */
     152         807 :                                                                                 prop *p = find_prop(e->p, PROP_MIN);
     153         807 :                                                                                 set_minmax_property(sql, e, PROP_MIN, p ? statistics_atom_max(sql, lval_min, p->value.pval) : lval_min);
     154             :                                                                         }
     155             :                                                                         break;
     156             :                                                                 default: /* Maybe later I can do cmp_in and cmp_notin */
     157             :                                                                         break;
     158             :                                                                 }
     159             :                                                         }
     160             :                                                 }
     161             :                                         }
     162             :                                 }
     163             :                         }
     164     1812470 :                         if (is_full(rel->op) || (is_left(rel->op) && found_right) || (is_right(rel->op) && found_left))
     165       37037 :                                 set_has_nil(e);
     166     1812470 :                         if (!is_outerjoin(rel->op) && found_without_semantics) /* at an outer join, null values pass */
     167       94143 :                                 set_has_no_nil(e);
     168     1812470 :                         if (is_join(rel->op) && is_unique(e) && !still_unique)
     169      119978 :                                 set_not_unique(e);
     170     1812470 :                         return e;
     171             :                 }
     172     4681802 :                 case op_table:
     173             :                 case op_basetable:
     174             :                 case op_union:
     175             :                 case op_except:
     176             :                 case op_inter:
     177             :                 case op_munion:
     178             :                 case op_project:
     179             :                 case op_groupby: {
     180     4681802 :                         if (is_recursive(rel))
     181             :                                 return NULL;
     182     4681605 :                         sql_exp *found;
     183     4681605 :                         atom *fval;
     184     4681605 :                         prop *est;
     185     4681605 :                         if ((found = rel_find_exp(rel, e))) {
     186     2201331 :                                 if (rel->op != op_table) { /* At the moment don't propagate statistics for table relations */
     187     2156026 :                                         if ((fval = find_prop_and_get(found->p, PROP_MAX)))
     188     1137611 :                                                 set_minmax_property(sql, e, PROP_MAX, fval);
     189     2156024 :                                         if ((fval = find_prop_and_get(found->p, PROP_MIN)))
     190     1144655 :                                                 set_minmax_property(sql, e, PROP_MIN, fval);
     191     2156024 :                                         if (!has_nil(found))
     192     1400115 :                                                 set_has_no_nil(e);
     193     2156024 :                                         if (is_unique(found) || (need_distinct(rel) && list_length(rel->exps) == 1) ||
     194     1737700 :                                                 (is_groupby(rel->op) && (list_empty(rel->r) || (list_length(rel->r) == 1 && exps_find_exp(rel->r, e)))))
     195      429264 :                                                 set_unique(e);
     196             :                                         /* propagate unique estimation for known cases */
     197     2156025 :                                         if (is_groupby(rel->op) && list_empty(rel->r) && !find_prop(e->p, PROP_NUNIQUES)) { /* global aggregate case */
     198        7665 :                                                 prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     199        7664 :                                                 p->value.dval = 1;
     200     2148359 :                                         } else if (((is_basetable(rel->op) || is_except(rel->op) || is_inter(rel->op) || is_simple_project(rel->op) ||
     201       72037 :                                                                  (is_groupby(rel->op) && exps_find_exp(rel->r, e))) &&
     202     2087683 :                                                                 (est = find_prop(found->p, PROP_NUNIQUES)) && !find_prop(e->p, PROP_NUNIQUES))) {
     203      195334 :                                                 prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     204      195334 :                                                 p->value.dval = est->value.dval;
     205             :                                         }
     206             :                                 }
     207     2201333 :                                 return e;
     208             :                         }
     209             :                         return NULL;
     210             :                 }
     211       81857 :                 case op_topn:
     212             :                 case op_sample:
     213       81857 :                         return rel_propagate_column_ref_statistics(sql, rel->l, e);
     214             :                 default:
     215             :                         break;
     216             :                 }
     217             :         }
     218             :         return NULL;
     219             : }
     220             : 
     221             : static atom *
     222     4723352 : atom_from_valptr( allocator *sa, sql_subtype *tpe, ValRecord *v)
     223             : {
     224     4723352 :         atom *a = SA_NEW(sa, atom);
     225             : 
     226     4723361 :         assert(!VALisnil(v));
     227     4723384 :         *a = (atom) {.tpe = *tpe,};
     228     4723384 :         SA_VALcopy(sa, &a->data, v);
     229     4723524 :         return a;
     230             : }
     231             : 
     232             : void
     233     4383620 : sql_column_get_statistics(mvc *sql, sql_column *c, sql_exp *e)
     234             : {
     235     4383620 :         bool nonil = false, unique = false;
     236     4383620 :         double unique_est = 0.0;
     237     4383620 :         ValRecord min, max;
     238     4383620 :         int ok = mvc_col_stats(sql, c, &nonil, &unique, &unique_est, &min, &max);
     239             : 
     240     4384487 :         if (has_nil(e) && nonil)
     241     2926808 :                 set_has_no_nil(e);
     242     4384487 :         if (!is_unique(e) && unique)
     243     1156174 :                 set_unique(e);
     244     4384487 :         if (unique_est != 0.0) {
     245     3092566 :                 prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     246     3092385 :                 p->value.dval = unique_est;
     247             :         }
     248     4384306 :         unsigned int digits = 0;
     249     4384306 :         sql_subtype *et = exp_subtype(e);
     250     4384549 :         if (et->type->eclass == EC_DEC || et->type->eclass == EC_NUM)
     251     2870291 :                 digits = et->digits;
     252     4384549 :         if ((ok & 2) == 2) {
     253     2359024 :                 if (!VALisnil(&max)) {
     254     2358938 :                         prop *p = e->p = prop_create(sql->sa, PROP_MAX, e->p);
     255     2358875 :                         p->value.pval = atom_from_valptr(sql->sa, &c->type, &max);
     256     2358852 :                         if (digits) {
     257     1753262 :                                 unsigned int nd = atom_digits(p->value.pval);
     258     1753201 :                                 if (nd < digits)
     259             :                                         digits = nd;
     260     1753201 :                                 if (!digits)
     261             :                                         digits = 1;
     262             :                         }
     263             :                 }
     264     2358778 :                 VALclear(&max);
     265             :         }
     266     4384293 :         if ((ok & 1) == 1) {
     267     2364915 :                 if (!VALisnil(&min)) {
     268     2364924 :                         prop *p = e->p = prop_create(sql->sa, PROP_MIN, e->p);
     269     2364964 :                         p->value.pval = atom_from_valptr(sql->sa, &c->type, &min);
     270     2365116 :                         if (digits) {
     271     1760522 :                                 unsigned int nd = atom_digits(p->value.pval);
     272     1760542 :                                 if (nd > digits) /* possibly set to low by max value */
     273             :                                         digits = nd;
     274             :                                 if (!digits)
     275             :                                         digits = 1;
     276             :                         }
     277             :                 }
     278     2365138 :                 VALclear(&min);
     279             :         }
     280     4384523 :         if (digits)
     281     2870206 :                 et->digits = digits;
     282     4384523 :         if (et->type->eclass == EC_DEC && et->digits <= et->scale)
     283         216 :                 et->digits = et->scale + 1;
     284     4384523 : }
     285             : 
     286             : static void
     287      958035 : rel_basetable_column_get_statistics(mvc *sql, sql_rel *rel, sql_exp *e)
     288             : {
     289      958035 :         if (e->p)
     290             :                 return;
     291      308683 :         sql_column *c = NULL;
     292             : 
     293      308683 :         if ((c = rel_base_find_column(rel, e->nid))) {
     294      210336 :                 sql_column_get_statistics(sql, c, e);
     295             :         }
     296             : }
     297             : 
     298             : static bool
     299        8876 : rel_setop_get_statistics(mvc *sql, sql_rel *rel, list *lexps, list *rexps, sql_exp *e, int i)
     300             : {
     301        8876 :         sql_exp *le = list_fetch(lexps, i), *re = list_fetch(rexps, i);
     302        8876 :         atom *lval_min = find_prop_and_get(le->p, PROP_MIN), *lval_max = find_prop_and_get(le->p, PROP_MAX),
     303        8876 :                 *rval_min = find_prop_and_get(re->p, PROP_MIN), *rval_max = find_prop_and_get(re->p, PROP_MAX);
     304        8876 :         prop *est;
     305             : 
     306             :         /* for the intersection, if both expressions don't overlap, it can be pruned */
     307        8876 :         if (is_inter(rel->op) && !has_nil(le) && !has_nil(re) &&
     308         511 :                 ((rval_max && lval_min && atom_cmp(rval_max, lval_min) < 0) || (rval_min && lval_max && atom_cmp(rval_min, lval_max) > 0)))
     309          26 :                 return true;
     310             : 
     311        8850 :         if (lval_max && rval_max) {
     312        2557 :                 if (is_union(rel->op))
     313           3 :                         set_minmax_property(sql, e, PROP_MAX, statistics_atom_max(sql, lval_max, rval_max)); /* for union the new max will be the max of the two */
     314        2554 :                 else if (is_inter(rel->op))
     315         399 :                         set_minmax_property(sql, e, PROP_MAX, statistics_atom_min(sql, lval_max, rval_max)); /* for intersect the new max will be the min of the two */
     316             :                 else /* except */
     317        2155 :                         set_minmax_property(sql, e, PROP_MAX, lval_max);
     318             :         }
     319        8850 :         if (lval_min && rval_min) {
     320        2557 :                 if (is_union(rel->op))
     321           3 :                         set_minmax_property(sql, e, PROP_MIN, statistics_atom_min(sql, lval_min, rval_min)); /* for union the new min will be the min of the two */
     322        2554 :                 else if (is_inter(rel->op))
     323         399 :                         set_minmax_property(sql, e, PROP_MIN, statistics_atom_max(sql, lval_min, rval_min)); /* for intersect the new min will be the max of the two */
     324             :                 else /* except */
     325        2155 :                         set_minmax_property(sql, e, PROP_MIN, lval_min);
     326             :         }
     327             : 
     328        8850 :         if (is_union(rel->op) || is_munion(rel->op)) {
     329        5986 :                 if (!has_nil(le) && !has_nil(re))
     330         879 :                         set_has_no_nil(e);
     331        5986 :                 if (need_distinct(rel) && list_length(rel->exps) == 1)
     332           0 :                         set_unique(e);
     333        2864 :         } else if (is_inter(rel->op)) {
     334         564 :                 if (!has_nil(le) || !has_nil(re))
     335         509 :                         set_has_no_nil(e);
     336         564 :                 if (is_unique(le) || (need_distinct(rel) && list_length(rel->exps) == 1))
     337         345 :                         set_unique(e);
     338             :         } else {
     339        2300 :                 assert(is_except(rel->op));
     340        2300 :                 if (!has_nil(le))
     341        2236 :                         set_has_no_nil(e);
     342        2300 :                 if (is_unique(le) || (need_distinct(rel) && list_length(rel->exps) == 1))
     343        2012 :                         set_unique(e);
     344             :         }
     345             :         /* propagate unique estimation for known cases */
     346        8850 :         if (!is_union(rel->op) && (est = find_prop(le->p, PROP_NUNIQUES)) && !find_prop(e->p, PROP_NUNIQUES)) {
     347         207 :                 prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     348         207 :                 p->value.dval = est->value.dval;
     349             :         }
     350             :         return false;
     351             : }
     352             : 
     353             : 
     354             : static void
     355       64038 : rel_munion_get_statistics(mvc *sql, sql_rel *rel, list *rels, sql_exp *e, int i)
     356             : {
     357       64038 :         if (is_recursive(rel))
     358             :                 return ;
     359       64038 :         assert(is_munion(rel->op));
     360             : 
     361       64038 :         sql_rel *l = rels->h->data;
     362       64038 :         sql_exp *le = list_fetch(l->exps, i);
     363       64038 :         atom *lval_min = find_prop_and_get(le->p, PROP_MIN), *lval_max = find_prop_and_get(le->p, PROP_MAX);
     364       64038 :         bool has_nonil = !has_nil(le);
     365             : 
     366      185274 :         for(node *n = rels->h->next; n; n = n->next) {
     367      121236 :                 sql_rel *r = n->data;
     368      121236 :                 sql_exp *re = list_fetch(r->exps, i);
     369      121236 :                 atom *rval_min = find_prop_and_get(re->p, PROP_MIN), *rval_max = find_prop_and_get(re->p, PROP_MAX);
     370             : 
     371      121236 :                 if (lval_max && rval_max) {
     372       44461 :                         set_minmax_property(sql, e, PROP_MAX, statistics_atom_max(sql, lval_max, rval_max)); /* for union the new max will be the max of the two */
     373       44461 :                         lval_max = find_prop_and_get(e->p, PROP_MAX);
     374             :                 }
     375      121236 :                 if (lval_min && rval_min) {
     376       43912 :                         set_minmax_property(sql, e, PROP_MIN, statistics_atom_min(sql, lval_min, rval_min)); /* for union the new min will be the min of the two */
     377       43912 :                         lval_min = find_prop_and_get(e->p, PROP_MIN);
     378             :                 }
     379      121236 :                 has_nonil &= !has_nil(re);
     380             : 
     381             :         }
     382             : 
     383       64038 :         if (has_nonil)
     384       43934 :                 set_has_no_nil(e);
     385             : 
     386       64038 :         if (need_distinct(rel) && list_length(rel->exps) == 1)
     387        1597 :                 set_unique(e);
     388             : }
     389             : 
     390             : 
     391             : static sql_exp *
     392     3571061 : rel_propagate_statistics(visitor *v, sql_rel *rel, sql_exp *e, int depth)
     393             : {
     394     3571061 :         mvc *sql = v->sql;
     395     3571061 :         atom *lval;
     396             : 
     397     3571061 :         (void) depth;
     398     3571061 :         switch(e->type) {
     399     2209738 :         case e_column:
     400     2209738 :                 switch (rel->op) { /* set relations don't call rel_propagate_statistics */
     401      285520 :                 case op_join:
     402             :                 case op_left:
     403             :                 case op_right:
     404             :                 case op_full:
     405             :                 case op_semi:
     406             :                 case op_anti: {
     407      285520 :                         sql_exp *found = rel_propagate_column_ref_statistics(sql, rel->l, e);
     408      285520 :                         if (!found)
     409      143209 :                                 (void) rel_propagate_column_ref_statistics(sql, rel->r, e);
     410             :                         break;
     411             :                 }
     412     1924218 :                 case op_select:
     413             :                 case op_project:
     414             :                 case op_groupby: {
     415     1924218 :                         sql_exp *found = rel_propagate_column_ref_statistics(sql, rel->l, e); /* labels may be found on the same projection, ugh */
     416     1924223 :                         if (!found && is_simple_project(rel->op))
     417      134040 :                                 (void) rel_propagate_column_ref_statistics(sql, rel, e);
     418             :                         break;
     419             :                 }
     420           0 :                 case op_insert:
     421             :                 case op_update:
     422             :                 case op_delete:
     423           0 :                         (void) rel_propagate_column_ref_statistics(sql, rel->r, e);
     424           0 :                         break;
     425             :                 default:
     426             :                         break;
     427             :                 }
     428             :                 break;
     429      102590 :         case e_convert: {
     430      102590 :                 sql_subtype *to = exp_totype(e), *from = exp_fromtype(e);
     431      102590 :                 sql_exp *l = e->l;
     432      102590 :                 sql_class fr = from->type->eclass, too = to->type->eclass;
     433      102590 :                 prop *est;
     434             : 
     435      102590 :                 if (fr == too) {
     436       93407 :                         if ((lval = find_prop_and_get(l->p, PROP_MAX))) {
     437       60427 :                                 atom *res = atom_copy(sql->sa, lval);
     438       60427 :                                 if ((res = atom_cast(sql->sa, res, to)))
     439       60399 :                                         set_minmax_property(sql, e, PROP_MAX, res);
     440             :                         }
     441       93407 :                         if ((lval = find_prop_and_get(l->p, PROP_MIN))) {
     442       61065 :                                 atom *res = atom_copy(sql->sa, lval);
     443       61065 :                                 if ((res = atom_cast(sql->sa, res, to)))
     444       61038 :                                         set_minmax_property(sql, e, PROP_MIN, res);
     445             :                         }
     446             :                 }
     447             :                 /* propagating nuniques through conversions is tricky. I am adding just the general cases */
     448      102590 :                 if ((est = find_prop(l->p, PROP_NUNIQUES)) &&
     449       63913 :                         (((fr == EC_TIME || fr == EC_TIME_TZ || fr == EC_DATE) && (too == EC_TIMESTAMP || too == EC_TIMESTAMP_TZ)) ||
     450       63888 :                          ((fr == EC_NUM || fr == EC_BIT) && too == EC_NUM) || (fr == EC_DEC && too == EC_DEC) || (fr != EC_EXTERNAL && too == EC_STRING))) {
     451       60448 :                         prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     452       60447 :                         p->value.dval = est->value.dval;
     453             :                 }
     454      102589 :                 if (!has_nil(l))
     455       58177 :                         set_has_no_nil(e);
     456             :                 break;
     457             :         }
     458      380303 :         case e_aggr:
     459             :         case e_func: {
     460      380303 :                 BUN lv;
     461      380303 :                 sql_subfunc *f = e->f;
     462             : 
     463      380303 :                 if (!f->func->s) {
     464      353078 :                         int key = hash_key(f->func->base.name); /* Using hash lookup */
     465      353078 :                         sql_hash_e *he = sql_functions_lookup->buckets[key&(sql_functions_lookup->size-1)];
     466      353078 :                         lookup_function look = NULL;
     467             : 
     468      755753 :                         for (; he && !look; he = he->chain) {
     469      402675 :                                 struct function_properties* fp = (struct function_properties*) he->value;
     470             : 
     471      402675 :                                 if (!strcmp(f->func->base.name, fp->name))
     472      108758 :                                         look = fp->func;
     473             :                         }
     474      353078 :                         if (look)
     475      108758 :                                 look(sql, e);
     476             :                 }
     477             :                 /* for global aggregates with no semantics, if the left relation has values, then the output won't be NULL */
     478      380303 :                 if (!is_semantics(e) && e->l && !have_nil(e->l) &&
     479      109514 :                         (e->type != e_aggr || (is_groupby(rel->op) && list_length(rel->r)) || ((lv = get_rel_count(rel->l)) != BUN_NONE && lv > 0)))
     480      109177 :                         set_has_no_nil(e);
     481             :                 /* set properties for global aggregates */
     482      380303 :                 if (e->type == e_aggr && is_groupby(rel->op) && list_empty(rel->r)) {
     483        7970 :                         if (!find_prop(e->p, PROP_NUNIQUES)) {
     484        7970 :                                 prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     485        7970 :                                 p->value.dval = 1;
     486             :                         }
     487        7970 :                         set_unique(e);
     488             :                 }
     489             :                 break;
     490             :         }
     491      594596 :         case e_atom:
     492      594596 :                 if (e->l) {
     493      576505 :                         atom *a = (atom*) e->l;
     494      576505 :                         if (!a->isnull) {
     495      507067 :                                 set_minmax_property(sql, e, PROP_MAX, a);
     496      507067 :                                 set_minmax_property(sql, e, PROP_MIN, a);
     497             :                         }
     498      576504 :                         prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     499      576506 :                         p->value.dval = 1;
     500       18091 :                 } else if (e->f) {
     501        4314 :                         list *vals = (list *) e->f;
     502        4314 :                         sql_exp *first = vals->h ? vals->h->data : NULL;
     503        4314 :                         atom *max = NULL, *min = NULL; /* all child values must have a valid min/max */
     504        4314 :                         int has_nil = 0;
     505             : 
     506        4314 :                         if (first) {
     507        4314 :                                 max = ((lval = find_prop_and_get(first->p, PROP_MAX))) ? lval : NULL;
     508        4314 :                                 min = ((lval = find_prop_and_get(first->p, PROP_MIN))) ? lval : NULL;
     509        4314 :                                 has_nil |= has_nil(first);
     510             :                         }
     511             : 
     512       18497 :                         for (node *n = vals->h ? vals->h->next : NULL ; n ; n = n->next) {
     513        9869 :                                 sql_exp *ee = n->data;
     514             : 
     515        9869 :                                 if (min && max) {
     516        9372 :                                         if ((lval = find_prop_and_get(ee->p, PROP_MAX))) {
     517        9326 :                                                 max = atom_cmp(lval, max) > 0 ? lval : max;
     518             :                                         } else {
     519             :                                                 max = NULL;
     520             :                                         }
     521        9372 :                                         if ((lval = find_prop_and_get(ee->p, PROP_MIN))) {
     522        9326 :                                                 min = atom_cmp(min, lval) > 0 ? lval : min;
     523             :                                         } else {
     524             :                                                 min = NULL;
     525             :                                         }
     526             :                                 }
     527        9869 :                                 has_nil |= has_nil(ee);
     528             :                         }
     529             : 
     530        4314 :                         if (!has_nil)
     531        3939 :                                 set_has_no_nil(e);
     532        4314 :                         if (min && max) {
     533        3895 :                                 set_minmax_property(sql, e, PROP_MAX, max);
     534        3895 :                                 set_minmax_property(sql, e, PROP_MIN, min);
     535             :                         }
     536        4314 :                         prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     537        4314 :                         p->value.dval = (dbl) list_length(vals);
     538             :                 } else {
     539       13777 :                         prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     540       13777 :                         p->value.dval = 1;
     541             :                 }
     542             :                 break;
     543      283834 :         case e_cmp:
     544             :                 /* TODO? propagating min/max/unique of booleans is not very worth it */
     545      283834 :                 if (e->flag == cmp_or || e->flag == cmp_filter) {
     546       15246 :                         if (!have_nil(e->l) && !have_nil(e->r))
     547       10188 :                                 set_has_no_nil(e);
     548      268588 :                 } else if (e->flag == cmp_in || e->flag == cmp_notin) {
     549       20212 :                         sql_exp *le = e->l;
     550       20212 :                         if (!has_nil(le) && !have_nil(e->r))
     551       16967 :                                 set_has_no_nil(e);
     552             :                 } else {
     553      248376 :                         sql_exp *le = e->l, *re = e->r, *fe = e->f;
     554      248376 :                         if (!has_nil(le) && !has_nil(re) && (!e->f || !has_nil(fe)))
     555      175241 :                                 set_has_no_nil(e);
     556             :                 }
     557             :                 break;
     558             :         case e_psm:
     559             :                 break;
     560             :         }
     561             : 
     562             : #ifndef NDEBUG
     563             :         {
     564             :                 /* min and max cannot be NULL and min must be <= than max, if it happens the inner relation must be empty! */
     565     3571066 :                 atom *min = find_prop_and_get(e->p, PROP_MIN), *max = find_prop_and_get(e->p, PROP_MAX);
     566             : 
     567     3571068 :                 (void) min;
     568     3571068 :                 (void) max;
     569     3571068 :                 assert(!min || !min->isnull);
     570     3571068 :                 assert(!max || !max->isnull);
     571     3571068 :                 assert(!min || !max || (min && max)/* atom_cmp(min, max) <= 0*/);
     572             :         }
     573             : #endif
     574     3571068 :         return e;
     575             : }
     576             : 
     577             : static list * /* Remove predicates always false from min/max values */
     578      144472 : rel_prune_predicates(visitor *v, sql_rel *rel)
     579             : {
     580      144472 :         if (rel->l) {
     581      144472 :                 sql_rel *l = rel->l;
     582      144472 :                 if (is_single(l))
     583        3372 :                         return rel->exps;
     584             :         }
     585      141100 :         if (!list_empty(rel->attr))
     586        3015 :                 return rel->exps;
     587      291033 :         for (node *n = rel->exps->h ; n ; n = n->next) {
     588      152948 :                 sql_exp *e = n->data;
     589             : 
     590      152948 :                 if (e->type == e_cmp && is_theta_exp(e->flag)) {
     591      125918 :                         sql_exp *le = e->l, *re = e->r, *fe = e->f;
     592      125918 :                         atom *lval_min = find_prop_and_get(le->p, PROP_MIN), *lval_max = find_prop_and_get(le->p, PROP_MAX),
     593      125918 :                                 *rval_min = find_prop_and_get(re->p, PROP_MIN), *rval_max = find_prop_and_get(re->p, PROP_MAX);
     594      125918 :                         bool always_false = false, always_true = false;
     595             : 
     596      125918 :                         if (fe && !is_symmetric(e)) {
     597        3045 :                                 atom *fval_min = find_prop_and_get(fe->p, PROP_MIN), *fval_max = find_prop_and_get(fe->p, PROP_MAX);
     598        3045 :                                 comp_type lower = range2lcompare(e->flag), higher = range2rcompare(e->flag);
     599        3644 :                                 int not_int1 = rval_min && lval_max && /* the middle and left intervals don't overlap */
     600        1654 :                                         (!is_anti(e) && (lower == cmp_gte ? atom_cmp(rval_min, lval_max) > 0 : atom_cmp(rval_min, lval_max) >= 0)),
     601        4059 :                                         not_int2 = lval_min && fval_max && /* the middle and right intervals don't overlap */
     602        2482 :                                         (!is_anti(e) && (higher == cmp_lte ? atom_cmp(lval_min, fval_max) > 0 : atom_cmp(lval_min, fval_max) >= 0)),
     603        3045 :                                         not_int3 = rval_min && fval_max && /* the left interval is after the right one */
     604        1285 :                                         (!is_anti(e) && (atom_cmp(rval_min, fval_max) > 0));
     605             : 
     606        3045 :                                 always_false |= not_int1 || not_int2 || not_int3;
     607             :                                 /* for anti the middle must be before the left or after the right or the right after the left, for the other the middle must be always between the left and right intervals */
     608        4089 :                                 always_true |= !has_nil(le) && !has_nil(re) && !has_nil(fe) &&
     609        3941 :                                         lval_min && lval_max && rval_min && rval_max && fval_min && fval_max &&
     610         573 :                                         (is_anti(e) ? ((lower == cmp_gte ? atom_cmp(rval_min, lval_max) > 0 : atom_cmp(rval_min, lval_max) >= 0) || (higher == cmp_lte ? atom_cmp(lval_min, fval_max) > 0 : atom_cmp(lval_min, fval_max) >= 0) || atom_cmp(rval_min, fval_max) > 0) :
     611         488 :                                          ((lower == cmp_gte ? atom_cmp(lval_min, rval_max) >= 0 : atom_cmp(lval_min, rval_max) > 0) && (higher == cmp_lte ? atom_cmp(fval_min, lval_max) >= 0 : atom_cmp(fval_min, lval_max) > 0)));
     612             :                         } else if (!fe) {
     613      122857 :                                 if (!is_semantics(e)) /* trivial not null cmp null case */
     614      234248 :                                         always_false |= !is_anti(e) && ((exp_is_not_null(le) && exp_is_null(re)) || (exp_is_null(le) && exp_is_not_null(re)));
     615      122857 :                                 switch (e->flag) {
     616      109318 :                                 case cmp_equal:
     617      109318 :                                         if (lval_min && lval_max && rval_min && rval_max && (!is_semantics(e) || !has_nil(le) || !has_nil(re)))
     618      136810 :                                                 always_false |= (is_anti(e) ? (atom_cmp(lval_min, rval_min) == 0 && atom_cmp(lval_max, rval_max) <= 0) : (atom_cmp(rval_max, lval_min) < 0 || atom_cmp(rval_min, lval_max) > 0));
     619      109318 :                                         if (is_semantics(e)) { /* prune *= NULL cases */
     620        5696 :                                                 always_false |= (is_anti(e) ? (exp_is_null(le) && exp_is_null(re)) : ((exp_is_not_null(le) && exp_is_null(re)) || (exp_is_null(le) && exp_is_not_null(re))));
     621       11392 :                                                 always_true |= (is_anti(e) ? ((exp_is_not_null(le) && exp_is_null(re)) || (exp_is_null(le) && exp_is_not_null(re))) : (exp_is_null(le) && exp_is_null(re)));
     622             :                                         }
     623             :                                         break;
     624        5347 :                                 case cmp_notequal:
     625        5347 :                                         if (lval_min && lval_max && rval_min && rval_max)
     626        7390 :                                                 always_true |= !has_nil(le) && !has_nil(re) && (is_anti(e) ? (atom_cmp(lval_min, rval_min) == 0 && atom_cmp(lval_max, rval_max) <= 0) : (atom_cmp(rval_max, lval_min) < 0 || atom_cmp(rval_min, lval_max) > 0));
     627        5347 :                                         if (is_semantics(e)) {
     628          37 :                                                 always_true |= (is_anti(e) ? (exp_is_null(le) && exp_is_null(re)) : ((exp_is_not_null(le) && exp_is_null(re)) || (exp_is_null(le) && exp_is_not_null(re))));
     629          74 :                                                 always_false |= (is_anti(e) ? ((exp_is_not_null(le) && exp_is_null(re)) || (exp_is_null(le) && exp_is_not_null(re))) : (exp_is_null(le) && exp_is_null(re)));
     630             :                                         }
     631             :                                         break;
     632        5487 :                                 case cmp_gt:
     633        5487 :                                         if (lval_max && rval_min)
     634        1834 :                                                 always_false |= (is_anti(e) ? atom_cmp(lval_max, rval_min) > 0 : atom_cmp(lval_max, rval_min) <= 0);
     635        5487 :                                         if (lval_min && rval_max)
     636        3668 :                                                 always_true |= !has_nil(le) && !has_nil(re) && (is_anti(e) ? atom_cmp(lval_min, rval_max) <= 0 : atom_cmp(lval_min, rval_max) > 0);
     637             :                                         break;
     638         121 :                                 case cmp_gte:
     639         121 :                                         if (lval_max && rval_min)
     640          51 :                                                 always_false |= (is_anti(e) ? atom_cmp(lval_max, rval_min) >= 0 : atom_cmp(lval_max, rval_min) < 0);
     641         121 :                                         if (lval_min && rval_max)
     642         102 :                                                 always_true |= !has_nil(le) && !has_nil(re) && (is_anti(e) ? atom_cmp(lval_min, rval_max) < 0 : atom_cmp(lval_min, rval_max) >= 0);
     643             :                                         break;
     644        2502 :                                 case cmp_lt:
     645        2502 :                                         if (lval_min && rval_max)
     646        1382 :                                                 always_false |= (is_anti(e) ? atom_cmp(lval_min, rval_max) < 0 : atom_cmp(lval_min, rval_max) >= 0);
     647        2502 :                                         if (lval_max && rval_min)
     648        2812 :                                                 always_true |= !has_nil(le) && !has_nil(re) && (is_anti(e) ? atom_cmp(lval_max, rval_min) >= 0 : atom_cmp(lval_max, rval_min) < 0);
     649             :                                         break;
     650          82 :                                 case cmp_lte:
     651          82 :                                         if (lval_min && rval_max)
     652          11 :                                                 always_false |= (is_anti(e) ? atom_cmp(lval_min, rval_max) <= 0 : atom_cmp(lval_min, rval_max) > 0);
     653          82 :                                         if (lval_max && rval_min)
     654          22 :                                                 always_true |= !has_nil(le) && !has_nil(re) && (is_anti(e) ? atom_cmp(lval_max, rval_min) > 0 : atom_cmp(lval_max, rval_min) <= 0);
     655             :                                         break;
     656             :                                 default: /* Maybe later I can do cmp_in and cmp_notin but don't forget to remove is_theta_exp check up there */
     657             :                                         break;
     658             :                                 }
     659             :                         }
     660      125918 :                         assert(!always_false || !always_true);
     661      125918 :                         if (always_false || always_true) {
     662        3708 :                                 sql_exp *ne = exp_atom_bool(v->sql->sa, always_true /* if always_true set then true else false */);
     663        3708 :                                 if (exp_name(e))
     664           0 :                                         exp_prop_alias(v->sql->sa, ne, e);
     665        3708 :                                 n->data = ne;
     666        3708 :                                 v->changes++;
     667             :                         }
     668             :                 }
     669             :         }
     670      138085 :         return rel->exps;
     671             : }
     672             : 
     673             : static sql_rel *
     674          14 : set_setop_side(visitor *v, sql_rel *rel, sql_rel *side)
     675             : {
     676          14 :         if (side == rel->r)
     677           0 :                 rel->r = NULL;
     678             :         else
     679          14 :                 rel->l = NULL;
     680          14 :         if (!is_simple_project(side->op) || rel_is_ref(side) || !list_empty(side->r)) {
     681           0 :                 side = rel_project(v->sql->sa, side, rel_projections(v->sql, side, NULL, 0, 1));
     682           0 :                 set_count_prop(v->sql->sa, side, get_rel_count(side->l));
     683           0 :                 side->exps = exps_exp_visitor_bottomup(v, side, side->exps, 0, &rel_propagate_statistics, false);
     684             :         }
     685          35 :         for (node *n = rel->exps->h, *m = side->exps->h ; n && m ; n = n->next, m = m->next) {
     686          21 :                 sql_exp *e1 = n->data, *e2 = m->data;
     687          21 :                 exp_prop_alias(v->sql->sa, e2, e1);
     688             :         }
     689          14 :         list_hash_clear(side->exps);
     690             : 
     691          14 :         if (need_distinct(rel))
     692           0 :                 set_distinct(side);
     693          14 :         side->p = prop_copy(v->sql->sa, rel->p);
     694          14 :         rel_destroy(rel);
     695          14 :         return side;
     696             : }
     697             : 
     698             : static BUN
     699       26619 : trivial_project_exp_card(sql_exp *e)
     700             : {
     701       26990 :         if (e->type == e_convert)
     702         371 :                 return trivial_project_exp_card(e->l);
     703       26619 :         return e->type == e_atom && e->f ? (BUN) list_length(e->f) : 1;
     704             : }
     705             : 
     706             : static BUN
     707       26223 : rel_calc_nuniques(mvc *sql, sql_rel *l, list *exps)
     708             : {
     709       26223 :         BUN lv = get_rel_count(l);
     710             : 
     711       26223 :         if (lv == 0)
     712             :                 return 0;
     713       23481 :         if (!list_empty(exps)) {
     714       23481 :                 BUN nuniques = 0;
     715             :                 /* compute the highest number of unique values */
     716       58709 :                 for (node *n = exps->h ; n && nuniques != BUN_NONE ; n = n->next) {
     717       35228 :                         sql_exp *e = n->data;
     718       35228 :                         sql_rel *bt = NULL;
     719       35228 :                         prop *p = NULL;
     720       35228 :                         BUN euniques = BUN_NONE;
     721       35228 :                         atom *min, *max, *sub = NULL;
     722       35228 :                         sql_subtype *tp = exp_subtype(e);
     723       35228 :                         sql_class ec = tp ? tp->type->eclass : EC_STRING; /* if 'e' has no type (eg parameter), use a non-number type to fail condition */
     724             : 
     725       35228 :                         if ((p = find_prop(e->p, PROP_NUNIQUES))) {
     726       25435 :                                 euniques = (BUN) p->value.dval;
     727        9793 :                         } else if (e->type == e_column && e->nid && rel_find_exp_and_corresponding_rel(l, e, false, &bt, NULL) && bt && (p = find_prop(bt->p, PROP_COUNT))) {
     728        7685 :                                 euniques = (BUN) p->value.lval;
     729             :                         }
     730             :                         /* use min to max range to compute number of possible values in the domain for number types */
     731       35228 :                         if ((EC_TEMP(ec)||ec==EC_NUM||ec==EC_MONTH||ec==EC_POS) &&
     732       24295 :                                 (min = find_prop_and_get(e->p, PROP_MIN)) && (max = find_prop_and_get(e->p, PROP_MAX))) {
     733             :                                 /* the range includes min and max, so the atom_inc call is needed */
     734             :                                 /* if 'euniques' has number of distinct values, compute min between both */
     735       19341 :                                 if ((sub = atom_sub(sql->sa, max, min)) && (sub = atom_inc(sql->sa, sub)) && (sub = atom_cast(sql->sa, sub, sql_bind_localtype("oid"))))
     736           0 :                                         euniques = MIN(euniques, (BUN) sub->data.val.oval);
     737             :                         }
     738       35228 :                         if (euniques != BUN_NONE)
     739       33120 :                                 nuniques = MAX(nuniques, euniques); /* the highest cardinality sets the estimation */
     740             :                         else
     741             :                                 nuniques = BUN_NONE;
     742             :                 }
     743       23481 :                 if (nuniques != BUN_NONE)
     744             :                         return nuniques;
     745             :         }
     746             :         return lv;
     747             : }
     748             : 
     749             : static sql_rel *
     750     1369029 : rel_get_statistics_(visitor *v, sql_rel *rel)
     751             : {
     752             :         /* Don't prune updates as pruning will possibly result in removing the joins which therefore cannot be used for constraint checking */
     753     1369029 :         uint8_t has_special_modify = *(uint8_t*) v->data;
     754     1369029 :         bool can_be_pruned = !has_special_modify && v->storage_based_opt;
     755             : 
     756             :         /* Don't look at the same relation twice */
     757     1369029 :         if (are_statistics_gathered(rel->used))
     758             :                 return rel;
     759     1368930 :         rel->used |= statistics_gathered;
     760             : 
     761     1368930 :         switch (rel->op) {
     762      319473 :         case op_basetable: {
     763      319473 :                 sql_table *t = (sql_table *) rel->l;
     764      319473 :                 sqlstore *store = v->sql->session->tr->store;
     765             : 
     766      319473 :                 if (!list_empty(rel->exps)) {
     767     1277785 :                         for (node *n = rel->exps->h ; n ; n = n->next)
     768      958115 :                                 rel_basetable_column_get_statistics(v->sql, rel, n->data);
     769             :                 }
     770             :                 /* Set table row count. TODO? look for remote tables. Don't look at storage for declared tables, because it won't be cleaned */
     771      319674 :                 if (isTable(t) && t->s && !isDeclaredTable(t)) /* count active rows only */
     772      264778 :                         set_count_prop(v->sql->sa, rel, (BUN)store->storage_api.count_del(v->sql->session->tr, t, CNT_ACTIVE));
     773             :                 break;
     774             :         }
     775        2794 :         case op_union:
     776             :         case op_inter:
     777             :         case op_except: {
     778        2794 :                 bool empty_cross = false;
     779        2794 :                 int i = 0;
     780        2794 :                 sql_rel *l = rel->l, *pl = rel->l, *r = rel->r, *pr = rel->r;
     781             : 
     782        2794 :                 while (is_sample(pl->op) || is_topn(pl->op)) /* skip topN and sample relations in the middle */
     783           0 :                         pl = pl->l;
     784        2794 :                 while (is_sample(pr->op) || is_topn(pr->op))
     785           0 :                         pr = pr->l;
     786             :                 /* if it's not a projection, then project and propagate statistics */
     787        2794 :                 if (!is_project(pl->op) && !is_base(pl->op)) {
     788           0 :                         pl = rel_project(v->sql->sa, pl, rel_projections(v->sql, pl, NULL, 0, 1));
     789           0 :                         set_count_prop(v->sql->sa, pl, get_rel_count(pl->l));
     790           0 :                         pl->exps = exps_exp_visitor_bottomup(v, pl, pl->exps, 0, &rel_propagate_statistics, false);
     791             :                 }
     792        2794 :                 if (!is_project(pr->op) && !is_base(pr->op)) {
     793           0 :                         pr = rel_project(v->sql->sa, pr, rel_projections(v->sql, pr, NULL, 0, 1));
     794           0 :                         set_count_prop(v->sql->sa, pr, get_rel_count(pr->l));
     795           0 :                         pr->exps = exps_exp_visitor_bottomup(v, pr, pr->exps, 0, &rel_propagate_statistics, false);
     796             :                 }
     797             : 
     798       11670 :                 for (node *n = rel->exps->h ; n ; n = n->next) {
     799        8876 :                         empty_cross |= rel_setop_get_statistics(v->sql, rel, pl->exps, pr->exps, n->data, i);
     800        8876 :                         i++;
     801             :                 }
     802             : 
     803             :                 /* propagate row count */
     804        2794 :                 if (is_union(rel->op)) {
     805         277 :                         BUN lv = need_distinct(rel) ? rel_calc_nuniques(v->sql, l, l->exps) : get_rel_count(l),
     806         277 :                                 rv = need_distinct(rel) ? rel_calc_nuniques(v->sql, r, r->exps) : get_rel_count(r);
     807             : 
     808         277 :                         if (lv == 0 && rv == 0) { /* both sides empty */
     809           2 :                                 if (can_be_pruned)
     810             :                                         empty_cross = true;
     811             :                                 else
     812           2 :                                         set_count_prop(v->sql->sa, rel, 0);
     813         275 :                         } else if (can_be_pruned && lv == 0 && !rel_is_ref(rel)) { /* left side empty */
     814           0 :                                 rel = set_setop_side(v, rel, r);
     815           0 :                                 empty_cross = false; /* don't rewrite again */
     816         275 :                         } else if (can_be_pruned && rv == 0 && !rel_is_ref(rel)) { /* right side empty */
     817           0 :                                 rel = set_setop_side(v, rel, l);
     818           0 :                                 empty_cross = false; /* don't rewrite again */
     819         275 :                         } else if (lv != BUN_NONE && rv != BUN_NONE) {
     820           7 :                                 set_count_prop(v->sql->sa, rel, (rv > (BUN_MAX - lv)) ? BUN_MAX : (lv + rv)); /* overflow check */
     821             :                         }
     822        2517 :                 } else if (is_inter(rel->op) || is_except(rel->op)) {
     823        2517 :                         BUN lv = need_distinct(rel) ? rel_calc_nuniques(v->sql, l, l->exps) : get_rel_count(l),
     824        2517 :                                 rv = need_distinct(rel) ? rel_calc_nuniques(v->sql, r, r->exps) : get_rel_count(r);
     825             : 
     826        2517 :                         if (lv == 0) { /* left side empty */
     827          63 :                                 if (can_be_pruned)
     828             :                                         empty_cross = true;
     829             :                                 else
     830           5 :                                         set_count_prop(v->sql->sa, rel, 0);
     831        2454 :                         } else if (rv == 0) { /* right side empty */
     832          17 :                                 if (is_inter(rel->op)) {
     833           3 :                                         if (can_be_pruned)
     834             :                                                 empty_cross = true;
     835             :                                         else
     836           0 :                                                 set_count_prop(v->sql->sa, rel, 0);
     837          14 :                                 } else if (can_be_pruned && !rel_is_ref(rel)) {
     838          14 :                                         rel = set_setop_side(v, rel, l);
     839          14 :                                         empty_cross = false; /* don't rewrite again */
     840             :                                 } else {
     841           0 :                                         set_count_prop(v->sql->sa, rel, lv);
     842             :                                 }
     843             :                         } else {
     844        2437 :                                 set_count_prop(v->sql->sa, rel, lv);
     845             :                         }
     846             :                 }
     847        2794 :                 if (can_be_pruned && empty_cross) {
     848          81 :                         rel_destroy(rel->l);
     849          81 :                         rel->l = NULL;
     850          81 :                         rel_destroy(rel->r);
     851          81 :                         rel->r = NULL;
     852         246 :                         for (node *n = rel->exps->h ; n ; n = n->next) {
     853         165 :                                 sql_exp *e = n->data, *a = exp_atom(v->sql->sa, atom_general(v->sql->sa, exp_subtype(e), NULL, 0));
     854         165 :                                 exp_prop_alias(v->sql->sa, a, e);
     855         165 :                                 n->data = a;
     856             :                         }
     857          81 :                         list_hash_clear(rel->exps);
     858          81 :                         sql_rel *l = rel_project(v->sql->sa, NULL, rel->exps);
     859          81 :                         set_count_prop(v->sql->sa, l, 1);
     860          81 :                         l = rel_select(v->sql->sa, l, exp_atom_bool(v->sql->sa, 0));
     861          81 :                         set_count_prop(v->sql->sa, l, 0);
     862          81 :                         rel->op = op_project;
     863          81 :                         rel->l = l;
     864          81 :                         rel->exps = rel_projections(v->sql, l, NULL, 1, 1);
     865          81 :                         set_count_prop(v->sql->sa, rel, 0);
     866          81 :                         set_nodistinct(rel); /* set relations may have distinct flag set */
     867          81 :                         v->changes++;
     868             :                 }
     869             :                 break;
     870             :         }
     871       13507 :         case op_munion: {
     872       13507 :                 list *l = rel->l, *nrels = sa_list(v->sql->sa);
     873       13507 :                 BUN cnt = 0;
     874       13507 :                 bool needs_pruning = false;
     875             : 
     876       13507 :                 if (is_recursive(rel))
     877             :                         break;
     878       51358 :                 for (node *n = l->h; n; n = n->next) {
     879       37922 :                         sql_rel *r = n->data, *pl = r;
     880             : 
     881       37922 :                         while (is_sample(pl->op) || is_topn(pl->op)) /* skip topN and sample relations in the middle */
     882           0 :                                         pl = pl->l;
     883             :                         /* if it's not a projection, then project and propagate statistics */
     884       37922 :                         if (!is_project(pl->op) && !is_base(pl->op)) {
     885           0 :                                 pl = rel_project(v->sql->sa, pl, rel_projections(v->sql, pl, NULL, 0, 1));
     886           0 :                                 set_count_prop(v->sql->sa, pl, get_rel_count(pl->l));
     887           0 :                                 pl->exps = exps_exp_visitor_bottomup(v, pl, pl->exps, 0, &rel_propagate_statistics, false);
     888             :                         }
     889       37922 :                         nrels = append(nrels, pl);
     890             :                         /* we need new munion statistics */
     891             :                         /* propagate row count */
     892       37922 :                         BUN rv = need_distinct(rel) ? rel_calc_nuniques(v->sql, r, r->exps) : get_rel_count(r);
     893             :                         /* if PROP_COUNT does not exist we assume at least a row (see get_rel_count def) */
     894       37922 :                         if (rv == BUN_NONE) {
     895        1224 :                                 cnt++;
     896        1224 :                                 continue;
     897             :                         }
     898       36698 :                         if (!rv && can_be_pruned)
     899        6738 :                                 needs_pruning = true;
     900             :                         /* overflow check */
     901       36698 :                         if (rv > (BUN_MAX - cnt))
     902       37922 :                                 rv = BUN_MAX;
     903             :                         else
     904       36698 :                                 cnt += rv;
     905             :                 }
     906       13436 :                 int i = 0;
     907       77474 :                 for (node *n = rel->exps->h ; n ; n = n->next, i++)
     908       64038 :                         rel_munion_get_statistics(v->sql, rel, nrels, n->data, i);
     909             : 
     910       13436 :                 if (needs_pruning && !rel_is_ref(rel)) {
     911        4576 :                         v->changes++;
     912        4576 :                         list *nl = sa_list(l->sa);
     913             : 
     914       16871 :                         for (node *n = nrels->h; n; n = n->next) {
     915       12295 :                                 sql_rel *r = n->data;
     916       12295 :                                 BUN rv = need_distinct(rel) ? rel_calc_nuniques(v->sql, r, r->exps) : get_rel_count(r);
     917             : 
     918       12295 :                                 if (!rv) { /* keep last for now */
     919        6267 :                                         rel_destroy(r);
     920        6267 :                                         continue;
     921             :                                 }
     922        6028 :                                 nl = append(nl, r);
     923             :                         }
     924        4576 :                         rel->l = nl;
     925        4576 :                         if (list_length(nl) == 1) {
     926        4240 :                                 sql_rel *l = rel->l = nl->h->data; /* ugh */
     927        4240 :                                 rel->r = NULL;
     928        4240 :                                 rel->op = op_project;
     929             : 
     930       20979 :                                 for (node *n = rel->exps->h, *m = l->exps->h ; n && m ; n = n->next, m = m->next) {
     931       16739 :                                         sql_exp *pe = n->data, *ie = m->data;
     932       16739 :                                         sql_exp *ne = exp_ref(v->sql, ie);
     933       16739 :                                         exp_prop_alias(v->sql->sa, ne, pe);
     934       16739 :                                         n->data = ne;
     935             :                                 }
     936        4240 :                                 list_hash_clear(rel->exps);
     937         336 :                         } else if (list_empty(nl)) {
     938             :                                 /* empty select (project [ nils ] ) */
     939         445 :                                 for (node *n = rel->exps->h ; n ; n = n->next) {
     940         345 :                                         sql_exp *e = n->data, *a = exp_atom(v->sql->sa, atom_general(v->sql->sa, exp_subtype(e), NULL, 0));
     941         345 :                                         exp_prop_alias(v->sql->sa, a, e);
     942         345 :                                         n->data = a;
     943             :                                 }
     944         100 :                                 list_hash_clear(rel->exps);
     945         100 :                                 sql_rel *l = rel_project(v->sql->sa, NULL, rel->exps);
     946         100 :                                 set_count_prop(v->sql->sa, l, 1);
     947         100 :                                 l = rel_select(v->sql->sa, l, exp_atom_bool(v->sql->sa, 0));
     948         100 :                                 set_count_prop(v->sql->sa, l, 0);
     949         100 :                                 rel->op = op_project;
     950         100 :                                 rel->r = NULL;
     951         100 :                                 rel->l = l;
     952         100 :                                 rel->exps = rel_projections(v->sql, l, NULL, 1, 1);
     953         100 :                                 set_count_prop(v->sql->sa, rel, 0);
     954         100 :                                 set_nodistinct(rel); /* set relations may have distinct flag set */
     955             :                         }
     956             :                 } else {
     957        8860 :                         set_count_prop(v->sql->sa, rel, cnt);
     958             :                 }
     959             :                 break;
     960             :         }
     961      549025 :         case op_join:
     962             :         case op_left:
     963             :         case op_right:
     964             :         case op_full:
     965             :         case op_semi:
     966             :         case op_anti:
     967             :         case op_select:
     968             :         case op_project:
     969             :         case op_groupby: {
     970      549025 :                 if (is_groupby(rel->op) && !list_empty(rel->r))
     971       16842 :                         rel->r = exps_exp_visitor_bottomup(v, rel, rel->r, 0, &rel_propagate_statistics, false);
     972      549023 :                 rel->exps = exps_exp_visitor_bottomup(v, rel, rel->exps, 0, &rel_propagate_statistics, false);
     973      549007 :                 if ((is_simple_project(rel->op) || is_groupby(rel->op)) && !list_empty(rel->r))
     974       41964 :                         rel->r = exps_exp_visitor_bottomup(v, rel, rel->r, 0, &rel_propagate_statistics, false);
     975             :                 /* The following optimizations can only be applied after propagating the statistics to rel->exps */
     976      549010 :                 if (can_be_pruned && (is_join(rel->op) || is_select(rel->op)) && !list_empty(rel->exps)) {
     977      144472 :                         int changes = v->changes;
     978      144472 :                         rel->exps = rel_prune_predicates(v, rel);
     979      144472 :                         if (v->changes > changes)
     980        3675 :                                 rel = rewrite_simplify(v, 0, v->value_based_opt, rel);
     981             :                 }
     982             : 
     983             :                 /* propagate row count */
     984      549010 :                 sql_rel *l = rel->l, *r = rel->r;
     985      549010 :                 switch (rel->op) {
     986      137465 :                 case op_join:
     987             :                 case op_left:
     988             :                 case op_right:
     989             :                 case op_full: {
     990      137465 :                         BUN lv = get_rel_count(l), rv = get_rel_count(r), uniques_estimate = BUN_MAX, join_idx_estimate = BUN_MAX;
     991             : 
     992      137465 :                         if (!list_empty(rel->exps) && !is_single(rel)) {
     993      251415 :                                 for (node *n = rel->exps->h ; n ; n = n->next) {
     994      128189 :                                         sql_exp *e = n->data, *el = e->l, *er = e->r;
     995             : 
     996      128189 :                                         if (find_prop(e->p, PROP_JOINIDX)) {
     997         670 :                                                 join_idx_estimate = lv>rv?lv:rv;
     998      127519 :                                         } else if (e->type == e_cmp && e->flag == cmp_equal) {
     999             :                                                 /* if one of the sides is unique, the cardinality will be that exact number, but look for nulls */
    1000      123637 :                                                 if (!is_semantics(e) || !has_nil(el) || !has_nil(er)) {
    1001      123461 :                                                         BUN lu = 0, ru = 0;
    1002      123461 :                                                         prop *p = NULL;
    1003      123461 :                                                         if ((p = find_prop(el->p, PROP_NUNIQUES)))
    1004       91543 :                                                                 lu = (BUN) p->value.dval;
    1005      123461 :                                                         if ((p = find_prop(er->p, PROP_NUNIQUES)))
    1006      106297 :                                                                 ru = (BUN) p->value.dval;
    1007      123461 :                                                         if (is_unique(el) || lu > lv) {
    1008       74787 :                                                                 BUN ncount = (is_right(rel->op) || is_full(rel->op)) ? MAX(lv, rv) : lv;
    1009       74787 :                                                                 uniques_estimate = MIN(uniques_estimate, ncount);
    1010       48674 :                                                         } else if (is_unique(er) || ru > rv) {
    1011       30619 :                                                                 BUN ncount = (is_left(rel->op) || is_full(rel->op)) ? MAX(lv, rv) : rv;
    1012       30619 :                                                                 uniques_estimate = MIN(uniques_estimate, ncount);
    1013             :                                                         }
    1014             :                                                 }
    1015             :                                         }
    1016             :                                 }
    1017             :                         }
    1018      137465 :                         if (is_single(rel)) {
    1019        4806 :                                 set_count_prop(v->sql->sa, rel, lv);
    1020      132659 :                         } else if (join_idx_estimate != BUN_MAX) {
    1021         668 :                                 set_count_prop(v->sql->sa, rel, join_idx_estimate);
    1022      131991 :                         } else if (uniques_estimate != BUN_MAX) {
    1023       98787 :                                 set_count_prop(v->sql->sa, rel, uniques_estimate);
    1024       33204 :                         } else if (list_length(rel->exps) == 1 && (exp_is_false(rel->exps->h->data) || exp_is_null(rel->exps->h->data))) {
    1025             :                                 /* corner cases for outer joins */
    1026         123 :                                 if (is_left(rel->op)) {
    1027         111 :                                         set_count_prop(v->sql->sa, rel, lv);
    1028          12 :                                 } else if (is_right(rel->op)) {
    1029           3 :                                         set_count_prop(v->sql->sa, rel, rv);
    1030           9 :                                 } else if (is_full(rel->op) && lv != BUN_NONE && rv != BUN_NONE) {
    1031           9 :                                         set_count_prop(v->sql->sa, rel, (rv > (BUN_MAX - lv)) ? BUN_MAX : (lv + rv)); /* overflow check */
    1032           0 :                                 } else if (lv != BUN_NONE && rv != BUN_NONE) {
    1033           0 :                                         set_count_prop(v->sql->sa, rel, 0);
    1034             :                                 }
    1035       33081 :                         } else if (lv == 0) {
    1036        1206 :                                 set_count_prop(v->sql->sa, rel, (is_right(rel->op) || is_full(rel->op)) ? rv : 0);
    1037       32464 :                         } else if (rv == 0) {
    1038        1576 :                                 set_count_prop(v->sql->sa, rel, (is_left(rel->op) || is_full(rel->op)) ? lv : 0);
    1039       31382 :                         } else if (lv != BUN_NONE && rv != BUN_NONE) {
    1040       18544 :                                 set_count_prop(v->sql->sa, rel, (rv > (BUN_MAX / lv)) ? BUN_MAX : (lv * rv)); /* overflow check */
    1041             :                         }
    1042             :                         break;
    1043             :                 }
    1044        2941 :                 case op_anti:
    1045        2941 :                         set_count_prop(v->sql->sa, rel, get_rel_count(l));
    1046        2941 :                         break;
    1047      113712 :                 case op_semi:
    1048             :                 case op_select:
    1049             :                         /* TODO calculate cardinalities using selectivities */
    1050      113712 :                         if (list_length(rel->exps) == 1 && (exp_is_false(rel->exps->h->data) || exp_is_null(rel->exps->h->data))) {
    1051        5160 :                                 set_count_prop(v->sql->sa, rel, 0);
    1052             :                         } else {
    1053      213812 :                                 if (!list_empty(rel->exps) && !is_single(rel)) {
    1054      105260 :                                         BUN cnt = get_rel_count(l), u = 1;
    1055      171338 :                                         for (node *n = rel->exps->h ; n ; n = n->next) {
    1056      112434 :                                                 sql_exp *e = n->data, *el = e->l, *er = e->r;
    1057             : 
    1058             :                                                 /* simple expressions first */
    1059      112434 :                                                 if (e->type == e_cmp && e->flag == cmp_equal && exp_is_atom(er)) {
    1060             :                                                         /* use selectivity */
    1061       61863 :                                                         prop *p;
    1062       61863 :                                                         if ((p = find_prop(el->p, PROP_NUNIQUES))) {
    1063       46356 :                                                                 u = (BUN) p->value.dval;
    1064       46356 :                                                                 break;
    1065             :                                                         }
    1066             :                                                 }
    1067             :                                         }
    1068             :                                         /* u is an *estimate*, so don't set count_prop to 0 unless cnt is 0 */
    1069      105260 :                                         set_count_prop(v->sql->sa, rel, cnt == 0 ? 0 : u == 0 || u > cnt ? 1 : cnt/u);
    1070             :                                 } else {
    1071        3292 :                                         set_count_prop(v->sql->sa, rel, get_rel_count(l));
    1072             :                                 }
    1073             :                         }
    1074             :                         break;
    1075      270121 :                 case op_project:
    1076      270121 :                         if (l) {
    1077      257398 :                                 if (need_distinct(rel)) {
    1078         114 :                                         set_count_prop(v->sql->sa, rel, rel_calc_nuniques(v->sql, l, rel->exps));
    1079             :                                 } else {
    1080      257284 :                                         set_count_prop(v->sql->sa, rel, get_rel_count(l));
    1081             :                                 }
    1082             :                         } else {
    1083       12723 :                                 BUN card = 1;
    1084             : 
    1085       12723 :                                 if (!list_empty(rel->exps)) {
    1086       37990 :                                         for (node *n = rel->exps->h ; n ; n = n->next) {
    1087       25267 :                                                 sql_exp *e = n->data;
    1088       25267 :                                                 card = MAX(card, trivial_project_exp_card(e));
    1089             :                                         }
    1090             :                                 }
    1091       12723 :                                 set_count_prop(v->sql->sa, rel, card);
    1092             :                         }
    1093             :                         break;
    1094       24359 :                 case op_groupby:
    1095       24359 :                         if (list_empty(rel->r)) {
    1096        7517 :                                 set_count_prop(v->sql->sa, rel, 1);
    1097             :                         } else {
    1098       16842 :                                 set_count_prop(v->sql->sa, rel, rel_calc_nuniques(v->sql, l, rel->r));
    1099             :                         }
    1100             :                         break;
    1101             :                 default:
    1102             :                         break;
    1103             :                 }
    1104             :                 break;
    1105             :         }
    1106       16653 :         case op_topn: {
    1107       16653 :                 BUN lv = get_rel_count(rel->l);
    1108             : 
    1109       16653 :                 if (lv != BUN_NONE) {
    1110       16630 :                         sql_exp *le = rel->exps->h->data, *oe = list_length(rel->exps) > 1 ? rel->exps->h->next->data : NULL;
    1111          84 :                         if (oe && oe->l && exp_is_not_null(oe)) { /* no parameters */
    1112          84 :                                 BUN offset = (BUN) ((atom*)oe->l)->data.val.lval;
    1113          84 :                                 lv = offset >= lv ? 0 : lv - offset;
    1114             :                         }
    1115       16630 :                         if (le->l && exp_is_not_null(le)) {
    1116       16595 :                                 BUN limit = (BUN) ((atom*)le->l)->data.val.lval;
    1117       16595 :                                 lv = MIN(lv, limit);
    1118             :                         }
    1119       16630 :                         set_count_prop(v->sql->sa, rel, lv);
    1120             :                 }
    1121             :                 break;
    1122             :         }
    1123          22 :         case op_sample: {
    1124          22 :                 BUN lv = get_rel_count(rel->l);
    1125             : 
    1126          22 :                 if (lv != BUN_NONE) {
    1127           4 :                         sql_exp *se = rel->exps->h->data;
    1128           4 :                         sql_subtype *tp = exp_subtype(se);
    1129             : 
    1130           4 :                         if (se->l && tp->type->eclass == EC_NUM) { /* sample is a number of rows */
    1131           4 :                                 BUN sample = (BUN) ((atom*)se->l)->data.val.lval;
    1132           4 :                                 lv = MIN(lv, sample);
    1133           0 :                         } else if (se->l) { /* sample is a percentage of rows */
    1134           0 :                                 dbl percent = ((atom*)se->l)->data.val.dval;
    1135           0 :                                 assert(tp->type->eclass == EC_FLT);
    1136           0 :                                 lv = (BUN) ceil((dbl)lv * percent);
    1137             :                         }
    1138           4 :                         set_count_prop(v->sql->sa, rel, lv);
    1139             :                 }
    1140             :                 break;
    1141             :         }
    1142        6225 :         case op_table: {
    1143        6225 :                 sql_exp *op = rel->r;
    1144        6225 :                 if (rel->flag != TRIGGER_WRAPPER && op) {
    1145        5913 :                         sql_subfunc *f = op->f;
    1146        5913 :                         if (f->func->lang == FUNC_LANG_SQL) {
    1147          97 :                                 set_count_prop(v->sql->sa, rel, 1000 /* just some fallback value */);
    1148        5816 :                         } else if (f->func->lang == FUNC_LANG_MAL && strcmp(f->func->base.name, "storage") == 0) {
    1149         839 :                                 set_count_prop(v->sql->sa, rel, 1000 /* TODO get size of catalog */);
    1150        4977 :                         } else if (f->func->lang == FUNC_LANG_MAL && strcmp(f->func->base.name, "db_users") == 0) {
    1151           0 :                                 set_count_prop(v->sql->sa, rel, 1000 /* TODO get size of users */);
    1152        4977 :                         } else if (f->func->lang == FUNC_LANG_MAL && strncmp(f->func->base.name, "querylog", 8) == 0) {
    1153        1105 :                                 set_count_prop(v->sql->sa, rel, 1000 /* TODO get size of querylog */);
    1154        3872 :                         } else if (f->func->lang == FUNC_LANG_MAL &&
    1155        3697 :                                            (strcmp(f->func->base.name, "queue") == 0 ||
    1156        3428 :                                                 strcmp(f->func->base.name, "optimizers") == 0 ||
    1157        3111 :                                                 strcmp(f->func->base.name, "env") == 0 ||
    1158        2744 :                                                 strcmp(f->func->base.name, "keywords") == 0 ||
    1159        2744 :                                                 strcmp(f->func->base.name, "statistics") == 0 ||
    1160        2065 :                                                 strcmp(f->func->base.name, "rejects") == 0 ||
    1161        1806 :                                                 strcmp(f->func->base.name, "schemastorage") == 0 ||
    1162        1806 :                                                 strncmp(f->func->base.name, "storage", 7) == 0 ||
    1163        1806 :                                                 strcmp(f->func->base.name, "sessions") == 0) ) {
    1164        2186 :                                 set_count_prop(v->sql->sa, rel, 1000 /* TODO get size of queue */);
    1165             :                         }
    1166             :                         /* else {
    1167             :                                 printf("%%func needs stats : %s\n", f->func->base.name);
    1168             :                         } */
    1169             :                 }
    1170             :                 break;
    1171             :         }
    1172             :         /*These relations are less important for now
    1173             :           TODO later we can tune it */
    1174             :         case op_insert:
    1175             :         case op_update:
    1176             :         case op_delete:
    1177             :         case op_truncate:
    1178             :         case op_ddl:
    1179             :         default:
    1180             :                 break;
    1181             :         }
    1182             : 
    1183             :         return rel;
    1184             : }
    1185             : 
    1186             : static sql_rel *
    1187      557072 : rel_get_statistics(visitor *v, global_props *gp, sql_rel *rel)
    1188             : {
    1189             :         /* Don't prune updates as pruning will possibly result in removing the joins which therefore cannot be used for constraint checking */
    1190      557072 :         uint8_t has_special_modify = (uint8_t) gp->has_special_modify;
    1191      557072 :         v->data = &has_special_modify;
    1192      557072 :         rel = rel_visitor_bottomup(v, rel, &rel_get_statistics_);
    1193      557504 :         v->data = gp;
    1194      557504 :         return rel;
    1195             : }
    1196             : 
    1197             : run_optimizer
    1198      734184 : bind_get_statistics(visitor *v, global_props *gp)
    1199             : {
    1200      734184 :         (void) v;
    1201      734184 :         return (gp->opt_level == 1 && !gp->cnt[op_insert]) ? rel_get_statistics : NULL;
    1202             : }
    1203             : 
    1204             : 
    1205             : static bool
    1206       45468 : point_select_on_unique_column(sql_rel *rel)
    1207             : {
    1208       45468 :         if (is_select(rel->op) && !list_empty(rel->exps)) {
    1209       38569 :                 for (node *n = rel->exps->h; n ; n = n->next) {
    1210       20067 :                         sql_exp *e = n->data, *el = e->l, *er = e->r, *found = NULL;
    1211             : 
    1212       20067 :                         if (is_compare(e->type) && e->flag == cmp_equal) {
    1213       15239 :                                 if (is_numeric_upcast(el))
    1214           0 :                                         el = el->l;
    1215       15239 :                                 if (is_numeric_upcast(er))
    1216           0 :                                         er = er->l;
    1217       15239 :                                 if (is_alias(el->type) && exp_is_atom(er) && (found = rel_find_exp(rel->l, el)) &&
    1218         764 :                                         is_unique(found) && (!is_semantics(e) || !has_nil(found) || !has_nil(er)))
    1219             :                                         return true;
    1220       14475 :                                 if (is_alias(er->type) && exp_is_atom(el) && (found = rel_find_exp(rel->l, er)) &&
    1221           0 :                                         is_unique(found) && (!is_semantics(e) || !has_nil(el) || !has_nil(found)))
    1222             :                                         return true;
    1223             :                         }
    1224             :                 }
    1225             :         }
    1226             :         return false;
    1227             : }
    1228             : 
    1229             : /*
    1230             :  * A point select on an unique column reduces the number of rows to 1. If the same select is under a
    1231             :  * join, the opposite side's select can be pushed above the join.
    1232             :  */
    1233             : static inline sql_rel *
    1234      698658 : rel_push_select_up(visitor *v, sql_rel *rel)
    1235             : {
    1236      698658 :         if ((is_innerjoin(rel->op) || is_left(rel->op) || is_right(rel->op) || is_semi(rel->op)) && !is_single(rel)) {
    1237      123001 :                 sql_rel *l = rel->l, *r = rel->r;
    1238      123001 :                 bool can_pushup_left = is_select(l->op) && !rel_is_ref(l) && !is_single(l) && (is_innerjoin(rel->op) || is_left(rel->op) || is_semi(rel->op)),
    1239      123001 :                         can_pushup_right = is_select(r->op) && !rel_is_ref(r) && !is_single(r) && (is_innerjoin(rel->op) || is_right(rel->op));
    1240             : 
    1241      123001 :                 if (can_pushup_left || can_pushup_right) {
    1242       35961 :                         if (can_pushup_left)
    1243       20713 :                                 can_pushup_left = point_select_on_unique_column(r);
    1244       35961 :                         if (can_pushup_right)
    1245       24755 :                                 can_pushup_right = point_select_on_unique_column(l);
    1246             : 
    1247             :                         /* if both selects retrieve one row each, it's not worth it to push both up */
    1248       35961 :                         if (can_pushup_left && !can_pushup_right) {
    1249         698 :                                 sql_rel *nrel = rel_dup_copy(v->sql->sa, rel);
    1250         698 :                                 nrel->l = l->l;
    1251         698 :                                 rel = rel_inplace_select(rel, nrel, l->exps);
    1252         698 :                                 assert(is_select(rel->op));
    1253         698 :                                 v->changes++;
    1254       35263 :                         } else if (!can_pushup_left && can_pushup_right) {
    1255          14 :                                 sql_rel *nrel = rel_dup_copy(v->sql->sa, rel);
    1256          14 :                                 nrel->r = r->l;
    1257          14 :                                 rel = rel_inplace_select(rel, nrel, r->exps);
    1258          14 :                                 assert(is_select(rel->op));
    1259          14 :                                 v->changes++;
    1260             :                         }
    1261             :                 }
    1262             :         }
    1263      698658 :         return rel;
    1264             : }
    1265             : 
    1266             : static int
    1267       39115 : sql_class_base_score(visitor *v, sql_column *c, sql_subtype *t, bool equality_based)
    1268             : {
    1269       39115 :         int de;
    1270             : 
    1271       39115 :         if (!t)
    1272             :                 return 0;
    1273       39115 :         switch (ATOMstorage(t->type->localtype)) {
    1274             :         case TYPE_bte:
    1275             :                 return 150 - 8;
    1276        1607 :         case TYPE_sht:
    1277        1607 :                 return 150 - 16;
    1278       16484 :         case TYPE_int:
    1279       16484 :                 return 150 - 32;
    1280         471 :         case TYPE_void:
    1281             :         case TYPE_lng:
    1282         471 :                 return 150 - 64;
    1283             :         case TYPE_uuid:
    1284             : #ifdef HAVE_HGE
    1285             :         case TYPE_hge:
    1286             : #endif
    1287             :                 return 150 - 128;
    1288           2 :         case TYPE_flt:
    1289           2 :                 return 75 - 24;
    1290             :         case TYPE_dbl:
    1291             :                 return 75 - 53;
    1292       16220 :         default:
    1293       16220 :                 if (equality_based && c && v->storage_based_opt && (de = mvc_is_duplicate_eliminated(v->sql, c)))
    1294        1495 :                         return 150 - de * 8;
    1295             :                 /* strings and blobs not duplicate eliminated don't get any points here */
    1296             :                 return 0;
    1297             :         }
    1298             : }
    1299             : 
    1300             : static int
    1301       15137 : score_se_base(visitor *v, sql_rel *rel, sql_exp *e)
    1302             : {
    1303       15137 :         int res = 0;
    1304       15137 :         sql_subtype *t = exp_subtype(e);
    1305       15137 :         sql_column *c = NULL;
    1306             : 
    1307       15137 :         if (e->type == e_convert) /* keep unsafes at the end (TODO improve) */
    1308             :                 return -1000;
    1309             :         /* can we find out if the underlying table is sorted */
    1310       14729 :         if ((c = exp_find_column(rel, e, -2)) && v->storage_based_opt && mvc_is_sorted(v->sql, c))
    1311       14729 :                 res += 600;
    1312             : 
    1313             :         /* prefer the shorter var types over the longer ones */
    1314       14729 :         res += sql_class_base_score(v, c, t, is_equality_or_inequality_exp(e->flag)); /* smaller the type, better */
    1315       14729 :         return res;
    1316             : }
    1317             : 
    1318             : static int
    1319       18526 : score_se(visitor *v, sql_rel *rel, sql_exp *e)
    1320             : {
    1321       18526 :         int score = 0;
    1322       18526 :         if (e->type == e_cmp && !is_complex_exp(e->flag)) {
    1323       15137 :                 sql_exp *l = e->l;
    1324             : 
    1325       15137 :                 while (l->type == e_cmp) { /* go through nested comparisons */
    1326           2 :                         sql_exp *ll;
    1327             : 
    1328           2 :                         if (l->flag == cmp_filter || l->flag == cmp_or)
    1329           0 :                                 ll = ((list*)l->l)->h->data;
    1330             :                         else
    1331           2 :                                 ll = l->l;
    1332           2 :                         if (ll->type != e_cmp)
    1333             :                                 break;
    1334             :                         l = ll;
    1335             :                 }
    1336       15137 :                 score += score_se_base(v, rel, l);
    1337             :         }
    1338       18526 :         score += exp_keyvalue(e);
    1339       18526 :         return score;
    1340             : }
    1341             : 
    1342             : static inline sql_rel *
    1343      698659 : rel_select_order(visitor *v, sql_rel *rel)
    1344             : {
    1345      698659 :         int *scores = NULL;
    1346      698659 :         sql_exp **exps = NULL;
    1347             : 
    1348      698659 :         if (is_select(rel->op) && list_length(rel->exps) > 1) {
    1349        7920 :                 node *n;
    1350        7920 :                 int i, nexps = list_length(rel->exps);
    1351        7920 :                 scores = SA_NEW_ARRAY(v->sql->ta, int, nexps);
    1352        7920 :                 exps = SA_NEW_ARRAY(v->sql->ta, sql_exp*, nexps);
    1353             : 
    1354       26446 :                 for (i = 0, n = rel->exps->h; n; i++, n = n->next) {
    1355       18555 :                         exps[i] = n->data;
    1356       18555 :                         if (find_prop(exps[i]->p, PROP_HASHCOL))
    1357             :                                 return rel;
    1358       18526 :                         scores[i] = score_se(v, rel, n->data);
    1359             :                 }
    1360        7891 :                 GDKqsort(scores, exps, NULL, nexps, sizeof(int), sizeof(void *), TYPE_int, true, true);
    1361             : 
    1362       26417 :                 for (i = 0, n = rel->exps->h; n; i++, n = n->next)
    1363       18526 :                         n->data = exps[i];
    1364             :         }
    1365             : 
    1366             :         return rel;
    1367             : }
    1368             : 
    1369             : /* Compute the efficiency of using this expression early in a group by list */
    1370             : static int
    1371       24386 : score_gbe(visitor *v, sql_rel *rel, sql_exp *e)
    1372             : {
    1373       24386 :         int res = 0;
    1374       24386 :         sql_subtype *t = exp_subtype(e);
    1375       24386 :         sql_column *c = exp_find_column(rel, e, -2);
    1376             : 
    1377       24386 :         if (e->card == CARD_ATOM) /* constants are trivial to group */
    1378          38 :                 res += 1000;
    1379             :         /* can we find out if the underlying table is sorted */
    1380       24386 :         if (is_unique(e) || find_prop(e->p, PROP_HASHCOL) || (c && v->storage_based_opt && mvc_is_unique(v->sql, c))) /* distinct columns */
    1381        2466 :                 res += 700;
    1382       21132 :         if (c && v->storage_based_opt && mvc_is_sorted(v->sql, c))
    1383        2484 :                 res += 500;
    1384       24386 :         if (find_prop(e->p, PROP_HASHIDX)) /* has hash index */
    1385           0 :                 res += 200;
    1386             : 
    1387             :         /* prefer the shorter var types over the longer ones */
    1388       24386 :         res += sql_class_base_score(v, c, t, true); /* smaller the type, better */
    1389       24386 :         return res;
    1390             : }
    1391             : 
    1392             : /* reorder group by expressions */
    1393             : static inline sql_rel *
    1394      698659 : rel_groupby_order(visitor *v, sql_rel *rel)
    1395             : {
    1396      698659 :         int *scores = NULL;
    1397      698659 :         sql_exp **exps = NULL;
    1398             : 
    1399      698659 :         if (is_groupby(rel->op) && list_length(rel->r) > 1) {
    1400        7998 :                 node *n;
    1401        7998 :                 list *gbe = rel->r;
    1402        7998 :                 int i, ngbe = list_length(gbe);
    1403        7998 :                 scores = SA_NEW_ARRAY(v->sql->ta, int, ngbe);
    1404        7998 :                 exps = SA_NEW_ARRAY(v->sql->ta, sql_exp*, ngbe);
    1405             : 
    1406             :                 /* first sorting step, give priority for integers and sorted columns */
    1407       32384 :                 for (i = 0, n = gbe->h; n; i++, n = n->next) {
    1408       24386 :                         exps[i] = n->data;
    1409       24386 :                         scores[i] = score_gbe(v, rel, exps[i]);
    1410             :                 }
    1411        7998 :                 GDKqsort(scores, exps, NULL, ngbe, sizeof(int), sizeof(void *), TYPE_int, true, true);
    1412             : 
    1413             :                 /* second sorting step, give priority to strings with lower number of digits */
    1414       17496 :                 for (i = ngbe - 1; i && !scores[i]; i--); /* find expressions with no score from the first round */
    1415        7998 :                 if (scores[i])
    1416        7002 :                         i++;
    1417        7998 :                 if (ngbe - i > 1) {
    1418       12055 :                         for (int j = i; j < ngbe; j++) {
    1419        9065 :                                 sql_subtype *t = exp_subtype(exps[j]);
    1420        9065 :                                 scores[j] = t ? t->digits : 0;
    1421             :                         }
    1422             :                         /* the less number of digits the better, order ascending */
    1423        2990 :                         GDKqsort(scores + i, exps + i, NULL, ngbe - i, sizeof(int), sizeof(void *), TYPE_int, false, true);
    1424             :                 }
    1425             : 
    1426       32384 :                 for (i = 0, n = gbe->h; n; i++, n = n->next)
    1427       24386 :                         n->data = exps[i];
    1428             :         }
    1429             : 
    1430      698660 :         return rel;
    1431             : }
    1432             : 
    1433             : /* This optimization loop contains optimizations that can potentially use statistics */
    1434             : static sql_rel *
    1435      698659 : rel_final_optimization_loop_(visitor *v, sql_rel *rel)
    1436             : {
    1437             :         /* Run rel_push_select_up only once at the end to avoid an infinite optimization loop */
    1438      698659 :         rel = rel_push_select_up(v, rel);
    1439      698658 :         rel = rel_select_order(v, rel);
    1440             : 
    1441             :         /* TODO? Maybe later add rel_simplify_count, rel_join2semijoin, rel_simplify_fk_joins,
    1442             :            rel_distinct_project2groupby, rel_simplify_predicates, rel_simplify_math,
    1443             :            rel_distinct_aggregate_on_unique_values */
    1444             : 
    1445      698659 :         rel = rel_groupby_order(v, rel);
    1446      698659 :         return rel;
    1447             : }
    1448             : 
    1449             : static sql_rel *
    1450       70855 : rel_final_optimization_loop(visitor *v, global_props *gp, sql_rel *rel)
    1451             : {
    1452       70855 :         (void) gp;
    1453       70855 :         return rel_visitor_bottomup(v, rel, &rel_final_optimization_loop_);
    1454             : }
    1455             : 
    1456             : run_optimizer
    1457      734656 : bind_final_optimization_loop(visitor *v, global_props *gp)
    1458             : {
    1459      734656 :         int flag = v->sql->sql_optimizer;
    1460             :         /* At the moment, this optimizer has dependency on 3 flags */
    1461      565428 :         return gp->opt_level == 1 && (gp->cnt[op_groupby] || gp->cnt[op_select]) &&
    1462      805514 :                 (flag & push_select_up) && (flag & optimize_select_and_joins_topdown) && (flag & optimize_projections) ? rel_final_optimization_loop : NULL;
    1463             : }

Generated by: LCOV version 1.14