LCOV - code coverage report
Current view: top level - sql/server - rel_statistics.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 834 885 94.2 %
Date: 2025-03-24 23:16:36 Functions: 26 26 100.0 %

          Line data    Source code
       1             : /*
       2             :  * SPDX-License-Identifier: MPL-2.0
       3             :  *
       4             :  * This Source Code Form is subject to the terms of the Mozilla Public
       5             :  * License, v. 2.0.  If a copy of the MPL was not distributed with this
       6             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       7             :  *
       8             :  * Copyright 2024, 2025 MonetDB Foundation;
       9             :  * Copyright August 2008 - 2023 MonetDB B.V.;
      10             :  * Copyright 1997 - July 2008 CWI.
      11             :  */
      12             : 
      13             : #include "monetdb_config.h"
      14             : #include "rel_optimizer_private.h"
      15             : #include "rel_statistics.h"
      16             : #include "rel_basetable.h"
      17             : #include "rel_rewriter.h"
      18             : #include "sql_storage.h"
      19             : 
      20             : static sql_exp *
      21     3487630 : comparison_find_column(sql_exp *input, sql_exp *e)
      22             : {
      23     3487630 :         switch (input->type) {
      24      100105 :         case e_convert: {
      25      100105 :                 list *types = (list *)input->r;
      26      100105 :                 sql_class from = ((sql_subtype*)types->h->data)->type->eclass, to = ((sql_subtype*)types->h->next->data)->type->eclass;
      27      100105 :                 if (from == to)
      28      193445 :                         return comparison_find_column(input->l, e) ? input : NULL;
      29             :                 return NULL;
      30             :         }
      31     3067442 :         case e_column:
      32     3067442 :                 return exp_match(e, input) ? input : NULL;
      33             :         default:
      34             :                 return NULL;
      35             :         }
      36             : }
      37             : 
      38             : /* multi lo <= col <= hi, maybe still be false even if lo or hi are NULL, possibly similar for filter on multiple
      39             :  * columns */
      40             : #define comp_single_column(c) (!c->f)
      41             : 
      42             : static sql_exp *
      43     8815337 : rel_propagate_column_ref_statistics(mvc *sql, sql_rel *rel, sql_exp *e)
      44             : {
      45     8898616 :         assert(e->type == e_column);
      46     8898616 :         if (rel) {
      47     8898535 :                 switch(rel->op) {
      48     4114898 :                 case op_left:
      49             :                 case op_right:
      50             :                 case op_full:
      51             :                 case op_join:
      52             :                 case op_select:
      53             :                 case op_anti:
      54             :                 case op_semi: {
      55     4114898 :                         bool found_without_semantics = false, found_left = false, found_right = false, still_unique = false;
      56             : 
      57     4114898 :                         if ((is_innerjoin(rel->op) || is_select(rel->op)) && list_length(rel->exps) == 1 && exp_is_false(rel->exps->h->data))
      58             :                                 return NULL; /* nothing will pass, skip */
      59             : 
      60             :                         /* propagate from the bottom first */
      61     4094446 :                         if (rel_propagate_column_ref_statistics(sql, rel->l, e))
      62             :                                 found_left = true;
      63     2601530 :                         if (!found_left && is_join(rel->op) && rel_propagate_column_ref_statistics(sql, rel->r, e))
      64     4094446 :                                 found_right = true;
      65             : 
      66     4094446 :                         if (!found_left && !found_right)
      67             :                                 return NULL;
      68     1790823 :                         if (!list_empty(rel->exps) && rel->op != op_anti) { /* if there's an or, the MIN and MAX get difficult to propagate */
      69     3515776 :                                 for (node *n = rel->exps->h ; n ; n = n->next) {
      70     1820293 :                                         sql_exp *comp = n->data, *le = comp->l, *lne = NULL, *re = comp->r, *rne = NULL, *fe = comp->f, *fne = NULL;
      71             : 
      72     1820293 :                                         if (comp->type == e_cmp) {
      73     1819289 :                                                 if (is_theta_exp(comp->flag) && ((lne = comparison_find_column(le, e)) || (rne = comparison_find_column(re, e)) || (fe && (fne = comparison_find_column(fe, e))))) {
      74      124772 :                                                         atom *lval_min = find_prop_and_get(le->p, PROP_MIN), *lval_max = find_prop_and_get(le->p, PROP_MAX),
      75      124772 :                                                                 *rval_min = find_prop_and_get(re->p, PROP_MIN), *rval_max = find_prop_and_get(re->p, PROP_MAX);
      76             : 
      77             :                                                         /* not semantics found or if explicitly filtering not null values from the column */
      78      124772 :                                                         found_without_semantics |= (!is_semantics(comp) && comp_single_column(comp)) || (comp->flag == cmp_equal && lne && is_anti(comp) && exp_is_null(re));
      79      124772 :                                                         still_unique |= comp->flag == cmp_equal && is_unique(le) && is_unique(re); /* unique if only equi-joins on unique columns are there */
      80      124772 :                                                         if (is_full(rel->op) || (is_left(rel->op) && found_left) || (is_right(rel->op) && found_right)) /* on outer joins, min and max cannot be propagated on some cases */
      81       19870 :                                                                 continue;
      82             :                                                         /* if (end2 >= start1 && start2 <= end1) then the 2 intervals are intersected */
      83      104902 :                                                         if (fe && lval_min && lval_max) { /* range case, the middle expression must intersect the other two */
      84        4618 :                                                                 atom *fval_min = find_prop_and_get(fe->p, PROP_MIN), *fval_max = find_prop_and_get(fe->p, PROP_MAX);
      85        4618 :                                                                 int int1 = rval_min && rval_max && atom_cmp(rval_max, lval_min) >= 0 && atom_cmp(rval_min, lval_max) <= 0,
      86        4618 :                                                                         int2 = fval_min && fval_max && atom_cmp(fval_max, lval_min) >= 0 && atom_cmp(fval_min, lval_max) <= 0,
      87        4618 :                                                                         symmetric = is_symmetric(comp);
      88             : 
      89        4618 :                                                                 if (is_anti(comp) || (!symmetric && fval_min && rval_max && atom_cmp(fval_min, rval_max) < 0)) /* for asymmetric case the re range must be after the fe range */
      90        1815 :                                                                         continue;
      91        2803 :                                                                 if (lne && int1 && int2) {
      92         103 :                                                                         if (symmetric) {
      93           0 :                                                                                 prop *p1 = find_prop(e->p, PROP_MIN), *p2 = find_prop(e->p, PROP_MAX);
      94           0 :                                                                                 atom *nmin = statistics_atom_min(sql, rval_min, fval_min), *nmax = statistics_atom_max(sql, rval_max, fval_max);
      95             :                                                                                 /* max is min from le and (max from re and fe max) */
      96           0 :                                                                                 set_minmax_property(sql, e, PROP_MAX, p2 ? statistics_atom_min(sql, nmax, p2->value.pval) : nmax);
      97             :                                                                                 /* min is max from le and (min from re and fe min) */
      98           0 :                                                                                 set_minmax_property(sql, e, PROP_MIN, p1 ? statistics_atom_max(sql, nmin, p1->value.pval) : nmin);
      99             :                                                                         } else {
     100         103 :                                                                                 prop *p1 = find_prop(e->p, PROP_MIN), *p2 = find_prop(e->p, PROP_MAX);
     101             :                                                                                 /* max is min from le and fe max */
     102         103 :                                                                                 set_minmax_property(sql, e, PROP_MAX, p2 ? statistics_atom_min(sql, fval_max, p2->value.pval) : fval_max);
     103             :                                                                                 /* min is max from le and re min */
     104         103 :                                                                                 set_minmax_property(sql, e, PROP_MIN, p1 ? statistics_atom_max(sql, rval_min, p1->value.pval) : rval_min);
     105             :                                                                         }
     106        2700 :                                                                 } else if (rne) {
     107         590 :                                                                         if (symmetric && int1 && int2) { /* min is max from le and (min from re and fe min) */
     108           0 :                                                                                 prop *p = find_prop(e->p, PROP_MIN);
     109           0 :                                                                                 atom *nmin = p ? statistics_atom_min(sql, p->value.pval, fval_min) : fval_min;
     110           0 :                                                                                 set_minmax_property(sql, e, PROP_MIN, statistics_atom_max(sql, nmin, lval_min));
     111         590 :                                                                         } else if (int1) { /* min is max from le and re min */
     112         100 :                                                                                 prop *p = find_prop(e->p, PROP_MIN);
     113         100 :                                                                                 set_minmax_property(sql, e, PROP_MIN, p ? statistics_atom_max(sql, lval_min, p->value.pval) : lval_min);
     114             :                                                                         }
     115        2110 :                                                                 } else if (fne) {
     116         549 :                                                                         if (symmetric && int1 && int2) { /* max is min from le and (max from re and fe max) */
     117           0 :                                                                                 prop *p = find_prop(e->p, PROP_MAX);
     118           0 :                                                                                 atom *nmax = p ? statistics_atom_max(sql, p->value.pval, rval_max) : rval_max;
     119           0 :                                                                                 set_minmax_property(sql, e, PROP_MAX, p ? statistics_atom_min(sql, nmax, lval_max) : nmax);
     120         549 :                                                                         } else if (int2) { /* max is min from le and fe max */
     121         266 :                                                                                 prop *p = find_prop(e->p, PROP_MAX);
     122         266 :                                                                                 set_minmax_property(sql, e, PROP_MAX, p ? statistics_atom_min(sql, lval_max, p->value.pval) : lval_max);
     123             :                                                                         }
     124             :                                                                 }
     125      100284 :                                                         } else if (lval_min && lval_max && rval_min && rval_max && atom_cmp(rval_max, lval_min) >= 0 && atom_cmp(rval_min, lval_max) <= 0) {
     126             :                                                                 /* both min and max must be set and the intervals must overlap */
     127       41888 :                                                                 switch (comp->flag) {
     128       28227 :                                                                 case cmp_equal: /* for equality reduce */
     129       28227 :                                                                         set_minmax_property(sql, e, PROP_MAX, is_anti(comp) ? statistics_atom_max(sql, lval_max, rval_max) : statistics_atom_min(sql, lval_max, rval_max));
     130       28227 :                                                                         set_minmax_property(sql, e, PROP_MIN, is_anti(comp) ? statistics_atom_min(sql, lval_min, rval_min) : statistics_atom_max(sql, lval_min, rval_min));
     131       28227 :                                                                         break;
     132        4345 :                                                                 case cmp_notequal: /* for inequality expand */
     133        4345 :                                                                         set_minmax_property(sql, e, PROP_MAX, is_anti(comp) ? statistics_atom_min(sql, lval_max, rval_max) : statistics_atom_max(sql, lval_max, rval_max));
     134        4345 :                                                                         set_minmax_property(sql, e, PROP_MIN, is_anti(comp) ? statistics_atom_max(sql, lval_min, rval_min) : statistics_atom_min(sql, lval_min, rval_min));
     135        4345 :                                                                         break;
     136        5310 :                                                                 case cmp_gt:
     137             :                                                                 case cmp_gte:
     138        9682 :                                                                         if (!is_anti(comp) && lne) { /* min is max from both min */
     139        4372 :                                                                                 prop *p = find_prop(e->p, PROP_MIN);
     140        4372 :                                                                                 set_minmax_property(sql, e, PROP_MIN, p ? statistics_atom_max(sql, rval_min, p->value.pval) : rval_min);
     141         938 :                                                                         } else if (!is_anti(comp)) { /* max is min from both max */
     142         938 :                                                                                 prop *p = find_prop(e->p, PROP_MAX);
     143         938 :                                                                                 set_minmax_property(sql, e, PROP_MAX, p ? statistics_atom_min(sql, lval_max, p->value.pval) : lval_max);
     144             :                                                                         }
     145             :                                                                         break;
     146        4006 :                                                                 case cmp_lt:
     147             :                                                                 case cmp_lte:
     148        7205 :                                                                         if (!is_anti(comp) && lne) { /* max is min from both max */
     149        3199 :                                                                                 prop *p = find_prop(e->p, PROP_MAX);
     150        3199 :                                                                                 set_minmax_property(sql, e, PROP_MAX, p ? statistics_atom_min(sql, rval_max, p->value.pval) : rval_max);
     151         807 :                                                                         } else if (!is_anti(comp)) { /* min is max from both min */
     152         807 :                                                                                 prop *p = find_prop(e->p, PROP_MIN);
     153         807 :                                                                                 set_minmax_property(sql, e, PROP_MIN, p ? statistics_atom_max(sql, lval_min, p->value.pval) : lval_min);
     154             :                                                                         }
     155             :                                                                         break;
     156             :                                                                 default: /* Maybe later I can do cmp_in and cmp_notin */
     157             :                                                                         break;
     158             :                                                                 }
     159             :                                                         }
     160             :                                                 }
     161             :                                         }
     162             :                                 }
     163             :                         }
     164     1790823 :                         if (is_full(rel->op) || (is_left(rel->op) && found_right) || (is_right(rel->op) && found_left))
     165       35775 :                                 set_has_nil(e);
     166     1790823 :                         if (!is_outerjoin(rel->op) && found_without_semantics) /* at an outer join, null values pass */
     167       94044 :                                 set_has_no_nil(e);
     168     1790823 :                         if (is_join(rel->op) && is_unique(e) && !still_unique)
     169      119329 :                                 set_not_unique(e);
     170     1790823 :                         return e;
     171             :                 }
     172     4680990 :                 case op_table:
     173             :                 case op_basetable:
     174             :                 case op_union:
     175             :                 case op_except:
     176             :                 case op_inter:
     177             :                 case op_munion:
     178             :                 case op_project:
     179             :                 case op_groupby: {
     180     4680990 :                         if (is_recursive(rel))
     181             :                                 return NULL;
     182     4680793 :                         sql_exp *found;
     183     4680793 :                         atom *fval;
     184     4680793 :                         prop *est;
     185     4680793 :                         if ((found = rel_find_exp(rel, e))) {
     186     2203253 :                                 if (rel->op != op_table) { /* At the moment don't propagate statistics for table relations */
     187     2157915 :                                         if ((fval = find_prop_and_get(found->p, PROP_MAX)))
     188     1140360 :                                                 set_minmax_property(sql, e, PROP_MAX, fval);
     189     2157906 :                                         if ((fval = find_prop_and_get(found->p, PROP_MIN)))
     190     1147428 :                                                 set_minmax_property(sql, e, PROP_MIN, fval);
     191     2157905 :                                         if (!has_nil(found))
     192     1396558 :                                                 set_has_no_nil(e);
     193     2157905 :                                         if (is_unique(found) || (need_distinct(rel) && list_length(rel->exps) == 1) ||
     194     1741368 :                                                 (is_groupby(rel->op) && (list_empty(rel->r) || (list_length(rel->r) == 1 && exps_find_exp(rel->r, e)))))
     195      427733 :                                                 set_unique(e);
     196             :                                         /* propagate unique estimation for known cases */
     197     2157905 :                                         if (is_groupby(rel->op) && list_empty(rel->r) && !find_prop(e->p, PROP_NUNIQUES)) { /* global aggregate case */
     198        7645 :                                                 prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     199        7645 :                                                 p->value.dval = 1;
     200     2150261 :                                         } else if (((is_basetable(rel->op) || is_except(rel->op) || is_inter(rel->op) || is_simple_project(rel->op) ||
     201       71914 :                                                                  (is_groupby(rel->op) && exps_find_exp(rel->r, e))) &&
     202     2090977 :                                                                 (est = find_prop(found->p, PROP_NUNIQUES)) && !find_prop(e->p, PROP_NUNIQUES))) {
     203      195233 :                                                 prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     204      195233 :                                                 p->value.dval = est->value.dval;
     205             :                                         }
     206             :                                 }
     207     2203241 :                                 return e;
     208             :                         }
     209             :                         return NULL;
     210             :                 }
     211       83279 :                 case op_topn:
     212             :                 case op_sample:
     213       83279 :                         return rel_propagate_column_ref_statistics(sql, rel->l, e);
     214             :                 default:
     215             :                         break;
     216             :                 }
     217             :         }
     218             :         return NULL;
     219             : }
     220             : 
     221             : static atom *
     222     4657905 : atom_from_valptr( allocator *sa, sql_subtype *tpe, ValRecord *v)
     223             : {
     224     4657905 :         atom *a = SA_NEW(sa, atom);
     225             : 
     226     4657921 :         assert(!VALisnil(v));
     227     4657940 :         *a = (atom) {.tpe = *tpe,};
     228     4657940 :         SA_VALcopy(sa, &a->data, v);
     229     4658105 :         return a;
     230             : }
     231             : 
     232             : void
     233     4290013 : sql_column_get_statistics(mvc *sql, sql_column *c, sql_exp *e)
     234             : {
     235     4290013 :         bool nonil = false, unique = false;
     236     4290013 :         double unique_est = 0.0;
     237     4290013 :         ValRecord min, max;
     238     4290013 :         int ok = mvc_col_stats(sql, c, &nonil, &unique, &unique_est, &min, &max);
     239             : 
     240     4291398 :         if (has_nil(e) && nonil)
     241     2837097 :                 set_has_no_nil(e);
     242     4291398 :         if (!is_unique(e) && unique)
     243     1133738 :                 set_unique(e);
     244     4291398 :         if (unique_est != 0.0) {
     245     3023219 :                 prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     246     3023052 :                 p->value.dval = unique_est;
     247             :         }
     248     4291231 :         unsigned int digits = 0;
     249     4291231 :         sql_subtype *et = exp_subtype(e);
     250     4291380 :         if (et->type->eclass == EC_DEC || et->type->eclass == EC_NUM)
     251     2783440 :                 digits = et->digits;
     252     4291380 :         if ((ok & 2) == 2) {
     253     2326266 :                 if (!VALisnil(&max)) {
     254     2326188 :                         prop *p = e->p = prop_create(sql->sa, PROP_MAX, e->p);
     255     2326137 :                         p->value.pval = atom_from_valptr(sql->sa, &c->type, &max);
     256     2326118 :                         if (digits) {
     257     1723117 :                                 unsigned int nd = atom_digits(p->value.pval);
     258     1723071 :                                 if (nd < digits)
     259             :                                         digits = nd;
     260     1723071 :                                 if (!digits)
     261             :                                         digits = 1;
     262             :                         }
     263             :                 }
     264     2326055 :                 VALclear(&max);
     265             :         }
     266     4291157 :         if ((ok & 1) == 1) {
     267     2332257 :                 if (!VALisnil(&min)) {
     268     2332270 :                         prop *p = e->p = prop_create(sql->sa, PROP_MIN, e->p);
     269     2332291 :                         p->value.pval = atom_from_valptr(sql->sa, &c->type, &min);
     270     2332397 :                         if (digits) {
     271     1730406 :                                 unsigned int nd = atom_digits(p->value.pval);
     272     1730433 :                                 if (nd > digits) /* possibly set to low by max value */
     273             :                                         digits = nd;
     274             :                                 if (!digits)
     275             :                                         digits = 1;
     276             :                         }
     277             :                 }
     278     2332422 :                 VALclear(&min);
     279             :         }
     280     4291339 :         if (digits)
     281     2783420 :                 et->digits = digits;
     282     4291339 :         if (et->type->eclass == EC_DEC && et->digits <= et->scale)
     283         216 :                 et->digits = et->scale + 1;
     284     4291339 : }
     285             : 
     286             : static void
     287      953148 : rel_basetable_column_get_statistics(mvc *sql, sql_rel *rel, sql_exp *e)
     288             : {
     289      953148 :         if (e->p)
     290             :                 return;
     291      306726 :         sql_column *c = NULL;
     292             : 
     293      306726 :         if ((c = rel_base_find_column(rel, e->nid))) {
     294      208600 :                 sql_column_get_statistics(sql, c, e);
     295             :         }
     296             : }
     297             : 
     298             : static bool
     299        8876 : rel_setop_get_statistics(mvc *sql, sql_rel *rel, list *lexps, list *rexps, sql_exp *e, int i)
     300             : {
     301        8876 :         sql_exp *le = list_fetch(lexps, i), *re = list_fetch(rexps, i);
     302        8876 :         atom *lval_min = find_prop_and_get(le->p, PROP_MIN), *lval_max = find_prop_and_get(le->p, PROP_MAX),
     303        8876 :                 *rval_min = find_prop_and_get(re->p, PROP_MIN), *rval_max = find_prop_and_get(re->p, PROP_MAX);
     304        8876 :         prop *est;
     305             : 
     306             :         /* for the intersection, if both expressions don't overlap, it can be pruned */
     307        8876 :         if (is_inter(rel->op) && !has_nil(le) && !has_nil(re) &&
     308         511 :                 ((rval_max && lval_min && atom_cmp(rval_max, lval_min) < 0) || (rval_min && lval_max && atom_cmp(rval_min, lval_max) > 0)))
     309          26 :                 return true;
     310             : 
     311        8850 :         if (lval_max && rval_max) {
     312        2557 :                 if (is_union(rel->op))
     313           3 :                         set_minmax_property(sql, e, PROP_MAX, statistics_atom_max(sql, lval_max, rval_max)); /* for union the new max will be the max of the two */
     314        2554 :                 else if (is_inter(rel->op))
     315         399 :                         set_minmax_property(sql, e, PROP_MAX, statistics_atom_min(sql, lval_max, rval_max)); /* for intersect the new max will be the min of the two */
     316             :                 else /* except */
     317        2155 :                         set_minmax_property(sql, e, PROP_MAX, lval_max);
     318             :         }
     319        8850 :         if (lval_min && rval_min) {
     320        2557 :                 if (is_union(rel->op))
     321           3 :                         set_minmax_property(sql, e, PROP_MIN, statistics_atom_min(sql, lval_min, rval_min)); /* for union the new min will be the min of the two */
     322        2554 :                 else if (is_inter(rel->op))
     323         399 :                         set_minmax_property(sql, e, PROP_MIN, statistics_atom_max(sql, lval_min, rval_min)); /* for intersect the new min will be the max of the two */
     324             :                 else /* except */
     325        2155 :                         set_minmax_property(sql, e, PROP_MIN, lval_min);
     326             :         }
     327             : 
     328        8850 :         if (is_union(rel->op) || is_munion(rel->op)) {
     329        5986 :                 if (!has_nil(le) && !has_nil(re))
     330         879 :                         set_has_no_nil(e);
     331        5986 :                 if (need_distinct(rel) && list_length(rel->exps) == 1)
     332           0 :                         set_unique(e);
     333        2864 :         } else if (is_inter(rel->op)) {
     334         564 :                 if (!has_nil(le) || !has_nil(re))
     335         509 :                         set_has_no_nil(e);
     336         564 :                 if (is_unique(le) || (need_distinct(rel) && list_length(rel->exps) == 1))
     337         345 :                         set_unique(e);
     338             :         } else {
     339        2300 :                 assert(is_except(rel->op));
     340        2300 :                 if (!has_nil(le))
     341        2236 :                         set_has_no_nil(e);
     342        2300 :                 if (is_unique(le) || (need_distinct(rel) && list_length(rel->exps) == 1))
     343        2012 :                         set_unique(e);
     344             :         }
     345             :         /* propagate unique estimation for known cases */
     346        8850 :         if (!is_union(rel->op) && (est = find_prop(le->p, PROP_NUNIQUES)) && !find_prop(e->p, PROP_NUNIQUES)) {
     347         207 :                 prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     348         207 :                 p->value.dval = est->value.dval;
     349             :         }
     350             :         return false;
     351             : }
     352             : 
     353             : 
     354             : static void
     355       62837 : rel_munion_get_statistics(mvc *sql, sql_rel *rel, list *rels, sql_exp *e, int i)
     356             : {
     357       62837 :         if (is_recursive(rel))
     358             :                 return ;
     359       62837 :         assert(is_munion(rel->op));
     360             : 
     361       62837 :         sql_rel *l = rels->h->data;
     362       62837 :         sql_exp *le = list_fetch(l->exps, i);
     363       62837 :         atom *lval_min = find_prop_and_get(le->p, PROP_MIN), *lval_max = find_prop_and_get(le->p, PROP_MAX);
     364       62837 :         bool has_nonil = !has_nil(le);
     365             : 
     366      182734 :         for(node *n = rels->h->next; n; n = n->next) {
     367      119897 :                 sql_rel *r = n->data;
     368      119897 :                 sql_exp *re = list_fetch(r->exps, i);
     369      119897 :                 atom *rval_min = find_prop_and_get(re->p, PROP_MIN), *rval_max = find_prop_and_get(re->p, PROP_MAX);
     370             : 
     371      119897 :                 if (lval_max && rval_max) {
     372       44177 :                         set_minmax_property(sql, e, PROP_MAX, statistics_atom_max(sql, lval_max, rval_max)); /* for union the new max will be the max of the two */
     373       44177 :                         lval_max = find_prop_and_get(e->p, PROP_MAX);
     374             :                 }
     375      119897 :                 if (lval_min && rval_min) {
     376       43628 :                         set_minmax_property(sql, e, PROP_MIN, statistics_atom_min(sql, lval_min, rval_min)); /* for union the new min will be the min of the two */
     377       43628 :                         lval_min = find_prop_and_get(e->p, PROP_MIN);
     378             :                 }
     379      119897 :                 has_nonil &= !has_nil(re);
     380             : 
     381             :         }
     382             : 
     383       62837 :         if (has_nonil)
     384       42790 :                 set_has_no_nil(e);
     385             : 
     386       62837 :         if (need_distinct(rel) && list_length(rel->exps) == 1)
     387        1597 :                 set_unique(e);
     388             : }
     389             : 
     390             : 
     391             : static sql_exp *
     392     3566059 : rel_propagate_statistics(visitor *v, sql_rel *rel, sql_exp *e, int depth)
     393             : {
     394     3566059 :         mvc *sql = v->sql;
     395     3566059 :         atom *lval;
     396             : 
     397     3566059 :         (void) depth;
     398     3566059 :         switch(e->type) {
     399     2211641 :         case e_column:
     400     2211641 :                 switch (rel->op) { /* set relations don't call rel_propagate_statistics */
     401      284686 :                 case op_join:
     402             :                 case op_left:
     403             :                 case op_right:
     404             :                 case op_full:
     405             :                 case op_semi:
     406             :                 case op_anti: {
     407      284686 :                         sql_exp *found = rel_propagate_column_ref_statistics(sql, rel->l, e);
     408      284686 :                         if (!found)
     409      142791 :                                 (void) rel_propagate_column_ref_statistics(sql, rel->r, e);
     410             :                         break;
     411             :                 }
     412     1926955 :                 case op_select:
     413             :                 case op_project:
     414             :                 case op_groupby: {
     415     1926955 :                         sql_exp *found = rel_propagate_column_ref_statistics(sql, rel->l, e); /* labels may be found on the same projection, ugh */
     416     1926949 :                         if (!found && is_simple_project(rel->op))
     417      133515 :                                 (void) rel_propagate_column_ref_statistics(sql, rel, e);
     418             :                         break;
     419             :                 }
     420           0 :                 case op_insert:
     421             :                 case op_update:
     422             :                 case op_delete:
     423           0 :                         (void) rel_propagate_column_ref_statistics(sql, rel->r, e);
     424           0 :                         break;
     425             :                 default:
     426             :                         break;
     427             :                 }
     428             :                 break;
     429      101863 :         case e_convert: {
     430      101863 :                 sql_subtype *to = exp_totype(e), *from = exp_fromtype(e);
     431      101863 :                 sql_exp *l = e->l;
     432      101863 :                 sql_class fr = from->type->eclass, too = to->type->eclass;
     433      101863 :                 prop *est;
     434             : 
     435      101863 :                 if (fr == too) {
     436       92701 :                         if ((lval = find_prop_and_get(l->p, PROP_MAX))) {
     437       60263 :                                 atom *res = atom_copy(sql->sa, lval);
     438       60263 :                                 if ((res = atom_cast(sql->sa, res, to)))
     439       60240 :                                         set_minmax_property(sql, e, PROP_MAX, res);
     440             :                         }
     441       92701 :                         if ((lval = find_prop_and_get(l->p, PROP_MIN))) {
     442       60901 :                                 atom *res = atom_copy(sql->sa, lval);
     443       60901 :                                 if ((res = atom_cast(sql->sa, res, to)))
     444       60878 :                                         set_minmax_property(sql, e, PROP_MIN, res);
     445             :                         }
     446             :                 }
     447             :                 /* propagating nuniques through conversions is tricky. I am adding just the general cases */
     448      101863 :                 if ((est = find_prop(l->p, PROP_NUNIQUES)) &&
     449       63324 :                         (((fr == EC_TIME || fr == EC_TIME_TZ || fr == EC_DATE) && (too == EC_TIMESTAMP || too == EC_TIMESTAMP_TZ)) ||
     450       63299 :                          ((fr == EC_NUM || fr == EC_BIT) && too == EC_NUM) || (fr == EC_DEC && too == EC_DEC) || (fr != EC_EXTERNAL && too == EC_STRING))) {
     451       59865 :                         prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     452       59865 :                         p->value.dval = est->value.dval;
     453             :                 }
     454      101863 :                 if (!has_nil(l))
     455       57955 :                         set_has_no_nil(e);
     456             :                 break;
     457             :         }
     458      379254 :         case e_aggr:
     459             :         case e_func: {
     460      379254 :                 BUN lv;
     461      379254 :                 sql_subfunc *f = e->f;
     462             : 
     463      379254 :                 if (!f->func->s) {
     464      352139 :                         int key = hash_key(f->func->base.name); /* Using hash lookup */
     465      352139 :                         sql_hash_e *he = sql_functions_lookup->buckets[key&(sql_functions_lookup->size-1)];
     466      352139 :                         lookup_function look = NULL;
     467             : 
     468      753756 :                         for (; he && !look; he = he->chain) {
     469      401617 :                                 struct function_properties* fp = (struct function_properties*) he->value;
     470             : 
     471      401617 :                                 if (!strcmp(f->func->base.name, fp->name))
     472      108565 :                                         look = fp->func;
     473             :                         }
     474      352139 :                         if (look)
     475      108565 :                                 look(sql, e);
     476             :                 }
     477             :                 /* for global aggregates with no semantics, if the left relation has values, then the output won't be NULL */
     478      379254 :                 if (!is_semantics(e) && e->l && !have_nil(e->l) &&
     479      109263 :                         (e->type != e_aggr || (is_groupby(rel->op) && list_length(rel->r)) || ((lv = get_rel_count(rel->l)) != BUN_NONE && lv > 0)))
     480      108927 :                         set_has_no_nil(e);
     481             :                 /* set properties for global aggregates */
     482      379254 :                 if (e->type == e_aggr && is_groupby(rel->op) && list_empty(rel->r)) {
     483        7949 :                         if (!find_prop(e->p, PROP_NUNIQUES)) {
     484        7949 :                                 prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     485        7949 :                                 p->value.dval = 1;
     486             :                         }
     487        7949 :                         set_unique(e);
     488             :                 }
     489             :                 break;
     490             :         }
     491      591095 :         case e_atom:
     492      591095 :                 if (e->l) {
     493      573066 :                         atom *a = (atom*) e->l;
     494      573066 :                         if (!a->isnull) {
     495      503993 :                                 set_minmax_property(sql, e, PROP_MAX, a);
     496      503993 :                                 set_minmax_property(sql, e, PROP_MIN, a);
     497             :                         }
     498      573066 :                         prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     499      573066 :                         p->value.dval = 1;
     500       18029 :                 } else if (e->f) {
     501        4302 :                         list *vals = (list *) e->f;
     502        4302 :                         sql_exp *first = vals->h ? vals->h->data : NULL;
     503        4302 :                         atom *max = NULL, *min = NULL; /* all child values must have a valid min/max */
     504        4302 :                         int has_nil = 0;
     505             : 
     506        4302 :                         if (first) {
     507        4302 :                                 max = ((lval = find_prop_and_get(first->p, PROP_MAX))) ? lval : NULL;
     508        4302 :                                 min = ((lval = find_prop_and_get(first->p, PROP_MIN))) ? lval : NULL;
     509        4302 :                                 has_nil |= has_nil(first);
     510             :                         }
     511             : 
     512       18436 :                         for (node *n = vals->h ? vals->h->next : NULL ; n ; n = n->next) {
     513        9832 :                                 sql_exp *ee = n->data;
     514             : 
     515        9832 :                                 if (min && max) {
     516        9339 :                                         if ((lval = find_prop_and_get(ee->p, PROP_MAX))) {
     517        9293 :                                                 max = atom_cmp(lval, max) > 0 ? lval : max;
     518             :                                         } else {
     519             :                                                 max = NULL;
     520             :                                         }
     521        9339 :                                         if ((lval = find_prop_and_get(ee->p, PROP_MIN))) {
     522        9293 :                                                 min = atom_cmp(min, lval) > 0 ? lval : min;
     523             :                                         } else {
     524             :                                                 min = NULL;
     525             :                                         }
     526             :                                 }
     527        9832 :                                 has_nil |= has_nil(ee);
     528             :                         }
     529             : 
     530        4302 :                         if (!has_nil)
     531        3928 :                                 set_has_no_nil(e);
     532        4302 :                         if (min && max) {
     533        3886 :                                 set_minmax_property(sql, e, PROP_MAX, max);
     534        3886 :                                 set_minmax_property(sql, e, PROP_MIN, min);
     535             :                         }
     536        4302 :                         prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     537        4302 :                         p->value.dval = (dbl) list_length(vals);
     538             :                 } else {
     539       13727 :                         prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     540       13727 :                         p->value.dval = 1;
     541             :                 }
     542             :                 break;
     543      282206 :         case e_cmp:
     544             :                 /* TODO? propagating min/max/unique of booleans is not very worth it */
     545      282206 :                 if (e->flag == cmp_or || e->flag == cmp_filter) {
     546       14765 :                         if (!have_nil(e->l) && !have_nil(e->r))
     547       10114 :                                 set_has_no_nil(e);
     548      267441 :                 } else if (e->flag == cmp_in || e->flag == cmp_notin) {
     549       20162 :                         sql_exp *le = e->l;
     550       20162 :                         if (!has_nil(le) && !have_nil(e->r))
     551       16927 :                                 set_has_no_nil(e);
     552             :                 } else {
     553      247279 :                         sql_exp *le = e->l, *re = e->r, *fe = e->f;
     554      247279 :                         if (!has_nil(le) && !has_nil(re) && (!e->f || !has_nil(fe)))
     555      174380 :                                 set_has_no_nil(e);
     556             :                 }
     557             :                 break;
     558             :         case e_psm:
     559             :                 break;
     560             :         }
     561             : 
     562             : #ifndef NDEBUG
     563             :         {
     564             :                 /* min and max cannot be NULL and min must be <= than max, if it happens the inner relation must be empty! */
     565     3566053 :                 atom *min = find_prop_and_get(e->p, PROP_MIN), *max = find_prop_and_get(e->p, PROP_MAX);
     566             : 
     567     3566055 :                 (void) min;
     568     3566055 :                 (void) max;
     569     3566055 :                 assert(!min || !min->isnull);
     570     3566055 :                 assert(!max || !max->isnull);
     571     3566055 :                 assert(!min || !max || (min && max)/* atom_cmp(min, max) <= 0*/);
     572             :         }
     573             : #endif
     574     3566055 :         return e;
     575             : }
     576             : 
     577             : static list * /* Remove predicates always false from min/max values */
     578      143712 : rel_prune_predicates(visitor *v, sql_rel *rel)
     579             : {
     580      143712 :         if (rel->l) {
     581      143712 :                 sql_rel *l = rel->l;
     582      143712 :                 if (is_single(l))
     583        3372 :                         return rel->exps;
     584             :         }
     585      140340 :         if (!list_empty(rel->attr))
     586        3003 :                 return rel->exps;
     587      289085 :         for (node *n = rel->exps->h ; n ; n = n->next) {
     588      151748 :                 sql_exp *e = n->data;
     589             : 
     590      151748 :                 if (e->type == e_cmp && is_theta_exp(e->flag)) {
     591      125224 :                         sql_exp *le = e->l, *re = e->r, *fe = e->f;
     592      125224 :                         atom *lval_min = find_prop_and_get(le->p, PROP_MIN), *lval_max = find_prop_and_get(le->p, PROP_MAX),
     593      125224 :                                 *rval_min = find_prop_and_get(re->p, PROP_MIN), *rval_max = find_prop_and_get(re->p, PROP_MAX);
     594      125224 :                         bool always_false = false, always_true = false;
     595             : 
     596      125224 :                         if (fe && !is_symmetric(e)) {
     597        3045 :                                 atom *fval_min = find_prop_and_get(fe->p, PROP_MIN), *fval_max = find_prop_and_get(fe->p, PROP_MAX);
     598        3045 :                                 comp_type lower = range2lcompare(e->flag), higher = range2rcompare(e->flag);
     599        3644 :                                 int not_int1 = rval_min && lval_max && /* the middle and left intervals don't overlap */
     600        1654 :                                         (!is_anti(e) && (lower == cmp_gte ? atom_cmp(rval_min, lval_max) > 0 : atom_cmp(rval_min, lval_max) >= 0)),
     601        4059 :                                         not_int2 = lval_min && fval_max && /* the middle and right intervals don't overlap */
     602        2482 :                                         (!is_anti(e) && (higher == cmp_lte ? atom_cmp(lval_min, fval_max) > 0 : atom_cmp(lval_min, fval_max) >= 0)),
     603        3045 :                                         not_int3 = rval_min && fval_max && /* the left interval is after the right one */
     604        1285 :                                         (!is_anti(e) && (atom_cmp(rval_min, fval_max) > 0));
     605             : 
     606        3045 :                                 always_false |= not_int1 || not_int2 || not_int3;
     607             :                                 /* for anti the middle must be before the left or after the right or the right after the left, for the other the middle must be always between the left and right intervals */
     608        4089 :                                 always_true |= !has_nil(le) && !has_nil(re) && !has_nil(fe) &&
     609        3941 :                                         lval_min && lval_max && rval_min && rval_max && fval_min && fval_max &&
     610         573 :                                         (is_anti(e) ? ((lower == cmp_gte ? atom_cmp(rval_min, lval_max) > 0 : atom_cmp(rval_min, lval_max) >= 0) || (higher == cmp_lte ? atom_cmp(lval_min, fval_max) > 0 : atom_cmp(lval_min, fval_max) >= 0) || atom_cmp(rval_min, fval_max) > 0) :
     611         488 :                                          ((lower == cmp_gte ? atom_cmp(lval_min, rval_max) >= 0 : atom_cmp(lval_min, rval_max) > 0) && (higher == cmp_lte ? atom_cmp(fval_min, lval_max) >= 0 : atom_cmp(fval_min, lval_max) > 0)));
     612             :                         } else if (!fe) {
     613      122163 :                                 if (!is_semantics(e)) /* trivial not null cmp null case */
     614      232882 :                                         always_false |= !is_anti(e) && ((exp_is_not_null(le) && exp_is_null(re)) || (exp_is_null(le) && exp_is_not_null(re)));
     615      122163 :                                 switch (e->flag) {
     616      108637 :                                 case cmp_equal:
     617      108637 :                                         if (lval_min && lval_max && rval_min && rval_max && (!is_semantics(e) || !has_nil(le) || !has_nil(re)))
     618      135956 :                                                 always_false |= (is_anti(e) ? (atom_cmp(lval_min, rval_min) == 0 && atom_cmp(lval_max, rval_max) <= 0) : (atom_cmp(rval_max, lval_min) < 0 || atom_cmp(rval_min, lval_max) > 0));
     619      108637 :                                         if (is_semantics(e)) { /* prune *= NULL cases */
     620        5685 :                                                 always_false |= (is_anti(e) ? (exp_is_null(le) && exp_is_null(re)) : ((exp_is_not_null(le) && exp_is_null(re)) || (exp_is_null(le) && exp_is_not_null(re))));
     621       11370 :                                                 always_true |= (is_anti(e) ? ((exp_is_not_null(le) && exp_is_null(re)) || (exp_is_null(le) && exp_is_not_null(re))) : (exp_is_null(le) && exp_is_null(re)));
     622             :                                         }
     623             :                                         break;
     624        5334 :                                 case cmp_notequal:
     625        5334 :                                         if (lval_min && lval_max && rval_min && rval_max)
     626        7364 :                                                 always_true |= !has_nil(le) && !has_nil(re) && (is_anti(e) ? (atom_cmp(lval_min, rval_min) == 0 && atom_cmp(lval_max, rval_max) <= 0) : (atom_cmp(rval_max, lval_min) < 0 || atom_cmp(rval_min, lval_max) > 0));
     627        5334 :                                         if (is_semantics(e)) {
     628          37 :                                                 always_true |= (is_anti(e) ? (exp_is_null(le) && exp_is_null(re)) : ((exp_is_not_null(le) && exp_is_null(re)) || (exp_is_null(le) && exp_is_not_null(re))));
     629          74 :                                                 always_false |= (is_anti(e) ? ((exp_is_not_null(le) && exp_is_null(re)) || (exp_is_null(le) && exp_is_not_null(re))) : (exp_is_null(le) && exp_is_null(re)));
     630             :                                         }
     631             :                                         break;
     632        5487 :                                 case cmp_gt:
     633        5487 :                                         if (lval_max && rval_min)
     634        1834 :                                                 always_false |= (is_anti(e) ? atom_cmp(lval_max, rval_min) > 0 : atom_cmp(lval_max, rval_min) <= 0);
     635        5487 :                                         if (lval_min && rval_max)
     636        3668 :                                                 always_true |= !has_nil(le) && !has_nil(re) && (is_anti(e) ? atom_cmp(lval_min, rval_max) <= 0 : atom_cmp(lval_min, rval_max) > 0);
     637             :                                         break;
     638         121 :                                 case cmp_gte:
     639         121 :                                         if (lval_max && rval_min)
     640          51 :                                                 always_false |= (is_anti(e) ? atom_cmp(lval_max, rval_min) >= 0 : atom_cmp(lval_max, rval_min) < 0);
     641         121 :                                         if (lval_min && rval_max)
     642         102 :                                                 always_true |= !has_nil(le) && !has_nil(re) && (is_anti(e) ? atom_cmp(lval_min, rval_max) < 0 : atom_cmp(lval_min, rval_max) >= 0);
     643             :                                         break;
     644        2502 :                                 case cmp_lt:
     645        2502 :                                         if (lval_min && rval_max)
     646        1382 :                                                 always_false |= (is_anti(e) ? atom_cmp(lval_min, rval_max) < 0 : atom_cmp(lval_min, rval_max) >= 0);
     647        2502 :                                         if (lval_max && rval_min)
     648        2812 :                                                 always_true |= !has_nil(le) && !has_nil(re) && (is_anti(e) ? atom_cmp(lval_max, rval_min) >= 0 : atom_cmp(lval_max, rval_min) < 0);
     649             :                                         break;
     650          82 :                                 case cmp_lte:
     651          82 :                                         if (lval_min && rval_max)
     652          11 :                                                 always_false |= (is_anti(e) ? atom_cmp(lval_min, rval_max) <= 0 : atom_cmp(lval_min, rval_max) > 0);
     653          82 :                                         if (lval_max && rval_min)
     654          22 :                                                 always_true |= !has_nil(le) && !has_nil(re) && (is_anti(e) ? atom_cmp(lval_max, rval_min) > 0 : atom_cmp(lval_max, rval_min) <= 0);
     655             :                                         break;
     656             :                                 default: /* Maybe later I can do cmp_in and cmp_notin but don't forget to remove is_theta_exp check up there */
     657             :                                         break;
     658             :                                 }
     659             :                         }
     660      125224 :                         assert(!always_false || !always_true);
     661      125224 :                         if (always_false || always_true) {
     662        3698 :                                 sql_exp *ne = exp_atom_bool(v->sql->sa, always_true /* if always_true set then true else false */);
     663        3698 :                                 if (exp_name(e))
     664           0 :                                         exp_prop_alias(v->sql->sa, ne, e);
     665        3698 :                                 n->data = ne;
     666        3698 :                                 v->changes++;
     667             :                         }
     668             :                 }
     669             :         }
     670      137337 :         return rel->exps;
     671             : }
     672             : 
     673             : static sql_rel *
     674          14 : set_setop_side(visitor *v, sql_rel *rel, sql_rel *side)
     675             : {
     676          14 :         if (side == rel->r)
     677           0 :                 rel->r = NULL;
     678             :         else
     679          14 :                 rel->l = NULL;
     680          14 :         if (!is_simple_project(side->op) || rel_is_ref(side) || !list_empty(side->r)) {
     681           0 :                 side = rel_project(v->sql->sa, side, rel_projections(v->sql, side, NULL, 0, 1));
     682           0 :                 set_count_prop(v->sql->sa, side, get_rel_count(side->l));
     683           0 :                 side->exps = exps_exp_visitor_bottomup(v, side, side->exps, 0, &rel_propagate_statistics, false);
     684             :         }
     685          35 :         for (node *n = rel->exps->h, *m = side->exps->h ; n && m ; n = n->next, m = m->next) {
     686          21 :                 sql_exp *e1 = n->data, *e2 = m->data;
     687          21 :                 exp_prop_alias(v->sql->sa, e2, e1);
     688             :         }
     689          14 :         list_hash_clear(side->exps);
     690             : 
     691          14 :         if (need_distinct(rel))
     692           0 :                 set_distinct(side);
     693          14 :         side->p = prop_copy(v->sql->sa, rel->p);
     694          14 :         rel_destroy(rel);
     695          14 :         return side;
     696             : }
     697             : 
     698             : static BUN
     699       26424 : trivial_project_exp_card(sql_exp *e)
     700             : {
     701       26799 :         if (e->type == e_convert)
     702         375 :                 return trivial_project_exp_card(e->l);
     703       26424 :         return e->type == e_atom && e->f ? (BUN) list_length(e->f) : 1;
     704             : }
     705             : 
     706             : static BUN
     707       26198 : rel_calc_nuniques(mvc *sql, sql_rel *l, list *exps)
     708             : {
     709       26198 :         BUN lv = get_rel_count(l);
     710             : 
     711       26199 :         if (lv == 0)
     712             :                 return 0;
     713       23461 :         if (!list_empty(exps)) {
     714       23461 :                 BUN nuniques = 0;
     715             :                 /* compute the highest number of unique values */
     716       58643 :                 for (node *n = exps->h ; n && nuniques != BUN_NONE ; n = n->next) {
     717       35181 :                         sql_exp *e = n->data;
     718       35181 :                         sql_rel *bt = NULL;
     719       35181 :                         prop *p = NULL;
     720       35181 :                         BUN euniques = BUN_NONE;
     721       35181 :                         atom *min, *max, *sub = NULL;
     722       35181 :                         sql_subtype *tp = exp_subtype(e);
     723       35181 :                         sql_class ec = tp ? tp->type->eclass : EC_STRING; /* if 'e' has no type (eg parameter), use a non-number type to fail condition */
     724             : 
     725       35181 :                         if ((p = find_prop(e->p, PROP_NUNIQUES))) {
     726       25416 :                                 euniques = (BUN) p->value.dval;
     727        9765 :                         } else if (e->type == e_column && e->nid && rel_find_exp_and_corresponding_rel(l, e, false, &bt, NULL) && bt && (p = find_prop(bt->p, PROP_COUNT))) {
     728        7665 :                                 euniques = (BUN) p->value.lval;
     729             :                         }
     730             :                         /* use min to max range to compute number of possible values in the domain for number types */
     731       35181 :                         if ((EC_TEMP(ec)||ec==EC_NUM||ec==EC_MONTH||ec==EC_POS) &&
     732       24291 :                                 (min = find_prop_and_get(e->p, PROP_MIN)) && (max = find_prop_and_get(e->p, PROP_MAX))) {
     733             :                                 /* the range includes min and max, so the atom_inc call is needed */
     734             :                                 /* if 'euniques' has number of distinct values, compute min between both */
     735       19346 :                                 if ((sub = atom_sub(sql->sa, max, min)) && (sub = atom_inc(sql->sa, sub)) && (sub = atom_cast(sql->sa, sub, sql_bind_localtype("oid"))))
     736           0 :                                         euniques = MIN(euniques, (BUN) sub->data.val.oval);
     737             :                         }
     738       35182 :                         if (euniques != BUN_NONE)
     739       33082 :                                 nuniques = MAX(nuniques, euniques); /* the highest cardinality sets the estimation */
     740             :                         else
     741             :                                 nuniques = BUN_NONE;
     742             :                 }
     743       23462 :                 if (nuniques != BUN_NONE)
     744             :                         return nuniques;
     745             :         }
     746             :         return lv;
     747             : }
     748             : 
     749             : static sql_rel *
     750     1366516 : rel_get_statistics_(visitor *v, sql_rel *rel)
     751             : {
     752             :         /* Don't prune updates as pruning will possibly result in removing the joins which therefore cannot be used for constraint checking */
     753     1366516 :         uint8_t has_special_modify = *(uint8_t*) v->data;
     754     1366516 :         bool can_be_pruned = !has_special_modify && v->storage_based_opt;
     755             : 
     756             :         /* Don't look at the same relation twice */
     757     1366516 :         if (are_statistics_gathered(rel->used))
     758             :                 return rel;
     759     1366417 :         rel->used |= statistics_gathered;
     760             : 
     761     1366417 :         switch (rel->op) {
     762      318940 :         case op_basetable: {
     763      318940 :                 sql_table *t = (sql_table *) rel->l;
     764      318940 :                 sqlstore *store = v->sql->session->tr->store;
     765             : 
     766      318940 :                 if (!list_empty(rel->exps)) {
     767     1272418 :                         for (node *n = rel->exps->h ; n ; n = n->next)
     768      953274 :                                 rel_basetable_column_get_statistics(v->sql, rel, n->data);
     769             :                 }
     770             :                 /* Set table row count. TODO? look for remote tables. Don't look at storage for declared tables, because it won't be cleaned */
     771      319147 :                 if (isTable(t) && t->s && !isDeclaredTable(t)) /* count active rows only */
     772      264463 :                         set_count_prop(v->sql->sa, rel, (BUN)store->storage_api.count_col(v->sql->session->tr, ol_first_node(t->columns)->data, 10));
     773             :                 break;
     774             :         }
     775        2794 :         case op_union:
     776             :         case op_inter:
     777             :         case op_except: {
     778        2794 :                 bool empty_cross = false;
     779        2794 :                 int i = 0;
     780        2794 :                 sql_rel *l = rel->l, *pl = rel->l, *r = rel->r, *pr = rel->r;
     781             : 
     782        2794 :                 while (is_sample(pl->op) || is_topn(pl->op)) /* skip topN and sample relations in the middle */
     783           0 :                         pl = pl->l;
     784        2794 :                 while (is_sample(pr->op) || is_topn(pr->op))
     785           0 :                         pr = pr->l;
     786             :                 /* if it's not a projection, then project and propagate statistics */
     787        2794 :                 if (!is_project(pl->op) && !is_base(pl->op)) {
     788           0 :                         pl = rel_project(v->sql->sa, pl, rel_projections(v->sql, pl, NULL, 0, 1));
     789           0 :                         set_count_prop(v->sql->sa, pl, get_rel_count(pl->l));
     790           0 :                         pl->exps = exps_exp_visitor_bottomup(v, pl, pl->exps, 0, &rel_propagate_statistics, false);
     791             :                 }
     792        2794 :                 if (!is_project(pr->op) && !is_base(pr->op)) {
     793           0 :                         pr = rel_project(v->sql->sa, pr, rel_projections(v->sql, pr, NULL, 0, 1));
     794           0 :                         set_count_prop(v->sql->sa, pr, get_rel_count(pr->l));
     795           0 :                         pr->exps = exps_exp_visitor_bottomup(v, pr, pr->exps, 0, &rel_propagate_statistics, false);
     796             :                 }
     797             : 
     798       11670 :                 for (node *n = rel->exps->h ; n ; n = n->next) {
     799        8876 :                         empty_cross |= rel_setop_get_statistics(v->sql, rel, pl->exps, pr->exps, n->data, i);
     800        8876 :                         i++;
     801             :                 }
     802             : 
     803             :                 /* propagate row count */
     804        2794 :                 if (is_union(rel->op)) {
     805         277 :                         BUN lv = need_distinct(rel) ? rel_calc_nuniques(v->sql, l, l->exps) : get_rel_count(l),
     806         277 :                                 rv = need_distinct(rel) ? rel_calc_nuniques(v->sql, r, r->exps) : get_rel_count(r);
     807             : 
     808         277 :                         if (lv == 0 && rv == 0) { /* both sides empty */
     809           2 :                                 if (can_be_pruned)
     810             :                                         empty_cross = true;
     811             :                                 else
     812           2 :                                         set_count_prop(v->sql->sa, rel, 0);
     813         275 :                         } else if (can_be_pruned && lv == 0 && !rel_is_ref(rel)) { /* left side empty */
     814           0 :                                 rel = set_setop_side(v, rel, r);
     815           0 :                                 empty_cross = false; /* don't rewrite again */
     816         275 :                         } else if (can_be_pruned && rv == 0 && !rel_is_ref(rel)) { /* right side empty */
     817           0 :                                 rel = set_setop_side(v, rel, l);
     818           0 :                                 empty_cross = false; /* don't rewrite again */
     819         275 :                         } else if (lv != BUN_NONE && rv != BUN_NONE) {
     820           7 :                                 set_count_prop(v->sql->sa, rel, (rv > (BUN_MAX - lv)) ? BUN_MAX : (lv + rv)); /* overflow check */
     821             :                         }
     822        2517 :                 } else if (is_inter(rel->op) || is_except(rel->op)) {
     823        2517 :                         BUN lv = need_distinct(rel) ? rel_calc_nuniques(v->sql, l, l->exps) : get_rel_count(l),
     824        2517 :                                 rv = need_distinct(rel) ? rel_calc_nuniques(v->sql, r, r->exps) : get_rel_count(r);
     825             : 
     826        2517 :                         if (lv == 0) { /* left side empty */
     827          63 :                                 if (can_be_pruned)
     828             :                                         empty_cross = true;
     829             :                                 else
     830           5 :                                         set_count_prop(v->sql->sa, rel, 0);
     831        2454 :                         } else if (rv == 0) { /* right side empty */
     832          17 :                                 if (is_inter(rel->op)) {
     833           3 :                                         if (can_be_pruned)
     834             :                                                 empty_cross = true;
     835             :                                         else
     836           0 :                                                 set_count_prop(v->sql->sa, rel, 0);
     837          14 :                                 } else if (can_be_pruned && !rel_is_ref(rel)) {
     838          14 :                                         rel = set_setop_side(v, rel, l);
     839          14 :                                         empty_cross = false; /* don't rewrite again */
     840             :                                 } else {
     841           0 :                                         set_count_prop(v->sql->sa, rel, lv);
     842             :                                 }
     843             :                         } else {
     844        2437 :                                 set_count_prop(v->sql->sa, rel, lv);
     845             :                         }
     846             :                 }
     847        2794 :                 if (can_be_pruned && empty_cross) {
     848          81 :                         rel_destroy(rel->l);
     849          81 :                         rel->l = NULL;
     850          81 :                         rel_destroy(rel->r);
     851          81 :                         rel->r = NULL;
     852         246 :                         for (node *n = rel->exps->h ; n ; n = n->next) {
     853         165 :                                 sql_exp *e = n->data, *a = exp_atom(v->sql->sa, atom_general(v->sql->sa, exp_subtype(e), NULL, 0));
     854         165 :                                 exp_prop_alias(v->sql->sa, a, e);
     855         165 :                                 n->data = a;
     856             :                         }
     857          81 :                         list_hash_clear(rel->exps);
     858          81 :                         sql_rel *l = rel_project(v->sql->sa, NULL, rel->exps);
     859          81 :                         set_count_prop(v->sql->sa, l, 1);
     860          81 :                         l = rel_select(v->sql->sa, l, exp_atom_bool(v->sql->sa, 0));
     861          81 :                         set_count_prop(v->sql->sa, l, 0);
     862          81 :                         rel->op = op_project;
     863          81 :                         rel->l = l;
     864          81 :                         rel->exps = rel_projections(v->sql, l, NULL, 1, 1);
     865          81 :                         set_count_prop(v->sql->sa, rel, 0);
     866          81 :                         set_nodistinct(rel); /* set relations may have distinct flag set */
     867          81 :                         v->changes++;
     868             :                 }
     869             :                 break;
     870             :         }
     871       13473 :         case op_munion: {
     872       13473 :                 list *l = rel->l, *nrels = sa_list(v->sql->sa);
     873       13473 :                 BUN cnt = 0;
     874       13473 :                 bool needs_pruning = false;
     875             : 
     876       13473 :                 if (is_recursive(rel))
     877             :                         break;
     878       51231 :                 for (node *n = l->h; n; n = n->next) {
     879       37829 :                         sql_rel *r = n->data, *pl = r;
     880             : 
     881       37829 :                         while (is_sample(pl->op) || is_topn(pl->op)) /* skip topN and sample relations in the middle */
     882           0 :                                         pl = pl->l;
     883             :                         /* if it's not a projection, then project and propagate statistics */
     884       37829 :                         if (!is_project(pl->op) && !is_base(pl->op)) {
     885           0 :                                 pl = rel_project(v->sql->sa, pl, rel_projections(v->sql, pl, NULL, 0, 1));
     886           0 :                                 set_count_prop(v->sql->sa, pl, get_rel_count(pl->l));
     887           0 :                                 pl->exps = exps_exp_visitor_bottomup(v, pl, pl->exps, 0, &rel_propagate_statistics, false);
     888             :                         }
     889       37829 :                         nrels = append(nrels, pl);
     890             :                         /* we need new munion statistics */
     891             :                         /* propagate row count */
     892       37829 :                         BUN rv = need_distinct(rel) ? rel_calc_nuniques(v->sql, r, r->exps) : get_rel_count(r);
     893             :                         /* if PROP_COUNT does not exist we assume at least a row (see get_rel_count def) */
     894       37829 :                         if (rv == BUN_NONE) {
     895        1220 :                                 cnt++;
     896        1220 :                                 continue;
     897             :                         }
     898       36609 :                         if (!rv && can_be_pruned)
     899        6724 :                                 needs_pruning = true;
     900             :                         /* overflow check */
     901       36609 :                         if (rv > (BUN_MAX - cnt))
     902       37829 :                                 rv = BUN_MAX;
     903             :                         else
     904       36609 :                                 cnt += rv;
     905             :                 }
     906       13402 :                 int i = 0;
     907       76239 :                 for (node *n = rel->exps->h ; n ; n = n->next, i++)
     908       62837 :                         rel_munion_get_statistics(v->sql, rel, nrels, n->data, i);
     909             : 
     910       13402 :                 if (needs_pruning && !rel_is_ref(rel)) {
     911        4563 :                         v->changes++;
     912        4563 :                         list *nl = sa_list(l->sa);
     913             : 
     914       16832 :                         for (node *n = nrels->h; n; n = n->next) {
     915       12269 :                                 sql_rel *r = n->data;
     916       12269 :                                 BUN rv = need_distinct(rel) ? rel_calc_nuniques(v->sql, r, r->exps) : get_rel_count(r);
     917             : 
     918       12269 :                                 if (!rv) { /* keep last for now */
     919        6254 :                                         rel_destroy(r);
     920        6254 :                                         continue;
     921             :                                 }
     922        6015 :                                 nl = append(nl, r);
     923             :                         }
     924        4563 :                         rel->l = nl;
     925        4563 :                         if (list_length(nl) == 1) {
     926        4227 :                                 sql_rel *l = rel->l = nl->h->data; /* ugh */
     927        4227 :                                 rel->r = NULL;
     928        4227 :                                 rel->op = op_project;
     929             : 
     930       20767 :                                 for (node *n = rel->exps->h, *m = l->exps->h ; n && m ; n = n->next, m = m->next) {
     931       16540 :                                         sql_exp *pe = n->data, *ie = m->data;
     932       16540 :                                         sql_exp *ne = exp_ref(v->sql, ie);
     933       16540 :                                         exp_prop_alias(v->sql->sa, ne, pe);
     934       16540 :                                         n->data = ne;
     935             :                                 }
     936        4227 :                                 list_hash_clear(rel->exps);
     937         336 :                         } else if (list_empty(nl)) {
     938             :                                 /* empty select (project [ nils ] ) */
     939         441 :                                 for (node *n = rel->exps->h ; n ; n = n->next) {
     940         341 :                                         sql_exp *e = n->data, *a = exp_atom(v->sql->sa, atom_general(v->sql->sa, exp_subtype(e), NULL, 0));
     941         341 :                                         exp_prop_alias(v->sql->sa, a, e);
     942         341 :                                         n->data = a;
     943             :                                 }
     944         100 :                                 list_hash_clear(rel->exps);
     945         100 :                                 sql_rel *l = rel_project(v->sql->sa, NULL, rel->exps);
     946         100 :                                 set_count_prop(v->sql->sa, l, 1);
     947         100 :                                 l = rel_select(v->sql->sa, l, exp_atom_bool(v->sql->sa, 0));
     948         100 :                                 set_count_prop(v->sql->sa, l, 0);
     949         100 :                                 rel->op = op_project;
     950         100 :                                 rel->r = NULL;
     951         100 :                                 rel->l = l;
     952         100 :                                 rel->exps = rel_projections(v->sql, l, NULL, 1, 1);
     953         100 :                                 set_count_prop(v->sql->sa, rel, 0);
     954         100 :                                 set_nodistinct(rel); /* set relations may have distinct flag set */
     955             :                         }
     956             :                 } else {
     957        8839 :                         set_count_prop(v->sql->sa, rel, cnt);
     958             :                 }
     959             :                 break;
     960             :         }
     961      549135 :         case op_join:
     962             :         case op_left:
     963             :         case op_right:
     964             :         case op_full:
     965             :         case op_semi:
     966             :         case op_anti:
     967             :         case op_select:
     968             :         case op_project:
     969             :         case op_groupby: {
     970      549135 :                 if (is_groupby(rel->op) && !list_empty(rel->r))
     971       16821 :                         rel->r = exps_exp_visitor_bottomup(v, rel, rel->r, 0, &rel_propagate_statistics, false);
     972      549134 :                 rel->exps = exps_exp_visitor_bottomup(v, rel, rel->exps, 0, &rel_propagate_statistics, false);
     973      549120 :                 if ((is_simple_project(rel->op) || is_groupby(rel->op)) && !list_empty(rel->r))
     974       41871 :                         rel->r = exps_exp_visitor_bottomup(v, rel, rel->r, 0, &rel_propagate_statistics, false);
     975             :                 /* The following optimizations can only be applied after propagating the statistics to rel->exps */
     976      549124 :                 if (can_be_pruned && (is_join(rel->op) || is_select(rel->op)) && !list_empty(rel->exps)) {
     977      143712 :                         int changes = v->changes;
     978      143712 :                         rel->exps = rel_prune_predicates(v, rel);
     979      143712 :                         if (v->changes > changes)
     980        3665 :                                 rel = rewrite_simplify(v, 0, v->value_based_opt, rel);
     981             :                 }
     982             : 
     983             :                 /* propagate row count */
     984      549124 :                 sql_rel *l = rel->l, *r = rel->r;
     985      549124 :                 switch (rel->op) {
     986      137081 :                 case op_join:
     987             :                 case op_left:
     988             :                 case op_right:
     989             :                 case op_full: {
     990      137081 :                         BUN lv = get_rel_count(l), rv = get_rel_count(r), uniques_estimate = BUN_MAX, join_idx_estimate = BUN_MAX;
     991             : 
     992      137081 :                         if (!list_empty(rel->exps) && !is_single(rel)) {
     993      250665 :                                 for (node *n = rel->exps->h ; n ; n = n->next) {
     994      127811 :                                         sql_exp *e = n->data, *el = e->l, *er = e->r;
     995             : 
     996      127811 :                                         if (find_prop(e->p, PROP_JOINIDX)) {
     997         670 :                                                 join_idx_estimate = lv>rv?lv:rv;
     998      127141 :                                         } else if (e->type == e_cmp && e->flag == cmp_equal) {
     999             :                                                 /* if one of the sides is unique, the cardinality will be that exact number, but look for nulls */
    1000      123256 :                                                 if (!is_semantics(e) || !has_nil(el) || !has_nil(er)) {
    1001      123079 :                                                         BUN lu = 0, ru = 0;
    1002      123079 :                                                         prop *p = NULL;
    1003      123079 :                                                         if ((p = find_prop(el->p, PROP_NUNIQUES)))
    1004       91307 :                                                                 lu = (BUN) p->value.dval;
    1005      123079 :                                                         if ((p = find_prop(er->p, PROP_NUNIQUES)))
    1006      105977 :                                                                 ru = (BUN) p->value.dval;
    1007      123079 :                                                         if (is_unique(el) || lu > lv) {
    1008       74988 :                                                                 BUN ncount = (is_right(rel->op) || is_full(rel->op)) ? MAX(lv, rv) : lv;
    1009       74988 :                                                                 uniques_estimate = MIN(uniques_estimate, ncount);
    1010       48091 :                                                         } else if (is_unique(er) || ru > rv) {
    1011       30062 :                                                                 BUN ncount = (is_left(rel->op) || is_full(rel->op)) ? MAX(lv, rv) : rv;
    1012       30062 :                                                                 uniques_estimate = MIN(uniques_estimate, ncount);
    1013             :                                                         }
    1014             :                                                 }
    1015             :                                         }
    1016             :                                 }
    1017             :                         }
    1018      137081 :                         if (is_single(rel)) {
    1019        4801 :                                 set_count_prop(v->sql->sa, rel, lv);
    1020      132280 :                         } else if (join_idx_estimate != BUN_MAX) {
    1021         668 :                                 set_count_prop(v->sql->sa, rel, join_idx_estimate);
    1022      131612 :                         } else if (uniques_estimate != BUN_MAX) {
    1023       98496 :                                 set_count_prop(v->sql->sa, rel, uniques_estimate);
    1024       33116 :                         } else if (list_length(rel->exps) == 1 && (exp_is_false(rel->exps->h->data) || exp_is_null(rel->exps->h->data))) {
    1025             :                                 /* corner cases for outer joins */
    1026         126 :                                 if (is_left(rel->op)) {
    1027         114 :                                         set_count_prop(v->sql->sa, rel, lv);
    1028          12 :                                 } else if (is_right(rel->op)) {
    1029           3 :                                         set_count_prop(v->sql->sa, rel, rv);
    1030           9 :                                 } else if (is_full(rel->op) && lv != BUN_NONE && rv != BUN_NONE) {
    1031           9 :                                         set_count_prop(v->sql->sa, rel, (rv > (BUN_MAX - lv)) ? BUN_MAX : (lv + rv)); /* overflow check */
    1032           0 :                                 } else if (lv != BUN_NONE && rv != BUN_NONE) {
    1033           0 :                                         set_count_prop(v->sql->sa, rel, 0);
    1034             :                                 }
    1035       32990 :                         } else if (lv == 0) {
    1036        1204 :                                 set_count_prop(v->sql->sa, rel, (is_right(rel->op) || is_full(rel->op)) ? rv : 0);
    1037       32374 :                         } else if (rv == 0) {
    1038        1574 :                                 set_count_prop(v->sql->sa, rel, (is_left(rel->op) || is_full(rel->op)) ? lv : 0);
    1039       31294 :                         } else if (lv != BUN_NONE && rv != BUN_NONE) {
    1040       18511 :                                 set_count_prop(v->sql->sa, rel, (rv > (BUN_MAX / lv)) ? BUN_MAX : (lv * rv)); /* overflow check */
    1041             :                         }
    1042             :                         break;
    1043             :                 }
    1044        2935 :                 case op_anti:
    1045        2935 :                         set_count_prop(v->sql->sa, rel, get_rel_count(l));
    1046        2935 :                         break;
    1047      112955 :                 case op_semi:
    1048             :                 case op_select:
    1049             :                         /* TODO calculate cardinalities using selectivities */
    1050      112955 :                         if (list_length(rel->exps) == 1 && (exp_is_false(rel->exps->h->data) || exp_is_null(rel->exps->h->data))) {
    1051        5158 :                                 set_count_prop(v->sql->sa, rel, 0);
    1052             :                         } else {
    1053      212309 :                                 if (!list_empty(rel->exps) && !is_single(rel)) {
    1054      104512 :                                         BUN cnt = get_rel_count(l), u = 1;
    1055      170388 :                                         for (node *n = rel->exps->h ; n ; n = n->next) {
    1056      111676 :                                                 sql_exp *e = n->data, *el = e->l, *er = e->r;
    1057             : 
    1058             :                                                 /* simple expressions first */
    1059      111676 :                                                 if (e->type == e_cmp && e->flag == cmp_equal && exp_is_atom(er)) {
    1060             :                                                         /* use selectivity */
    1061       61262 :                                                         prop *p;
    1062       61262 :                                                         if ((p = find_prop(el->p, PROP_NUNIQUES))) {
    1063       45800 :                                                                 u = (BUN) p->value.dval;
    1064       45800 :                                                                 break;
    1065             :                                                         }
    1066             :                                                 }
    1067             :                                         }
    1068             :                                         /* u is an *estimate*, so don't set count_prop to 0 unless cnt is 0 */
    1069      104512 :                                         set_count_prop(v->sql->sa, rel, cnt == 0 ? 0 : u == 0 || u > cnt ? 1 : cnt/u);
    1070             :                                 } else {
    1071        3285 :                                         set_count_prop(v->sql->sa, rel, get_rel_count(l));
    1072             :                                 }
    1073             :                         }
    1074             :                         break;
    1075      271425 :                 case op_project:
    1076      271425 :                         if (l) {
    1077      258732 :                                 if (need_distinct(rel)) {
    1078         114 :                                         set_count_prop(v->sql->sa, rel, rel_calc_nuniques(v->sql, l, rel->exps));
    1079             :                                 } else {
    1080      258618 :                                         set_count_prop(v->sql->sa, rel, get_rel_count(l));
    1081             :                                 }
    1082             :                         } else {
    1083       12693 :                                 BUN card = 1;
    1084             : 
    1085       12693 :                                 if (!list_empty(rel->exps)) {
    1086       37768 :                                         for (node *n = rel->exps->h ; n ; n = n->next) {
    1087       25075 :                                                 sql_exp *e = n->data;
    1088       25075 :                                                 card = MAX(card, trivial_project_exp_card(e));
    1089             :                                         }
    1090             :                                 }
    1091       12693 :                                 set_count_prop(v->sql->sa, rel, card);
    1092             :                         }
    1093             :                         break;
    1094       24317 :                 case op_groupby:
    1095       24317 :                         if (list_empty(rel->r)) {
    1096        7496 :                                 set_count_prop(v->sql->sa, rel, 1);
    1097             :                         } else {
    1098       16821 :                                 set_count_prop(v->sql->sa, rel, rel_calc_nuniques(v->sql, l, rel->r));
    1099             :                         }
    1100             :                         break;
    1101             :                 default:
    1102             :                         break;
    1103             :                 }
    1104             :                 break;
    1105             :         }
    1106       16933 :         case op_topn: {
    1107       16933 :                 BUN lv = get_rel_count(rel->l);
    1108             : 
    1109       16933 :                 if (lv != BUN_NONE) {
    1110       16910 :                         sql_exp *le = rel->exps->h->data, *oe = list_length(rel->exps) > 1 ? rel->exps->h->next->data : NULL;
    1111          84 :                         if (oe && oe->l && exp_is_not_null(oe)) { /* no parameters */
    1112          84 :                                 BUN offset = (BUN) ((atom*)oe->l)->data.val.lval;
    1113          84 :                                 lv = offset >= lv ? 0 : lv - offset;
    1114             :                         }
    1115       16910 :                         if (le->l && exp_is_not_null(le)) {
    1116       16875 :                                 BUN limit = (BUN) ((atom*)le->l)->data.val.lval;
    1117       16875 :                                 lv = MIN(lv, limit);
    1118             :                         }
    1119       16910 :                         set_count_prop(v->sql->sa, rel, lv);
    1120             :                 }
    1121             :                 break;
    1122             :         }
    1123          22 :         case op_sample: {
    1124          22 :                 BUN lv = get_rel_count(rel->l);
    1125             : 
    1126          22 :                 if (lv != BUN_NONE) {
    1127           4 :                         sql_exp *se = rel->exps->h->data;
    1128           4 :                         sql_subtype *tp = exp_subtype(se);
    1129             : 
    1130           4 :                         if (se->l && tp->type->eclass == EC_NUM) { /* sample is a number of rows */
    1131           4 :                                 BUN sample = (BUN) ((atom*)se->l)->data.val.lval;
    1132           4 :                                 lv = MIN(lv, sample);
    1133           0 :                         } else if (se->l) { /* sample is a percentage of rows */
    1134           0 :                                 dbl percent = ((atom*)se->l)->data.val.dval;
    1135           0 :                                 assert(tp->type->eclass == EC_FLT);
    1136           0 :                                 lv = (BUN) ceil((dbl)lv * percent);
    1137             :                         }
    1138           4 :                         set_count_prop(v->sql->sa, rel, lv);
    1139             :                 }
    1140             :                 break;
    1141             :         }
    1142        6196 :         case op_table: {
    1143        6196 :                 sql_exp *op = rel->r;
    1144        6196 :                 if (rel->flag != TRIGGER_WRAPPER && op) {
    1145        5884 :                         sql_subfunc *f = op->f;
    1146        5884 :                         if (f->func->lang == FUNC_LANG_SQL) {
    1147          97 :                                 set_count_prop(v->sql->sa, rel, 1000 /* just some fallback value */);
    1148        5787 :                         } else if (f->func->lang == FUNC_LANG_MAL && strcmp(f->func->base.name, "storage") == 0) {
    1149         837 :                                 set_count_prop(v->sql->sa, rel, 1000 /* TODO get size of catalog */);
    1150        4950 :                         } else if (f->func->lang == FUNC_LANG_MAL && strcmp(f->func->base.name, "db_users") == 0) {
    1151           0 :                                 set_count_prop(v->sql->sa, rel, 1000 /* TODO get size of users */);
    1152        4950 :                         } else if (f->func->lang == FUNC_LANG_MAL && strncmp(f->func->base.name, "querylog", 8) == 0) {
    1153        1101 :                                 set_count_prop(v->sql->sa, rel, 1000 /* TODO get size of querylog */);
    1154        3849 :                         } else if (f->func->lang == FUNC_LANG_MAL &&
    1155        3684 :                                            (strcmp(f->func->base.name, "queue") == 0 ||
    1156        3416 :                                                 strcmp(f->func->base.name, "optimizers") == 0 ||
    1157        3100 :                                                 strcmp(f->func->base.name, "env") == 0 ||
    1158        2734 :                                                 strcmp(f->func->base.name, "keywords") == 0 ||
    1159        2734 :                                                 strcmp(f->func->base.name, "statistics") == 0 ||
    1160        2057 :                                                 strcmp(f->func->base.name, "rejects") == 0 ||
    1161        1799 :                                                 strcmp(f->func->base.name, "schemastorage") == 0 ||
    1162        1799 :                                                 strncmp(f->func->base.name, "storage", 7) == 0 ||
    1163        1799 :                                                 strcmp(f->func->base.name, "sessions") == 0) ) {
    1164        2179 :                                 set_count_prop(v->sql->sa, rel, 1000 /* TODO get size of queue */);
    1165             :                         }
    1166             :                         /* else {
    1167             :                                 printf("%%func needs stats : %s\n", f->func->base.name);
    1168             :                         } */
    1169             :                 }
    1170             :                 break;
    1171             :         }
    1172             :         /*These relations are less important for now
    1173             :           TODO later we can tune it */
    1174             :         case op_insert:
    1175             :         case op_update:
    1176             :         case op_delete:
    1177             :         case op_truncate:
    1178             :         case op_ddl:
    1179             :         default:
    1180             :                 break;
    1181             :         }
    1182             : 
    1183             :         return rel;
    1184             : }
    1185             : 
    1186             : static sql_rel *
    1187      554812 : rel_get_statistics(visitor *v, global_props *gp, sql_rel *rel)
    1188             : {
    1189             :         /* Don't prune updates as pruning will possibly result in removing the joins which therefore cannot be used for constraint checking */
    1190      554812 :         uint8_t has_special_modify = (uint8_t) gp->has_special_modify;
    1191      554812 :         v->data = &has_special_modify;
    1192      554812 :         rel = rel_visitor_bottomup(v, rel, &rel_get_statistics_);
    1193      555189 :         v->data = gp;
    1194      555189 :         return rel;
    1195             : }
    1196             : 
    1197             : run_optimizer
    1198      731194 : bind_get_statistics(visitor *v, global_props *gp)
    1199             : {
    1200      731194 :         (void) v;
    1201      731194 :         return (gp->opt_level == 1 && !gp->cnt[op_insert]) ? rel_get_statistics : NULL;
    1202             : }
    1203             : 
    1204             : 
    1205             : static bool
    1206       44894 : point_select_on_unique_column(sql_rel *rel)
    1207             : {
    1208       44894 :         if (is_select(rel->op) && !list_empty(rel->exps)) {
    1209       38356 :                 for (node *n = rel->exps->h; n ; n = n->next) {
    1210       19958 :                         sql_exp *e = n->data, *el = e->l, *er = e->r, *found = NULL;
    1211             : 
    1212       19958 :                         if (is_compare(e->type) && e->flag == cmp_equal) {
    1213       15205 :                                 if (is_numeric_upcast(el))
    1214           0 :                                         el = el->l;
    1215       15205 :                                 if (is_numeric_upcast(er))
    1216           0 :                                         er = er->l;
    1217       15205 :                                 if (is_alias(el->type) && exp_is_atom(er) && (found = rel_find_exp(rel->l, el)) &&
    1218         761 :                                         is_unique(found) && (!is_semantics(e) || !has_nil(found) || !has_nil(er)))
    1219             :                                         return true;
    1220       14444 :                                 if (is_alias(er->type) && exp_is_atom(el) && (found = rel_find_exp(rel->l, er)) &&
    1221           0 :                                         is_unique(found) && (!is_semantics(e) || !has_nil(el) || !has_nil(found)))
    1222             :                                         return true;
    1223             :                         }
    1224             :                 }
    1225             :         }
    1226             :         return false;
    1227             : }
    1228             : 
    1229             : /*
    1230             :  * A point select on an unique column reduces the number of rows to 1. If the same select is under a
    1231             :  * join, the opposite side's select can be pushed above the join.
    1232             :  */
    1233             : static inline sql_rel *
    1234      697659 : rel_push_select_up(visitor *v, sql_rel *rel)
    1235             : {
    1236      697659 :         if ((is_innerjoin(rel->op) || is_left(rel->op) || is_right(rel->op) || is_semi(rel->op)) && !is_single(rel)) {
    1237      122618 :                 sql_rel *l = rel->l, *r = rel->r;
    1238      122618 :                 bool can_pushup_left = is_select(l->op) && !rel_is_ref(l) && !is_single(l) && (is_innerjoin(rel->op) || is_left(rel->op) || is_semi(rel->op)),
    1239      122618 :                         can_pushup_right = is_select(r->op) && !rel_is_ref(r) && !is_single(r) && (is_innerjoin(rel->op) || is_right(rel->op));
    1240             : 
    1241      122618 :                 if (can_pushup_left || can_pushup_right) {
    1242       35440 :                         if (can_pushup_left)
    1243       20626 :                                 can_pushup_left = point_select_on_unique_column(r);
    1244       35440 :                         if (can_pushup_right)
    1245       24268 :                                 can_pushup_right = point_select_on_unique_column(l);
    1246             : 
    1247             :                         /* if both selects retrieve one row each, it's not worth it to push both up */
    1248       35440 :                         if (can_pushup_left && !can_pushup_right) {
    1249         695 :                                 sql_rel *nrel = rel_dup_copy(v->sql->sa, rel);
    1250         695 :                                 nrel->l = l->l;
    1251         695 :                                 rel = rel_inplace_select(rel, nrel, l->exps);
    1252         695 :                                 assert(is_select(rel->op));
    1253         695 :                                 v->changes++;
    1254       34745 :                         } else if (!can_pushup_left && can_pushup_right) {
    1255          14 :                                 sql_rel *nrel = rel_dup_copy(v->sql->sa, rel);
    1256          14 :                                 nrel->r = r->l;
    1257          14 :                                 rel = rel_inplace_select(rel, nrel, r->exps);
    1258          14 :                                 assert(is_select(rel->op));
    1259          14 :                                 v->changes++;
    1260             :                         }
    1261             :                 }
    1262             :         }
    1263      697659 :         return rel;
    1264             : }
    1265             : 
    1266             : static int
    1267       38565 : sql_class_base_score(visitor *v, sql_column *c, sql_subtype *t, bool equality_based)
    1268             : {
    1269       38565 :         int de;
    1270             : 
    1271       38565 :         if (!t)
    1272             :                 return 0;
    1273       38565 :         switch (ATOMstorage(t->type->localtype)) {
    1274             :         case TYPE_bte:
    1275             :                 return 150 - 8;
    1276        1606 :         case TYPE_sht:
    1277        1606 :                 return 150 - 16;
    1278       16442 :         case TYPE_int:
    1279       16442 :                 return 150 - 32;
    1280         469 :         case TYPE_void:
    1281             :         case TYPE_lng:
    1282         469 :                 return 150 - 64;
    1283             :         case TYPE_uuid:
    1284             : #ifdef HAVE_HGE
    1285             :         case TYPE_hge:
    1286             : #endif
    1287             :                 return 150 - 128;
    1288           2 :         case TYPE_flt:
    1289           2 :                 return 75 - 24;
    1290             :         case TYPE_dbl:
    1291             :                 return 75 - 53;
    1292       15726 :         default:
    1293       15726 :                 if (equality_based && c && v->storage_based_opt && (de = mvc_is_duplicate_eliminated(v->sql, c)))
    1294        1491 :                         return 150 - de * 8;
    1295             :                 /* strings and blobs not duplicate eliminated don't get any points here */
    1296             :                 return 0;
    1297             :         }
    1298             : }
    1299             : 
    1300             : static int
    1301       14651 : score_se_base(visitor *v, sql_rel *rel, sql_exp *e)
    1302             : {
    1303       14651 :         int res = 0;
    1304       14651 :         sql_subtype *t = exp_subtype(e);
    1305       14651 :         sql_column *c = NULL;
    1306             : 
    1307       14651 :         if (e->type == e_convert) /* keep unsafes at the end (TODO improve) */
    1308             :                 return -1000;
    1309             :         /* can we find out if the underlying table is sorted */
    1310       14243 :         if ((c = exp_find_column(rel, e, -2)) && v->storage_based_opt && mvc_is_sorted(v->sql, c))
    1311       14243 :                 res += 600;
    1312             : 
    1313             :         /* prefer the shorter var types over the longer ones */
    1314       14243 :         res += sql_class_base_score(v, c, t, is_equality_or_inequality_exp(e->flag)); /* smaller the type, better */
    1315       14243 :         return res;
    1316             : }
    1317             : 
    1318             : static int
    1319       17635 : score_se(visitor *v, sql_rel *rel, sql_exp *e)
    1320             : {
    1321       17635 :         int score = 0;
    1322       17635 :         if (e->type == e_cmp && !is_complex_exp(e->flag)) {
    1323       14651 :                 sql_exp *l = e->l;
    1324             : 
    1325       14651 :                 while (l->type == e_cmp) { /* go through nested comparisons */
    1326           2 :                         sql_exp *ll;
    1327             : 
    1328           2 :                         if (l->flag == cmp_filter || l->flag == cmp_or)
    1329           0 :                                 ll = ((list*)l->l)->h->data;
    1330             :                         else
    1331           2 :                                 ll = l->l;
    1332           2 :                         if (ll->type != e_cmp)
    1333             :                                 break;
    1334             :                         l = ll;
    1335             :                 }
    1336       14651 :                 score += score_se_base(v, rel, l);
    1337             :         }
    1338       17635 :         score += exp_keyvalue(e);
    1339       17635 :         return score;
    1340             : }
    1341             : 
    1342             : static inline sql_rel *
    1343      697659 : rel_select_order(visitor *v, sql_rel *rel)
    1344             : {
    1345      697659 :         int *scores = NULL;
    1346      697659 :         sql_exp **exps = NULL;
    1347             : 
    1348      697659 :         if (is_select(rel->op) && list_length(rel->exps) > 1) {
    1349        7476 :                 node *n;
    1350        7476 :                 int i, nexps = list_length(rel->exps);
    1351        7476 :                 scores = SA_NEW_ARRAY(v->sql->ta, int, nexps);
    1352        7476 :                 exps = SA_NEW_ARRAY(v->sql->ta, sql_exp*, nexps);
    1353             : 
    1354       25111 :                 for (i = 0, n = rel->exps->h; n; i++, n = n->next) {
    1355       17664 :                         exps[i] = n->data;
    1356       17664 :                         if (find_prop(exps[i]->p, PROP_HASHCOL))
    1357             :                                 return rel;
    1358       17635 :                         scores[i] = score_se(v, rel, n->data);
    1359             :                 }
    1360        7447 :                 GDKqsort(scores, exps, NULL, nexps, sizeof(int), sizeof(void *), TYPE_int, true, true);
    1361             : 
    1362       25082 :                 for (i = 0, n = rel->exps->h; n; i++, n = n->next)
    1363       17635 :                         n->data = exps[i];
    1364             :         }
    1365             : 
    1366             :         return rel;
    1367             : }
    1368             : 
    1369             : /* Compute the efficiency of using this expression early in a group by list */
    1370             : static int
    1371       24322 : score_gbe(visitor *v, sql_rel *rel, sql_exp *e)
    1372             : {
    1373       24322 :         int res = 0;
    1374       24322 :         sql_subtype *t = exp_subtype(e);
    1375       24322 :         sql_column *c = exp_find_column(rel, e, -2);
    1376             : 
    1377       24322 :         if (e->card == CARD_ATOM) /* constants are trivial to group */
    1378          38 :                 res += 1000;
    1379             :         /* can we find out if the underlying table is sorted */
    1380       24322 :         if (is_unique(e) || find_prop(e->p, PROP_HASHCOL) || (c && v->storage_based_opt && mvc_is_unique(v->sql, c))) /* distinct columns */
    1381        2476 :                 res += 700;
    1382       21084 :         if (c && v->storage_based_opt && mvc_is_sorted(v->sql, c))
    1383        2491 :                 res += 500;
    1384       24322 :         if (find_prop(e->p, PROP_HASHIDX)) /* has hash index */
    1385           0 :                 res += 200;
    1386             : 
    1387             :         /* prefer the shorter var types over the longer ones */
    1388       24322 :         res += sql_class_base_score(v, c, t, true); /* smaller the type, better */
    1389       24322 :         return res;
    1390             : }
    1391             : 
    1392             : /* reorder group by expressions */
    1393             : static inline sql_rel *
    1394      697659 : rel_groupby_order(visitor *v, sql_rel *rel)
    1395             : {
    1396      697659 :         int *scores = NULL;
    1397      697659 :         sql_exp **exps = NULL;
    1398             : 
    1399      697659 :         if (is_groupby(rel->op) && list_length(rel->r) > 1) {
    1400        7983 :                 node *n;
    1401        7983 :                 list *gbe = rel->r;
    1402        7983 :                 int i, ngbe = list_length(gbe);
    1403        7983 :                 scores = SA_NEW_ARRAY(v->sql->ta, int, ngbe);
    1404        7983 :                 exps = SA_NEW_ARRAY(v->sql->ta, sql_exp*, ngbe);
    1405             : 
    1406             :                 /* first sorting step, give priority for integers and sorted columns */
    1407       32305 :                 for (i = 0, n = gbe->h; n; i++, n = n->next) {
    1408       24322 :                         exps[i] = n->data;
    1409       24322 :                         scores[i] = score_gbe(v, rel, exps[i]);
    1410             :                 }
    1411        7983 :                 GDKqsort(scores, exps, NULL, ngbe, sizeof(int), sizeof(void *), TYPE_int, true, true);
    1412             : 
    1413             :                 /* second sorting step, give priority to strings with lower number of digits */
    1414       17439 :                 for (i = ngbe - 1; i && !scores[i]; i--); /* find expressions with no score from the first round */
    1415        7983 :                 if (scores[i])
    1416        6991 :                         i++;
    1417        7983 :                 if (ngbe - i > 1) {
    1418       12002 :                         for (int j = i; j < ngbe; j++) {
    1419        9025 :                                 sql_subtype *t = exp_subtype(exps[j]);
    1420        9025 :                                 scores[j] = t ? t->digits : 0;
    1421             :                         }
    1422             :                         /* the less number of digits the better, order ascending */
    1423        2977 :                         GDKqsort(scores + i, exps + i, NULL, ngbe - i, sizeof(int), sizeof(void *), TYPE_int, false, true);
    1424             :                 }
    1425             : 
    1426       32305 :                 for (i = 0, n = gbe->h; n; i++, n = n->next)
    1427       24322 :                         n->data = exps[i];
    1428             :         }
    1429             : 
    1430      697659 :         return rel;
    1431             : }
    1432             : 
    1433             : /* This optimization loop contains optimizations that can potentially use statistics */
    1434             : static sql_rel *
    1435      697659 : rel_final_optimization_loop_(visitor *v, sql_rel *rel)
    1436             : {
    1437             :         /* Run rel_push_select_up only once at the end to avoid an infinite optimization loop */
    1438      697659 :         rel = rel_push_select_up(v, rel);
    1439      697659 :         rel = rel_select_order(v, rel);
    1440             : 
    1441             :         /* TODO? Maybe later add rel_simplify_count, rel_join2semijoin, rel_simplify_fk_joins,
    1442             :            rel_distinct_project2groupby, rel_simplify_predicates, rel_simplify_math,
    1443             :            rel_distinct_aggregate_on_unique_values */
    1444             : 
    1445      697659 :         rel = rel_groupby_order(v, rel);
    1446      697659 :         return rel;
    1447             : }
    1448             : 
    1449             : static sql_rel *
    1450       70587 : rel_final_optimization_loop(visitor *v, global_props *gp, sql_rel *rel)
    1451             : {
    1452       70587 :         (void) gp;
    1453       70587 :         return rel_visitor_bottomup(v, rel, &rel_final_optimization_loop_);
    1454             : }
    1455             : 
    1456             : run_optimizer
    1457      731637 : bind_final_optimization_loop(visitor *v, global_props *gp)
    1458             : {
    1459      731637 :         int flag = v->sql->sql_optimizer;
    1460             :         /* At the moment, this optimizer has dependency on 3 flags */
    1461      563110 :         return gp->opt_level == 1 && (gp->cnt[op_groupby] || gp->cnt[op_select]) &&
    1462      802225 :                 (flag & push_select_up) && (flag & optimize_select_and_joins_topdown) && (flag & optimize_projections) ? rel_final_optimization_loop : NULL;
    1463             : }

Generated by: LCOV version 1.14