LCOV - code coverage report
Current view: top level - sql/server - rel_statistics.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 834 885 94.2 %
Date: 2025-03-24 21:28:01 Functions: 26 26 100.0 %

          Line data    Source code
       1             : /*
       2             :  * SPDX-License-Identifier: MPL-2.0
       3             :  *
       4             :  * This Source Code Form is subject to the terms of the Mozilla Public
       5             :  * License, v. 2.0.  If a copy of the MPL was not distributed with this
       6             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       7             :  *
       8             :  * Copyright 2024, 2025 MonetDB Foundation;
       9             :  * Copyright August 2008 - 2023 MonetDB B.V.;
      10             :  * Copyright 1997 - July 2008 CWI.
      11             :  */
      12             : 
      13             : #include "monetdb_config.h"
      14             : #include "rel_optimizer_private.h"
      15             : #include "rel_statistics.h"
      16             : #include "rel_basetable.h"
      17             : #include "rel_rewriter.h"
      18             : #include "sql_storage.h"
      19             : 
      20             : static sql_exp *
      21     3533012 : comparison_find_column(sql_exp *input, sql_exp *e)
      22             : {
      23     3533012 :         switch (input->type) {
      24      100435 :         case e_convert: {
      25      100435 :                 list *types = (list *)input->r;
      26      100435 :                 sql_class from = ((sql_subtype*)types->h->data)->type->eclass, to = ((sql_subtype*)types->h->next->data)->type->eclass;
      27      100435 :                 if (from == to)
      28      194115 :                         return comparison_find_column(input->l, e) ? input : NULL;
      29             :                 return NULL;
      30             :         }
      31     3107670 :         case e_column:
      32     3107670 :                 return exp_match(e, input) ? input : NULL;
      33             :         default:
      34             :                 return NULL;
      35             :         }
      36             : }
      37             : 
      38             : /* multi lo <= col <= hi, maybe still be false even if lo or hi are NULL, possibly similar for filter on multiple
      39             :  * columns */
      40             : #define comp_single_column(c) (!c->f)
      41             : 
      42             : static sql_exp *
      43     8824892 : rel_propagate_column_ref_statistics(mvc *sql, sql_rel *rel, sql_exp *e)
      44             : {
      45     8908668 :         assert(e->type == e_column);
      46     8908668 :         if (rel) {
      47     8908587 :                 switch(rel->op) {
      48     4129330 :                 case op_left:
      49             :                 case op_right:
      50             :                 case op_full:
      51             :                 case op_join:
      52             :                 case op_select:
      53             :                 case op_anti:
      54             :                 case op_semi: {
      55     4129330 :                         bool found_without_semantics = false, found_left = false, found_right = false, still_unique = false;
      56             : 
      57     4129330 :                         if ((is_innerjoin(rel->op) || is_select(rel->op)) && list_length(rel->exps) == 1 && exp_is_false(rel->exps->h->data))
      58             :                                 return NULL; /* nothing will pass, skip */
      59             : 
      60             :                         /* propagate from the bottom first */
      61     4108695 :                         if (rel_propagate_column_ref_statistics(sql, rel->l, e))
      62             :                                 found_left = true;
      63     2596480 :                         if (!found_left && is_join(rel->op) && rel_propagate_column_ref_statistics(sql, rel->r, e))
      64     4108695 :                                 found_right = true;
      65             : 
      66     4108695 :                         if (!found_left && !found_right)
      67             :                                 return NULL;
      68     1809610 :                         if (!list_empty(rel->exps) && rel->op != op_anti) { /* if there's an or, the MIN and MAX get difficult to propagate */
      69     3559647 :                                 for (node *n = rel->exps->h ; n ; n = n->next) {
      70     1846387 :                                         sql_exp *comp = n->data, *le = comp->l, *lne = NULL, *re = comp->r, *rne = NULL, *fe = comp->f, *fne = NULL;
      71             : 
      72     1846387 :                                         if (comp->type == e_cmp) {
      73     1845519 :                                                 if (is_theta_exp(comp->flag) && ((lne = comparison_find_column(le, e)) || (rne = comparison_find_column(re, e)) || (fe && (fne = comparison_find_column(fe, e))))) {
      74      127457 :                                                         atom *lval_min = find_prop_and_get(le->p, PROP_MIN), *lval_max = find_prop_and_get(le->p, PROP_MAX),
      75      127457 :                                                                 *rval_min = find_prop_and_get(re->p, PROP_MIN), *rval_max = find_prop_and_get(re->p, PROP_MAX);
      76             : 
      77             :                                                         /* not semantics found or if explicitly filtering not null values from the column */
      78      127457 :                                                         found_without_semantics |= (!is_semantics(comp) && comp_single_column(comp)) || (comp->flag == cmp_equal && lne && is_anti(comp) && exp_is_null(re));
      79      127457 :                                                         still_unique |= comp->flag == cmp_equal && is_unique(le) && is_unique(re); /* unique if only equi-joins on unique columns are there */
      80      127457 :                                                         if (is_full(rel->op) || (is_left(rel->op) && found_left) || (is_right(rel->op) && found_right)) /* on outer joins, min and max cannot be propagated on some cases */
      81       22655 :                                                                 continue;
      82             :                                                         /* if (end2 >= start1 && start2 <= end1) then the 2 intervals are intersected */
      83      104802 :                                                         if (fe && lval_min && lval_max) { /* range case, the middle expression must intersect the other two */
      84        4618 :                                                                 atom *fval_min = find_prop_and_get(fe->p, PROP_MIN), *fval_max = find_prop_and_get(fe->p, PROP_MAX);
      85        4618 :                                                                 int int1 = rval_min && rval_max && atom_cmp(rval_max, lval_min) >= 0 && atom_cmp(rval_min, lval_max) <= 0,
      86        4618 :                                                                         int2 = fval_min && fval_max && atom_cmp(fval_max, lval_min) >= 0 && atom_cmp(fval_min, lval_max) <= 0,
      87        4618 :                                                                         symmetric = is_symmetric(comp);
      88             : 
      89        4618 :                                                                 if (is_anti(comp) || (!symmetric && fval_min && rval_max && atom_cmp(fval_min, rval_max) < 0)) /* for asymmetric case the re range must be after the fe range */
      90        1815 :                                                                         continue;
      91        2803 :                                                                 if (lne && int1 && int2) {
      92         103 :                                                                         if (symmetric) {
      93           0 :                                                                                 prop *p1 = find_prop(e->p, PROP_MIN), *p2 = find_prop(e->p, PROP_MAX);
      94           0 :                                                                                 atom *nmin = statistics_atom_min(sql, rval_min, fval_min), *nmax = statistics_atom_max(sql, rval_max, fval_max);
      95             :                                                                                 /* max is min from le and (max from re and fe max) */
      96           0 :                                                                                 set_minmax_property(sql, e, PROP_MAX, p2 ? statistics_atom_min(sql, nmax, p2->value.pval) : nmax);
      97             :                                                                                 /* min is max from le and (min from re and fe min) */
      98           0 :                                                                                 set_minmax_property(sql, e, PROP_MIN, p1 ? statistics_atom_max(sql, nmin, p1->value.pval) : nmin);
      99             :                                                                         } else {
     100         103 :                                                                                 prop *p1 = find_prop(e->p, PROP_MIN), *p2 = find_prop(e->p, PROP_MAX);
     101             :                                                                                 /* max is min from le and fe max */
     102         103 :                                                                                 set_minmax_property(sql, e, PROP_MAX, p2 ? statistics_atom_min(sql, fval_max, p2->value.pval) : fval_max);
     103             :                                                                                 /* min is max from le and re min */
     104         103 :                                                                                 set_minmax_property(sql, e, PROP_MIN, p1 ? statistics_atom_max(sql, rval_min, p1->value.pval) : rval_min);
     105             :                                                                         }
     106        2700 :                                                                 } else if (rne) {
     107         590 :                                                                         if (symmetric && int1 && int2) { /* min is max from le and (min from re and fe min) */
     108           0 :                                                                                 prop *p = find_prop(e->p, PROP_MIN);
     109           0 :                                                                                 atom *nmin = p ? statistics_atom_min(sql, p->value.pval, fval_min) : fval_min;
     110           0 :                                                                                 set_minmax_property(sql, e, PROP_MIN, statistics_atom_max(sql, nmin, lval_min));
     111         590 :                                                                         } else if (int1) { /* min is max from le and re min */
     112         100 :                                                                                 prop *p = find_prop(e->p, PROP_MIN);
     113         100 :                                                                                 set_minmax_property(sql, e, PROP_MIN, p ? statistics_atom_max(sql, lval_min, p->value.pval) : lval_min);
     114             :                                                                         }
     115        2110 :                                                                 } else if (fne) {
     116         549 :                                                                         if (symmetric && int1 && int2) { /* max is min from le and (max from re and fe max) */
     117           0 :                                                                                 prop *p = find_prop(e->p, PROP_MAX);
     118           0 :                                                                                 atom *nmax = p ? statistics_atom_max(sql, p->value.pval, rval_max) : rval_max;
     119           0 :                                                                                 set_minmax_property(sql, e, PROP_MAX, p ? statistics_atom_min(sql, nmax, lval_max) : nmax);
     120         549 :                                                                         } else if (int2) { /* max is min from le and fe max */
     121         266 :                                                                                 prop *p = find_prop(e->p, PROP_MAX);
     122         266 :                                                                                 set_minmax_property(sql, e, PROP_MAX, p ? statistics_atom_min(sql, lval_max, p->value.pval) : lval_max);
     123             :                                                                         }
     124             :                                                                 }
     125      100184 :                                                         } else if (lval_min && lval_max && rval_min && rval_max && atom_cmp(rval_max, lval_min) >= 0 && atom_cmp(rval_min, lval_max) <= 0) {
     126             :                                                                 /* both min and max must be set and the intervals must overlap */
     127       41896 :                                                                 switch (comp->flag) {
     128       28235 :                                                                 case cmp_equal: /* for equality reduce */
     129       28235 :                                                                         set_minmax_property(sql, e, PROP_MAX, is_anti(comp) ? statistics_atom_max(sql, lval_max, rval_max) : statistics_atom_min(sql, lval_max, rval_max));
     130       28235 :                                                                         set_minmax_property(sql, e, PROP_MIN, is_anti(comp) ? statistics_atom_min(sql, lval_min, rval_min) : statistics_atom_max(sql, lval_min, rval_min));
     131       28235 :                                                                         break;
     132        4345 :                                                                 case cmp_notequal: /* for inequality expand */
     133        4345 :                                                                         set_minmax_property(sql, e, PROP_MAX, is_anti(comp) ? statistics_atom_min(sql, lval_max, rval_max) : statistics_atom_max(sql, lval_max, rval_max));
     134        4345 :                                                                         set_minmax_property(sql, e, PROP_MIN, is_anti(comp) ? statistics_atom_max(sql, lval_min, rval_min) : statistics_atom_min(sql, lval_min, rval_min));
     135        4345 :                                                                         break;
     136        5310 :                                                                 case cmp_gt:
     137             :                                                                 case cmp_gte:
     138        9682 :                                                                         if (!is_anti(comp) && lne) { /* min is max from both min */
     139        4372 :                                                                                 prop *p = find_prop(e->p, PROP_MIN);
     140        4372 :                                                                                 set_minmax_property(sql, e, PROP_MIN, p ? statistics_atom_max(sql, rval_min, p->value.pval) : rval_min);
     141         938 :                                                                         } else if (!is_anti(comp)) { /* max is min from both max */
     142         938 :                                                                                 prop *p = find_prop(e->p, PROP_MAX);
     143         938 :                                                                                 set_minmax_property(sql, e, PROP_MAX, p ? statistics_atom_min(sql, lval_max, p->value.pval) : lval_max);
     144             :                                                                         }
     145             :                                                                         break;
     146        4006 :                                                                 case cmp_lt:
     147             :                                                                 case cmp_lte:
     148        7205 :                                                                         if (!is_anti(comp) && lne) { /* max is min from both max */
     149        3199 :                                                                                 prop *p = find_prop(e->p, PROP_MAX);
     150        3199 :                                                                                 set_minmax_property(sql, e, PROP_MAX, p ? statistics_atom_min(sql, rval_max, p->value.pval) : rval_max);
     151         807 :                                                                         } else if (!is_anti(comp)) { /* min is max from both min */
     152         807 :                                                                                 prop *p = find_prop(e->p, PROP_MIN);
     153         807 :                                                                                 set_minmax_property(sql, e, PROP_MIN, p ? statistics_atom_max(sql, lval_min, p->value.pval) : lval_min);
     154             :                                                                         }
     155             :                                                                         break;
     156             :                                                                 default: /* Maybe later I can do cmp_in and cmp_notin */
     157             :                                                                         break;
     158             :                                                                 }
     159             :                                                         }
     160             :                                                 }
     161             :                                         }
     162             :                                 }
     163             :                         }
     164     1809610 :                         if (is_full(rel->op) || (is_left(rel->op) && found_right) || (is_right(rel->op) && found_left))
     165       36924 :                                 set_has_nil(e);
     166     1809610 :                         if (!is_outerjoin(rel->op) && found_without_semantics) /* at an outer join, null values pass */
     167       93944 :                                 set_has_no_nil(e);
     168     1809610 :                         if (is_join(rel->op) && is_unique(e) && !still_unique)
     169      119744 :                                 set_not_unique(e);
     170     1809610 :                         return e;
     171             :                 }
     172     4676113 :                 case op_table:
     173             :                 case op_basetable:
     174             :                 case op_union:
     175             :                 case op_except:
     176             :                 case op_inter:
     177             :                 case op_munion:
     178             :                 case op_project:
     179             :                 case op_groupby: {
     180     4676113 :                         if (is_recursive(rel))
     181             :                                 return NULL;
     182     4675916 :                         sql_exp *found;
     183     4675916 :                         atom *fval;
     184     4675916 :                         prop *est;
     185     4675916 :                         if ((found = rel_find_exp(rel, e))) {
     186     2198515 :                                 if (rel->op != op_table) { /* At the moment don't propagate statistics for table relations */
     187     2153357 :                                         if ((fval = find_prop_and_get(found->p, PROP_MAX)))
     188     1137417 :                                                 set_minmax_property(sql, e, PROP_MAX, fval);
     189     2153349 :                                         if ((fval = find_prop_and_get(found->p, PROP_MIN)))
     190     1144459 :                                                 set_minmax_property(sql, e, PROP_MIN, fval);
     191     2153361 :                                         if (!has_nil(found))
     192     1397146 :                                                 set_has_no_nil(e);
     193     2153361 :                                         if (is_unique(found) || (need_distinct(rel) && list_length(rel->exps) == 1) ||
     194     1735810 :                                                 (is_groupby(rel->op) && (list_empty(rel->r) || (list_length(rel->r) == 1 && exps_find_exp(rel->r, e)))))
     195      428473 :                                                 set_unique(e);
     196             :                                         /* propagate unique estimation for known cases */
     197     2153359 :                                         if (is_groupby(rel->op) && list_empty(rel->r) && !find_prop(e->p, PROP_NUNIQUES)) { /* global aggregate case */
     198        7647 :                                                 prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     199        7647 :                                                 p->value.dval = 1;
     200     2145712 :                                         } else if (((is_basetable(rel->op) || is_except(rel->op) || is_inter(rel->op) || is_simple_project(rel->op) ||
     201       71851 :                                                                  (is_groupby(rel->op) && exps_find_exp(rel->r, e))) &&
     202     2085194 :                                                                 (est = find_prop(found->p, PROP_NUNIQUES)) && !find_prop(e->p, PROP_NUNIQUES))) {
     203      194851 :                                                 prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     204      194851 :                                                 p->value.dval = est->value.dval;
     205             :                                         }
     206             :                                 }
     207     2198518 :                                 return e;
     208             :                         }
     209             :                         return NULL;
     210             :                 }
     211       83776 :                 case op_topn:
     212             :                 case op_sample:
     213       83776 :                         return rel_propagate_column_ref_statistics(sql, rel->l, e);
     214             :                 default:
     215             :                         break;
     216             :                 }
     217             :         }
     218             :         return NULL;
     219             : }
     220             : 
     221             : static atom *
     222     4716978 : atom_from_valptr( allocator *sa, sql_subtype *tpe, ValRecord *v)
     223             : {
     224     4716978 :         atom *a = SA_NEW(sa, atom);
     225             : 
     226     4717023 :         assert(!VALisnil(v));
     227     4716969 :         *a = (atom) {.tpe = *tpe,};
     228     4716969 :         SA_VALcopy(sa, &a->data, v);
     229     4717155 :         return a;
     230             : }
     231             : 
     232             : void
     233     4375773 : sql_column_get_statistics(mvc *sql, sql_column *c, sql_exp *e)
     234             : {
     235     4375773 :         bool nonil = false, unique = false;
     236     4375773 :         double unique_est = 0.0;
     237     4375773 :         ValRecord min, max;
     238     4375773 :         int ok = mvc_col_stats(sql, c, &nonil, &unique, &unique_est, &min, &max);
     239             : 
     240     4376531 :         if (has_nil(e) && nonil)
     241     2917459 :                 set_has_no_nil(e);
     242     4376531 :         if (!is_unique(e) && unique)
     243     1153143 :                 set_unique(e);
     244     4376531 :         if (unique_est != 0.0) {
     245     3088478 :                 prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     246     3088303 :                 p->value.dval = unique_est;
     247             :         }
     248     4376356 :         unsigned int digits = 0;
     249     4376356 :         sql_subtype *et = exp_subtype(e);
     250     4376599 :         if (et->type->eclass == EC_DEC || et->type->eclass == EC_NUM)
     251     2866396 :                 digits = et->digits;
     252     4376599 :         if ((ok & 2) == 2) {
     253     2355801 :                 if (!VALisnil(&max)) {
     254     2355746 :                         prop *p = e->p = prop_create(sql->sa, PROP_MAX, e->p);
     255     2355695 :                         p->value.pval = atom_from_valptr(sql->sa, &c->type, &max);
     256     2355673 :                         if (digits) {
     257     1751668 :                                 unsigned int nd = atom_digits(p->value.pval);
     258     1751640 :                                 if (nd < digits)
     259             :                                         digits = nd;
     260     1751640 :                                 if (!digits)
     261             :                                         digits = 1;
     262             :                         }
     263             :                 }
     264     2355641 :                 VALclear(&max);
     265             :         }
     266     4376429 :         if ((ok & 1) == 1) {
     267     2361772 :                 if (!VALisnil(&min)) {
     268     2361785 :                         prop *p = e->p = prop_create(sql->sa, PROP_MIN, e->p);
     269     2361819 :                         p->value.pval = atom_from_valptr(sql->sa, &c->type, &min);
     270     2361887 :                         if (digits) {
     271     1758892 :                                 unsigned int nd = atom_digits(p->value.pval);
     272     1758915 :                                 if (nd > digits) /* possibly set to low by max value */
     273             :                                         digits = nd;
     274             :                                 if (!digits)
     275             :                                         digits = 1;
     276             :                         }
     277             :                 }
     278     2361914 :                 VALclear(&min);
     279             :         }
     280     4376582 :         if (digits)
     281     2866370 :                 et->digits = digits;
     282     4376582 :         if (et->type->eclass == EC_DEC && et->digits <= et->scale)
     283         216 :                 et->digits = et->scale + 1;
     284     4376582 : }
     285             : 
     286             : static void
     287      958375 : rel_basetable_column_get_statistics(mvc *sql, sql_rel *rel, sql_exp *e)
     288             : {
     289      958375 :         if (e->p)
     290             :                 return;
     291      307956 :         sql_column *c = NULL;
     292             : 
     293      307956 :         if ((c = rel_base_find_column(rel, e->nid))) {
     294      209788 :                 sql_column_get_statistics(sql, c, e);
     295             :         }
     296             : }
     297             : 
     298             : static bool
     299        8876 : rel_setop_get_statistics(mvc *sql, sql_rel *rel, list *lexps, list *rexps, sql_exp *e, int i)
     300             : {
     301        8876 :         sql_exp *le = list_fetch(lexps, i), *re = list_fetch(rexps, i);
     302        8876 :         atom *lval_min = find_prop_and_get(le->p, PROP_MIN), *lval_max = find_prop_and_get(le->p, PROP_MAX),
     303        8876 :                 *rval_min = find_prop_and_get(re->p, PROP_MIN), *rval_max = find_prop_and_get(re->p, PROP_MAX);
     304        8876 :         prop *est;
     305             : 
     306             :         /* for the intersection, if both expressions don't overlap, it can be pruned */
     307        8876 :         if (is_inter(rel->op) && !has_nil(le) && !has_nil(re) &&
     308         511 :                 ((rval_max && lval_min && atom_cmp(rval_max, lval_min) < 0) || (rval_min && lval_max && atom_cmp(rval_min, lval_max) > 0)))
     309          26 :                 return true;
     310             : 
     311        8850 :         if (lval_max && rval_max) {
     312        2557 :                 if (is_union(rel->op))
     313           3 :                         set_minmax_property(sql, e, PROP_MAX, statistics_atom_max(sql, lval_max, rval_max)); /* for union the new max will be the max of the two */
     314        2554 :                 else if (is_inter(rel->op))
     315         399 :                         set_minmax_property(sql, e, PROP_MAX, statistics_atom_min(sql, lval_max, rval_max)); /* for intersect the new max will be the min of the two */
     316             :                 else /* except */
     317        2155 :                         set_minmax_property(sql, e, PROP_MAX, lval_max);
     318             :         }
     319        8850 :         if (lval_min && rval_min) {
     320        2557 :                 if (is_union(rel->op))
     321           3 :                         set_minmax_property(sql, e, PROP_MIN, statistics_atom_min(sql, lval_min, rval_min)); /* for union the new min will be the min of the two */
     322        2554 :                 else if (is_inter(rel->op))
     323         399 :                         set_minmax_property(sql, e, PROP_MIN, statistics_atom_max(sql, lval_min, rval_min)); /* for intersect the new min will be the max of the two */
     324             :                 else /* except */
     325        2155 :                         set_minmax_property(sql, e, PROP_MIN, lval_min);
     326             :         }
     327             : 
     328        8850 :         if (is_union(rel->op) || is_munion(rel->op)) {
     329        5986 :                 if (!has_nil(le) && !has_nil(re))
     330         879 :                         set_has_no_nil(e);
     331        5986 :                 if (need_distinct(rel) && list_length(rel->exps) == 1)
     332           0 :                         set_unique(e);
     333        2864 :         } else if (is_inter(rel->op)) {
     334         564 :                 if (!has_nil(le) || !has_nil(re))
     335         509 :                         set_has_no_nil(e);
     336         564 :                 if (is_unique(le) || (need_distinct(rel) && list_length(rel->exps) == 1))
     337         345 :                         set_unique(e);
     338             :         } else {
     339        2300 :                 assert(is_except(rel->op));
     340        2300 :                 if (!has_nil(le))
     341        2236 :                         set_has_no_nil(e);
     342        2300 :                 if (is_unique(le) || (need_distinct(rel) && list_length(rel->exps) == 1))
     343        2012 :                         set_unique(e);
     344             :         }
     345             :         /* propagate unique estimation for known cases */
     346        8850 :         if (!is_union(rel->op) && (est = find_prop(le->p, PROP_NUNIQUES)) && !find_prop(e->p, PROP_NUNIQUES)) {
     347         207 :                 prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     348         207 :                 p->value.dval = est->value.dval;
     349             :         }
     350             :         return false;
     351             : }
     352             : 
     353             : 
     354             : static void
     355       63909 : rel_munion_get_statistics(mvc *sql, sql_rel *rel, list *rels, sql_exp *e, int i)
     356             : {
     357       63909 :         if (is_recursive(rel))
     358             :                 return ;
     359       63909 :         assert(is_munion(rel->op));
     360             : 
     361       63909 :         sql_rel *l = rels->h->data;
     362       63909 :         sql_exp *le = list_fetch(l->exps, i);
     363       63909 :         atom *lval_min = find_prop_and_get(le->p, PROP_MIN), *lval_max = find_prop_and_get(le->p, PROP_MAX);
     364       63909 :         bool has_nonil = !has_nil(le);
     365             : 
     366      184878 :         for(node *n = rels->h->next; n; n = n->next) {
     367      120969 :                 sql_rel *r = n->data;
     368      120969 :                 sql_exp *re = list_fetch(r->exps, i);
     369      120969 :                 atom *rval_min = find_prop_and_get(re->p, PROP_MIN), *rval_max = find_prop_and_get(re->p, PROP_MAX);
     370             : 
     371      120969 :                 if (lval_max && rval_max) {
     372       44365 :                         set_minmax_property(sql, e, PROP_MAX, statistics_atom_max(sql, lval_max, rval_max)); /* for union the new max will be the max of the two */
     373       44365 :                         lval_max = find_prop_and_get(e->p, PROP_MAX);
     374             :                 }
     375      120969 :                 if (lval_min && rval_min) {
     376       43816 :                         set_minmax_property(sql, e, PROP_MIN, statistics_atom_min(sql, lval_min, rval_min)); /* for union the new min will be the min of the two */
     377       43816 :                         lval_min = find_prop_and_get(e->p, PROP_MIN);
     378             :                 }
     379      120969 :                 has_nonil &= !has_nil(re);
     380             : 
     381             :         }
     382             : 
     383       63909 :         if (has_nonil)
     384       43847 :                 set_has_no_nil(e);
     385             : 
     386       63909 :         if (need_distinct(rel) && list_length(rel->exps) == 1)
     387        1597 :                 set_unique(e);
     388             : }
     389             : 
     390             : 
     391             : static sql_exp *
     392     3565573 : rel_propagate_statistics(visitor *v, sql_rel *rel, sql_exp *e, int depth)
     393             : {
     394     3565573 :         mvc *sql = v->sql;
     395     3565573 :         atom *lval;
     396             : 
     397     3565573 :         (void) depth;
     398     3565573 :         switch(e->type) {
     399     2206924 :         case e_column:
     400     2206924 :                 switch (rel->op) { /* set relations don't call rel_propagate_statistics */
     401      284908 :                 case op_join:
     402             :                 case op_left:
     403             :                 case op_right:
     404             :                 case op_full:
     405             :                 case op_semi:
     406             :                 case op_anti: {
     407      284908 :                         sql_exp *found = rel_propagate_column_ref_statistics(sql, rel->l, e);
     408      284908 :                         if (!found)
     409      142902 :                                 (void) rel_propagate_column_ref_statistics(sql, rel->r, e);
     410             :                         break;
     411             :                 }
     412     1922016 :                 case op_select:
     413             :                 case op_project:
     414             :                 case op_groupby: {
     415     1922016 :                         sql_exp *found = rel_propagate_column_ref_statistics(sql, rel->l, e); /* labels may be found on the same projection, ugh */
     416     1922010 :                         if (!found && is_simple_project(rel->op))
     417      133713 :                                 (void) rel_propagate_column_ref_statistics(sql, rel, e);
     418             :                         break;
     419             :                 }
     420           0 :                 case op_insert:
     421             :                 case op_update:
     422             :                 case op_delete:
     423           0 :                         (void) rel_propagate_column_ref_statistics(sql, rel->r, e);
     424           0 :                         break;
     425             :                 default:
     426             :                         break;
     427             :                 }
     428             :                 break;
     429      102365 :         case e_convert: {
     430      102365 :                 sql_subtype *to = exp_totype(e), *from = exp_fromtype(e);
     431      102365 :                 sql_exp *l = e->l;
     432      102365 :                 sql_class fr = from->type->eclass, too = to->type->eclass;
     433      102365 :                 prop *est;
     434             : 
     435      102365 :                 if (fr == too) {
     436       93198 :                         if ((lval = find_prop_and_get(l->p, PROP_MAX))) {
     437       60303 :                                 atom *res = atom_copy(sql->sa, lval);
     438       60303 :                                 if ((res = atom_cast(sql->sa, res, to)))
     439       60280 :                                         set_minmax_property(sql, e, PROP_MAX, res);
     440             :                         }
     441       93198 :                         if ((lval = find_prop_and_get(l->p, PROP_MIN))) {
     442       60941 :                                 atom *res = atom_copy(sql->sa, lval);
     443       60941 :                                 if ((res = atom_cast(sql->sa, res, to)))
     444       60918 :                                         set_minmax_property(sql, e, PROP_MIN, res);
     445             :                         }
     446             :                 }
     447             :                 /* propagating nuniques through conversions is tricky. I am adding just the general cases */
     448      102365 :                 if ((est = find_prop(l->p, PROP_NUNIQUES)) &&
     449       63788 :                         (((fr == EC_TIME || fr == EC_TIME_TZ || fr == EC_DATE) && (too == EC_TIMESTAMP || too == EC_TIMESTAMP_TZ)) ||
     450       63763 :                          ((fr == EC_NUM || fr == EC_BIT) && too == EC_NUM) || (fr == EC_DEC && too == EC_DEC) || (fr != EC_EXTERNAL && too == EC_STRING))) {
     451       60329 :                         prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     452       60329 :                         p->value.dval = est->value.dval;
     453             :                 }
     454      102365 :                 if (!has_nil(l))
     455       58044 :                         set_has_no_nil(e);
     456             :                 break;
     457             :         }
     458      379391 :         case e_aggr:
     459             :         case e_func: {
     460      379391 :                 BUN lv;
     461      379391 :                 sql_subfunc *f = e->f;
     462             : 
     463      379391 :                 if (!f->func->s) {
     464      352272 :                         int key = hash_key(f->func->base.name); /* Using hash lookup */
     465      352272 :                         sql_hash_e *he = sql_functions_lookup->buckets[key&(sql_functions_lookup->size-1)];
     466      352272 :                         lookup_function look = NULL;
     467             : 
     468      754052 :                         for (; he && !look; he = he->chain) {
     469      401780 :                                 struct function_properties* fp = (struct function_properties*) he->value;
     470             : 
     471      401780 :                                 if (!strcmp(f->func->base.name, fp->name))
     472      108607 :                                         look = fp->func;
     473             :                         }
     474      352272 :                         if (look)
     475      108607 :                                 look(sql, e);
     476             :                 }
     477             :                 /* for global aggregates with no semantics, if the left relation has values, then the output won't be NULL */
     478      379391 :                 if (!is_semantics(e) && e->l && !have_nil(e->l) &&
     479      109333 :                         (e->type != e_aggr || (is_groupby(rel->op) && list_length(rel->r)) || ((lv = get_rel_count(rel->l)) != BUN_NONE && lv > 0)))
     480      108997 :                         set_has_no_nil(e);
     481             :                 /* set properties for global aggregates */
     482      379391 :                 if (e->type == e_aggr && is_groupby(rel->op) && list_empty(rel->r)) {
     483        7950 :                         if (!find_prop(e->p, PROP_NUNIQUES)) {
     484        7950 :                                 prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     485        7950 :                                 p->value.dval = 1;
     486             :                         }
     487        7950 :                         set_unique(e);
     488             :                 }
     489             :                 break;
     490             :         }
     491      593499 :         case e_atom:
     492      593499 :                 if (e->l) {
     493      575461 :                         atom *a = (atom*) e->l;
     494      575461 :                         if (!a->isnull) {
     495      506221 :                                 set_minmax_property(sql, e, PROP_MAX, a);
     496      506221 :                                 set_minmax_property(sql, e, PROP_MIN, a);
     497             :                         }
     498      575461 :                         prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     499      575461 :                         p->value.dval = 1;
     500       18038 :                 } else if (e->f) {
     501        4311 :                         list *vals = (list *) e->f;
     502        4311 :                         sql_exp *first = vals->h ? vals->h->data : NULL;
     503        4311 :                         atom *max = NULL, *min = NULL; /* all child values must have a valid min/max */
     504        4311 :                         int has_nil = 0;
     505             : 
     506        4311 :                         if (first) {
     507        4311 :                                 max = ((lval = find_prop_and_get(first->p, PROP_MAX))) ? lval : NULL;
     508        4311 :                                 min = ((lval = find_prop_and_get(first->p, PROP_MIN))) ? lval : NULL;
     509        4311 :                                 has_nil |= has_nil(first);
     510             :                         }
     511             : 
     512       18478 :                         for (node *n = vals->h ? vals->h->next : NULL ; n ; n = n->next) {
     513        9856 :                                 sql_exp *ee = n->data;
     514             : 
     515        9856 :                                 if (min && max) {
     516        9359 :                                         if ((lval = find_prop_and_get(ee->p, PROP_MAX))) {
     517        9313 :                                                 max = atom_cmp(lval, max) > 0 ? lval : max;
     518             :                                         } else {
     519             :                                                 max = NULL;
     520             :                                         }
     521        9359 :                                         if ((lval = find_prop_and_get(ee->p, PROP_MIN))) {
     522        9313 :                                                 min = atom_cmp(min, lval) > 0 ? lval : min;
     523             :                                         } else {
     524             :                                                 min = NULL;
     525             :                                         }
     526             :                                 }
     527        9856 :                                 has_nil |= has_nil(ee);
     528             :                         }
     529             : 
     530        4311 :                         if (!has_nil)
     531        3937 :                                 set_has_no_nil(e);
     532        4311 :                         if (min && max) {
     533        3893 :                                 set_minmax_property(sql, e, PROP_MAX, max);
     534        3893 :                                 set_minmax_property(sql, e, PROP_MIN, min);
     535             :                         }
     536        4311 :                         prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     537        4311 :                         p->value.dval = (dbl) list_length(vals);
     538             :                 } else {
     539       13727 :                         prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     540       13727 :                         p->value.dval = 1;
     541             :                 }
     542             :                 break;
     543      283394 :         case e_cmp:
     544             :                 /* TODO? propagating min/max/unique of booleans is not very worth it */
     545      283394 :                 if (e->flag == cmp_or || e->flag == cmp_filter) {
     546       15247 :                         if (!have_nil(e->l) && !have_nil(e->r))
     547       10186 :                                 set_has_no_nil(e);
     548      268147 :                 } else if (e->flag == cmp_in || e->flag == cmp_notin) {
     549       20181 :                         sql_exp *le = e->l;
     550       20181 :                         if (!has_nil(le) && !have_nil(e->r))
     551       16946 :                                 set_has_no_nil(e);
     552             :                 } else {
     553      247966 :                         sql_exp *le = e->l, *re = e->r, *fe = e->f;
     554      247966 :                         if (!has_nil(le) && !has_nil(re) && (!e->f || !has_nil(fe)))
     555      175013 :                                 set_has_no_nil(e);
     556             :                 }
     557             :                 break;
     558             :         case e_psm:
     559             :                 break;
     560             :         }
     561             : 
     562             : #ifndef NDEBUG
     563             :         {
     564             :                 /* min and max cannot be NULL and min must be <= than max, if it happens the inner relation must be empty! */
     565     3565567 :                 atom *min = find_prop_and_get(e->p, PROP_MIN), *max = find_prop_and_get(e->p, PROP_MAX);
     566             : 
     567     3565575 :                 (void) min;
     568     3565575 :                 (void) max;
     569     3565575 :                 assert(!min || !min->isnull);
     570     3565575 :                 assert(!max || !max->isnull);
     571     3565575 :                 assert(!min || !max || (min && max)/* atom_cmp(min, max) <= 0*/);
     572             :         }
     573             : #endif
     574     3565575 :         return e;
     575             : }
     576             : 
     577             : static list * /* Remove predicates always false from min/max values */
     578      144417 : rel_prune_predicates(visitor *v, sql_rel *rel)
     579             : {
     580      144417 :         if (rel->l) {
     581      144417 :                 sql_rel *l = rel->l;
     582      144417 :                 if (is_single(l))
     583        3374 :                         return rel->exps;
     584             :         }
     585      141043 :         if (!list_empty(rel->attr))
     586        3011 :                 return rel->exps;
     587      290950 :         for (node *n = rel->exps->h ; n ; n = n->next) {
     588      152918 :                 sql_exp *e = n->data;
     589             : 
     590      152918 :                 if (e->type == e_cmp && is_theta_exp(e->flag)) {
     591      125895 :                         sql_exp *le = e->l, *re = e->r, *fe = e->f;
     592      125895 :                         atom *lval_min = find_prop_and_get(le->p, PROP_MIN), *lval_max = find_prop_and_get(le->p, PROP_MAX),
     593      125895 :                                 *rval_min = find_prop_and_get(re->p, PROP_MIN), *rval_max = find_prop_and_get(re->p, PROP_MAX);
     594      125895 :                         bool always_false = false, always_true = false;
     595             : 
     596      125895 :                         if (fe && !is_symmetric(e)) {
     597        3045 :                                 atom *fval_min = find_prop_and_get(fe->p, PROP_MIN), *fval_max = find_prop_and_get(fe->p, PROP_MAX);
     598        3045 :                                 comp_type lower = range2lcompare(e->flag), higher = range2rcompare(e->flag);
     599        3644 :                                 int not_int1 = rval_min && lval_max && /* the middle and left intervals don't overlap */
     600        1654 :                                         (!is_anti(e) && (lower == cmp_gte ? atom_cmp(rval_min, lval_max) > 0 : atom_cmp(rval_min, lval_max) >= 0)),
     601        4059 :                                         not_int2 = lval_min && fval_max && /* the middle and right intervals don't overlap */
     602        2482 :                                         (!is_anti(e) && (higher == cmp_lte ? atom_cmp(lval_min, fval_max) > 0 : atom_cmp(lval_min, fval_max) >= 0)),
     603        3045 :                                         not_int3 = rval_min && fval_max && /* the left interval is after the right one */
     604        1285 :                                         (!is_anti(e) && (atom_cmp(rval_min, fval_max) > 0));
     605             : 
     606        3045 :                                 always_false |= not_int1 || not_int2 || not_int3;
     607             :                                 /* for anti the middle must be before the left or after the right or the right after the left, for the other the middle must be always between the left and right intervals */
     608        4089 :                                 always_true |= !has_nil(le) && !has_nil(re) && !has_nil(fe) &&
     609        3941 :                                         lval_min && lval_max && rval_min && rval_max && fval_min && fval_max &&
     610         573 :                                         (is_anti(e) ? ((lower == cmp_gte ? atom_cmp(rval_min, lval_max) > 0 : atom_cmp(rval_min, lval_max) >= 0) || (higher == cmp_lte ? atom_cmp(lval_min, fval_max) > 0 : atom_cmp(lval_min, fval_max) >= 0) || atom_cmp(rval_min, fval_max) > 0) :
     611         488 :                                          ((lower == cmp_gte ? atom_cmp(lval_min, rval_max) >= 0 : atom_cmp(lval_min, rval_max) > 0) && (higher == cmp_lte ? atom_cmp(fval_min, lval_max) >= 0 : atom_cmp(fval_min, lval_max) > 0)));
     612             :                         } else if (!fe) {
     613      122834 :                                 if (!is_semantics(e)) /* trivial not null cmp null case */
     614      234208 :                                         always_false |= !is_anti(e) && ((exp_is_not_null(le) && exp_is_null(re)) || (exp_is_null(le) && exp_is_not_null(re)));
     615      122834 :                                 switch (e->flag) {
     616      109297 :                                 case cmp_equal:
     617      109297 :                                         if (lval_min && lval_max && rval_min && rval_max && (!is_semantics(e) || !has_nil(le) || !has_nil(re)))
     618      136844 :                                                 always_false |= (is_anti(e) ? (atom_cmp(lval_min, rval_min) == 0 && atom_cmp(lval_max, rval_max) <= 0) : (atom_cmp(rval_max, lval_min) < 0 || atom_cmp(rval_min, lval_max) > 0));
     619      109297 :                                         if (is_semantics(e)) { /* prune *= NULL cases */
     620        5693 :                                                 always_false |= (is_anti(e) ? (exp_is_null(le) && exp_is_null(re)) : ((exp_is_not_null(le) && exp_is_null(re)) || (exp_is_null(le) && exp_is_not_null(re))));
     621       11386 :                                                 always_true |= (is_anti(e) ? ((exp_is_not_null(le) && exp_is_null(re)) || (exp_is_null(le) && exp_is_not_null(re))) : (exp_is_null(le) && exp_is_null(re)));
     622             :                                         }
     623             :                                         break;
     624        5345 :                                 case cmp_notequal:
     625        5345 :                                         if (lval_min && lval_max && rval_min && rval_max)
     626        7386 :                                                 always_true |= !has_nil(le) && !has_nil(re) && (is_anti(e) ? (atom_cmp(lval_min, rval_min) == 0 && atom_cmp(lval_max, rval_max) <= 0) : (atom_cmp(rval_max, lval_min) < 0 || atom_cmp(rval_min, lval_max) > 0));
     627        5345 :                                         if (is_semantics(e)) {
     628          37 :                                                 always_true |= (is_anti(e) ? (exp_is_null(le) && exp_is_null(re)) : ((exp_is_not_null(le) && exp_is_null(re)) || (exp_is_null(le) && exp_is_not_null(re))));
     629          74 :                                                 always_false |= (is_anti(e) ? ((exp_is_not_null(le) && exp_is_null(re)) || (exp_is_null(le) && exp_is_not_null(re))) : (exp_is_null(le) && exp_is_null(re)));
     630             :                                         }
     631             :                                         break;
     632        5487 :                                 case cmp_gt:
     633        5487 :                                         if (lval_max && rval_min)
     634        1834 :                                                 always_false |= (is_anti(e) ? atom_cmp(lval_max, rval_min) > 0 : atom_cmp(lval_max, rval_min) <= 0);
     635        5487 :                                         if (lval_min && rval_max)
     636        3668 :                                                 always_true |= !has_nil(le) && !has_nil(re) && (is_anti(e) ? atom_cmp(lval_min, rval_max) <= 0 : atom_cmp(lval_min, rval_max) > 0);
     637             :                                         break;
     638         121 :                                 case cmp_gte:
     639         121 :                                         if (lval_max && rval_min)
     640          51 :                                                 always_false |= (is_anti(e) ? atom_cmp(lval_max, rval_min) >= 0 : atom_cmp(lval_max, rval_min) < 0);
     641         121 :                                         if (lval_min && rval_max)
     642         102 :                                                 always_true |= !has_nil(le) && !has_nil(re) && (is_anti(e) ? atom_cmp(lval_min, rval_max) < 0 : atom_cmp(lval_min, rval_max) >= 0);
     643             :                                         break;
     644        2502 :                                 case cmp_lt:
     645        2502 :                                         if (lval_min && rval_max)
     646        1382 :                                                 always_false |= (is_anti(e) ? atom_cmp(lval_min, rval_max) < 0 : atom_cmp(lval_min, rval_max) >= 0);
     647        2502 :                                         if (lval_max && rval_min)
     648        2812 :                                                 always_true |= !has_nil(le) && !has_nil(re) && (is_anti(e) ? atom_cmp(lval_max, rval_min) >= 0 : atom_cmp(lval_max, rval_min) < 0);
     649             :                                         break;
     650          82 :                                 case cmp_lte:
     651          82 :                                         if (lval_min && rval_max)
     652          11 :                                                 always_false |= (is_anti(e) ? atom_cmp(lval_min, rval_max) <= 0 : atom_cmp(lval_min, rval_max) > 0);
     653          82 :                                         if (lval_max && rval_min)
     654          22 :                                                 always_true |= !has_nil(le) && !has_nil(re) && (is_anti(e) ? atom_cmp(lval_max, rval_min) > 0 : atom_cmp(lval_max, rval_min) <= 0);
     655             :                                         break;
     656             :                                 default: /* Maybe later I can do cmp_in and cmp_notin but don't forget to remove is_theta_exp check up there */
     657             :                                         break;
     658             :                                 }
     659             :                         }
     660      125895 :                         assert(!always_false || !always_true);
     661      125895 :                         if (always_false || always_true) {
     662        3706 :                                 sql_exp *ne = exp_atom_bool(v->sql->sa, always_true /* if always_true set then true else false */);
     663        3706 :                                 if (exp_name(e))
     664           0 :                                         exp_prop_alias(v->sql->sa, ne, e);
     665        3706 :                                 n->data = ne;
     666        3706 :                                 v->changes++;
     667             :                         }
     668             :                 }
     669             :         }
     670      138032 :         return rel->exps;
     671             : }
     672             : 
     673             : static sql_rel *
     674          14 : set_setop_side(visitor *v, sql_rel *rel, sql_rel *side)
     675             : {
     676          14 :         if (side == rel->r)
     677           0 :                 rel->r = NULL;
     678             :         else
     679          14 :                 rel->l = NULL;
     680          14 :         if (!is_simple_project(side->op) || rel_is_ref(side) || !list_empty(side->r)) {
     681           0 :                 side = rel_project(v->sql->sa, side, rel_projections(v->sql, side, NULL, 0, 1));
     682           0 :                 set_count_prop(v->sql->sa, side, get_rel_count(side->l));
     683           0 :                 side->exps = exps_exp_visitor_bottomup(v, side, side->exps, 0, &rel_propagate_statistics, false);
     684             :         }
     685          35 :         for (node *n = rel->exps->h, *m = side->exps->h ; n && m ; n = n->next, m = m->next) {
     686          21 :                 sql_exp *e1 = n->data, *e2 = m->data;
     687          21 :                 exp_prop_alias(v->sql->sa, e2, e1);
     688             :         }
     689          14 :         list_hash_clear(side->exps);
     690             : 
     691          14 :         if (need_distinct(rel))
     692           0 :                 set_distinct(side);
     693          14 :         side->p = prop_copy(v->sql->sa, rel->p);
     694          14 :         rel_destroy(rel);
     695          14 :         return side;
     696             : }
     697             : 
     698             : static BUN
     699       26588 : trivial_project_exp_card(sql_exp *e)
     700             : {
     701       26960 :         if (e->type == e_convert)
     702         372 :                 return trivial_project_exp_card(e->l);
     703       26588 :         return e->type == e_atom && e->f ? (BUN) list_length(e->f) : 1;
     704             : }
     705             : 
     706             : static BUN
     707       26189 : rel_calc_nuniques(mvc *sql, sql_rel *l, list *exps)
     708             : {
     709       26189 :         BUN lv = get_rel_count(l);
     710             : 
     711       26188 :         if (lv == 0)
     712             :                 return 0;
     713       23452 :         if (!list_empty(exps)) {
     714       23452 :                 BUN nuniques = 0;
     715             :                 /* compute the highest number of unique values */
     716       58620 :                 for (node *n = exps->h ; n && nuniques != BUN_NONE ; n = n->next) {
     717       35167 :                         sql_exp *e = n->data;
     718       35167 :                         sql_rel *bt = NULL;
     719       35167 :                         prop *p = NULL;
     720       35167 :                         BUN euniques = BUN_NONE;
     721       35167 :                         atom *min, *max, *sub = NULL;
     722       35167 :                         sql_subtype *tp = exp_subtype(e);
     723       35167 :                         sql_class ec = tp ? tp->type->eclass : EC_STRING; /* if 'e' has no type (eg parameter), use a non-number type to fail condition */
     724             : 
     725       35167 :                         if ((p = find_prop(e->p, PROP_NUNIQUES))) {
     726       25403 :                                 euniques = (BUN) p->value.dval;
     727        9765 :                         } else if (e->type == e_column && e->nid && rel_find_exp_and_corresponding_rel(l, e, false, &bt, NULL) && bt && (p = find_prop(bt->p, PROP_COUNT))) {
     728        7665 :                                 euniques = (BUN) p->value.lval;
     729             :                         }
     730             :                         /* use min to max range to compute number of possible values in the domain for number types */
     731       35168 :                         if ((EC_TEMP(ec)||ec==EC_NUM||ec==EC_MONTH||ec==EC_POS) &&
     732       24272 :                                 (min = find_prop_and_get(e->p, PROP_MIN)) && (max = find_prop_and_get(e->p, PROP_MAX))) {
     733             :                                 /* the range includes min and max, so the atom_inc call is needed */
     734             :                                 /* if 'euniques' has number of distinct values, compute min between both */
     735       19326 :                                 if ((sub = atom_sub(sql->sa, max, min)) && (sub = atom_inc(sql->sa, sub)) && (sub = atom_cast(sql->sa, sub, sql_bind_localtype("oid"))))
     736           0 :                                         euniques = MIN(euniques, (BUN) sub->data.val.oval);
     737             :                         }
     738       35168 :                         if (euniques != BUN_NONE)
     739       33068 :                                 nuniques = MAX(nuniques, euniques); /* the highest cardinality sets the estimation */
     740             :                         else
     741             :                                 nuniques = BUN_NONE;
     742             :                 }
     743       23453 :                 if (nuniques != BUN_NONE)
     744             :                         return nuniques;
     745             :         }
     746             :         return lv;
     747             : }
     748             : 
     749             : static sql_rel *
     750     1366784 : rel_get_statistics_(visitor *v, sql_rel *rel)
     751             : {
     752             :         /* Don't prune updates as pruning will possibly result in removing the joins which therefore cannot be used for constraint checking */
     753     1366784 :         uint8_t has_special_modify = *(uint8_t*) v->data;
     754     1366784 :         bool can_be_pruned = !has_special_modify && v->storage_based_opt;
     755             : 
     756             :         /* Don't look at the same relation twice */
     757     1366784 :         if (are_statistics_gathered(rel->used))
     758             :                 return rel;
     759     1366685 :         rel->used |= statistics_gathered;
     760             : 
     761     1366685 :         switch (rel->op) {
     762      319281 :         case op_basetable: {
     763      319281 :                 sql_table *t = (sql_table *) rel->l;
     764      319281 :                 sqlstore *store = v->sql->session->tr->store;
     765             : 
     766      319281 :                 if (!list_empty(rel->exps)) {
     767     1277975 :                         for (node *n = rel->exps->h ; n ; n = n->next)
     768      958514 :                                 rel_basetable_column_get_statistics(v->sql, rel, n->data);
     769             :                 }
     770             :                 /* Set table row count. TODO? look for remote tables. Don't look at storage for declared tables, because it won't be cleaned */
     771      319468 :                 if (isTable(t) && t->s && !isDeclaredTable(t)) /* count active rows only */
     772      264817 :                         set_count_prop(v->sql->sa, rel, (BUN)store->storage_api.count_del(v->sql->session->tr, t, CNT_ACTIVE));
     773             :                 break;
     774             :         }
     775        2794 :         case op_union:
     776             :         case op_inter:
     777             :         case op_except: {
     778        2794 :                 bool empty_cross = false;
     779        2794 :                 int i = 0;
     780        2794 :                 sql_rel *l = rel->l, *pl = rel->l, *r = rel->r, *pr = rel->r;
     781             : 
     782        2794 :                 while (is_sample(pl->op) || is_topn(pl->op)) /* skip topN and sample relations in the middle */
     783           0 :                         pl = pl->l;
     784        2794 :                 while (is_sample(pr->op) || is_topn(pr->op))
     785           0 :                         pr = pr->l;
     786             :                 /* if it's not a projection, then project and propagate statistics */
     787        2794 :                 if (!is_project(pl->op) && !is_base(pl->op)) {
     788           0 :                         pl = rel_project(v->sql->sa, pl, rel_projections(v->sql, pl, NULL, 0, 1));
     789           0 :                         set_count_prop(v->sql->sa, pl, get_rel_count(pl->l));
     790           0 :                         pl->exps = exps_exp_visitor_bottomup(v, pl, pl->exps, 0, &rel_propagate_statistics, false);
     791             :                 }
     792        2794 :                 if (!is_project(pr->op) && !is_base(pr->op)) {
     793           0 :                         pr = rel_project(v->sql->sa, pr, rel_projections(v->sql, pr, NULL, 0, 1));
     794           0 :                         set_count_prop(v->sql->sa, pr, get_rel_count(pr->l));
     795           0 :                         pr->exps = exps_exp_visitor_bottomup(v, pr, pr->exps, 0, &rel_propagate_statistics, false);
     796             :                 }
     797             : 
     798       11670 :                 for (node *n = rel->exps->h ; n ; n = n->next) {
     799        8876 :                         empty_cross |= rel_setop_get_statistics(v->sql, rel, pl->exps, pr->exps, n->data, i);
     800        8876 :                         i++;
     801             :                 }
     802             : 
     803             :                 /* propagate row count */
     804        2794 :                 if (is_union(rel->op)) {
     805         277 :                         BUN lv = need_distinct(rel) ? rel_calc_nuniques(v->sql, l, l->exps) : get_rel_count(l),
     806         277 :                                 rv = need_distinct(rel) ? rel_calc_nuniques(v->sql, r, r->exps) : get_rel_count(r);
     807             : 
     808         277 :                         if (lv == 0 && rv == 0) { /* both sides empty */
     809           2 :                                 if (can_be_pruned)
     810             :                                         empty_cross = true;
     811             :                                 else
     812           2 :                                         set_count_prop(v->sql->sa, rel, 0);
     813         275 :                         } else if (can_be_pruned && lv == 0 && !rel_is_ref(rel)) { /* left side empty */
     814           0 :                                 rel = set_setop_side(v, rel, r);
     815           0 :                                 empty_cross = false; /* don't rewrite again */
     816         275 :                         } else if (can_be_pruned && rv == 0 && !rel_is_ref(rel)) { /* right side empty */
     817           0 :                                 rel = set_setop_side(v, rel, l);
     818           0 :                                 empty_cross = false; /* don't rewrite again */
     819         275 :                         } else if (lv != BUN_NONE && rv != BUN_NONE) {
     820           7 :                                 set_count_prop(v->sql->sa, rel, (rv > (BUN_MAX - lv)) ? BUN_MAX : (lv + rv)); /* overflow check */
     821             :                         }
     822        2517 :                 } else if (is_inter(rel->op) || is_except(rel->op)) {
     823        2517 :                         BUN lv = need_distinct(rel) ? rel_calc_nuniques(v->sql, l, l->exps) : get_rel_count(l),
     824        2517 :                                 rv = need_distinct(rel) ? rel_calc_nuniques(v->sql, r, r->exps) : get_rel_count(r);
     825             : 
     826        2517 :                         if (lv == 0) { /* left side empty */
     827          63 :                                 if (can_be_pruned)
     828             :                                         empty_cross = true;
     829             :                                 else
     830           5 :                                         set_count_prop(v->sql->sa, rel, 0);
     831        2454 :                         } else if (rv == 0) { /* right side empty */
     832          17 :                                 if (is_inter(rel->op)) {
     833           3 :                                         if (can_be_pruned)
     834             :                                                 empty_cross = true;
     835             :                                         else
     836           0 :                                                 set_count_prop(v->sql->sa, rel, 0);
     837          14 :                                 } else if (can_be_pruned && !rel_is_ref(rel)) {
     838          14 :                                         rel = set_setop_side(v, rel, l);
     839          14 :                                         empty_cross = false; /* don't rewrite again */
     840             :                                 } else {
     841           0 :                                         set_count_prop(v->sql->sa, rel, lv);
     842             :                                 }
     843             :                         } else {
     844        2437 :                                 set_count_prop(v->sql->sa, rel, lv);
     845             :                         }
     846             :                 }
     847        2794 :                 if (can_be_pruned && empty_cross) {
     848          81 :                         rel_destroy(rel->l);
     849          81 :                         rel->l = NULL;
     850          81 :                         rel_destroy(rel->r);
     851          81 :                         rel->r = NULL;
     852         246 :                         for (node *n = rel->exps->h ; n ; n = n->next) {
     853         165 :                                 sql_exp *e = n->data, *a = exp_atom(v->sql->sa, atom_general(v->sql->sa, exp_subtype(e), NULL, 0));
     854         165 :                                 exp_prop_alias(v->sql->sa, a, e);
     855         165 :                                 n->data = a;
     856             :                         }
     857          81 :                         list_hash_clear(rel->exps);
     858          81 :                         sql_rel *l = rel_project(v->sql->sa, NULL, rel->exps);
     859          81 :                         set_count_prop(v->sql->sa, l, 1);
     860          81 :                         l = rel_select(v->sql->sa, l, exp_atom_bool(v->sql->sa, 0));
     861          81 :                         set_count_prop(v->sql->sa, l, 0);
     862          81 :                         rel->op = op_project;
     863          81 :                         rel->l = l;
     864          81 :                         rel->exps = rel_projections(v->sql, l, NULL, 1, 1);
     865          81 :                         set_count_prop(v->sql->sa, rel, 0);
     866          81 :                         set_nodistinct(rel); /* set relations may have distinct flag set */
     867          81 :                         v->changes++;
     868             :                 }
     869             :                 break;
     870             :         }
     871       13485 :         case op_munion: {
     872       13485 :                 list *l = rel->l, *nrels = sa_list(v->sql->sa);
     873       13485 :                 BUN cnt = 0;
     874       13485 :                 bool needs_pruning = false;
     875             : 
     876       13485 :                 if (is_recursive(rel))
     877             :                         break;
     878       51267 :                 for (node *n = l->h; n; n = n->next) {
     879       37853 :                         sql_rel *r = n->data, *pl = r;
     880             : 
     881       37853 :                         while (is_sample(pl->op) || is_topn(pl->op)) /* skip topN and sample relations in the middle */
     882           0 :                                         pl = pl->l;
     883             :                         /* if it's not a projection, then project and propagate statistics */
     884       37853 :                         if (!is_project(pl->op) && !is_base(pl->op)) {
     885           0 :                                 pl = rel_project(v->sql->sa, pl, rel_projections(v->sql, pl, NULL, 0, 1));
     886           0 :                                 set_count_prop(v->sql->sa, pl, get_rel_count(pl->l));
     887           0 :                                 pl->exps = exps_exp_visitor_bottomup(v, pl, pl->exps, 0, &rel_propagate_statistics, false);
     888             :                         }
     889       37853 :                         nrels = append(nrels, pl);
     890             :                         /* we need new munion statistics */
     891             :                         /* propagate row count */
     892       37853 :                         BUN rv = need_distinct(rel) ? rel_calc_nuniques(v->sql, r, r->exps) : get_rel_count(r);
     893             :                         /* if PROP_COUNT does not exist we assume at least a row (see get_rel_count def) */
     894       37853 :                         if (rv == BUN_NONE) {
     895        1220 :                                 cnt++;
     896        1220 :                                 continue;
     897             :                         }
     898       36633 :                         if (!rv && can_be_pruned)
     899        6736 :                                 needs_pruning = true;
     900             :                         /* overflow check */
     901       36633 :                         if (rv > (BUN_MAX - cnt))
     902       37853 :                                 rv = BUN_MAX;
     903             :                         else
     904       36633 :                                 cnt += rv;
     905             :                 }
     906       13414 :                 int i = 0;
     907       77323 :                 for (node *n = rel->exps->h ; n ; n = n->next, i++)
     908       63909 :                         rel_munion_get_statistics(v->sql, rel, nrels, n->data, i);
     909             : 
     910       13414 :                 if (needs_pruning && !rel_is_ref(rel)) {
     911        4574 :                         v->changes++;
     912        4574 :                         list *nl = sa_list(l->sa);
     913             : 
     914       16865 :                         for (node *n = nrels->h; n; n = n->next) {
     915       12291 :                                 sql_rel *r = n->data;
     916       12291 :                                 BUN rv = need_distinct(rel) ? rel_calc_nuniques(v->sql, r, r->exps) : get_rel_count(r);
     917             : 
     918       12291 :                                 if (!rv) { /* keep last for now */
     919        6265 :                                         rel_destroy(r);
     920        6265 :                                         continue;
     921             :                                 }
     922        6026 :                                 nl = append(nl, r);
     923             :                         }
     924        4574 :                         rel->l = nl;
     925        4574 :                         if (list_length(nl) == 1) {
     926        4238 :                                 sql_rel *l = rel->l = nl->h->data; /* ugh */
     927        4238 :                                 rel->r = NULL;
     928        4238 :                                 rel->op = op_project;
     929             : 
     930       20975 :                                 for (node *n = rel->exps->h, *m = l->exps->h ; n && m ; n = n->next, m = m->next) {
     931       16737 :                                         sql_exp *pe = n->data, *ie = m->data;
     932       16737 :                                         sql_exp *ne = exp_ref(v->sql, ie);
     933       16737 :                                         exp_prop_alias(v->sql->sa, ne, pe);
     934       16737 :                                         n->data = ne;
     935             :                                 }
     936        4238 :                                 list_hash_clear(rel->exps);
     937         336 :                         } else if (list_empty(nl)) {
     938             :                                 /* empty select (project [ nils ] ) */
     939         445 :                                 for (node *n = rel->exps->h ; n ; n = n->next) {
     940         345 :                                         sql_exp *e = n->data, *a = exp_atom(v->sql->sa, atom_general(v->sql->sa, exp_subtype(e), NULL, 0));
     941         345 :                                         exp_prop_alias(v->sql->sa, a, e);
     942         345 :                                         n->data = a;
     943             :                                 }
     944         100 :                                 list_hash_clear(rel->exps);
     945         100 :                                 sql_rel *l = rel_project(v->sql->sa, NULL, rel->exps);
     946         100 :                                 set_count_prop(v->sql->sa, l, 1);
     947         100 :                                 l = rel_select(v->sql->sa, l, exp_atom_bool(v->sql->sa, 0));
     948         100 :                                 set_count_prop(v->sql->sa, l, 0);
     949         100 :                                 rel->op = op_project;
     950         100 :                                 rel->r = NULL;
     951         100 :                                 rel->l = l;
     952         100 :                                 rel->exps = rel_projections(v->sql, l, NULL, 1, 1);
     953         100 :                                 set_count_prop(v->sql->sa, rel, 0);
     954         100 :                                 set_nodistinct(rel); /* set relations may have distinct flag set */
     955             :                         }
     956             :                 } else {
     957        8840 :                         set_count_prop(v->sql->sa, rel, cnt);
     958             :                 }
     959             :                 break;
     960             :         }
     961      548391 :         case op_join:
     962             :         case op_left:
     963             :         case op_right:
     964             :         case op_full:
     965             :         case op_semi:
     966             :         case op_anti:
     967             :         case op_select:
     968             :         case op_project:
     969             :         case op_groupby: {
     970      548391 :                 if (is_groupby(rel->op) && !list_empty(rel->r))
     971       16811 :                         rel->r = exps_exp_visitor_bottomup(v, rel, rel->r, 0, &rel_propagate_statistics, false);
     972      548391 :                 rel->exps = exps_exp_visitor_bottomup(v, rel, rel->exps, 0, &rel_propagate_statistics, false);
     973      548378 :                 if ((is_simple_project(rel->op) || is_groupby(rel->op)) && !list_empty(rel->r))
     974       41876 :                         rel->r = exps_exp_visitor_bottomup(v, rel, rel->r, 0, &rel_propagate_statistics, false);
     975             :                 /* The following optimizations can only be applied after propagating the statistics to rel->exps */
     976      548381 :                 if (can_be_pruned && (is_join(rel->op) || is_select(rel->op)) && !list_empty(rel->exps)) {
     977      144417 :                         int changes = v->changes;
     978      144417 :                         rel->exps = rel_prune_predicates(v, rel);
     979      144417 :                         if (v->changes > changes)
     980        3673 :                                 rel = rewrite_simplify(v, 0, v->value_based_opt, rel);
     981             :                 }
     982             : 
     983             :                 /* propagate row count */
     984      548381 :                 sql_rel *l = rel->l, *r = rel->r;
     985      548381 :                 switch (rel->op) {
     986      137189 :                 case op_join:
     987             :                 case op_left:
     988             :                 case op_right:
     989             :                 case op_full: {
     990      137189 :                         BUN lv = get_rel_count(l), rv = get_rel_count(r), uniques_estimate = BUN_MAX, join_idx_estimate = BUN_MAX;
     991             : 
     992      137189 :                         if (!list_empty(rel->exps) && !is_single(rel)) {
     993      250875 :                                 for (node *n = rel->exps->h ; n ; n = n->next) {
     994      127916 :                                         sql_exp *e = n->data, *el = e->l, *er = e->r;
     995             : 
     996      127916 :                                         if (find_prop(e->p, PROP_JOINIDX)) {
     997         670 :                                                 join_idx_estimate = lv>rv?lv:rv;
     998      127246 :                                         } else if (e->type == e_cmp && e->flag == cmp_equal) {
     999             :                                                 /* if one of the sides is unique, the cardinality will be that exact number, but look for nulls */
    1000      123364 :                                                 if (!is_semantics(e) || !has_nil(el) || !has_nil(er)) {
    1001      123188 :                                                         BUN lu = 0, ru = 0;
    1002      123188 :                                                         prop *p = NULL;
    1003      123188 :                                                         if ((p = find_prop(el->p, PROP_NUNIQUES)))
    1004       91390 :                                                                 lu = (BUN) p->value.dval;
    1005      123188 :                                                         if ((p = find_prop(er->p, PROP_NUNIQUES)))
    1006      106083 :                                                                 ru = (BUN) p->value.dval;
    1007      123188 :                                                         if (is_unique(el) || lu > lv) {
    1008       74674 :                                                                 BUN ncount = (is_right(rel->op) || is_full(rel->op)) ? MAX(lv, rv) : lv;
    1009       74674 :                                                                 uniques_estimate = MIN(uniques_estimate, ncount);
    1010       48514 :                                                         } else if (is_unique(er) || ru > rv) {
    1011       30533 :                                                                 BUN ncount = (is_left(rel->op) || is_full(rel->op)) ? MAX(lv, rv) : rv;
    1012       30533 :                                                                 uniques_estimate = MIN(uniques_estimate, ncount);
    1013             :                                                         }
    1014             :                                                 }
    1015             :                                         }
    1016             :                                 }
    1017             :                         }
    1018      137189 :                         if (is_single(rel)) {
    1019        4803 :                                 set_count_prop(v->sql->sa, rel, lv);
    1020      132386 :                         } else if (join_idx_estimate != BUN_MAX) {
    1021         668 :                                 set_count_prop(v->sql->sa, rel, join_idx_estimate);
    1022      131718 :                         } else if (uniques_estimate != BUN_MAX) {
    1023       98602 :                                 set_count_prop(v->sql->sa, rel, uniques_estimate);
    1024       33116 :                         } else if (list_length(rel->exps) == 1 && (exp_is_false(rel->exps->h->data) || exp_is_null(rel->exps->h->data))) {
    1025             :                                 /* corner cases for outer joins */
    1026         123 :                                 if (is_left(rel->op)) {
    1027         111 :                                         set_count_prop(v->sql->sa, rel, lv);
    1028          12 :                                 } else if (is_right(rel->op)) {
    1029           3 :                                         set_count_prop(v->sql->sa, rel, rv);
    1030           9 :                                 } else if (is_full(rel->op) && lv != BUN_NONE && rv != BUN_NONE) {
    1031           9 :                                         set_count_prop(v->sql->sa, rel, (rv > (BUN_MAX - lv)) ? BUN_MAX : (lv + rv)); /* overflow check */
    1032           0 :                                 } else if (lv != BUN_NONE && rv != BUN_NONE) {
    1033           0 :                                         set_count_prop(v->sql->sa, rel, 0);
    1034             :                                 }
    1035       32993 :                         } else if (lv == 0) {
    1036        1204 :                                 set_count_prop(v->sql->sa, rel, (is_right(rel->op) || is_full(rel->op)) ? rv : 0);
    1037       32377 :                         } else if (rv == 0) {
    1038        1574 :                                 set_count_prop(v->sql->sa, rel, (is_left(rel->op) || is_full(rel->op)) ? lv : 0);
    1039       31297 :                         } else if (lv != BUN_NONE && rv != BUN_NONE) {
    1040       18506 :                                 set_count_prop(v->sql->sa, rel, (rv > (BUN_MAX / lv)) ? BUN_MAX : (lv * rv)); /* overflow check */
    1041             :                         }
    1042             :                         break;
    1043             :                 }
    1044        2935 :                 case op_anti:
    1045        2935 :                         set_count_prop(v->sql->sa, rel, get_rel_count(l));
    1046        2935 :                         break;
    1047      113557 :                 case op_semi:
    1048             :                 case op_select:
    1049             :                         /* TODO calculate cardinalities using selectivities */
    1050      113557 :                         if (list_length(rel->exps) == 1 && (exp_is_false(rel->exps->h->data) || exp_is_null(rel->exps->h->data))) {
    1051        5158 :                                 set_count_prop(v->sql->sa, rel, 0);
    1052             :                         } else {
    1053      213513 :                                 if (!list_empty(rel->exps) && !is_single(rel)) {
    1054      105114 :                                         BUN cnt = get_rel_count(l), u = 1;
    1055      171099 :                                         for (node *n = rel->exps->h ; n ; n = n->next) {
    1056      112284 :                                                 sql_exp *e = n->data, *el = e->l, *er = e->r;
    1057             : 
    1058             :                                                 /* simple expressions first */
    1059      112284 :                                                 if (e->type == e_cmp && e->flag == cmp_equal && exp_is_atom(er)) {
    1060             :                                                         /* use selectivity */
    1061       61772 :                                                         prop *p;
    1062       61772 :                                                         if ((p = find_prop(el->p, PROP_NUNIQUES))) {
    1063       46299 :                                                                 u = (BUN) p->value.dval;
    1064       46299 :                                                                 break;
    1065             :                                                         }
    1066             :                                                 }
    1067             :                                         }
    1068             :                                         /* u is an *estimate*, so don't set count_prop to 0 unless cnt is 0 */
    1069      105114 :                                         set_count_prop(v->sql->sa, rel, cnt == 0 ? 0 : u == 0 || u > cnt ? 1 : cnt/u);
    1070             :                                 } else {
    1071        3285 :                                         set_count_prop(v->sql->sa, rel, get_rel_count(l));
    1072             :                                 }
    1073             :                         }
    1074             :                         break;
    1075      269979 :                 case op_project:
    1076      269979 :                         if (l) {
    1077      257283 :                                 if (need_distinct(rel)) {
    1078         114 :                                         set_count_prop(v->sql->sa, rel, rel_calc_nuniques(v->sql, l, rel->exps));
    1079             :                                 } else {
    1080      257169 :                                         set_count_prop(v->sql->sa, rel, get_rel_count(l));
    1081             :                                 }
    1082             :                         } else {
    1083       12696 :                                 BUN card = 1;
    1084             : 
    1085       12696 :                                 if (!list_empty(rel->exps)) {
    1086       37935 :                                         for (node *n = rel->exps->h ; n ; n = n->next) {
    1087       25239 :                                                 sql_exp *e = n->data;
    1088       25239 :                                                 card = MAX(card, trivial_project_exp_card(e));
    1089             :                                         }
    1090             :                                 }
    1091       12696 :                                 set_count_prop(v->sql->sa, rel, card);
    1092             :                         }
    1093             :                         break;
    1094       24309 :                 case op_groupby:
    1095       24309 :                         if (list_empty(rel->r)) {
    1096        7497 :                                 set_count_prop(v->sql->sa, rel, 1);
    1097             :                         } else {
    1098       16811 :                                 set_count_prop(v->sql->sa, rel, rel_calc_nuniques(v->sql, l, rel->r));
    1099             :                         }
    1100             :                         break;
    1101             :                 default:
    1102             :                         break;
    1103             :                 }
    1104             :                 break;
    1105             :         }
    1106       17035 :         case op_topn: {
    1107       17035 :                 BUN lv = get_rel_count(rel->l);
    1108             : 
    1109       17035 :                 if (lv != BUN_NONE) {
    1110       17012 :                         sql_exp *le = rel->exps->h->data, *oe = list_length(rel->exps) > 1 ? rel->exps->h->next->data : NULL;
    1111          84 :                         if (oe && oe->l && exp_is_not_null(oe)) { /* no parameters */
    1112          84 :                                 BUN offset = (BUN) ((atom*)oe->l)->data.val.lval;
    1113          84 :                                 lv = offset >= lv ? 0 : lv - offset;
    1114             :                         }
    1115       17012 :                         if (le->l && exp_is_not_null(le)) {
    1116       16977 :                                 BUN limit = (BUN) ((atom*)le->l)->data.val.lval;
    1117       16977 :                                 lv = MIN(lv, limit);
    1118             :                         }
    1119       17012 :                         set_count_prop(v->sql->sa, rel, lv);
    1120             :                 }
    1121             :                 break;
    1122             :         }
    1123          22 :         case op_sample: {
    1124          22 :                 BUN lv = get_rel_count(rel->l);
    1125             : 
    1126          22 :                 if (lv != BUN_NONE) {
    1127           4 :                         sql_exp *se = rel->exps->h->data;
    1128           4 :                         sql_subtype *tp = exp_subtype(se);
    1129             : 
    1130           4 :                         if (se->l && tp->type->eclass == EC_NUM) { /* sample is a number of rows */
    1131           4 :                                 BUN sample = (BUN) ((atom*)se->l)->data.val.lval;
    1132           4 :                                 lv = MIN(lv, sample);
    1133           0 :                         } else if (se->l) { /* sample is a percentage of rows */
    1134           0 :                                 dbl percent = ((atom*)se->l)->data.val.dval;
    1135           0 :                                 assert(tp->type->eclass == EC_FLT);
    1136           0 :                                 lv = (BUN) ceil((dbl)lv * percent);
    1137             :                         }
    1138           4 :                         set_count_prop(v->sql->sa, rel, lv);
    1139             :                 }
    1140             :                 break;
    1141             :         }
    1142        6201 :         case op_table: {
    1143        6201 :                 sql_exp *op = rel->r;
    1144        6201 :                 if (rel->flag != TRIGGER_WRAPPER && op) {
    1145        5889 :                         sql_subfunc *f = op->f;
    1146        5889 :                         if (f->func->lang == FUNC_LANG_SQL) {
    1147          97 :                                 set_count_prop(v->sql->sa, rel, 1000 /* just some fallback value */);
    1148        5792 :                         } else if (f->func->lang == FUNC_LANG_MAL && strcmp(f->func->base.name, "storage") == 0) {
    1149         837 :                                 set_count_prop(v->sql->sa, rel, 1000 /* TODO get size of catalog */);
    1150        4955 :                         } else if (f->func->lang == FUNC_LANG_MAL && strcmp(f->func->base.name, "db_users") == 0) {
    1151           0 :                                 set_count_prop(v->sql->sa, rel, 1000 /* TODO get size of users */);
    1152        4955 :                         } else if (f->func->lang == FUNC_LANG_MAL && strncmp(f->func->base.name, "querylog", 8) == 0) {
    1153        1101 :                                 set_count_prop(v->sql->sa, rel, 1000 /* TODO get size of querylog */);
    1154        3854 :                         } else if (f->func->lang == FUNC_LANG_MAL &&
    1155        3685 :                                            (strcmp(f->func->base.name, "queue") == 0 ||
    1156        3417 :                                                 strcmp(f->func->base.name, "optimizers") == 0 ||
    1157        3101 :                                                 strcmp(f->func->base.name, "env") == 0 ||
    1158        2735 :                                                 strcmp(f->func->base.name, "keywords") == 0 ||
    1159        2735 :                                                 strcmp(f->func->base.name, "statistics") == 0 ||
    1160        2058 :                                                 strcmp(f->func->base.name, "rejects") == 0 ||
    1161        1800 :                                                 strcmp(f->func->base.name, "schemastorage") == 0 ||
    1162        1800 :                                                 strncmp(f->func->base.name, "storage", 7) == 0 ||
    1163        1800 :                                                 strcmp(f->func->base.name, "sessions") == 0) ) {
    1164        2179 :                                 set_count_prop(v->sql->sa, rel, 1000 /* TODO get size of queue */);
    1165             :                         }
    1166             :                         /* else {
    1167             :                                 printf("%%func needs stats : %s\n", f->func->base.name);
    1168             :                         } */
    1169             :                 }
    1170             :                 break;
    1171             :         }
    1172             :         /*These relations are less important for now
    1173             :           TODO later we can tune it */
    1174             :         case op_insert:
    1175             :         case op_update:
    1176             :         case op_delete:
    1177             :         case op_truncate:
    1178             :         case op_ddl:
    1179             :         default:
    1180             :                 break;
    1181             :         }
    1182             : 
    1183             :         return rel;
    1184             : }
    1185             : 
    1186             : static sql_rel *
    1187      555534 : rel_get_statistics(visitor *v, global_props *gp, sql_rel *rel)
    1188             : {
    1189             :         /* Don't prune updates as pruning will possibly result in removing the joins which therefore cannot be used for constraint checking */
    1190      555534 :         uint8_t has_special_modify = (uint8_t) gp->has_special_modify;
    1191      555534 :         v->data = &has_special_modify;
    1192      555534 :         rel = rel_visitor_bottomup(v, rel, &rel_get_statistics_);
    1193      555965 :         v->data = gp;
    1194      555965 :         return rel;
    1195             : }
    1196             : 
    1197             : run_optimizer
    1198      732253 : bind_get_statistics(visitor *v, global_props *gp)
    1199             : {
    1200      732253 :         (void) v;
    1201      732253 :         return (gp->opt_level == 1 && !gp->cnt[op_insert]) ? rel_get_statistics : NULL;
    1202             : }
    1203             : 
    1204             : 
    1205             : static bool
    1206       45373 : point_select_on_unique_column(sql_rel *rel)
    1207             : {
    1208       45373 :         if (is_select(rel->op) && !list_empty(rel->exps)) {
    1209       38490 :                 for (node *n = rel->exps->h; n ; n = n->next) {
    1210       20026 :                         sql_exp *e = n->data, *el = e->l, *er = e->r, *found = NULL;
    1211             : 
    1212       20026 :                         if (is_compare(e->type) && e->flag == cmp_equal) {
    1213       15211 :                                 if (is_numeric_upcast(el))
    1214           0 :                                         el = el->l;
    1215       15211 :                                 if (is_numeric_upcast(er))
    1216           0 :                                         er = er->l;
    1217       15211 :                                 if (is_alias(el->type) && exp_is_atom(er) && (found = rel_find_exp(rel->l, el)) &&
    1218         761 :                                         is_unique(found) && (!is_semantics(e) || !has_nil(found) || !has_nil(er)))
    1219             :                                         return true;
    1220       14450 :                                 if (is_alias(er->type) && exp_is_atom(el) && (found = rel_find_exp(rel->l, er)) &&
    1221           0 :                                         is_unique(found) && (!is_semantics(e) || !has_nil(el) || !has_nil(found)))
    1222             :                                         return true;
    1223             :                         }
    1224             :                 }
    1225             :         }
    1226             :         return false;
    1227             : }
    1228             : 
    1229             : /*
    1230             :  * A point select on an unique column reduces the number of rows to 1. If the same select is under a
    1231             :  * join, the opposite side's select can be pushed above the join.
    1232             :  */
    1233             : static inline sql_rel *
    1234      697336 : rel_push_select_up(visitor *v, sql_rel *rel)
    1235             : {
    1236      697336 :         if ((is_innerjoin(rel->op) || is_left(rel->op) || is_right(rel->op) || is_semi(rel->op)) && !is_single(rel)) {
    1237      122761 :                 sql_rel *l = rel->l, *r = rel->r;
    1238      122761 :                 bool can_pushup_left = is_select(l->op) && !rel_is_ref(l) && !is_single(l) && (is_innerjoin(rel->op) || is_left(rel->op) || is_semi(rel->op)),
    1239      122761 :                         can_pushup_right = is_select(r->op) && !rel_is_ref(r) && !is_single(r) && (is_innerjoin(rel->op) || is_right(rel->op));
    1240             : 
    1241      122761 :                 if (can_pushup_left || can_pushup_right) {
    1242       35886 :                         if (can_pushup_left)
    1243       20669 :                                 can_pushup_left = point_select_on_unique_column(r);
    1244       35886 :                         if (can_pushup_right)
    1245       24704 :                                 can_pushup_right = point_select_on_unique_column(l);
    1246             : 
    1247             :                         /* if both selects retrieve one row each, it's not worth it to push both up */
    1248       35886 :                         if (can_pushup_left && !can_pushup_right) {
    1249         695 :                                 sql_rel *nrel = rel_dup_copy(v->sql->sa, rel);
    1250         695 :                                 nrel->l = l->l;
    1251         695 :                                 rel = rel_inplace_select(rel, nrel, l->exps);
    1252         695 :                                 assert(is_select(rel->op));
    1253         695 :                                 v->changes++;
    1254       35191 :                         } else if (!can_pushup_left && can_pushup_right) {
    1255          14 :                                 sql_rel *nrel = rel_dup_copy(v->sql->sa, rel);
    1256          14 :                                 nrel->r = r->l;
    1257          14 :                                 rel = rel_inplace_select(rel, nrel, r->exps);
    1258          14 :                                 assert(is_select(rel->op));
    1259          14 :                                 v->changes++;
    1260             :                         }
    1261             :                 }
    1262             :         }
    1263      697336 :         return rel;
    1264             : }
    1265             : 
    1266             : static int
    1267       39058 : sql_class_base_score(visitor *v, sql_column *c, sql_subtype *t, bool equality_based)
    1268             : {
    1269       39058 :         int de;
    1270             : 
    1271       39058 :         if (!t)
    1272             :                 return 0;
    1273       39058 :         switch (ATOMstorage(t->type->localtype)) {
    1274             :         case TYPE_bte:
    1275             :                 return 150 - 8;
    1276        1606 :         case TYPE_sht:
    1277        1606 :                 return 150 - 16;
    1278       16472 :         case TYPE_int:
    1279       16472 :                 return 150 - 32;
    1280         469 :         case TYPE_void:
    1281             :         case TYPE_lng:
    1282         469 :                 return 150 - 64;
    1283             :         case TYPE_uuid:
    1284             : #ifdef HAVE_HGE
    1285             :         case TYPE_hge:
    1286             : #endif
    1287             :                 return 150 - 128;
    1288           2 :         case TYPE_flt:
    1289           2 :                 return 75 - 24;
    1290             :         case TYPE_dbl:
    1291             :                 return 75 - 53;
    1292       16187 :         default:
    1293       16187 :                 if (equality_based && c && v->storage_based_opt && (de = mvc_is_duplicate_eliminated(v->sql, c)))
    1294        1491 :                         return 150 - de * 8;
    1295             :                 /* strings and blobs not duplicate eliminated don't get any points here */
    1296             :                 return 0;
    1297             :         }
    1298             : }
    1299             : 
    1300             : static int
    1301       15154 : score_se_base(visitor *v, sql_rel *rel, sql_exp *e)
    1302             : {
    1303       15154 :         int res = 0;
    1304       15154 :         sql_subtype *t = exp_subtype(e);
    1305       15154 :         sql_column *c = NULL;
    1306             : 
    1307       15154 :         if (e->type == e_convert) /* keep unsafes at the end (TODO improve) */
    1308             :                 return -1000;
    1309             :         /* can we find out if the underlying table is sorted */
    1310       14746 :         if ((c = exp_find_column(rel, e, -2)) && v->storage_based_opt && mvc_is_sorted(v->sql, c))
    1311       14746 :                 res += 600;
    1312             : 
    1313             :         /* prefer the shorter var types over the longer ones */
    1314       14746 :         res += sql_class_base_score(v, c, t, is_equality_or_inequality_exp(e->flag)); /* smaller the type, better */
    1315       14746 :         return res;
    1316             : }
    1317             : 
    1318             : static int
    1319       18547 : score_se(visitor *v, sql_rel *rel, sql_exp *e)
    1320             : {
    1321       18547 :         int score = 0;
    1322       18547 :         if (e->type == e_cmp && !is_complex_exp(e->flag)) {
    1323       15154 :                 sql_exp *l = e->l;
    1324             : 
    1325       15154 :                 while (l->type == e_cmp) { /* go through nested comparisons */
    1326           2 :                         sql_exp *ll;
    1327             : 
    1328           2 :                         if (l->flag == cmp_filter || l->flag == cmp_or)
    1329           0 :                                 ll = ((list*)l->l)->h->data;
    1330             :                         else
    1331           2 :                                 ll = l->l;
    1332           2 :                         if (ll->type != e_cmp)
    1333             :                                 break;
    1334             :                         l = ll;
    1335             :                 }
    1336       15154 :                 score += score_se_base(v, rel, l);
    1337             :         }
    1338       18547 :         score += exp_keyvalue(e);
    1339       18547 :         return score;
    1340             : }
    1341             : 
    1342             : static inline sql_rel *
    1343      697336 : rel_select_order(visitor *v, sql_rel *rel)
    1344             : {
    1345      697336 :         int *scores = NULL;
    1346      697336 :         sql_exp **exps = NULL;
    1347             : 
    1348      697336 :         if (is_select(rel->op) && list_length(rel->exps) > 1) {
    1349        7926 :                 node *n;
    1350        7926 :                 int i, nexps = list_length(rel->exps);
    1351        7926 :                 scores = SA_NEW_ARRAY(v->sql->ta, int, nexps);
    1352        7926 :                 exps = SA_NEW_ARRAY(v->sql->ta, sql_exp*, nexps);
    1353             : 
    1354       26473 :                 for (i = 0, n = rel->exps->h; n; i++, n = n->next) {
    1355       18576 :                         exps[i] = n->data;
    1356       18576 :                         if (find_prop(exps[i]->p, PROP_HASHCOL))
    1357             :                                 return rel;
    1358       18547 :                         scores[i] = score_se(v, rel, n->data);
    1359             :                 }
    1360        7897 :                 GDKqsort(scores, exps, NULL, nexps, sizeof(int), sizeof(void *), TYPE_int, true, true);
    1361             : 
    1362       26444 :                 for (i = 0, n = rel->exps->h; n; i++, n = n->next)
    1363       18547 :                         n->data = exps[i];
    1364             :         }
    1365             : 
    1366             :         return rel;
    1367             : }
    1368             : 
    1369             : /* Compute the efficiency of using this expression early in a group by list */
    1370             : static int
    1371       24312 : score_gbe(visitor *v, sql_rel *rel, sql_exp *e)
    1372             : {
    1373       24312 :         int res = 0;
    1374       24312 :         sql_subtype *t = exp_subtype(e);
    1375       24312 :         sql_column *c = exp_find_column(rel, e, -2);
    1376             : 
    1377       24312 :         if (e->card == CARD_ATOM) /* constants are trivial to group */
    1378          38 :                 res += 1000;
    1379             :         /* can we find out if the underlying table is sorted */
    1380       24312 :         if (is_unique(e) || find_prop(e->p, PROP_HASHCOL) || (c && v->storage_based_opt && mvc_is_unique(v->sql, c))) /* distinct columns */
    1381        2464 :                 res += 700;
    1382       21072 :         if (c && v->storage_based_opt && mvc_is_sorted(v->sql, c))
    1383        2479 :                 res += 500;
    1384       24312 :         if (find_prop(e->p, PROP_HASHIDX)) /* has hash index */
    1385           0 :                 res += 200;
    1386             : 
    1387             :         /* prefer the shorter var types over the longer ones */
    1388       24312 :         res += sql_class_base_score(v, c, t, true); /* smaller the type, better */
    1389       24312 :         return res;
    1390             : }
    1391             : 
    1392             : /* reorder group by expressions */
    1393             : static inline sql_rel *
    1394      697336 : rel_groupby_order(visitor *v, sql_rel *rel)
    1395             : {
    1396      697336 :         int *scores = NULL;
    1397      697336 :         sql_exp **exps = NULL;
    1398             : 
    1399      697336 :         if (is_groupby(rel->op) && list_length(rel->r) > 1) {
    1400        7978 :                 node *n;
    1401        7978 :                 list *gbe = rel->r;
    1402        7978 :                 int i, ngbe = list_length(gbe);
    1403        7978 :                 scores = SA_NEW_ARRAY(v->sql->ta, int, ngbe);
    1404        7978 :                 exps = SA_NEW_ARRAY(v->sql->ta, sql_exp*, ngbe);
    1405             : 
    1406             :                 /* first sorting step, give priority for integers and sorted columns */
    1407       32290 :                 for (i = 0, n = gbe->h; n; i++, n = n->next) {
    1408       24312 :                         exps[i] = n->data;
    1409       24312 :                         scores[i] = score_gbe(v, rel, exps[i]);
    1410             :                 }
    1411        7978 :                 GDKqsort(scores, exps, NULL, ngbe, sizeof(int), sizeof(void *), TYPE_int, true, true);
    1412             : 
    1413             :                 /* second sorting step, give priority to strings with lower number of digits */
    1414       17435 :                 for (i = ngbe - 1; i && !scores[i]; i--); /* find expressions with no score from the first round */
    1415        7978 :                 if (scores[i])
    1416        6986 :                         i++;
    1417        7978 :                 if (ngbe - i > 1) {
    1418       12002 :                         for (int j = i; j < ngbe; j++) {
    1419        9025 :                                 sql_subtype *t = exp_subtype(exps[j]);
    1420        9025 :                                 scores[j] = t ? t->digits : 0;
    1421             :                         }
    1422             :                         /* the less number of digits the better, order ascending */
    1423        2977 :                         GDKqsort(scores + i, exps + i, NULL, ngbe - i, sizeof(int), sizeof(void *), TYPE_int, false, true);
    1424             :                 }
    1425             : 
    1426       32290 :                 for (i = 0, n = gbe->h; n; i++, n = n->next)
    1427       24312 :                         n->data = exps[i];
    1428             :         }
    1429             : 
    1430      697335 :         return rel;
    1431             : }
    1432             : 
    1433             : /* This optimization loop contains optimizations that can potentially use statistics */
    1434             : static sql_rel *
    1435      697336 : rel_final_optimization_loop_(visitor *v, sql_rel *rel)
    1436             : {
    1437             :         /* Run rel_push_select_up only once at the end to avoid an infinite optimization loop */
    1438      697336 :         rel = rel_push_select_up(v, rel);
    1439      697336 :         rel = rel_select_order(v, rel);
    1440             : 
    1441             :         /* TODO? Maybe later add rel_simplify_count, rel_join2semijoin, rel_simplify_fk_joins,
    1442             :            rel_distinct_project2groupby, rel_simplify_predicates, rel_simplify_math,
    1443             :            rel_distinct_aggregate_on_unique_values */
    1444             : 
    1445      697336 :         rel = rel_groupby_order(v, rel);
    1446      697335 :         return rel;
    1447             : }
    1448             : 
    1449             : static sql_rel *
    1450       70729 : rel_final_optimization_loop(visitor *v, global_props *gp, sql_rel *rel)
    1451             : {
    1452       70729 :         (void) gp;
    1453       70729 :         return rel_visitor_bottomup(v, rel, &rel_final_optimization_loop_);
    1454             : }
    1455             : 
    1456             : run_optimizer
    1457      732793 : bind_final_optimization_loop(visitor *v, global_props *gp)
    1458             : {
    1459      732793 :         int flag = v->sql->sql_optimizer;
    1460             :         /* At the moment, this optimizer has dependency on 3 flags */
    1461      563880 :         return gp->opt_level == 1 && (gp->cnt[op_groupby] || gp->cnt[op_select]) &&
    1462      803524 :                 (flag & push_select_up) && (flag & optimize_select_and_joins_topdown) && (flag & optimize_projections) ? rel_final_optimization_loop : NULL;
    1463             : }

Generated by: LCOV version 1.14