LCOV - code coverage report
Current view: top level - sql/server - rel_statistics.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 833 885 94.1 %
Date: 2024-12-19 23:10:26 Functions: 26 26 100.0 %

          Line data    Source code
       1             : /*
       2             :  * SPDX-License-Identifier: MPL-2.0
       3             :  *
       4             :  * This Source Code Form is subject to the terms of the Mozilla Public
       5             :  * License, v. 2.0.  If a copy of the MPL was not distributed with this
       6             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       7             :  *
       8             :  * Copyright 2024 MonetDB Foundation;
       9             :  * Copyright August 2008 - 2023 MonetDB B.V.;
      10             :  * Copyright 1997 - July 2008 CWI.
      11             :  */
      12             : 
      13             : #include "monetdb_config.h"
      14             : #include "rel_optimizer_private.h"
      15             : #include "rel_statistics.h"
      16             : #include "rel_basetable.h"
      17             : #include "rel_rewriter.h"
      18             : 
      19             : static sql_exp *
      20     3469448 : comparison_find_column(sql_exp *input, sql_exp *e)
      21             : {
      22     3469448 :         switch (input->type) {
      23       99298 :         case e_convert: {
      24       99298 :                 list *types = (list *)input->r;
      25       99298 :                 sql_class from = ((sql_subtype*)types->h->data)->type->eclass, to = ((sql_subtype*)types->h->next->data)->type->eclass;
      26       99298 :                 if (from == to)
      27      191769 :                         return comparison_find_column(input->l, e) ? input : NULL;
      28             :                 return NULL;
      29             :         }
      30     3054496 :         case e_column:
      31     3054496 :                 return exp_match(e, input) ? input : NULL;
      32             :         default:
      33             :                 return NULL;
      34             :         }
      35             : }
      36             : 
      37             : /* multi lo <= col <= hi, maybe still be false even if lo or hi are NULL, possibly similar for filter on multiple
      38             :  * columns */
      39             : #define comp_single_column(c) (!c->f)
      40             : 
      41             : static sql_exp *
      42     8756591 : rel_propagate_column_ref_statistics(mvc *sql, sql_rel *rel, sql_exp *e)
      43             : {
      44     8839144 :         assert(e->type == e_column);
      45     8839144 :         if (rel) {
      46     8839064 :                 switch(rel->op) {
      47     4094819 :                 case op_left:
      48             :                 case op_right:
      49             :                 case op_full:
      50             :                 case op_join:
      51             :                 case op_select:
      52             :                 case op_anti:
      53             :                 case op_semi: {
      54     4094819 :                         bool found_without_semantics = false, found_left = false, found_right = false, still_unique = false;
      55             : 
      56     4094819 :                         if ((is_innerjoin(rel->op) || is_select(rel->op)) && list_length(rel->exps) == 1 && exp_is_false(rel->exps->h->data))
      57             :                                 return NULL; /* nothing will pass, skip */
      58             : 
      59             :                         /* propagate from the bottom first */
      60     4079196 :                         if (rel_propagate_column_ref_statistics(sql, rel->l, e))
      61             :                                 found_left = true;
      62     2592625 :                         if (!found_left && is_join(rel->op) && rel_propagate_column_ref_statistics(sql, rel->r, e))
      63     4079195 :                                 found_right = true;
      64             : 
      65     4079195 :                         if (!found_left && !found_right)
      66             :                                 return NULL;
      67     1782513 :                         if (!list_empty(rel->exps) && rel->op != op_anti) { /* if there's an or, the MIN and MAX get difficult to propagate */
      68     3500835 :                                 for (node *n = rel->exps->h ; n ; n = n->next) {
      69     1813826 :                                         sql_exp *comp = n->data, *le = comp->l, *lne = NULL, *re = comp->r, *rne = NULL, *fe = comp->f, *fne = NULL;
      70             : 
      71     1813826 :                                         if (comp->type == e_cmp) {
      72     1812822 :                                                 if (is_theta_exp(comp->flag) && ((lne = comparison_find_column(le, e)) || (rne = comparison_find_column(re, e)) || (fe && (fne = comparison_find_column(fe, e))))) {
      73      124338 :                                                         atom *lval_min = find_prop_and_get(le->p, PROP_MIN), *lval_max = find_prop_and_get(le->p, PROP_MAX),
      74      124338 :                                                                 *rval_min = find_prop_and_get(re->p, PROP_MIN), *rval_max = find_prop_and_get(re->p, PROP_MAX);
      75             : 
      76             :                                                         /* not semantics found or if explicitly filtering not null values from the column */
      77      124338 :                                                         found_without_semantics |= (!is_semantics(comp) && comp_single_column(comp)) || (comp->flag == cmp_equal && lne && is_anti(comp) && exp_is_null(re));
      78      124338 :                                                         still_unique |= comp->flag == cmp_equal && is_unique(le) && is_unique(re); /* unique if only equi-joins on unique columns are there */
      79      124338 :                                                         if (is_full(rel->op) || (is_left(rel->op) && found_left) || (is_right(rel->op) && found_right)) /* on outer joins, min and max cannot be propagated on some cases */
      80       19700 :                                                                 continue;
      81             :                                                         /* if (end2 >= start1 && start2 <= end1) then the 2 intervals are intersected */
      82      104638 :                                                         if (fe && lval_min && lval_max) { /* range case, the middle expression must intersect the other two */
      83        4622 :                                                                 atom *fval_min = find_prop_and_get(fe->p, PROP_MIN), *fval_max = find_prop_and_get(fe->p, PROP_MAX);
      84        4622 :                                                                 int int1 = rval_min && rval_max && atom_cmp(rval_max, lval_min) >= 0 && atom_cmp(rval_min, lval_max) <= 0,
      85        4622 :                                                                         int2 = fval_min && fval_max && atom_cmp(fval_max, lval_min) >= 0 && atom_cmp(fval_min, lval_max) <= 0,
      86        4622 :                                                                         symmetric = is_symmetric(comp);
      87             : 
      88        4622 :                                                                 if (is_anti(comp) || (!symmetric && fval_min && rval_max && atom_cmp(fval_min, rval_max) < 0)) /* for asymmetric case the re range must be after the fe range */
      89        1817 :                                                                         continue;
      90        2805 :                                                                 if (lne && int1 && int2) {
      91         104 :                                                                         if (symmetric) {
      92           0 :                                                                                 prop *p1 = find_prop(e->p, PROP_MIN), *p2 = find_prop(e->p, PROP_MAX);
      93           0 :                                                                                 atom *nmin = statistics_atom_min(sql, rval_min, fval_min), *nmax = statistics_atom_max(sql, rval_max, fval_max);
      94             :                                                                                 /* max is min from le and (max from re and fe max) */
      95           0 :                                                                                 set_minmax_property(sql, e, PROP_MAX, p2 ? statistics_atom_min(sql, nmax, p2->value.pval) : nmax);
      96             :                                                                                 /* min is max from le and (min from re and fe min) */
      97           0 :                                                                                 set_minmax_property(sql, e, PROP_MIN, p1 ? statistics_atom_max(sql, nmin, p1->value.pval) : nmin);
      98             :                                                                         } else {
      99         104 :                                                                                 prop *p1 = find_prop(e->p, PROP_MIN), *p2 = find_prop(e->p, PROP_MAX);
     100             :                                                                                 /* max is min from le and fe max */
     101         104 :                                                                                 set_minmax_property(sql, e, PROP_MAX, p2 ? statistics_atom_min(sql, fval_max, p2->value.pval) : fval_max);
     102             :                                                                                 /* min is max from le and re min */
     103         104 :                                                                                 set_minmax_property(sql, e, PROP_MIN, p1 ? statistics_atom_max(sql, rval_min, p1->value.pval) : rval_min);
     104             :                                                                         }
     105        2701 :                                                                 } else if (rne) {
     106         590 :                                                                         if (symmetric && int1 && int2) { /* min is max from le and (min from re and fe min) */
     107           0 :                                                                                 prop *p = find_prop(e->p, PROP_MIN);
     108           0 :                                                                                 atom *nmin = p ? statistics_atom_min(sql, p->value.pval, fval_min) : fval_min;
     109           0 :                                                                                 set_minmax_property(sql, e, PROP_MIN, statistics_atom_max(sql, nmin, lval_min));
     110         590 :                                                                         } else if (int1) { /* min is max from le and re min */
     111         100 :                                                                                 prop *p = find_prop(e->p, PROP_MIN);
     112         100 :                                                                                 set_minmax_property(sql, e, PROP_MIN, p ? statistics_atom_max(sql, lval_min, p->value.pval) : lval_min);
     113             :                                                                         }
     114        2111 :                                                                 } else if (fne) {
     115         549 :                                                                         if (symmetric && int1 && int2) { /* max is min from le and (max from re and fe max) */
     116           0 :                                                                                 prop *p = find_prop(e->p, PROP_MAX);
     117           0 :                                                                                 atom *nmax = p ? statistics_atom_max(sql, p->value.pval, rval_max) : rval_max;
     118           0 :                                                                                 set_minmax_property(sql, e, PROP_MAX, p ? statistics_atom_min(sql, nmax, lval_max) : nmax);
     119         549 :                                                                         } else if (int2) { /* max is min from le and fe max */
     120         266 :                                                                                 prop *p = find_prop(e->p, PROP_MAX);
     121         266 :                                                                                 set_minmax_property(sql, e, PROP_MAX, p ? statistics_atom_min(sql, lval_max, p->value.pval) : lval_max);
     122             :                                                                         }
     123             :                                                                 }
     124      100016 :                                                         } else if (lval_min && lval_max && rval_min && rval_max && atom_cmp(rval_max, lval_min) >= 0 && atom_cmp(rval_min, lval_max) <= 0) {
     125             :                                                                 /* both min and max must be set and the intervals must overlap */
     126       42020 :                                                                 switch (comp->flag) {
     127       27998 :                                                                 case cmp_equal: /* for equality reduce */
     128       27998 :                                                                         set_minmax_property(sql, e, PROP_MAX, is_anti(comp) ? statistics_atom_max(sql, lval_max, rval_max) : statistics_atom_min(sql, lval_max, rval_max));
     129       27998 :                                                                         set_minmax_property(sql, e, PROP_MIN, is_anti(comp) ? statistics_atom_min(sql, lval_min, rval_min) : statistics_atom_max(sql, lval_min, rval_min));
     130       27998 :                                                                         break;
     131        4706 :                                                                 case cmp_notequal: /* for inequality expand */
     132        4706 :                                                                         set_minmax_property(sql, e, PROP_MAX, is_anti(comp) ? statistics_atom_min(sql, lval_max, rval_max) : statistics_atom_max(sql, lval_max, rval_max));
     133        4706 :                                                                         set_minmax_property(sql, e, PROP_MIN, is_anti(comp) ? statistics_atom_max(sql, lval_min, rval_min) : statistics_atom_min(sql, lval_min, rval_min));
     134        4706 :                                                                         break;
     135        5310 :                                                                 case cmp_gt:
     136             :                                                                 case cmp_gte:
     137        9682 :                                                                         if (!is_anti(comp) && lne) { /* min is max from both min */
     138        4372 :                                                                                 prop *p = find_prop(e->p, PROP_MIN);
     139        4372 :                                                                                 set_minmax_property(sql, e, PROP_MIN, p ? statistics_atom_max(sql, rval_min, p->value.pval) : rval_min);
     140         938 :                                                                         } else if (!is_anti(comp)) { /* max is min from both max */
     141         938 :                                                                                 prop *p = find_prop(e->p, PROP_MAX);
     142         938 :                                                                                 set_minmax_property(sql, e, PROP_MAX, p ? statistics_atom_min(sql, lval_max, p->value.pval) : lval_max);
     143             :                                                                         }
     144             :                                                                         break;
     145        4006 :                                                                 case cmp_lt:
     146             :                                                                 case cmp_lte:
     147        7205 :                                                                         if (!is_anti(comp) && lne) { /* max is min from both max */
     148        3199 :                                                                                 prop *p = find_prop(e->p, PROP_MAX);
     149        3199 :                                                                                 set_minmax_property(sql, e, PROP_MAX, p ? statistics_atom_min(sql, rval_max, p->value.pval) : rval_max);
     150         807 :                                                                         } else if (!is_anti(comp)) { /* min is max from both min */
     151         807 :                                                                                 prop *p = find_prop(e->p, PROP_MIN);
     152         807 :                                                                                 set_minmax_property(sql, e, PROP_MIN, p ? statistics_atom_max(sql, lval_min, p->value.pval) : lval_min);
     153             :                                                                         }
     154             :                                                                         break;
     155             :                                                                 default: /* Maybe later I can do cmp_in and cmp_notin */
     156             :                                                                         break;
     157             :                                                                 }
     158             :                                                         }
     159             :                                                 }
     160             :                                         }
     161             :                                 }
     162             :                         }
     163     1782513 :                         if (is_full(rel->op) || (is_left(rel->op) && found_right) || (is_right(rel->op) && found_left))
     164       35455 :                                 set_has_nil(e);
     165     1782513 :                         if (!is_outerjoin(rel->op) && found_without_semantics) /* at an outer join, null values pass */
     166       93806 :                                 set_has_no_nil(e);
     167     1782513 :                         if (is_join(rel->op) && is_unique(e) && !still_unique)
     168      118686 :                                 set_not_unique(e);
     169     1782513 :                         return e;
     170             :                 }
     171     4642325 :                 case op_table:
     172             :                 case op_basetable:
     173             :                 case op_union:
     174             :                 case op_except:
     175             :                 case op_inter:
     176             :                 case op_munion:
     177             :                 case op_project:
     178             :                 case op_groupby: {
     179     4642325 :                         if (is_recursive(rel))
     180             :                                 return NULL;
     181     4642137 :                         sql_exp *found;
     182     4642137 :                         atom *fval;
     183     4642137 :                         prop *est;
     184     4642137 :                         if ((found = rel_find_exp(rel, e))) {
     185     2175655 :                                 if (rel->op != op_table) { /* At the moment don't propagate statistics for table relations */
     186     2131166 :                                         if ((fval = find_prop_and_get(found->p, PROP_MAX)))
     187     1121487 :                                                 set_minmax_property(sql, e, PROP_MAX, fval);
     188     2131167 :                                         if ((fval = find_prop_and_get(found->p, PROP_MIN)))
     189     1128567 :                                                 set_minmax_property(sql, e, PROP_MIN, fval);
     190     2131167 :                                         if (!has_nil(found))
     191     1375228 :                                                 set_has_no_nil(e);
     192     2131167 :                                         if (is_unique(found) || (need_distinct(rel) && list_length(rel->exps) == 1) ||
     193     1717159 :                                                 (is_groupby(rel->op) && (list_empty(rel->r) || (list_length(rel->r) == 1 && exps_find_exp(rel->r, e)))))
     194      425250 :                                                 set_unique(e);
     195             :                                         /* propagate unique estimation for known cases */
     196     2131167 :                                         if (is_groupby(rel->op) && list_empty(rel->r) && !find_prop(e->p, PROP_NUNIQUES)) { /* global aggregate case */
     197        7580 :                                                 prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     198        7580 :                                                 p->value.dval = 1;
     199     2123587 :                                         } else if (((is_basetable(rel->op) || is_except(rel->op) || is_inter(rel->op) || is_simple_project(rel->op) ||
     200       71515 :                                                                  (is_groupby(rel->op) && exps_find_exp(rel->r, e))) &&
     201     2064738 :                                                                 (est = find_prop(found->p, PROP_NUNIQUES)) && !find_prop(e->p, PROP_NUNIQUES))) {
     202      193936 :                                                 prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     203      193936 :                                                 p->value.dval = est->value.dval;
     204             :                                         }
     205             :                                 }
     206     2175658 :                                 return e;
     207             :                         }
     208             :                         return NULL;
     209             :                 }
     210       82553 :                 case op_topn:
     211             :                 case op_sample:
     212       82553 :                         return rel_propagate_column_ref_statistics(sql, rel->l, e);
     213             :                 default:
     214             :                         break;
     215             :                 }
     216             :         }
     217             :         return NULL;
     218             : }
     219             : 
     220             : static atom *
     221     4596336 : atom_from_valptr( allocator *sa, sql_subtype *tpe, ValRecord *v)
     222             : {
     223     4596336 :         atom *a = SA_NEW(sa, atom);
     224             : 
     225     4596311 :         assert(!VALisnil(v));
     226     4596341 :         *a = (atom) {.tpe = *tpe,};
     227     4596341 :         SA_VALcopy(sa, &a->data, v);
     228     4596435 :         return a;
     229             : }
     230             : 
     231             : void
     232     4245428 : sql_column_get_statistics(mvc *sql, sql_column *c, sql_exp *e)
     233             : {
     234     4245428 :         bool nonil = false, unique = false;
     235     4245428 :         double unique_est = 0.0;
     236     4245428 :         ValRecord min, max;
     237     4245428 :         int ok = mvc_col_stats(sql, c, &nonil, &unique, &unique_est, &min, &max);
     238             : 
     239     4246537 :         if (has_nil(e) && nonil)
     240     2800808 :                 set_has_no_nil(e);
     241     4246537 :         if (!is_unique(e) && unique)
     242     1120237 :                 set_unique(e);
     243     4246537 :         if (unique_est != 0.0) {
     244     2989359 :                 prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     245     2989190 :                 p->value.dval = unique_est;
     246             :         }
     247     4246368 :         unsigned int digits = 0;
     248     4246368 :         sql_subtype *et = exp_subtype(e);
     249     4246556 :         if (et->type->eclass == EC_DEC || et->type->eclass == EC_NUM)
     250     2760073 :                 digits = et->digits;
     251     4246556 :         if ((ok & 2) == 2) {
     252     2295363 :                 if (!VALisnil(&max)) {
     253     2295335 :                         prop *p = e->p = prop_create(sql->sa, PROP_MAX, e->p);
     254     2295289 :                         p->value.pval = atom_from_valptr(sql->sa, &c->type, &max);
     255     2295292 :                         if (digits) {
     256     1706330 :                                 unsigned int nd = atom_digits(p->value.pval);
     257     1706289 :                                 if (nd < digits)
     258             :                                         digits = nd;
     259     1706289 :                                 if (!digits)
     260             :                                         digits = 1;
     261             :                         }
     262             :                 }
     263     2295178 :                 VALclear(&max);
     264             :         }
     265     4246421 :         if ((ok & 1) == 1) {
     266     2301418 :                 if (!VALisnil(&min)) {
     267     2301411 :                         prop *p = e->p = prop_create(sql->sa, PROP_MIN, e->p);
     268     2301459 :                         p->value.pval = atom_from_valptr(sql->sa, &c->type, &min);
     269     2301572 :                         if (digits) {
     270     1713595 :                                 unsigned int nd = atom_digits(p->value.pval);
     271     1713572 :                                 if (nd > digits) /* possibly set to low by max value */
     272             :                                         digits = nd;
     273             :                                 if (!digits)
     274             :                                         digits = 1;
     275             :                         }
     276             :                 }
     277     2301539 :                 VALclear(&min);
     278             :         }
     279     4246569 :         if (digits)
     280     2760014 :                 et->digits = digits;
     281     4246569 :         if (et->type->eclass == EC_DEC && et->digits <= et->scale)
     282         216 :                 et->digits = et->scale + 1;
     283     4246569 : }
     284             : 
     285             : static void
     286      946404 : rel_basetable_column_get_statistics(mvc *sql, sql_rel *rel, sql_exp *e)
     287             : {
     288      946404 :         if (e->p)
     289             :                 return;
     290      304428 :         sql_column *c = NULL;
     291             : 
     292      304428 :         if ((c = rel_base_find_column(rel, e->nid))) {
     293      206818 :                 sql_column_get_statistics(sql, c, e);
     294             :         }
     295             : }
     296             : 
     297             : static bool
     298        8875 : rel_setop_get_statistics(mvc *sql, sql_rel *rel, list *lexps, list *rexps, sql_exp *e, int i)
     299             : {
     300        8875 :         sql_exp *le = list_fetch(lexps, i), *re = list_fetch(rexps, i);
     301        8875 :         atom *lval_min = find_prop_and_get(le->p, PROP_MIN), *lval_max = find_prop_and_get(le->p, PROP_MAX),
     302        8875 :                 *rval_min = find_prop_and_get(re->p, PROP_MIN), *rval_max = find_prop_and_get(re->p, PROP_MAX);
     303        8875 :         prop *est;
     304             : 
     305             :         /* for the intersection, if both expressions don't overlap, it can be pruned */
     306        8875 :         if (is_inter(rel->op) && !has_nil(le) && !has_nil(re) &&
     307         511 :                 ((rval_max && lval_min && atom_cmp(rval_max, lval_min) < 0) || (rval_min && lval_max && atom_cmp(rval_min, lval_max) > 0)))
     308          26 :                 return true;
     309             : 
     310        8849 :         if (lval_max && rval_max) {
     311        2557 :                 if (is_union(rel->op))
     312           3 :                         set_minmax_property(sql, e, PROP_MAX, statistics_atom_max(sql, lval_max, rval_max)); /* for union the new max will be the max of the two */
     313        2554 :                 else if (is_inter(rel->op))
     314         399 :                         set_minmax_property(sql, e, PROP_MAX, statistics_atom_min(sql, lval_max, rval_max)); /* for intersect the new max will be the min of the two */
     315             :                 else /* except */
     316        2155 :                         set_minmax_property(sql, e, PROP_MAX, lval_max);
     317             :         }
     318        8849 :         if (lval_min && rval_min) {
     319        2557 :                 if (is_union(rel->op))
     320           3 :                         set_minmax_property(sql, e, PROP_MIN, statistics_atom_min(sql, lval_min, rval_min)); /* for union the new min will be the min of the two */
     321        2554 :                 else if (is_inter(rel->op))
     322         399 :                         set_minmax_property(sql, e, PROP_MIN, statistics_atom_max(sql, lval_min, rval_min)); /* for intersect the new min will be the max of the two */
     323             :                 else /* except */
     324        2155 :                         set_minmax_property(sql, e, PROP_MIN, lval_min);
     325             :         }
     326             : 
     327        8849 :         if (is_union(rel->op) || is_munion(rel->op)) {
     328        5986 :                 if (!has_nil(le) && !has_nil(re))
     329         879 :                         set_has_no_nil(e);
     330        5986 :                 if (need_distinct(rel) && list_length(rel->exps) == 1)
     331           0 :                         set_unique(e);
     332        2863 :         } else if (is_inter(rel->op)) {
     333         564 :                 if (!has_nil(le) || !has_nil(re))
     334         509 :                         set_has_no_nil(e);
     335         564 :                 if (is_unique(le) || (need_distinct(rel) && list_length(rel->exps) == 1))
     336         345 :                         set_unique(e);
     337             :         } else {
     338        2299 :                 assert(is_except(rel->op));
     339        2299 :                 if (!has_nil(le))
     340        2236 :                         set_has_no_nil(e);
     341        2299 :                 if (is_unique(le) || (need_distinct(rel) && list_length(rel->exps) == 1))
     342        2012 :                         set_unique(e);
     343             :         }
     344             :         /* propagate unique estimation for known cases */
     345        8849 :         if (!is_union(rel->op) && (est = find_prop(le->p, PROP_NUNIQUES)) && !find_prop(e->p, PROP_NUNIQUES)) {
     346         207 :                 prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     347         207 :                 p->value.dval = est->value.dval;
     348             :         }
     349             :         return false;
     350             : }
     351             : 
     352             : 
     353             : static void
     354       62385 : rel_munion_get_statistics(mvc *sql, sql_rel *rel, list *rels, sql_exp *e, int i)
     355             : {
     356       62385 :         if (is_recursive(rel))
     357             :                 return ;
     358       62385 :         assert(is_munion(rel->op));
     359             : 
     360       62385 :         sql_rel *l = rels->h->data;
     361       62385 :         sql_exp *le = list_fetch(l->exps, i);
     362       62385 :         atom *lval_min = find_prop_and_get(le->p, PROP_MIN), *lval_max = find_prop_and_get(le->p, PROP_MAX);
     363       62385 :         bool has_nonil = !has_nil(le);
     364             : 
     365      181454 :         for(node *n = rels->h->next; n; n = n->next) {
     366      119069 :                 sql_rel *r = n->data;
     367      119069 :                 sql_exp *re = list_fetch(r->exps, i);
     368      119069 :                 atom *rval_min = find_prop_and_get(re->p, PROP_MIN), *rval_max = find_prop_and_get(re->p, PROP_MAX);
     369             : 
     370      119069 :                 if (lval_max && rval_max) {
     371       43928 :                         set_minmax_property(sql, e, PROP_MAX, statistics_atom_max(sql, lval_max, rval_max)); /* for union the new max will be the max of the two */
     372       43928 :                         lval_max = find_prop_and_get(e->p, PROP_MAX);
     373             :                 }
     374      119069 :                 if (lval_min && rval_min) {
     375       43379 :                         set_minmax_property(sql, e, PROP_MIN, statistics_atom_min(sql, lval_min, rval_min)); /* for union the new min will be the min of the two */
     376       43379 :                         lval_min = find_prop_and_get(e->p, PROP_MIN);
     377             :                 }
     378      119069 :                 has_nonil &= !has_nil(re);
     379             : 
     380             :         }
     381             : 
     382       62385 :         if (has_nonil)
     383       42484 :                 set_has_no_nil(e);
     384             : 
     385       62385 :         if (need_distinct(rel) && list_length(rel->exps) == 1)
     386        1596 :                 set_unique(e);
     387             : }
     388             : 
     389             : 
     390             : static sql_exp *
     391     3493745 : rel_propagate_statistics(visitor *v, sql_rel *rel, sql_exp *e, int depth)
     392             : {
     393     3493745 :         mvc *sql = v->sql;
     394     3493745 :         atom *lval;
     395             : 
     396     3493745 :         (void) depth;
     397     3493745 :         switch(e->type) {
     398     2181609 :         case e_column:
     399     2181609 :                 switch (rel->op) { /* set relations don't call rel_propagate_statistics */
     400      282795 :                 case op_join:
     401             :                 case op_left:
     402             :                 case op_right:
     403             :                 case op_full:
     404             :                 case op_semi:
     405             :                 case op_anti: {
     406      282795 :                         sql_exp *found = rel_propagate_column_ref_statistics(sql, rel->l, e);
     407      282795 :                         if (!found)
     408      141839 :                                 (void) rel_propagate_column_ref_statistics(sql, rel->r, e);
     409             :                         break;
     410             :                 }
     411     1898814 :                 case op_select:
     412             :                 case op_project:
     413             :                 case op_groupby: {
     414     1898814 :                         sql_exp *found = rel_propagate_column_ref_statistics(sql, rel->l, e); /* labels may be found on the same projection, ugh */
     415     1898818 :                         if (!found && is_simple_project(rel->op))
     416      127684 :                                 (void) rel_propagate_column_ref_statistics(sql, rel, e);
     417             :                         break;
     418             :                 }
     419           0 :                 case op_insert:
     420             :                 case op_update:
     421             :                 case op_delete:
     422           0 :                         (void) rel_propagate_column_ref_statistics(sql, rel->r, e);
     423           0 :                         break;
     424             :                 default:
     425             :                         break;
     426             :                 }
     427             :                 break;
     428      101099 :         case e_convert: {
     429      101099 :                 sql_subtype *to = exp_totype(e), *from = exp_fromtype(e);
     430      101099 :                 sql_exp *l = e->l;
     431      101099 :                 sql_class fr = from->type->eclass, too = to->type->eclass;
     432      101099 :                 prop *est;
     433             : 
     434      101099 :                 if (fr == too) {
     435       91987 :                         if ((lval = find_prop_and_get(l->p, PROP_MAX))) {
     436       60377 :                                 atom *res = atom_copy(sql->sa, lval);
     437       60377 :                                 if ((res = atom_cast(sql->sa, res, to)))
     438       60354 :                                         set_minmax_property(sql, e, PROP_MAX, res);
     439             :                         }
     440       91987 :                         if ((lval = find_prop_and_get(l->p, PROP_MIN))) {
     441       61016 :                                 atom *res = atom_copy(sql->sa, lval);
     442       61016 :                                 if ((res = atom_cast(sql->sa, res, to)))
     443       60993 :                                         set_minmax_property(sql, e, PROP_MIN, res);
     444             :                         }
     445             :                 }
     446             :                 /* propagating nuniques through conversions is tricky. I am adding just the general cases */
     447      101099 :                 if ((est = find_prop(l->p, PROP_NUNIQUES)) &&
     448       62888 :                         (((fr == EC_TIME || fr == EC_TIME_TZ || fr == EC_DATE) && (too == EC_TIMESTAMP || too == EC_TIMESTAMP_TZ)) ||
     449       62863 :                          ((fr == EC_NUM || fr == EC_BIT) && too == EC_NUM) || (fr == EC_DEC && too == EC_DEC) || (fr != EC_EXTERNAL && too == EC_STRING))) {
     450       59435 :                         prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     451       59435 :                         p->value.dval = est->value.dval;
     452             :                 }
     453      101099 :                 if (!has_nil(l))
     454       57234 :                         set_has_no_nil(e);
     455             :                 break;
     456             :         }
     457      342808 :         case e_aggr:
     458             :         case e_func: {
     459      342808 :                 BUN lv;
     460      342808 :                 sql_subfunc *f = e->f;
     461             : 
     462      342808 :                 if (!f->func->s) {
     463      316026 :                         int key = hash_key(f->func->base.name); /* Using hash lookup */
     464      316026 :                         sql_hash_e *he = sql_functions_lookup->buckets[key&(sql_functions_lookup->size-1)];
     465      316026 :                         lookup_function look = NULL;
     466             : 
     467      690633 :                         for (; he && !look; he = he->chain) {
     468      374607 :                                 struct function_properties* fp = (struct function_properties*) he->value;
     469             : 
     470      374607 :                                 if (!strcmp(f->func->base.name, fp->name))
     471      108072 :                                         look = fp->func;
     472             :                         }
     473      316026 :                         if (look)
     474      108072 :                                 look(sql, e);
     475             :                 }
     476             :                 /* for global aggregates with no semantics, if the left relation has values, then the output won't be NULL */
     477      342808 :                 if (!is_semantics(e) && e->l && !have_nil(e->l) &&
     478       90524 :                         (e->type != e_aggr || (is_groupby(rel->op) && list_length(rel->r)) || ((lv = get_rel_count(rel->l)) != BUN_NONE && lv > 0)))
     479       90193 :                         set_has_no_nil(e);
     480             :                 /* set properties for global aggregates */
     481      342808 :                 if (e->type == e_aggr && is_groupby(rel->op) && list_empty(rel->r)) {
     482        7912 :                         if (!find_prop(e->p, PROP_NUNIQUES)) {
     483        7912 :                                 prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     484        7912 :                                 p->value.dval = 1;
     485             :                         }
     486        7912 :                         set_unique(e);
     487             :                 }
     488             :                 break;
     489             :         }
     490      578271 :         case e_atom:
     491      578271 :                 if (e->l) {
     492      560418 :                         atom *a = (atom*) e->l;
     493      560418 :                         if (!a->isnull) {
     494      496104 :                                 set_minmax_property(sql, e, PROP_MAX, a);
     495      496104 :                                 set_minmax_property(sql, e, PROP_MIN, a);
     496             :                         }
     497      560418 :                         prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     498      560418 :                         p->value.dval = 1;
     499       17853 :                 } else if (e->f) {
     500        4277 :                         list *vals = (list *) e->f;
     501        4277 :                         sql_exp *first = vals->h ? vals->h->data : NULL;
     502        4277 :                         atom *max = NULL, *min = NULL; /* all child values must have a valid min/max */
     503        4277 :                         int has_nil = 0;
     504             : 
     505        4277 :                         if (first) {
     506        4277 :                                 max = ((lval = find_prop_and_get(first->p, PROP_MAX))) ? lval : NULL;
     507        4277 :                                 min = ((lval = find_prop_and_get(first->p, PROP_MIN))) ? lval : NULL;
     508        4277 :                                 has_nil |= has_nil(first);
     509             :                         }
     510             : 
     511       18354 :                         for (node *n = vals->h ? vals->h->next : NULL ; n ; n = n->next) {
     512        9800 :                                 sql_exp *ee = n->data;
     513             : 
     514        9800 :                                 if (min && max) {
     515        9307 :                                         if ((lval = find_prop_and_get(ee->p, PROP_MAX))) {
     516        9261 :                                                 max = atom_cmp(lval, max) > 0 ? lval : max;
     517             :                                         } else {
     518             :                                                 max = NULL;
     519             :                                         }
     520        9307 :                                         if ((lval = find_prop_and_get(ee->p, PROP_MIN))) {
     521        9261 :                                                 min = atom_cmp(min, lval) > 0 ? lval : min;
     522             :                                         } else {
     523             :                                                 min = NULL;
     524             :                                         }
     525             :                                 }
     526        9800 :                                 has_nil |= has_nil(ee);
     527             :                         }
     528             : 
     529        4277 :                         if (!has_nil)
     530        3906 :                                 set_has_no_nil(e);
     531        4277 :                         if (min && max) {
     532        3864 :                                 set_minmax_property(sql, e, PROP_MAX, max);
     533        3864 :                                 set_minmax_property(sql, e, PROP_MIN, min);
     534             :                         }
     535        4277 :                         prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     536        4277 :                         p->value.dval = (dbl) list_length(vals);
     537             :                 } else {
     538       13576 :                         prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     539       13576 :                         p->value.dval = 1;
     540             :                 }
     541             :                 break;
     542      289958 :         case e_cmp:
     543             :                 /* TODO? propagating min/max/unique of booleans is not very worth it */
     544      289958 :                 if (e->flag == cmp_or || e->flag == cmp_filter) {
     545       17847 :                         if (!have_nil(e->l) && !have_nil(e->r))
     546       13204 :                                 set_has_no_nil(e);
     547      272111 :                 } else if (e->flag == cmp_in || e->flag == cmp_notin) {
     548       21723 :                         sql_exp *le = e->l;
     549       21723 :                         if (!has_nil(le) && !have_nil(e->r))
     550       18606 :                                 set_has_no_nil(e);
     551             :                 } else {
     552      250388 :                         sql_exp *le = e->l, *re = e->r, *fe = e->f;
     553      250388 :                         if (!has_nil(le) && !has_nil(re) && (!e->f || !has_nil(fe)))
     554      179840 :                                 set_has_no_nil(e);
     555             :                 }
     556             :                 break;
     557             :         case e_psm:
     558             :                 break;
     559             :         }
     560             : 
     561             : #ifndef NDEBUG
     562             :         {
     563             :                 /* min and max cannot be NULL and min must be <= than max, if it happens the inner relation must be empty! */
     564     3493749 :                 atom *min = find_prop_and_get(e->p, PROP_MIN), *max = find_prop_and_get(e->p, PROP_MAX);
     565             : 
     566     3493744 :                 (void) min;
     567     3493744 :                 (void) max;
     568     3493744 :                 assert(!min || !min->isnull);
     569     3493744 :                 assert(!max || !max->isnull);
     570     3493744 :                 assert(!min || !max || (min && max)/* atom_cmp(min, max) <= 0*/);
     571             :         }
     572             : #endif
     573     3493744 :         return e;
     574             : }
     575             : 
     576             : static list * /* Remove predicates always false from min/max values */
     577      141218 : rel_prune_predicates(visitor *v, sql_rel *rel)
     578             : {
     579      141218 :         if (rel->l) {
     580      141218 :                 sql_rel *l = rel->l;
     581      141218 :                 if (is_single(l))
     582        3517 :                         return rel->exps;
     583             :         }
     584      137701 :         if (!list_empty(rel->attr))
     585        2991 :                 return rel->exps;
     586      286688 :         for (node *n = rel->exps->h ; n ; n = n->next) {
     587      151978 :                 sql_exp *e = n->data;
     588             : 
     589      151978 :                 if (e->type == e_cmp && is_theta_exp(e->flag)) {
     590      125059 :                         sql_exp *le = e->l, *re = e->r, *fe = e->f;
     591      125059 :                         atom *lval_min = find_prop_and_get(le->p, PROP_MIN), *lval_max = find_prop_and_get(le->p, PROP_MAX),
     592      125059 :                                 *rval_min = find_prop_and_get(re->p, PROP_MIN), *rval_max = find_prop_and_get(re->p, PROP_MAX);
     593      125059 :                         bool always_false = false, always_true = false;
     594             : 
     595      125059 :                         if (fe && !is_symmetric(e)) {
     596        3058 :                                 atom *fval_min = find_prop_and_get(fe->p, PROP_MIN), *fval_max = find_prop_and_get(fe->p, PROP_MAX);
     597        3058 :                                 comp_type lower = range2lcompare(e->flag), higher = range2rcompare(e->flag);
     598        3660 :                                 int not_int1 = rval_min && lval_max && /* the middle and left intervals don't overlap */
     599        1663 :                                         (!is_anti(e) && (lower == cmp_gte ? atom_cmp(rval_min, lval_max) > 0 : atom_cmp(rval_min, lval_max) >= 0)),
     600        4072 :                                         not_int2 = lval_min && fval_max && /* the middle and right intervals don't overlap */
     601        2485 :                                         (!is_anti(e) && (higher == cmp_lte ? atom_cmp(lval_min, fval_max) > 0 : atom_cmp(lval_min, fval_max) >= 0)),
     602        3058 :                                         not_int3 = rval_min && fval_max && /* the left interval is after the right one */
     603        1287 :                                         (!is_anti(e) && (atom_cmp(rval_min, fval_max) > 0));
     604             : 
     605        3058 :                                 always_false |= not_int1 || not_int2 || not_int3;
     606             :                                 /* for anti the middle must be before the left or after the right or the right after the left, for the other the middle must be always between the left and right intervals */
     607        4113 :                                 always_true |= !has_nil(le) && !has_nil(re) && !has_nil(fe) &&
     608        3957 :                                         lval_min && lval_max && rval_min && rval_max && fval_min && fval_max &&
     609         575 :                                         (is_anti(e) ? ((lower == cmp_gte ? atom_cmp(rval_min, lval_max) > 0 : atom_cmp(rval_min, lval_max) >= 0) || (higher == cmp_lte ? atom_cmp(lval_min, fval_max) > 0 : atom_cmp(lval_min, fval_max) >= 0) || atom_cmp(rval_min, fval_max) > 0) :
     610         488 :                                          ((lower == cmp_gte ? atom_cmp(lval_min, rval_max) >= 0 : atom_cmp(lval_min, rval_max) > 0) && (higher == cmp_lte ? atom_cmp(fval_min, lval_max) >= 0 : atom_cmp(fval_min, lval_max) > 0)));
     611             :                         } else if (!fe) {
     612      121983 :                                 if (!is_semantics(e)) /* trivial not null cmp null case */
     613      232552 :                                         always_false |= !is_anti(e) && ((exp_is_not_null(le) && exp_is_null(re)) || (exp_is_null(le) && exp_is_not_null(re)));
     614      121983 :                                 switch (e->flag) {
     615      106451 :                                 case cmp_equal:
     616      106451 :                                         if (lval_min && lval_max && rval_min && rval_max && (!is_semantics(e) || !has_nil(le) || !has_nil(re)))
     617      135952 :                                                 always_false |= (is_anti(e) ? (atom_cmp(lval_min, rval_min) == 0 && atom_cmp(lval_max, rval_max) <= 0) : (atom_cmp(rval_max, lval_min) < 0 || atom_cmp(rval_min, lval_max) > 0));
     618      106451 :                                         if (is_semantics(e)) { /* prune *= NULL cases */
     619        5678 :                                                 always_false |= (is_anti(e) ? (exp_is_null(le) && exp_is_null(re)) : ((exp_is_not_null(le) && exp_is_null(re)) || (exp_is_null(le) && exp_is_not_null(re))));
     620       11356 :                                                 always_true |= (is_anti(e) ? ((exp_is_not_null(le) && exp_is_null(re)) || (exp_is_null(le) && exp_is_not_null(re))) : (exp_is_null(le) && exp_is_null(re)));
     621             :                                         }
     622             :                                         break;
     623        7338 :                                 case cmp_notequal:
     624        7338 :                                         if (lval_min && lval_max && rval_min && rval_max)
     625       11434 :                                                 always_true |= !has_nil(le) && !has_nil(re) && (is_anti(e) ? (atom_cmp(lval_min, rval_min) == 0 && atom_cmp(lval_max, rval_max) <= 0) : (atom_cmp(rval_max, lval_min) < 0 || atom_cmp(rval_min, lval_max) > 0));
     626        7338 :                                         if (is_semantics(e)) {
     627          29 :                                                 always_true |= (is_anti(e) ? (exp_is_null(le) && exp_is_null(re)) : ((exp_is_not_null(le) && exp_is_null(re)) || (exp_is_null(le) && exp_is_not_null(re))));
     628          58 :                                                 always_false |= (is_anti(e) ? ((exp_is_not_null(le) && exp_is_null(re)) || (exp_is_null(le) && exp_is_not_null(re))) : (exp_is_null(le) && exp_is_null(re)));
     629             :                                         }
     630             :                                         break;
     631        5489 :                                 case cmp_gt:
     632        5489 :                                         if (lval_max && rval_min)
     633        1833 :                                                 always_false |= (is_anti(e) ? atom_cmp(lval_max, rval_min) > 0 : atom_cmp(lval_max, rval_min) <= 0);
     634        5489 :                                         if (lval_min && rval_max)
     635        3666 :                                                 always_true |= !has_nil(le) && !has_nil(re) && (is_anti(e) ? atom_cmp(lval_min, rval_max) <= 0 : atom_cmp(lval_min, rval_max) > 0);
     636             :                                         break;
     637         121 :                                 case cmp_gte:
     638         121 :                                         if (lval_max && rval_min)
     639          51 :                                                 always_false |= (is_anti(e) ? atom_cmp(lval_max, rval_min) >= 0 : atom_cmp(lval_max, rval_min) < 0);
     640         121 :                                         if (lval_min && rval_max)
     641         102 :                                                 always_true |= !has_nil(le) && !has_nil(re) && (is_anti(e) ? atom_cmp(lval_min, rval_max) < 0 : atom_cmp(lval_min, rval_max) >= 0);
     642             :                                         break;
     643        2499 :                                 case cmp_lt:
     644        2499 :                                         if (lval_min && rval_max)
     645        1383 :                                                 always_false |= (is_anti(e) ? atom_cmp(lval_min, rval_max) < 0 : atom_cmp(lval_min, rval_max) >= 0);
     646        2499 :                                         if (lval_max && rval_min)
     647        2814 :                                                 always_true |= !has_nil(le) && !has_nil(re) && (is_anti(e) ? atom_cmp(lval_max, rval_min) >= 0 : atom_cmp(lval_max, rval_min) < 0);
     648             :                                         break;
     649          85 :                                 case cmp_lte:
     650          85 :                                         if (lval_min && rval_max)
     651          11 :                                                 always_false |= (is_anti(e) ? atom_cmp(lval_min, rval_max) <= 0 : atom_cmp(lval_min, rval_max) > 0);
     652          85 :                                         if (lval_max && rval_min)
     653          22 :                                                 always_true |= !has_nil(le) && !has_nil(re) && (is_anti(e) ? atom_cmp(lval_max, rval_min) > 0 : atom_cmp(lval_max, rval_min) <= 0);
     654             :                                         break;
     655             :                                 default: /* Maybe later I can do cmp_in and cmp_notin but don't forget to remove is_theta_exp check up there */
     656             :                                         break;
     657             :                                 }
     658             :                         }
     659      125059 :                         assert(!always_false || !always_true);
     660      125059 :                         if (always_false || always_true) {
     661        3687 :                                 sql_exp *ne = exp_atom_bool(v->sql->sa, always_true /* if always_true set then true else false */);
     662        3687 :                                 if (exp_name(e))
     663           0 :                                         exp_prop_alias(v->sql->sa, ne, e);
     664        3687 :                                 n->data = ne;
     665        3687 :                                 v->changes++;
     666             :                         }
     667             :                 }
     668             :         }
     669      134710 :         return rel->exps;
     670             : }
     671             : 
     672             : static sql_rel *
     673          14 : set_setop_side(visitor *v, sql_rel *rel, sql_rel *side)
     674             : {
     675          14 :         if (side == rel->r)
     676           0 :                 rel->r = NULL;
     677             :         else
     678          14 :                 rel->l = NULL;
     679          14 :         if (!is_simple_project(side->op) || rel_is_ref(side) || !list_empty(side->r)) {
     680           0 :                 side = rel_project(v->sql->sa, side, rel_projections(v->sql, side, NULL, 0, 1));
     681           0 :                 set_count_prop(v->sql->sa, side, get_rel_count(side->l));
     682           0 :                 side->exps = exps_exp_visitor_bottomup(v, side, side->exps, 0, &rel_propagate_statistics, false);
     683             :         }
     684          35 :         for (node *n = rel->exps->h, *m = side->exps->h ; n && m ; n = n->next, m = m->next) {
     685          21 :                 sql_exp *e1 = n->data, *e2 = m->data;
     686          21 :                 exp_prop_alias(v->sql->sa, e2, e1);
     687             :         }
     688          14 :         list_hash_clear(side->exps);
     689             : 
     690          14 :         if (need_distinct(rel))
     691           0 :                 set_distinct(side);
     692          14 :         side->p = prop_copy(v->sql->sa, rel->p);
     693          14 :         rel_destroy(rel);
     694          14 :         return side;
     695             : }
     696             : 
     697             : static BUN
     698       22157 : trivial_project_exp_card(sql_exp *e)
     699             : {
     700       22520 :         if (e->type == e_convert)
     701         363 :                 return trivial_project_exp_card(e->l);
     702       22157 :         return e->type == e_atom && e->f ? (BUN) list_length(e->f) : 1;
     703             : }
     704             : 
     705             : static BUN
     706       26136 : rel_calc_nuniques(mvc *sql, sql_rel *l, list *exps)
     707             : {
     708       26136 :         BUN lv = get_rel_count(l);
     709             : 
     710       26136 :         if (lv == 0)
     711             :                 return 0;
     712       23421 :         if (!list_empty(exps)) {
     713       23421 :                 BUN nuniques = 0;
     714             :                 /* compute the highest number of unique values */
     715       58497 :                 for (node *n = exps->h ; n && nuniques != BUN_NONE ; n = n->next) {
     716       35076 :                         sql_exp *e = n->data;
     717       35076 :                         sql_rel *bt = NULL;
     718       35076 :                         prop *p = NULL;
     719       35076 :                         BUN euniques = BUN_NONE;
     720       35076 :                         atom *min, *max, *sub = NULL;
     721       35076 :                         sql_subtype *tp = exp_subtype(e);
     722       35076 :                         sql_class ec = tp ? tp->type->eclass : EC_STRING; /* if 'e' has no type (eg parameter), use a non-number type to fail condition */
     723             : 
     724       35076 :                         if ((p = find_prop(e->p, PROP_NUNIQUES))) {
     725       25369 :                                 euniques = (BUN) p->value.dval;
     726        9707 :                         } else if (e->type == e_column && e->nid && rel_find_exp_and_corresponding_rel(l, e, false, &bt, NULL) && bt && (p = find_prop(bt->p, PROP_COUNT))) {
     727        7633 :                                 euniques = (BUN) p->value.lval;
     728             :                         }
     729             :                         /* use min to max range to compute number of possible values in the domain for number types */
     730       35076 :                         if ((EC_TEMP(ec)||ec==EC_NUM||ec==EC_MONTH||ec==EC_POS) &&
     731       24247 :                                 (min = find_prop_and_get(e->p, PROP_MIN)) && (max = find_prop_and_get(e->p, PROP_MAX))) {
     732             :                                 /* the range includes min and max, so the atom_inc call is needed */
     733             :                                 /* if 'euniques' has number of distinct values, compute min between both */
     734       19299 :                                 if ((sub = atom_sub(sql->sa, max, min)) && (sub = atom_inc(sql->sa, sub)) && (sub = atom_cast(sql->sa, sub, sql_bind_localtype("oid"))))
     735           0 :                                         euniques = MIN(euniques, (BUN) sub->data.val.oval);
     736             :                         }
     737       35076 :                         if (euniques != BUN_NONE)
     738       33002 :                                 nuniques = MAX(nuniques, euniques); /* the highest cardinality sets the estimation */
     739             :                         else
     740             :                                 nuniques = BUN_NONE;
     741             :                 }
     742       23421 :                 if (nuniques != BUN_NONE)
     743             :                         return nuniques;
     744             :         }
     745             :         return lv;
     746             : }
     747             : 
     748             : static sql_rel *
     749     2088299 : rel_get_statistics_(visitor *v, sql_rel *rel)
     750             : {
     751             :         /* Don't prune updates as pruning will possibly result in removing the joins which therefore cannot be used for constraint checking */
     752     2088299 :         uint8_t has_special_modify = *(uint8_t*) v->data;
     753     2088299 :         bool can_be_pruned = !has_special_modify && v->storage_based_opt;
     754             : 
     755             :         /* Don't look at the same relation twice */
     756     2088299 :         if (are_statistics_gathered(rel->used))
     757             :                 return rel;
     758     1346698 :         rel->used |= statistics_gathered;
     759             : 
     760     1346698 :         switch (rel->op) {
     761      316906 :         case op_basetable: {
     762      316906 :                 sql_table *t = (sql_table *) rel->l;
     763      316906 :                 sqlstore *store = v->sql->session->tr->store;
     764             : 
     765      316906 :                 if (!list_empty(rel->exps)) {
     766     1263449 :                         for (node *n = rel->exps->h ; n ; n = n->next)
     767      946351 :                                 rel_basetable_column_get_statistics(v->sql, rel, n->data);
     768             :                 }
     769             :                 /* Set table row count. TODO? look for remote tables. Don't look at storage for declared tables, because it won't be cleaned */
     770      317100 :                 if (isTable(t) && t->s && !isDeclaredTable(t)) /* count active rows only */
     771      263161 :                         set_count_prop(v->sql->sa, rel, (BUN)store->storage_api.count_col(v->sql->session->tr, ol_first_node(t->columns)->data, 10));
     772             :                 break;
     773             :         }
     774        2793 :         case op_union:
     775             :         case op_inter:
     776             :         case op_except: {
     777        2793 :                 bool empty_cross = false;
     778        2793 :                 int i = 0;
     779        2793 :                 sql_rel *l = rel->l, *pl = rel->l, *r = rel->r, *pr = rel->r;
     780             : 
     781        2793 :                 while (is_sample(pl->op) || is_topn(pl->op)) /* skip topN and sample relations in the middle */
     782           0 :                         pl = pl->l;
     783        2793 :                 while (is_sample(pr->op) || is_topn(pr->op))
     784           0 :                         pr = pr->l;
     785             :                 /* if it's not a projection, then project and propagate statistics */
     786        2793 :                 if (!is_project(pl->op) && !is_base(pl->op)) {
     787           0 :                         pl = rel_project(v->sql->sa, pl, rel_projections(v->sql, pl, NULL, 0, 1));
     788           0 :                         set_count_prop(v->sql->sa, pl, get_rel_count(pl->l));
     789           0 :                         pl->exps = exps_exp_visitor_bottomup(v, pl, pl->exps, 0, &rel_propagate_statistics, false);
     790             :                 }
     791        2793 :                 if (!is_project(pr->op) && !is_base(pr->op)) {
     792           0 :                         pr = rel_project(v->sql->sa, pr, rel_projections(v->sql, pr, NULL, 0, 1));
     793           0 :                         set_count_prop(v->sql->sa, pr, get_rel_count(pr->l));
     794           0 :                         pr->exps = exps_exp_visitor_bottomup(v, pr, pr->exps, 0, &rel_propagate_statistics, false);
     795             :                 }
     796             : 
     797       11668 :                 for (node *n = rel->exps->h ; n ; n = n->next) {
     798        8875 :                         empty_cross |= rel_setop_get_statistics(v->sql, rel, pl->exps, pr->exps, n->data, i);
     799        8875 :                         i++;
     800             :                 }
     801             : 
     802             :                 /* propagate row count */
     803        2793 :                 if (is_union(rel->op)) {
     804         277 :                         BUN lv = need_distinct(rel) ? rel_calc_nuniques(v->sql, l, l->exps) : get_rel_count(l),
     805         277 :                                 rv = need_distinct(rel) ? rel_calc_nuniques(v->sql, r, r->exps) : get_rel_count(r);
     806             : 
     807         277 :                         if (lv == 0 && rv == 0) { /* both sides empty */
     808           2 :                                 if (can_be_pruned)
     809             :                                         empty_cross = true;
     810             :                                 else
     811           2 :                                         set_count_prop(v->sql->sa, rel, 0);
     812         275 :                         } else if (can_be_pruned && lv == 0 && !rel_is_ref(rel)) { /* left side empty */
     813           0 :                                 rel = set_setop_side(v, rel, r);
     814           0 :                                 empty_cross = false; /* don't rewrite again */
     815         275 :                         } else if (can_be_pruned && rv == 0 && !rel_is_ref(rel)) { /* right side empty */
     816           0 :                                 rel = set_setop_side(v, rel, l);
     817           0 :                                 empty_cross = false; /* don't rewrite again */
     818         275 :                         } else if (lv != BUN_NONE && rv != BUN_NONE) {
     819           7 :                                 set_count_prop(v->sql->sa, rel, (rv > (BUN_MAX - lv)) ? BUN_MAX : (lv + rv)); /* overflow check */
     820             :                         }
     821        2516 :                 } else if (is_inter(rel->op) || is_except(rel->op)) {
     822        2516 :                         BUN lv = need_distinct(rel) ? rel_calc_nuniques(v->sql, l, l->exps) : get_rel_count(l),
     823        2516 :                                 rv = need_distinct(rel) ? rel_calc_nuniques(v->sql, r, r->exps) : get_rel_count(r);
     824             : 
     825        2516 :                         if (lv == 0) { /* left side empty */
     826          62 :                                 if (can_be_pruned)
     827             :                                         empty_cross = true;
     828             :                                 else
     829           5 :                                         set_count_prop(v->sql->sa, rel, 0);
     830        2454 :                         } else if (rv == 0) { /* right side empty */
     831          17 :                                 if (is_inter(rel->op)) {
     832           3 :                                         if (can_be_pruned)
     833             :                                                 empty_cross = true;
     834             :                                         else
     835           0 :                                                 set_count_prop(v->sql->sa, rel, 0);
     836          14 :                                 } else if (can_be_pruned && !rel_is_ref(rel)) {
     837          14 :                                         rel = set_setop_side(v, rel, l);
     838          14 :                                         empty_cross = false; /* don't rewrite again */
     839             :                                 } else {
     840           0 :                                         set_count_prop(v->sql->sa, rel, lv);
     841             :                                 }
     842             :                         } else {
     843        2437 :                                 set_count_prop(v->sql->sa, rel, lv);
     844             :                         }
     845             :                 }
     846        2793 :                 if (can_be_pruned && empty_cross) {
     847          80 :                         rel_destroy(rel->l);
     848          80 :                         rel->l = NULL;
     849          80 :                         rel_destroy(rel->r);
     850          80 :                         rel->r = NULL;
     851         244 :                         for (node *n = rel->exps->h ; n ; n = n->next) {
     852         164 :                                 sql_exp *e = n->data, *a = exp_atom(v->sql->sa, atom_general(v->sql->sa, exp_subtype(e), NULL, 0));
     853         164 :                                 exp_prop_alias(v->sql->sa, a, e);
     854         164 :                                 n->data = a;
     855             :                         }
     856          80 :                         list_hash_clear(rel->exps);
     857          80 :                         sql_rel *l = rel_project(v->sql->sa, NULL, rel->exps);
     858          80 :                         set_count_prop(v->sql->sa, l, 1);
     859          80 :                         l = rel_select(v->sql->sa, l, exp_atom_bool(v->sql->sa, 0));
     860          80 :                         set_count_prop(v->sql->sa, l, 0);
     861          80 :                         rel->op = op_project;
     862          80 :                         rel->l = l;
     863          80 :                         rel->exps = rel_projections(v->sql, l, NULL, 1, 1);
     864          80 :                         set_count_prop(v->sql->sa, rel, 0);
     865          80 :                         set_nodistinct(rel); /* set relations may have distinct flag set */
     866          80 :                         v->changes++;
     867             :                 }
     868             :                 break;
     869             :         }
     870       13395 :         case op_munion: {
     871       13395 :                 list *l = rel->l, *nrels = sa_list(v->sql->sa);
     872       13395 :                 BUN cnt = 0;
     873       13395 :                 bool needs_pruning = false;
     874             : 
     875       13395 :                 if (is_recursive(rel))
     876             :                         break;
     877       50930 :                 for (node *n = l->h; n; n = n->next) {
     878       37605 :                         sql_rel *r = n->data, *pl = r;
     879             : 
     880       37605 :                         while (is_sample(pl->op) || is_topn(pl->op)) /* skip topN and sample relations in the middle */
     881           0 :                                         pl = pl->l;
     882             :                         /* if it's not a projection, then project and propagate statistics */
     883       37605 :                         if (!is_project(pl->op) && !is_base(pl->op)) {
     884           0 :                                 pl = rel_project(v->sql->sa, pl, rel_projections(v->sql, pl, NULL, 0, 1));
     885           0 :                                 set_count_prop(v->sql->sa, pl, get_rel_count(pl->l));
     886           0 :                                 pl->exps = exps_exp_visitor_bottomup(v, pl, pl->exps, 0, &rel_propagate_statistics, false);
     887             :                         }
     888       37605 :                         nrels = append(nrels, pl);
     889             :                         /* we need new munion statistics */
     890             :                         /* propagate row count */
     891       37605 :                         BUN rv = need_distinct(rel) ? rel_calc_nuniques(v->sql, r, r->exps) : get_rel_count(r);
     892             :                         /* if PROP_COUNT does not exist we assume at least a row (see get_rel_count def) */
     893       37605 :                         if (rv == BUN_NONE) {
     894        1208 :                                 cnt++;
     895        1208 :                                 continue;
     896             :                         }
     897       36397 :                         if (!rv && can_be_pruned)
     898        6695 :                                 needs_pruning = true;
     899             :                         /* overflow check */
     900       36397 :                         if (rv > (BUN_MAX - cnt))
     901       37605 :                                 rv = BUN_MAX;
     902             :                         else
     903       36397 :                                 cnt += rv;
     904             :                 }
     905       13325 :                 int i = 0;
     906       75710 :                 for (node *n = rel->exps->h ; n ; n = n->next, i++)
     907       62385 :                         rel_munion_get_statistics(v->sql, rel, nrels, n->data, i);
     908             : 
     909       13325 :                 if (needs_pruning && !rel_is_ref(rel)) {
     910        4534 :                         v->changes++;
     911        4534 :                         list *nl = sa_list(l->sa);
     912             : 
     913       16675 :                         for (node *n = nrels->h; n; n = n->next) {
     914       12141 :                                 sql_rel *r = n->data;
     915       12141 :                                 BUN rv = need_distinct(rel) ? rel_calc_nuniques(v->sql, r, r->exps) : get_rel_count(r);
     916             : 
     917       12141 :                                 if (!rv) { /* keep last for now */
     918        6225 :                                         rel_destroy(r);
     919        6225 :                                         continue;
     920             :                                 }
     921        5916 :                                 nl = append(nl, r);
     922             :                         }
     923        4534 :                         rel->l = nl;
     924        4534 :                         if (list_length(nl) == 1) {
     925        4208 :                                 sql_rel *l = rel->l = nl->h->data; /* ugh */
     926        4208 :                                 rel->r = NULL;
     927        4208 :                                 rel->op = op_project;
     928             : 
     929       20691 :                                 for (node *n = rel->exps->h, *m = l->exps->h ; n && m ; n = n->next, m = m->next) {
     930       16483 :                                         sql_exp *pe = n->data, *ie = m->data;
     931       16483 :                                         sql_exp *ne = exp_ref(v->sql, ie);
     932       16483 :                                         exp_prop_alias(v->sql->sa, ne, pe);
     933       16483 :                                         n->data = ne;
     934             :                                 }
     935        4208 :                                 list_hash_clear(rel->exps);
     936         326 :                         } else if (list_empty(nl)) {
     937             :                                 /* empty select (project [ nils ] ) */
     938         454 :                                 for (node *n = rel->exps->h ; n ; n = n->next) {
     939         354 :                                         sql_exp *e = n->data, *a = exp_atom(v->sql->sa, atom_general(v->sql->sa, exp_subtype(e), NULL, 0));
     940         354 :                                         exp_prop_alias(v->sql->sa, a, e);
     941         354 :                                         n->data = a;
     942             :                                 }
     943         100 :                                 list_hash_clear(rel->exps);
     944         100 :                                 sql_rel *l = rel_project(v->sql->sa, NULL, rel->exps);
     945         100 :                                 set_count_prop(v->sql->sa, l, 1);
     946         100 :                                 l = rel_select(v->sql->sa, l, exp_atom_bool(v->sql->sa, 0));
     947         100 :                                 set_count_prop(v->sql->sa, l, 0);
     948         100 :                                 rel->op = op_project;
     949         100 :                                 rel->r = NULL;
     950         100 :                                 rel->l = l;
     951         100 :                                 rel->exps = rel_projections(v->sql, l, NULL, 1, 1);
     952         100 :                                 set_count_prop(v->sql->sa, rel, 0);
     953         100 :                                 set_nodistinct(rel); /* set relations may have distinct flag set */
     954             :                         }
     955             :                 } else {
     956        8791 :                         set_count_prop(v->sql->sa, rel, cnt);
     957             :                 }
     958             :                 break;
     959             :         }
     960      538862 :         case op_join:
     961             :         case op_left:
     962             :         case op_right:
     963             :         case op_full:
     964             :         case op_semi:
     965             :         case op_anti:
     966             :         case op_select:
     967             :         case op_project:
     968             :         case op_groupby: {
     969      538862 :                 if (is_groupby(rel->op) && !list_empty(rel->r))
     970       16891 :                         rel->r = exps_exp_visitor_bottomup(v, rel, rel->r, 0, &rel_propagate_statistics, false);
     971      538862 :                 rel->exps = exps_exp_visitor_bottomup(v, rel, rel->exps, 0, &rel_propagate_statistics, false);
     972      538846 :                 if ((is_simple_project(rel->op) || is_groupby(rel->op)) && !list_empty(rel->r))
     973       39355 :                         rel->r = exps_exp_visitor_bottomup(v, rel, rel->r, 0, &rel_propagate_statistics, false);
     974             :                 /* The following optimizations can only be applied after propagating the statistics to rel->exps */
     975      538849 :                 if (can_be_pruned && (is_join(rel->op) || is_select(rel->op)) && !list_empty(rel->exps)) {
     976      141218 :                         int changes = v->changes;
     977      141218 :                         rel->exps = rel_prune_predicates(v, rel);
     978      141218 :                         if (v->changes > changes)
     979        3654 :                                 rel = rewrite_simplify(v, 0, v->value_based_opt, rel);
     980             :                 }
     981             : 
     982             :                 /* propagate row count */
     983      538849 :                 sql_rel *l = rel->l, *r = rel->r;
     984      538849 :                 switch (rel->op) {
     985      136378 :                 case op_join:
     986             :                 case op_left:
     987             :                 case op_right:
     988             :                 case op_full: {
     989      136378 :                         BUN lv = get_rel_count(l), rv = get_rel_count(r), uniques_estimate = BUN_MAX, join_idx_estimate = BUN_MAX;
     990             : 
     991      136378 :                         if (!list_empty(rel->exps) && !is_single(rel)) {
     992      249052 :                                 for (node *n = rel->exps->h ; n ; n = n->next) {
     993      126994 :                                         sql_exp *e = n->data, *el = e->l, *er = e->r;
     994             : 
     995      126994 :                                         if (find_prop(e->p, PROP_JOINIDX)) {
     996         667 :                                                 join_idx_estimate = lv>rv?lv:rv;
     997      126327 :                                         } else if (e->type == e_cmp && e->flag == cmp_equal) {
     998             :                                                 /* if one of the sides is unique, the cardinality will be that exact number, but look for nulls */
     999      122443 :                                                 if (!is_semantics(e) || !has_nil(el) || !has_nil(er)) {
    1000      122267 :                                                         BUN lu = 0, ru = 0;
    1001      122267 :                                                         prop *p = NULL;
    1002      122267 :                                                         if ((p = find_prop(el->p, PROP_NUNIQUES)))
    1003       90815 :                                                                 lu = (BUN) p->value.dval;
    1004      122267 :                                                         if ((p = find_prop(er->p, PROP_NUNIQUES)))
    1005      105340 :                                                                 ru = (BUN) p->value.dval;
    1006      122267 :                                                         if (is_unique(el) || lu > lv) {
    1007       74600 :                                                                 BUN ncount = (is_right(rel->op) || is_full(rel->op)) ? MAX(lv, rv) : lv;
    1008       74600 :                                                                 uniques_estimate = MIN(uniques_estimate, ncount);
    1009       47667 :                                                         } else if (is_unique(er) || ru > rv) {
    1010       29815 :                                                                 BUN ncount = (is_left(rel->op) || is_full(rel->op)) ? MAX(lv, rv) : rv;
    1011       29815 :                                                                 uniques_estimate = MIN(uniques_estimate, ncount);
    1012             :                                                         }
    1013             :                                                 }
    1014             :                                         }
    1015             :                                 }
    1016             :                         }
    1017      136378 :                         if (is_single(rel)) {
    1018        4926 :                                 set_count_prop(v->sql->sa, rel, lv);
    1019      131452 :                         } else if (join_idx_estimate != BUN_MAX) {
    1020         665 :                                 set_count_prop(v->sql->sa, rel, join_idx_estimate);
    1021      130787 :                         } else if (uniques_estimate != BUN_MAX) {
    1022       97902 :                                 set_count_prop(v->sql->sa, rel, uniques_estimate);
    1023       32885 :                         } else if (list_length(rel->exps) == 1 && (exp_is_false(rel->exps->h->data) || exp_is_null(rel->exps->h->data))) {
    1024             :                                 /* corner cases for outer joins */
    1025         126 :                                 if (is_left(rel->op)) {
    1026         114 :                                         set_count_prop(v->sql->sa, rel, lv);
    1027          12 :                                 } else if (is_right(rel->op)) {
    1028           3 :                                         set_count_prop(v->sql->sa, rel, rv);
    1029           9 :                                 } else if (is_full(rel->op) && lv != BUN_NONE && rv != BUN_NONE) {
    1030           9 :                                         set_count_prop(v->sql->sa, rel, (rv > (BUN_MAX - lv)) ? BUN_MAX : (lv + rv)); /* overflow check */
    1031           0 :                                 } else if (lv != BUN_NONE && rv != BUN_NONE) {
    1032           0 :                                         set_count_prop(v->sql->sa, rel, 0);
    1033             :                                 }
    1034       32759 :                         } else if (lv == 0) {
    1035        1172 :                                 set_count_prop(v->sql->sa, rel, (is_right(rel->op) || is_full(rel->op)) ? rv : 0);
    1036       32159 :                         } else if (rv == 0) {
    1037        1570 :                                 set_count_prop(v->sql->sa, rel, (is_left(rel->op) || is_full(rel->op)) ? lv : 0);
    1038       31083 :                         } else if (lv != BUN_NONE && rv != BUN_NONE) {
    1039       18436 :                                 set_count_prop(v->sql->sa, rel, (rv > (BUN_MAX / lv)) ? BUN_MAX : (lv * rv)); /* overflow check */
    1040             :                         }
    1041             :                         break;
    1042             :                 }
    1043        2916 :                 case op_anti:
    1044        2916 :                         set_count_prop(v->sql->sa, rel, get_rel_count(l));
    1045        2916 :                         break;
    1046      110142 :                 case op_semi:
    1047             :                 case op_select:
    1048             :                         /* TODO calculate cardinalities using selectivities */
    1049      110142 :                         if (list_length(rel->exps) == 1 && (exp_is_false(rel->exps->h->data) || exp_is_null(rel->exps->h->data))) {
    1050        2746 :                                 set_count_prop(v->sql->sa, rel, 0);
    1051             :                         } else {
    1052      211529 :                                 if (!list_empty(rel->exps) && !is_single(rel)) {
    1053      104132 :                                         BUN cnt = get_rel_count(l), u = 1;
    1054      172700 :                                         for (node *n = rel->exps->h ; n ; n = n->next) {
    1055      114240 :                                                 sql_exp *e = n->data, *el = e->l, *er = e->r;
    1056             : 
    1057             :                                                 /* simple expressions first */
    1058      114240 :                                                 if (e->type == e_cmp && e->flag == cmp_equal && exp_is_atom(er)) {
    1059             :                                                         /* use selectivity */
    1060       58992 :                                                         prop *p;
    1061       58992 :                                                         if ((p = find_prop(el->p, PROP_NUNIQUES))) {
    1062       45672 :                                                                 u = (BUN) p->value.dval;
    1063       45672 :                                                                 break;
    1064             :                                                         }
    1065             :                                                 }
    1066             :                                         }
    1067             :                                         /* u is an *estimate*, so don't set count_prop to 0 unless cnt is 0 */
    1068      104132 :                                         set_count_prop(v->sql->sa, rel, cnt == 0 ? 0 : u == 0 || u > cnt ? 1 : cnt/u);
    1069             :                                 } else {
    1070        3264 :                                         set_count_prop(v->sql->sa, rel, get_rel_count(l));
    1071             :                                 }
    1072             :                         }
    1073             :                         break;
    1074      264647 :                 case op_project:
    1075      264647 :                         if (l) {
    1076      254397 :                                 if (need_distinct(rel)) {
    1077           0 :                                         set_count_prop(v->sql->sa, rel, rel_calc_nuniques(v->sql, l, rel->exps));
    1078             :                                 } else {
    1079      254397 :                                         set_count_prop(v->sql->sa, rel, get_rel_count(l));
    1080             :                                 }
    1081             :                         } else {
    1082       10250 :                                 BUN card = 1;
    1083             : 
    1084       10250 :                                 if (!list_empty(rel->exps)) {
    1085       31064 :                                         for (node *n = rel->exps->h ; n ; n = n->next) {
    1086       20814 :                                                 sql_exp *e = n->data;
    1087       20814 :                                                 card = MAX(card, trivial_project_exp_card(e));
    1088             :                                         }
    1089             :                                 }
    1090       10250 :                                 set_count_prop(v->sql->sa, rel, card);
    1091             :                         }
    1092             :                         break;
    1093       24352 :                 case op_groupby:
    1094       24352 :                         if (list_empty(rel->r)) {
    1095        7460 :                                 set_count_prop(v->sql->sa, rel, 1);
    1096             :                         } else {
    1097       16892 :                                 set_count_prop(v->sql->sa, rel, rel_calc_nuniques(v->sql, l, rel->r));
    1098             :                         }
    1099             :                         break;
    1100             :                 default:
    1101             :                         break;
    1102             :                 }
    1103             :                 break;
    1104             :         }
    1105       16676 :         case op_topn: {
    1106       16676 :                 BUN lv = get_rel_count(rel->l);
    1107             : 
    1108       16676 :                 if (lv != BUN_NONE) {
    1109       16654 :                         sql_exp *le = rel->exps->h->data, *oe = list_length(rel->exps) > 1 ? rel->exps->h->next->data : NULL;
    1110          84 :                         if (oe && oe->l && exp_is_not_null(oe)) { /* no parameters */
    1111          84 :                                 BUN offset = (BUN) ((atom*)oe->l)->data.val.lval;
    1112          84 :                                 lv = offset >= lv ? 0 : lv - offset;
    1113             :                         }
    1114       16655 :                         if (le->l && exp_is_not_null(le)) {
    1115       16620 :                                 BUN limit = (BUN) ((atom*)le->l)->data.val.lval;
    1116       16620 :                                 lv = MIN(lv, limit);
    1117             :                         }
    1118       16654 :                         set_count_prop(v->sql->sa, rel, lv);
    1119             :                 }
    1120             :                 break;
    1121             :         }
    1122          22 :         case op_sample: {
    1123          22 :                 BUN lv = get_rel_count(rel->l);
    1124             : 
    1125          22 :                 if (lv != BUN_NONE) {
    1126           4 :                         sql_exp *se = rel->exps->h->data;
    1127           4 :                         sql_subtype *tp = exp_subtype(se);
    1128             : 
    1129           4 :                         if (se->l && tp->type->eclass == EC_NUM) { /* sample is a number of rows */
    1130           4 :                                 BUN sample = (BUN) ((atom*)se->l)->data.val.lval;
    1131           4 :                                 lv = MIN(lv, sample);
    1132           0 :                         } else if (se->l) { /* sample is a percentage of rows */
    1133           0 :                                 dbl percent = ((atom*)se->l)->data.val.dval;
    1134           0 :                                 assert(tp->type->eclass == EC_FLT);
    1135           0 :                                 lv = (BUN) ceil((dbl)lv * percent);
    1136             :                         }
    1137           4 :                         set_count_prop(v->sql->sa, rel, lv);
    1138             :                 }
    1139             :                 break;
    1140             :         }
    1141        6015 :         case op_table: {
    1142        6015 :                 sql_exp *op = rel->r;
    1143        6015 :                 if (rel->flag != TRIGGER_WRAPPER && op) {
    1144        5703 :                         sql_subfunc *f = op->f;
    1145        5703 :                         if (f->func->lang == FUNC_LANG_SQL) {
    1146          97 :                                 set_count_prop(v->sql->sa, rel, 1000 /* just some fallback value */);
    1147        5606 :                         } else if (f->func->lang == FUNC_LANG_MAL && strcmp(f->func->base.name, "storage") == 0) {
    1148         831 :                                 set_count_prop(v->sql->sa, rel, 1000 /* TODO get size of catalog */);
    1149        4775 :                         } else if (f->func->lang == FUNC_LANG_MAL && strcmp(f->func->base.name, "db_users") == 0) {
    1150           0 :                                 set_count_prop(v->sql->sa, rel, 1000 /* TODO get size of users */);
    1151        4775 :                         } else if (f->func->lang == FUNC_LANG_MAL && strncmp(f->func->base.name, "querylog", 8) == 0) {
    1152        1089 :                                 set_count_prop(v->sql->sa, rel, 1000 /* TODO get size of querylog */);
    1153        3686 :                         } else if (f->func->lang == FUNC_LANG_MAL &&
    1154        3574 :                                            (strcmp(f->func->base.name, "queue") == 0 ||
    1155        3309 :                                                 strcmp(f->func->base.name, "optimizers") == 0 ||
    1156        2996 :                                                 strcmp(f->func->base.name, "env") == 0 ||
    1157        2703 :                                                 strcmp(f->func->base.name, "keywords") == 0 ||
    1158        2703 :                                                 strcmp(f->func->base.name, "statistics") == 0 ||
    1159        2034 :                                                 strcmp(f->func->base.name, "rejects") == 0 ||
    1160        1779 :                                                 strcmp(f->func->base.name, "schemastorage") == 0 ||
    1161        1779 :                                                 strncmp(f->func->base.name, "storage", 7) == 0 ||
    1162        1779 :                                                 strcmp(f->func->base.name, "sessions") == 0) ) {
    1163        2086 :                                 set_count_prop(v->sql->sa, rel, 1000 /* TODO get size of queue */);
    1164             :                         }
    1165             :                         /* else {
    1166             :                                 printf("%%func needs stats : %s\n", f->func->base.name);
    1167             :                         } */
    1168             :                 }
    1169             :                 break;
    1170             :         }
    1171             :         /*These relations are less important for now
    1172             :           TODO later we can tune it */
    1173             :         case op_insert:
    1174             :         case op_update:
    1175             :         case op_delete:
    1176             :         case op_truncate:
    1177             :         case op_ddl:
    1178             :         default:
    1179             :                 break;
    1180             :         }
    1181             : 
    1182             :         return rel;
    1183             : }
    1184             : 
    1185             : static sql_rel *
    1186      544502 : rel_get_statistics(visitor *v, global_props *gp, sql_rel *rel)
    1187             : {
    1188             :         /* Don't prune updates as pruning will possibly result in removing the joins which therefore cannot be used for constraint checking */
    1189      544502 :         uint8_t has_special_modify = (uint8_t) gp->has_special_modify;
    1190      544502 :         v->data = &has_special_modify;
    1191      544502 :         rel = rel_visitor_bottomup(v, rel, &rel_get_statistics_);
    1192      544982 :         v->data = gp;
    1193      544982 :         return rel;
    1194             : }
    1195             : 
    1196             : run_optimizer
    1197      720817 : bind_get_statistics(visitor *v, global_props *gp)
    1198             : {
    1199      720817 :         (void) v;
    1200      720817 :         return (gp->opt_level == 1 && !gp->cnt[op_insert]) ? rel_get_statistics : NULL;
    1201             : }
    1202             : 
    1203             : 
    1204             : static bool
    1205       95559 : point_select_on_unique_column(sql_rel *rel)
    1206             : {
    1207       95559 :         if (is_select(rel->op) && !list_empty(rel->exps)) {
    1208      131620 :                 for (node *n = rel->exps->h; n ; n = n->next) {
    1209       75836 :                         sql_exp *e = n->data, *el = e->l, *er = e->r, *found = NULL;
    1210             : 
    1211       75836 :                         if (is_compare(e->type) && e->flag == cmp_equal) {
    1212       33632 :                                 if (is_numeric_upcast(el))
    1213           0 :                                         el = el->l;
    1214       33632 :                                 if (is_numeric_upcast(er))
    1215           0 :                                         er = er->l;
    1216       33632 :                                 if (is_alias(el->type) && exp_is_atom(er) && (found = rel_find_exp(rel->l, el)) &&
    1217         752 :                                         is_unique(found) && (!is_semantics(e) || !has_nil(found) || !has_nil(er)))
    1218             :                                         return true;
    1219       32880 :                                 if (is_alias(er->type) && exp_is_atom(el) && (found = rel_find_exp(rel->l, er)) &&
    1220           0 :                                         is_unique(found) && (!is_semantics(e) || !has_nil(el) || !has_nil(found)))
    1221             :                                         return true;
    1222             :                         }
    1223             :                 }
    1224             :         }
    1225             :         return false;
    1226             : }
    1227             : 
    1228             : /*
    1229             :  * A point select on an unique column reduces the number of rows to 1. If the same select is under a
    1230             :  * join, the opposite side's select can be pushed above the join.
    1231             :  */
    1232             : static inline sql_rel *
    1233     1258781 : rel_push_select_up(visitor *v, sql_rel *rel)
    1234             : {
    1235     1258781 :         if ((is_innerjoin(rel->op) || is_left(rel->op) || is_right(rel->op) || is_semi(rel->op)) && !is_single(rel)) {
    1236      256094 :                 sql_rel *l = rel->l, *r = rel->r;
    1237      256094 :                 bool can_pushup_left = is_select(l->op) && !rel_is_ref(l) && !is_single(l) && (is_innerjoin(rel->op) || is_left(rel->op) || is_semi(rel->op)),
    1238      256094 :                         can_pushup_right = is_select(r->op) && !rel_is_ref(r) && !is_single(r) && (is_innerjoin(rel->op) || is_right(rel->op));
    1239             : 
    1240      256094 :                 if (can_pushup_left || can_pushup_right) {
    1241       67415 :                         if (can_pushup_left)
    1242       45777 :                                 can_pushup_left = point_select_on_unique_column(r);
    1243       67415 :                         if (can_pushup_right)
    1244       49782 :                                 can_pushup_right = point_select_on_unique_column(l);
    1245             : 
    1246             :                         /* if both selects retrieve one row each, it's not worth it to push both up */
    1247       67415 :                         if (can_pushup_left && !can_pushup_right) {
    1248         686 :                                 sql_rel *nrel = rel_dup_copy(v->sql->sa, rel);
    1249         686 :                                 nrel->l = l->l;
    1250         686 :                                 rel = rel_inplace_select(rel, nrel, l->exps);
    1251         686 :                                 assert(is_select(rel->op));
    1252         686 :                                 v->changes++;
    1253       66729 :                         } else if (!can_pushup_left && can_pushup_right) {
    1254          14 :                                 sql_rel *nrel = rel_dup_copy(v->sql->sa, rel);
    1255          14 :                                 nrel->r = r->l;
    1256          14 :                                 rel = rel_inplace_select(rel, nrel, r->exps);
    1257          14 :                                 assert(is_select(rel->op));
    1258          14 :                                 v->changes++;
    1259             :                         }
    1260             :                 }
    1261             :         }
    1262     1258781 :         return rel;
    1263             : }
    1264             : 
    1265             : static int
    1266       98681 : sql_class_base_score(visitor *v, sql_column *c, sql_subtype *t, bool equality_based)
    1267             : {
    1268       98681 :         int de;
    1269             : 
    1270       98681 :         if (!t)
    1271             :                 return 0;
    1272       98681 :         switch (ATOMstorage(t->type->localtype)) {
    1273             :         case TYPE_bte:
    1274             :                 return 150 - 8;
    1275        1634 :         case TYPE_sht:
    1276        1634 :                 return 150 - 16;
    1277       39113 :         case TYPE_int:
    1278       39113 :                 return 150 - 32;
    1279         519 :         case TYPE_void:
    1280             :         case TYPE_lng:
    1281         519 :                 return 150 - 64;
    1282             :         case TYPE_uuid:
    1283             : #ifdef HAVE_HGE
    1284             :         case TYPE_hge:
    1285             : #endif
    1286             :                 return 150 - 128;
    1287           2 :         case TYPE_flt:
    1288           2 :                 return 75 - 24;
    1289             :         case TYPE_dbl:
    1290             :                 return 75 - 53;
    1291       33888 :         default:
    1292       33888 :                 if (equality_based && c && v->storage_based_opt && (de = mvc_is_duplicate_eliminated(v->sql, c)))
    1293        1666 :                         return 150 - de * 8;
    1294             :                 /* strings and blobs not duplicate eliminated don't get any points here */
    1295             :                 return 0;
    1296             :         }
    1297             : }
    1298             : 
    1299             : static int
    1300       35922 : score_se_base(visitor *v, sql_rel *rel, sql_exp *e)
    1301             : {
    1302       35922 :         int res = 0;
    1303       35922 :         sql_subtype *t = exp_subtype(e);
    1304       35922 :         sql_column *c = NULL;
    1305             : 
    1306       35922 :         if (e->type == e_convert) /* keep unsafes at the end (TODO improve) */
    1307             :                 return -1000;
    1308             :         /* can we find out if the underlying table is sorted */
    1309       35321 :         if ((c = exp_find_column(rel, e, -2)) && v->storage_based_opt && mvc_is_sorted(v->sql, c))
    1310       35321 :                 res += 600;
    1311             : 
    1312             :         /* prefer the shorter var types over the longer ones */
    1313       35321 :         res += sql_class_base_score(v, c, t, is_equality_or_inequality_exp(e->flag)); /* smaller the type, better */
    1314       35321 :         return res;
    1315             : }
    1316             : 
    1317             : static int
    1318       60407 : score_se(visitor *v, sql_rel *rel, sql_exp *e)
    1319             : {
    1320       60407 :         int score = 0;
    1321       60407 :         if (e->type == e_cmp && !is_complex_exp(e->flag)) {
    1322       35922 :                 sql_exp *l = e->l;
    1323             : 
    1324       35922 :                 while (l->type == e_cmp) { /* go through nested comparisons */
    1325           2 :                         sql_exp *ll;
    1326             : 
    1327           2 :                         if (l->flag == cmp_filter || l->flag == cmp_or)
    1328           0 :                                 ll = ((list*)l->l)->h->data;
    1329             :                         else
    1330           2 :                                 ll = l->l;
    1331           2 :                         if (ll->type != e_cmp)
    1332             :                                 break;
    1333             :                         l = ll;
    1334             :                 }
    1335       35922 :                 score += score_se_base(v, rel, l);
    1336             :         }
    1337       60407 :         score += exp_keyvalue(e);
    1338       60407 :         return score;
    1339             : }
    1340             : 
    1341             : static inline sql_rel *
    1342     1258782 : rel_select_order(visitor *v, sql_rel *rel)
    1343             : {
    1344     1258782 :         int *scores = NULL;
    1345     1258782 :         sql_exp **exps = NULL;
    1346             : 
    1347     1258782 :         if (is_select(rel->op) && list_length(rel->exps) > 1) {
    1348       28102 :                 node *n;
    1349       28102 :                 int i, nexps = list_length(rel->exps);
    1350       28102 :                 scores = SA_NEW_ARRAY(v->sql->ta, int, nexps);
    1351       28102 :                 exps = SA_NEW_ARRAY(v->sql->ta, sql_exp*, nexps);
    1352             : 
    1353       88509 :                 for (i = 0, n = rel->exps->h; n; i++, n = n->next) {
    1354       60437 :                         exps[i] = n->data;
    1355       60437 :                         if (find_prop(exps[i]->p, PROP_HASHCOL))
    1356             :                                 return rel;
    1357       60407 :                         scores[i] = score_se(v, rel, n->data);
    1358             :                 }
    1359       28072 :                 GDKqsort(scores, exps, NULL, nexps, sizeof(int), sizeof(void *), TYPE_int, true, true);
    1360             : 
    1361       88479 :                 for (i = 0, n = rel->exps->h; n; i++, n = n->next)
    1362       60407 :                         n->data = exps[i];
    1363             :         }
    1364             : 
    1365             :         return rel;
    1366             : }
    1367             : 
    1368             : /* Compute the efficiency of using this expression early in a group by list */
    1369             : static int
    1370       63360 : score_gbe(visitor *v, sql_rel *rel, sql_exp *e)
    1371             : {
    1372       63360 :         int res = 0;
    1373       63360 :         sql_subtype *t = exp_subtype(e);
    1374       63360 :         sql_column *c = exp_find_column(rel, e, -2);
    1375             : 
    1376       63360 :         if (e->card == CARD_ATOM) /* constants are trivial to group */
    1377          38 :                 res += 1000;
    1378             :         /* can we find out if the underlying table is sorted */
    1379       63360 :         if (is_unique(e) || find_prop(e->p, PROP_HASHCOL) || (c && v->storage_based_opt && mvc_is_unique(v->sql, c))) /* distinct columns */
    1380        3584 :                 res += 700;
    1381       42134 :         if (c && v->storage_based_opt && mvc_is_sorted(v->sql, c))
    1382        3700 :                 res += 500;
    1383       63360 :         if (find_prop(e->p, PROP_HASHIDX)) /* has hash index */
    1384           0 :                 res += 200;
    1385             : 
    1386             :         /* prefer the shorter var types over the longer ones */
    1387       63360 :         res += sql_class_base_score(v, c, t, true); /* smaller the type, better */
    1388       63360 :         return res;
    1389             : }
    1390             : 
    1391             : /* reorder group by expressions */
    1392             : static inline sql_rel *
    1393     1258781 : rel_groupby_order(visitor *v, sql_rel *rel)
    1394             : {
    1395     1258781 :         int *scores = NULL;
    1396     1258781 :         sql_exp **exps = NULL;
    1397             : 
    1398     1258781 :         if (is_groupby(rel->op) && list_length(rel->r) > 1) {
    1399       26727 :                 node *n;
    1400       26727 :                 list *gbe = rel->r;
    1401       26727 :                 int i, ngbe = list_length(gbe);
    1402       26727 :                 scores = SA_NEW_ARRAY(v->sql->ta, int, ngbe);
    1403       26727 :                 exps = SA_NEW_ARRAY(v->sql->ta, sql_exp*, ngbe);
    1404             : 
    1405             :                 /* first sorting step, give priority for integers and sorted columns */
    1406       90087 :                 for (i = 0, n = gbe->h; n; i++, n = n->next) {
    1407       63360 :                         exps[i] = n->data;
    1408       63360 :                         scores[i] = score_gbe(v, rel, exps[i]);
    1409             :                 }
    1410       26727 :                 GDKqsort(scores, exps, NULL, ngbe, sizeof(int), sizeof(void *), TYPE_int, true, true);
    1411             : 
    1412             :                 /* second sorting step, give priority to strings with lower number of digits */
    1413       54217 :                 for (i = ngbe - 1; i && !scores[i]; i--); /* find expressions with no score from the first round */
    1414       26727 :                 if (scores[i])
    1415       25725 :                         i++;
    1416       26727 :                 if (ngbe - i > 1) {
    1417       12055 :                         for (int j = i; j < ngbe; j++) {
    1418        9064 :                                 sql_subtype *t = exp_subtype(exps[j]);
    1419        9064 :                                 scores[j] = t ? t->digits : 0;
    1420             :                         }
    1421             :                         /* the less number of digits the better, order ascending */
    1422        2991 :                         GDKqsort(scores + i, exps + i, NULL, ngbe - i, sizeof(int), sizeof(void *), TYPE_int, false, true);
    1423             :                 }
    1424             : 
    1425       90087 :                 for (i = 0, n = gbe->h; n; i++, n = n->next)
    1426       63360 :                         n->data = exps[i];
    1427             :         }
    1428             : 
    1429     1258781 :         return rel;
    1430             : }
    1431             : 
    1432             : /* This optimization loop contains optimizations that can potentially use statistics */
    1433             : static sql_rel *
    1434     1258781 : rel_final_optimization_loop_(visitor *v, sql_rel *rel)
    1435             : {
    1436             :         /* Run rel_push_select_up only once at the end to avoid an infinite optimization loop */
    1437     1258781 :         rel = rel_push_select_up(v, rel);
    1438     1258781 :         rel = rel_select_order(v, rel);
    1439             : 
    1440             :         /* TODO? Maybe later add rel_simplify_count, rel_join2semijoin, rel_simplify_fk_joins,
    1441             :            rel_distinct_project2groupby, rel_simplify_predicates, rel_simplify_math,
    1442             :            rel_distinct_aggregate_on_unique_values */
    1443             : 
    1444     1258781 :         rel = rel_groupby_order(v, rel);
    1445     1258782 :         return rel;
    1446             : }
    1447             : 
    1448             : static sql_rel *
    1449       67711 : rel_final_optimization_loop(visitor *v, global_props *gp, sql_rel *rel)
    1450             : {
    1451       67711 :         (void) gp;
    1452       67711 :         return rel_visitor_bottomup(v, rel, &rel_final_optimization_loop_);
    1453             : }
    1454             : 
    1455             : run_optimizer
    1456      721425 : bind_final_optimization_loop(visitor *v, global_props *gp)
    1457             : {
    1458      721425 :         int flag = v->sql->sql_optimizer;
    1459             :         /* At the moment, this optimizer has dependency on 3 flags */
    1460      552820 :         return gp->opt_level == 1 && (gp->cnt[op_groupby] || gp->cnt[op_select]) &&
    1461      789138 :                 (flag & push_select_up) && (flag & optimize_select_and_joins_topdown) && (flag & optimize_projections) ? rel_final_optimization_loop : NULL;
    1462             : }

Generated by: LCOV version 1.14