LCOV - code coverage report
Current view: top level - sql/server - rel_statistics.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 831 883 94.1 %
Date: 2024-10-04 20:04:04 Functions: 26 26 100.0 %

          Line data    Source code
       1             : /*
       2             :  * SPDX-License-Identifier: MPL-2.0
       3             :  *
       4             :  * This Source Code Form is subject to the terms of the Mozilla Public
       5             :  * License, v. 2.0.  If a copy of the MPL was not distributed with this
       6             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       7             :  *
       8             :  * Copyright 2024 MonetDB Foundation;
       9             :  * Copyright August 2008 - 2023 MonetDB B.V.;
      10             :  * Copyright 1997 - July 2008 CWI.
      11             :  */
      12             : 
      13             : #include "monetdb_config.h"
      14             : #include "rel_optimizer_private.h"
      15             : #include "rel_statistics.h"
      16             : #include "rel_basetable.h"
      17             : #include "rel_rewriter.h"
      18             : 
      19             : static sql_exp *
      20     3462878 : comparison_find_column(sql_exp *input, sql_exp *e)
      21             : {
      22     3462878 :         switch (input->type) {
      23       98271 :         case e_convert: {
      24       98271 :                 list *types = (list *)input->r;
      25       98271 :                 sql_class from = ((sql_subtype*)types->h->data)->type->eclass, to = ((sql_subtype*)types->h->next->data)->type->eclass;
      26       98271 :                 if (from == to)
      27      189491 :                         return comparison_find_column(input->l, e) ? input : NULL;
      28             :                 return NULL;
      29             :         }
      30     3051660 :         case e_column:
      31     3051660 :                 return exp_match(e, input) ? input : NULL;
      32             :         default:
      33             :                 return NULL;
      34             :         }
      35             : }
      36             : 
      37             : /* multi lo <= col <= hi, maybe still be false even if lo or hi are NULL, possibly similar for filter on multiple
      38             :  * columns */
      39             : #define comp_single_column(c) (!c->f)
      40             : 
      41             : static sql_exp *
      42     8733084 : rel_propagate_column_ref_statistics(mvc *sql, sql_rel *rel, sql_exp *e)
      43             : {
      44     8816529 :         assert(e->type == e_column);
      45     8816529 :         if (rel) {
      46     8816455 :                 switch(rel->op) {
      47     4081101 :                 case op_left:
      48             :                 case op_right:
      49             :                 case op_full:
      50             :                 case op_join:
      51             :                 case op_select:
      52             :                 case op_anti:
      53             :                 case op_semi: {
      54     4081101 :                         bool found_without_semantics = false, found_left = false, found_right = false, still_unique = false;
      55             : 
      56     4081101 :                         if ((is_innerjoin(rel->op) || is_select(rel->op)) && list_length(rel->exps) == 1 && exp_is_false(rel->exps->h->data))
      57             :                                 return NULL; /* nothing will pass, skip */
      58             : 
      59             :                         /* propagate from the bottom first */
      60     4066985 :                         if (rel_propagate_column_ref_statistics(sql, rel->l, e))
      61             :                                 found_left = true;
      62     2585975 :                         if (!found_left && is_join(rel->op) && rel_propagate_column_ref_statistics(sql, rel->r, e))
      63     4066985 :                                 found_right = true;
      64             : 
      65     4066985 :                         if (!found_left && !found_right)
      66             :                                 return NULL;
      67     1775507 :                         if (!list_empty(rel->exps) && rel->op != op_anti) { /* if there's an or, the MIN and MAX get difficult to propagate */
      68     3494211 :                                 for (node *n = rel->exps->h ; n ; n = n->next) {
      69     1809383 :                                         sql_exp *comp = n->data, *le = comp->l, *lne = NULL, *re = comp->r, *rne = NULL, *fe = comp->f, *fne = NULL;
      70             : 
      71     1809383 :                                         if (comp->type == e_cmp) {
      72     1808381 :                                                 if (is_theta_exp(comp->flag) && ((lne = comparison_find_column(le, e)) || (rne = comparison_find_column(re, e)) || (fe && (fne = comparison_find_column(fe, e))))) {
      73      125627 :                                                         atom *lval_min = find_prop_and_get(le->p, PROP_MIN), *lval_max = find_prop_and_get(le->p, PROP_MAX),
      74      125627 :                                                                 *rval_min = find_prop_and_get(re->p, PROP_MIN), *rval_max = find_prop_and_get(re->p, PROP_MAX);
      75             : 
      76             :                                                         /* not semantics found or if explicitly filtering not null values from the column */
      77      125627 :                                                         found_without_semantics |= (!is_semantics(comp) && comp_single_column(comp)) || (comp->flag == cmp_equal && lne && is_anti(comp) && exp_is_null(re));
      78      125627 :                                                         still_unique |= comp->flag == cmp_equal && is_unique(le) && is_unique(re); /* unique if only equi-joins on unique columns are there */
      79      125627 :                                                         if (is_full(rel->op) || (is_left(rel->op) && found_left) || (is_right(rel->op) && found_right)) /* on outer joins, min and max cannot be propagated on some cases */
      80       19747 :                                                                 continue;
      81             :                                                         /* if (end2 >= start1 && start2 <= end1) then the 2 intervals are intersected */
      82      105880 :                                                         if (fe && lval_min && lval_max) { /* range case, the middle expression must intersect the other two */
      83        4622 :                                                                 atom *fval_min = find_prop_and_get(fe->p, PROP_MIN), *fval_max = find_prop_and_get(fe->p, PROP_MAX);
      84        4622 :                                                                 int int1 = rval_min && rval_max && atom_cmp(rval_max, lval_min) >= 0 && atom_cmp(rval_min, lval_max) <= 0,
      85        4622 :                                                                         int2 = fval_min && fval_max && atom_cmp(fval_max, lval_min) >= 0 && atom_cmp(fval_min, lval_max) <= 0,
      86        4622 :                                                                         symmetric = is_symmetric(comp);
      87             : 
      88        4622 :                                                                 if (is_anti(comp) || (!symmetric && fval_min && rval_max && atom_cmp(fval_min, rval_max) < 0)) /* for asymmetric case the re range must be after the fe range */
      89        1817 :                                                                         continue;
      90        2805 :                                                                 if (lne && int1 && int2) {
      91         104 :                                                                         if (symmetric) {
      92           0 :                                                                                 prop *p1 = find_prop(e->p, PROP_MIN), *p2 = find_prop(e->p, PROP_MAX);
      93           0 :                                                                                 atom *nmin = statistics_atom_min(sql, rval_min, fval_min), *nmax = statistics_atom_max(sql, rval_max, fval_max);
      94             :                                                                                 /* max is min from le and (max from re and fe max) */
      95           0 :                                                                                 set_minmax_property(sql, e, PROP_MAX, p2 ? statistics_atom_min(sql, nmax, p2->value.pval) : nmax);
      96             :                                                                                 /* min is max from le and (min from re and fe min) */
      97           0 :                                                                                 set_minmax_property(sql, e, PROP_MIN, p1 ? statistics_atom_max(sql, nmin, p1->value.pval) : nmin);
      98             :                                                                         } else {
      99         104 :                                                                                 prop *p1 = find_prop(e->p, PROP_MIN), *p2 = find_prop(e->p, PROP_MAX);
     100             :                                                                                 /* max is min from le and fe max */
     101         104 :                                                                                 set_minmax_property(sql, e, PROP_MAX, p2 ? statistics_atom_min(sql, fval_max, p2->value.pval) : fval_max);
     102             :                                                                                 /* min is max from le and re min */
     103         104 :                                                                                 set_minmax_property(sql, e, PROP_MIN, p1 ? statistics_atom_max(sql, rval_min, p1->value.pval) : rval_min);
     104             :                                                                         }
     105        2701 :                                                                 } else if (rne) {
     106         590 :                                                                         if (symmetric && int1 && int2) { /* min is max from le and (min from re and fe min) */
     107           0 :                                                                                 prop *p = find_prop(e->p, PROP_MIN);
     108           0 :                                                                                 atom *nmin = p ? statistics_atom_min(sql, p->value.pval, fval_min) : fval_min;
     109           0 :                                                                                 set_minmax_property(sql, e, PROP_MIN, statistics_atom_max(sql, nmin, lval_min));
     110         590 :                                                                         } else if (int1) { /* min is max from le and re min */
     111         100 :                                                                                 prop *p = find_prop(e->p, PROP_MIN);
     112         100 :                                                                                 set_minmax_property(sql, e, PROP_MIN, p ? statistics_atom_max(sql, lval_min, p->value.pval) : lval_min);
     113             :                                                                         }
     114        2111 :                                                                 } else if (fne) {
     115         549 :                                                                         if (symmetric && int1 && int2) { /* max is min from le and (max from re and fe max) */
     116           0 :                                                                                 prop *p = find_prop(e->p, PROP_MAX);
     117           0 :                                                                                 atom *nmax = p ? statistics_atom_max(sql, p->value.pval, rval_max) : rval_max;
     118           0 :                                                                                 set_minmax_property(sql, e, PROP_MAX, p ? statistics_atom_min(sql, nmax, lval_max) : nmax);
     119         549 :                                                                         } else if (int2) { /* max is min from le and fe max */
     120         266 :                                                                                 prop *p = find_prop(e->p, PROP_MAX);
     121         266 :                                                                                 set_minmax_property(sql, e, PROP_MAX, p ? statistics_atom_min(sql, lval_max, p->value.pval) : lval_max);
     122             :                                                                         }
     123             :                                                                 }
     124      101258 :                                                         } else if (lval_min && lval_max && rval_min && rval_max && atom_cmp(rval_max, lval_min) >= 0 && atom_cmp(rval_min, lval_max) <= 0) {
     125             :                                                                 /* both min and max must be set and the intervals must overlap */
     126       43110 :                                                                 switch (comp->flag) {
     127       28773 :                                                                 case cmp_equal: /* for equality reduce */
     128       28773 :                                                                         set_minmax_property(sql, e, PROP_MAX, is_anti(comp) ? statistics_atom_max(sql, lval_max, rval_max) : statistics_atom_min(sql, lval_max, rval_max));
     129       28773 :                                                                         set_minmax_property(sql, e, PROP_MIN, is_anti(comp) ? statistics_atom_min(sql, lval_min, rval_min) : statistics_atom_max(sql, lval_min, rval_min));
     130       28773 :                                                                         break;
     131        4680 :                                                                 case cmp_notequal: /* for inequality expand */
     132        4680 :                                                                         set_minmax_property(sql, e, PROP_MAX, is_anti(comp) ? statistics_atom_min(sql, lval_max, rval_max) : statistics_atom_max(sql, lval_max, rval_max));
     133        4680 :                                                                         set_minmax_property(sql, e, PROP_MIN, is_anti(comp) ? statistics_atom_max(sql, lval_min, rval_min) : statistics_atom_min(sql, lval_min, rval_min));
     134        4680 :                                                                         break;
     135        5651 :                                                                 case cmp_gt:
     136             :                                                                 case cmp_gte:
     137       10364 :                                                                         if (!is_anti(comp) && lne) { /* min is max from both min */
     138        4713 :                                                                                 prop *p = find_prop(e->p, PROP_MIN);
     139        4713 :                                                                                 set_minmax_property(sql, e, PROP_MIN, p ? statistics_atom_max(sql, rval_min, p->value.pval) : rval_min);
     140         938 :                                                                         } else if (!is_anti(comp)) { /* max is min from both max */
     141         938 :                                                                                 prop *p = find_prop(e->p, PROP_MAX);
     142         938 :                                                                                 set_minmax_property(sql, e, PROP_MAX, p ? statistics_atom_min(sql, lval_max, p->value.pval) : lval_max);
     143             :                                                                         }
     144             :                                                                         break;
     145        4006 :                                                                 case cmp_lt:
     146             :                                                                 case cmp_lte:
     147        7205 :                                                                         if (!is_anti(comp) && lne) { /* max is min from both max */
     148        3199 :                                                                                 prop *p = find_prop(e->p, PROP_MAX);
     149        3199 :                                                                                 set_minmax_property(sql, e, PROP_MAX, p ? statistics_atom_min(sql, rval_max, p->value.pval) : rval_max);
     150         807 :                                                                         } else if (!is_anti(comp)) { /* min is max from both min */
     151         807 :                                                                                 prop *p = find_prop(e->p, PROP_MIN);
     152         807 :                                                                                 set_minmax_property(sql, e, PROP_MIN, p ? statistics_atom_max(sql, lval_min, p->value.pval) : lval_min);
     153             :                                                                         }
     154             :                                                                         break;
     155             :                                                                 default: /* Maybe later I can do cmp_in and cmp_notin */
     156             :                                                                         break;
     157             :                                                                 }
     158             :                                                         }
     159             :                                                 }
     160             :                                         }
     161             :                                 }
     162             :                         }
     163     1775507 :                         if (is_full(rel->op) || (is_left(rel->op) && found_right) || (is_right(rel->op) && found_left))
     164       37914 :                                 set_has_nil(e);
     165     1775507 :                         if (!is_outerjoin(rel->op) && found_without_semantics) /* at an outer join, null values pass */
     166       94488 :                                 set_has_no_nil(e);
     167     1775507 :                         if (is_join(rel->op) && is_unique(e) && !still_unique)
     168      118612 :                                 set_not_unique(e);
     169     1775507 :                         return e;
     170             :                 }
     171     4632562 :                 case op_table:
     172             :                 case op_basetable:
     173             :                 case op_union:
     174             :                 case op_except:
     175             :                 case op_inter:
     176             :                 case op_munion:
     177             :                 case op_project:
     178             :                 case op_groupby: {
     179     4632562 :                         sql_exp *found;
     180     4632562 :                         atom *fval;
     181     4632562 :                         prop *est;
     182     4632562 :                         if ((found = rel_find_exp(rel, e))) {
     183     2173579 :                                 if (rel->op != op_table) { /* At the moment don't propagate statistics for table relations */
     184     2129147 :                                         if ((fval = find_prop_and_get(found->p, PROP_MAX)))
     185     1130604 :                                                 set_minmax_property(sql, e, PROP_MAX, fval);
     186     2129146 :                                         if ((fval = find_prop_and_get(found->p, PROP_MIN)))
     187     1137713 :                                                 set_minmax_property(sql, e, PROP_MIN, fval);
     188     2129145 :                                         if (!has_nil(found))
     189     1351283 :                                                 set_has_no_nil(e);
     190     2129145 :                                         if (is_unique(found) || (need_distinct(rel) && list_length(rel->exps) == 1) ||
     191     1718789 :                                                 (is_groupby(rel->op) && (list_empty(rel->r) || (list_length(rel->r) == 1 && exps_find_exp(rel->r, e)))))
     192      421234 :                                                 set_unique(e);
     193             :                                         /* propagate unique estimation for known cases */
     194     2129145 :                                         if (is_groupby(rel->op) && list_empty(rel->r) && !find_prop(e->p, PROP_NUNIQUES)) { /* global aggregate case */
     195        7769 :                                                 prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     196        7769 :                                                 p->value.dval = 1;
     197     2121376 :                                         } else if (((is_basetable(rel->op) || is_except(rel->op) || is_inter(rel->op) || is_simple_project(rel->op) ||
     198       66651 :                                                                  (is_groupby(rel->op) && exps_find_exp(rel->r, e))) &&
     199     2063665 :                                                                 (est = find_prop(found->p, PROP_NUNIQUES)) && !find_prop(e->p, PROP_NUNIQUES))) {
     200      188166 :                                                 prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     201      188166 :                                                 p->value.dval = est->value.dval;
     202             :                                         }
     203             :                                 }
     204     2173577 :                                 return e;
     205             :                         }
     206             :                         return NULL;
     207             :                 }
     208       83445 :                 case op_topn:
     209             :                 case op_sample:
     210       83445 :                         return rel_propagate_column_ref_statistics(sql, rel->l, e);
     211             :                 default:
     212             :                         break;
     213             :                 }
     214             :         }
     215             :         return NULL;
     216             : }
     217             : 
     218             : static atom *
     219     4522706 : atom_from_valptr( allocator *sa, sql_subtype *tpe, ValRecord *v)
     220             : {
     221     4522706 :         atom *a = SA_NEW(sa, atom);
     222             : 
     223     4522617 :         assert(!VALisnil(v));
     224     4522516 :         *a = (atom) {.tpe = *tpe,};
     225     4522516 :         SA_VALcopy(sa, &a->data, v);
     226     4522461 :         return a;
     227             : }
     228             : 
     229             : void
     230     4152904 : sql_column_get_statistics(mvc *sql, sql_column *c, sql_exp *e)
     231             : {
     232     4152904 :         bool nonil = false, unique = false;
     233     4152904 :         double unique_est = 0.0;
     234     4152904 :         ValRecord min, max;
     235     4152904 :         int ok = mvc_col_stats(sql, c, &nonil, &unique, &unique_est, &min, &max);
     236             : 
     237     4153016 :         if (has_nil(e) && nonil)
     238     2608556 :                 set_has_no_nil(e);
     239     4153016 :         if (!is_unique(e) && unique)
     240     1099036 :                 set_unique(e);
     241     4153016 :         if (unique_est != 0.0) {
     242     2916407 :                 prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     243     2916405 :                 p->value.dval = unique_est;
     244             :         }
     245     4153014 :         unsigned int digits = 0;
     246     4153014 :         sql_subtype *et = exp_subtype(e);
     247     4153038 :         if (et->type->eclass == EC_DEC || et->type->eclass == EC_NUM)
     248     2697137 :                 digits = et->digits;
     249     4153038 :         if ((ok & 2) == 2) {
     250     2258440 :                 if (!VALisnil(&max)) {
     251     2258433 :                         prop *p = e->p = prop_create(sql->sa, PROP_MAX, e->p);
     252     2258413 :                         p->value.pval = atom_from_valptr(sql->sa, &c->type, &max);
     253     2258360 :                         if (digits) {
     254     1680842 :                                 unsigned int nd = atom_digits(p->value.pval);
     255     1680858 :                                 if (nd < digits)
     256             :                                         digits = nd;
     257     1680858 :                                 if (!digits)
     258             :                                         digits = 1;
     259             :                         }
     260             :                 }
     261     2258377 :                 VALclear(&max);
     262             :         }
     263     4152959 :         if ((ok & 1) == 1) {
     264     2264485 :                 if (!VALisnil(&min)) {
     265     2264467 :                         prop *p = e->p = prop_create(sql->sa, PROP_MIN, e->p);
     266     2264450 :                         p->value.pval = atom_from_valptr(sql->sa, &c->type, &min);
     267     2264405 :                         if (digits) {
     268     1687950 :                                 unsigned int nd = atom_digits(p->value.pval);
     269     1687948 :                                 if (nd > digits) /* possibly set to low by max value */
     270             :                                         digits = nd;
     271             :                                 if (!digits)
     272             :                                         digits = 1;
     273             :                         }
     274             :                 }
     275     2264403 :                 VALclear(&min);
     276             :         }
     277     4152863 :         if (digits)
     278     2696965 :                 et->digits = digits;
     279     4152863 :         if (et->type->eclass == EC_DEC && et->digits <= et->scale)
     280         216 :                 et->digits = et->scale + 1;
     281     4152863 : }
     282             : 
     283             : static void
     284      935656 : rel_basetable_column_get_statistics(mvc *sql, sql_rel *rel, sql_exp *e)
     285             : {
     286      935656 :         if (e->p)
     287             :                 return;
     288      302403 :         sql_column *c = NULL;
     289             : 
     290      302403 :         if ((c = rel_base_find_column(rel, e->nid))) {
     291      204288 :                 sql_column_get_statistics(sql, c, e);
     292             :         }
     293             : }
     294             : 
     295             : static bool
     296        8872 : rel_setop_get_statistics(mvc *sql, sql_rel *rel, list *lexps, list *rexps, sql_exp *e, int i)
     297             : {
     298        8872 :         sql_exp *le = list_fetch(lexps, i), *re = list_fetch(rexps, i);
     299        8872 :         atom *lval_min = find_prop_and_get(le->p, PROP_MIN), *lval_max = find_prop_and_get(le->p, PROP_MAX),
     300        8872 :                 *rval_min = find_prop_and_get(re->p, PROP_MIN), *rval_max = find_prop_and_get(re->p, PROP_MAX);
     301        8872 :         prop *est;
     302             : 
     303             :         /* for the intersection, if both expressions don't overlap, it can be pruned */
     304        8872 :         if (is_inter(rel->op) && !has_nil(le) && !has_nil(re) &&
     305         511 :                 ((rval_max && lval_min && atom_cmp(rval_max, lval_min) < 0) || (rval_min && lval_max && atom_cmp(rval_min, lval_max) > 0)))
     306          26 :                 return true;
     307             : 
     308        8846 :         if (lval_max && rval_max) {
     309        2557 :                 if (is_union(rel->op))
     310           3 :                         set_minmax_property(sql, e, PROP_MAX, statistics_atom_max(sql, lval_max, rval_max)); /* for union the new max will be the max of the two */
     311        2554 :                 else if (is_inter(rel->op))
     312         399 :                         set_minmax_property(sql, e, PROP_MAX, statistics_atom_min(sql, lval_max, rval_max)); /* for intersect the new max will be the min of the two */
     313             :                 else /* except */
     314        2155 :                         set_minmax_property(sql, e, PROP_MAX, lval_max);
     315             :         }
     316        8846 :         if (lval_min && rval_min) {
     317        2557 :                 if (is_union(rel->op))
     318           3 :                         set_minmax_property(sql, e, PROP_MIN, statistics_atom_min(sql, lval_min, rval_min)); /* for union the new min will be the min of the two */
     319        2554 :                 else if (is_inter(rel->op))
     320         399 :                         set_minmax_property(sql, e, PROP_MIN, statistics_atom_max(sql, lval_min, rval_min)); /* for intersect the new min will be the max of the two */
     321             :                 else /* except */
     322        2155 :                         set_minmax_property(sql, e, PROP_MIN, lval_min);
     323             :         }
     324             : 
     325        8846 :         if (is_union(rel->op) || is_munion(rel->op)) {
     326        5986 :                 if (!has_nil(le) && !has_nil(re))
     327         879 :                         set_has_no_nil(e);
     328        5986 :                 if (need_distinct(rel) && list_length(rel->exps) == 1)
     329           0 :                         set_unique(e);
     330        2860 :         } else if (is_inter(rel->op)) {
     331         564 :                 if (!has_nil(le) || !has_nil(re))
     332         509 :                         set_has_no_nil(e);
     333         564 :                 if (is_unique(le) || (need_distinct(rel) && list_length(rel->exps) == 1))
     334         345 :                         set_unique(e);
     335             :         } else {
     336        2296 :                 assert(is_except(rel->op));
     337        2296 :                 if (!has_nil(le))
     338        2233 :                         set_has_no_nil(e);
     339        2296 :                 if (is_unique(le) || (need_distinct(rel) && list_length(rel->exps) == 1))
     340        2009 :                         set_unique(e);
     341             :         }
     342             :         /* propagate unique estimation for known cases */
     343        8846 :         if (!is_union(rel->op) && (est = find_prop(le->p, PROP_NUNIQUES)) && !find_prop(e->p, PROP_NUNIQUES)) {
     344         205 :                 prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     345         205 :                 p->value.dval = est->value.dval;
     346             :         }
     347             :         return false;
     348             : }
     349             : 
     350             : 
     351             : static void
     352       60942 : rel_munion_get_statistics(mvc *sql, sql_rel *rel, list *rels, sql_exp *e, int i)
     353             : {
     354       60942 :         assert(is_munion(rel->op));
     355             : 
     356       60942 :         sql_rel *l = rels->h->data;
     357       60942 :         sql_exp *le = list_fetch(l->exps, i);
     358       60942 :         atom *lval_min = find_prop_and_get(le->p, PROP_MIN), *lval_max = find_prop_and_get(le->p, PROP_MAX);
     359       60942 :         bool has_nonil = !has_nil(le);
     360             : 
     361      176411 :         for(node *n = rels->h->next; n; n = n->next) {
     362      115469 :                 sql_rel *r = n->data;
     363      115469 :                 sql_exp *re = list_fetch(r->exps, i);
     364      115469 :                 atom *rval_min = find_prop_and_get(re->p, PROP_MIN), *rval_max = find_prop_and_get(re->p, PROP_MAX);
     365             : 
     366      115469 :                 if (lval_max && rval_max) {
     367       42584 :                         set_minmax_property(sql, e, PROP_MAX, statistics_atom_max(sql, lval_max, rval_max)); /* for union the new max will be the max of the two */
     368       42584 :                         lval_max = find_prop_and_get(e->p, PROP_MAX);
     369             :                 }
     370      115469 :                 if (lval_min && rval_min) {
     371       42059 :                         set_minmax_property(sql, e, PROP_MIN, statistics_atom_min(sql, lval_min, rval_min)); /* for union the new min will be the min of the two */
     372       42059 :                         lval_min = find_prop_and_get(e->p, PROP_MIN);
     373             :                 }
     374      115469 :                 has_nonil &= !has_nil(re);
     375             : 
     376             :         }
     377             : 
     378       60942 :         if (has_nonil)
     379       41018 :                 set_has_no_nil(e);
     380             : 
     381       60942 :         if (need_distinct(rel) && list_length(rel->exps) == 1)
     382        1594 :                 set_unique(e);
     383       60942 : }
     384             : 
     385             : 
     386             : static sql_exp *
     387     3474704 : rel_propagate_statistics(visitor *v, sql_rel *rel, sql_exp *e, int depth)
     388             : {
     389     3474704 :         mvc *sql = v->sql;
     390     3474704 :         atom *lval;
     391             : 
     392     3474704 :         (void) depth;
     393     3474704 :         switch(e->type) {
     394     2179200 :         case e_column:
     395     2179200 :                 switch (rel->op) { /* set relations don't call rel_propagate_statistics */
     396      278023 :                 case op_join:
     397             :                 case op_left:
     398             :                 case op_right:
     399             :                 case op_full:
     400             :                 case op_semi:
     401             :                 case op_anti: {
     402      278023 :                         sql_exp *found = rel_propagate_column_ref_statistics(sql, rel->l, e);
     403      278023 :                         if (!found)
     404      139456 :                                 (void) rel_propagate_column_ref_statistics(sql, rel->r, e);
     405             :                         break;
     406             :                 }
     407     1901177 :                 case op_select:
     408             :                 case op_project:
     409             :                 case op_groupby: {
     410     1901177 :                         sql_exp *found = rel_propagate_column_ref_statistics(sql, rel->l, e); /* labels may be found on the same projection, ugh */
     411     1901174 :                         if (!found && is_simple_project(rel->op))
     412      124035 :                                 (void) rel_propagate_column_ref_statistics(sql, rel, e);
     413             :                         break;
     414             :                 }
     415           0 :                 case op_insert:
     416             :                 case op_update:
     417             :                 case op_delete:
     418           0 :                         (void) rel_propagate_column_ref_statistics(sql, rel->r, e);
     419           0 :                         break;
     420             :                 default:
     421             :                         break;
     422             :                 }
     423             :                 break;
     424       98304 :         case e_convert: {
     425       98304 :                 sql_subtype *to = exp_totype(e), *from = exp_fromtype(e);
     426       98304 :                 sql_exp *l = e->l;
     427       98304 :                 sql_class fr = from->type->eclass, too = to->type->eclass;
     428       98304 :                 prop *est;
     429             : 
     430       98304 :                 if (fr == too) {
     431       89279 :                         if ((lval = find_prop_and_get(l->p, PROP_MAX))) {
     432       58658 :                                 atom *res = atom_copy(sql->sa, lval);
     433       58658 :                                 if ((res = atom_cast(sql->sa, res, to)))
     434       58635 :                                         set_minmax_property(sql, e, PROP_MAX, res);
     435             :                         }
     436       89279 :                         if ((lval = find_prop_and_get(l->p, PROP_MIN))) {
     437       59272 :                                 atom *res = atom_copy(sql->sa, lval);
     438       59272 :                                 if ((res = atom_cast(sql->sa, res, to)))
     439       59249 :                                         set_minmax_property(sql, e, PROP_MIN, res);
     440             :                         }
     441             :                 }
     442             :                 /* propagating nuniques through conversions is tricky. I am adding just the general cases */
     443       98304 :                 if ((est = find_prop(l->p, PROP_NUNIQUES)) &&
     444       60887 :                         (((fr == EC_TIME || fr == EC_TIME_TZ || fr == EC_DATE) && (too == EC_TIMESTAMP || too == EC_TIMESTAMP_TZ)) ||
     445       60862 :                          ((fr == EC_NUM || fr == EC_BIT) && too == EC_NUM) || (fr == EC_DEC && too == EC_DEC) || (fr != EC_EXTERNAL && too == EC_STRING))) {
     446       57466 :                         prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     447       57466 :                         p->value.dval = est->value.dval;
     448             :                 }
     449       98304 :                 if (!has_nil(l))
     450       55238 :                         set_has_no_nil(e);
     451             :                 break;
     452             :         }
     453      341995 :         case e_aggr:
     454             :         case e_func: {
     455      341995 :                 BUN lv;
     456      341995 :                 sql_subfunc *f = e->f;
     457             : 
     458      341995 :                 if (!f->func->s) {
     459      315558 :                         int key = hash_key(f->func->base.name); /* Using hash lookup */
     460      315558 :                         sql_hash_e *he = sql_functions_lookup->buckets[key&(sql_functions_lookup->size-1)];
     461      315558 :                         lookup_function look = NULL;
     462             : 
     463      689091 :                         for (; he && !look; he = he->chain) {
     464      373533 :                                 struct function_properties* fp = (struct function_properties*) he->value;
     465             : 
     466      373533 :                                 if (!strcmp(f->func->base.name, fp->name))
     467      107871 :                                         look = fp->func;
     468             :                         }
     469      315558 :                         if (look)
     470      107871 :                                 look(sql, e);
     471             :                 }
     472             :                 /* for global aggregates with no semantics, if the left relation has values, then the output won't be NULL */
     473      341995 :                 if (!is_semantics(e) && e->l && !have_nil(e->l) &&
     474       88681 :                         (e->type != e_aggr || (is_groupby(rel->op) && list_length(rel->r)) || ((lv = get_rel_count(rel->l)) != BUN_NONE && lv > 0)))
     475       88358 :                         set_has_no_nil(e);
     476             :                 /* set properties for global aggregates */
     477      341995 :                 if (e->type == e_aggr && is_groupby(rel->op) && list_empty(rel->r)) {
     478        8091 :                         if (!find_prop(e->p, PROP_NUNIQUES)) {
     479        8091 :                                 prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     480        8091 :                                 p->value.dval = 1;
     481             :                         }
     482        8091 :                         set_unique(e);
     483             :                 }
     484             :                 break;
     485             :         }
     486      570598 :         case e_atom:
     487      570598 :                 if (e->l) {
     488      552942 :                         atom *a = (atom*) e->l;
     489      552942 :                         if (!a->isnull) {
     490      490982 :                                 set_minmax_property(sql, e, PROP_MAX, a);
     491      490982 :                                 set_minmax_property(sql, e, PROP_MIN, a);
     492             :                         }
     493      552942 :                         prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     494      552942 :                         p->value.dval = 1;
     495       17656 :                 } else if (e->f) {
     496        4131 :                         list *vals = (list *) e->f;
     497        4131 :                         sql_exp *first = vals->h ? vals->h->data : NULL;
     498        4131 :                         atom *max = NULL, *min = NULL; /* all child values must have a valid min/max */
     499        4131 :                         int has_nil = 0;
     500             : 
     501        4131 :                         if (first) {
     502        4131 :                                 max = ((lval = find_prop_and_get(first->p, PROP_MAX))) ? lval : NULL;
     503        4131 :                                 min = ((lval = find_prop_and_get(first->p, PROP_MIN))) ? lval : NULL;
     504        4131 :                                 has_nil |= has_nil(first);
     505             :                         }
     506             : 
     507       17775 :                         for (node *n = vals->h ? vals->h->next : NULL ; n ; n = n->next) {
     508        9513 :                                 sql_exp *ee = n->data;
     509             : 
     510        9513 :                                 if (min && max) {
     511        9020 :                                         if ((lval = find_prop_and_get(ee->p, PROP_MAX))) {
     512        8974 :                                                 max = atom_cmp(lval, max) > 0 ? lval : max;
     513             :                                         } else {
     514             :                                                 max = NULL;
     515             :                                         }
     516        9020 :                                         if ((lval = find_prop_and_get(ee->p, PROP_MIN))) {
     517        8974 :                                                 min = atom_cmp(min, lval) > 0 ? lval : min;
     518             :                                         } else {
     519             :                                                 min = NULL;
     520             :                                         }
     521             :                                 }
     522        9513 :                                 has_nil |= has_nil(ee);
     523             :                         }
     524             : 
     525        4131 :                         if (!has_nil)
     526        3761 :                                 set_has_no_nil(e);
     527        4131 :                         if (min && max) {
     528        3719 :                                 set_minmax_property(sql, e, PROP_MAX, max);
     529        3719 :                                 set_minmax_property(sql, e, PROP_MIN, min);
     530             :                         }
     531        4131 :                         prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     532        4131 :                         p->value.dval = (dbl) list_length(vals);
     533             :                 } else {
     534       13525 :                         prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     535       13525 :                         p->value.dval = 1;
     536             :                 }
     537             :                 break;
     538      284607 :         case e_cmp:
     539             :                 /* TODO? propagating min/max/unique of booleans is not very worth it */
     540      284607 :                 if (e->flag == cmp_or || e->flag == cmp_filter) {
     541       17386 :                         if (!have_nil(e->l) && !have_nil(e->r))
     542       12931 :                                 set_has_no_nil(e);
     543      267221 :                 } else if (e->flag == cmp_in || e->flag == cmp_notin) {
     544       21736 :                         sql_exp *le = e->l;
     545       21736 :                         if (!has_nil(le) && !have_nil(e->r))
     546       18524 :                                 set_has_no_nil(e);
     547             :                 } else {
     548      245485 :                         sql_exp *le = e->l, *re = e->r, *fe = e->f;
     549      245485 :                         if (!has_nil(le) && !has_nil(re) && (!e->f || !has_nil(fe)))
     550      170248 :                                 set_has_no_nil(e);
     551             :                 }
     552             :                 break;
     553             :         case e_psm:
     554             :                 break;
     555             :         }
     556             : 
     557             : #ifndef NDEBUG
     558             :         {
     559             :                 /* min and max cannot be NULL and min must be <= than max, if it happens the inner relation must be empty! */
     560     3474701 :                 atom *min = find_prop_and_get(e->p, PROP_MIN), *max = find_prop_and_get(e->p, PROP_MAX);
     561             : 
     562     3474701 :                 (void) min;
     563     3474701 :                 (void) max;
     564     3474701 :                 assert(!min || !min->isnull);
     565     3474701 :                 assert(!max || !max->isnull);
     566     3474701 :                 assert(!min || !max || (min && max)/* atom_cmp(min, max) <= 0*/);
     567             :         }
     568             : #endif
     569     3474701 :         return e;
     570             : }
     571             : 
     572             : static list * /* Remove predicates always false from min/max values */
     573      138284 : rel_prune_predicates(visitor *v, sql_rel *rel)
     574             : {
     575      138284 :         if (rel->l) {
     576      138284 :                 sql_rel *l = rel->l;
     577      138284 :                 if (is_single(l))
     578        3269 :                         return rel->exps;
     579             :         }
     580      135015 :         if (!list_empty(rel->attr))
     581        2961 :                 return rel->exps;
     582      280175 :         for (node *n = rel->exps->h ; n ; n = n->next) {
     583      148121 :                 sql_exp *e = n->data;
     584             : 
     585      148121 :                 if (e->type == e_cmp && is_theta_exp(e->flag)) {
     586      121980 :                         sql_exp *le = e->l, *re = e->r, *fe = e->f;
     587      121980 :                         atom *lval_min = find_prop_and_get(le->p, PROP_MIN), *lval_max = find_prop_and_get(le->p, PROP_MAX),
     588      121980 :                                 *rval_min = find_prop_and_get(re->p, PROP_MIN), *rval_max = find_prop_and_get(re->p, PROP_MAX);
     589      121980 :                         bool always_false = false, always_true = false;
     590             : 
     591      121980 :                         if (fe && !is_symmetric(e)) {
     592        3058 :                                 atom *fval_min = find_prop_and_get(fe->p, PROP_MIN), *fval_max = find_prop_and_get(fe->p, PROP_MAX);
     593        3058 :                                 comp_type lower = range2lcompare(e->flag), higher = range2rcompare(e->flag);
     594        3660 :                                 int not_int1 = rval_min && lval_max && /* the middle and left intervals don't overlap */
     595        1663 :                                         (!is_anti(e) && (lower == cmp_gte ? atom_cmp(rval_min, lval_max) > 0 : atom_cmp(rval_min, lval_max) >= 0)),
     596        4072 :                                         not_int2 = lval_min && fval_max && /* the middle and right intervals don't overlap */
     597        2485 :                                         (!is_anti(e) && (higher == cmp_lte ? atom_cmp(lval_min, fval_max) > 0 : atom_cmp(lval_min, fval_max) >= 0)),
     598        3058 :                                         not_int3 = rval_min && fval_max && /* the left interval is after the right one */
     599        1287 :                                         (!is_anti(e) && (atom_cmp(rval_min, fval_max) > 0));
     600             : 
     601        3058 :                                 always_false |= not_int1 || not_int2 || not_int3;
     602             :                                 /* for anti the middle must be before the left or after the right or the right after the left, for the other the middle must be always between the left and right intervals */
     603        4113 :                                 always_true |= !has_nil(le) && !has_nil(re) && !has_nil(fe) &&
     604        3957 :                                         lval_min && lval_max && rval_min && rval_max && fval_min && fval_max &&
     605         575 :                                         (is_anti(e) ? ((lower == cmp_gte ? atom_cmp(rval_min, lval_max) > 0 : atom_cmp(rval_min, lval_max) >= 0) || (higher == cmp_lte ? atom_cmp(lval_min, fval_max) > 0 : atom_cmp(lval_min, fval_max) >= 0) || atom_cmp(rval_min, fval_max) > 0) :
     606         488 :                                          ((lower == cmp_gte ? atom_cmp(lval_min, rval_max) >= 0 : atom_cmp(lval_min, rval_max) > 0) && (higher == cmp_lte ? atom_cmp(fval_min, lval_max) >= 0 : atom_cmp(fval_min, lval_max) > 0)));
     607             :                         } else if (!fe) {
     608      118904 :                                 if (!is_semantics(e)) /* trivial not null cmp null case */
     609      226036 :                                         always_false |= !is_anti(e) && ((exp_is_not_null(le) && exp_is_null(re)) || (exp_is_null(le) && exp_is_not_null(re)));
     610      118904 :                                 switch (e->flag) {
     611      103388 :                                 case cmp_equal:
     612      103388 :                                         if (lval_min && lval_max && rval_min && rval_max && (!is_semantics(e) || !has_nil(le) || !has_nil(re)))
     613      131510 :                                                 always_false |= (is_anti(e) ? (atom_cmp(lval_min, rval_min) == 0 && atom_cmp(lval_max, rval_max) <= 0) : (atom_cmp(rval_max, lval_min) < 0 || atom_cmp(rval_min, lval_max) > 0));
     614      103388 :                                         if (is_semantics(e)) { /* prune *= NULL cases */
     615        5857 :                                                 always_false |= (is_anti(e) ? (exp_is_null(le) && exp_is_null(re)) : ((exp_is_not_null(le) && exp_is_null(re)) || (exp_is_null(le) && exp_is_not_null(re))));
     616       11714 :                                                 always_true |= (is_anti(e) ? ((exp_is_not_null(le) && exp_is_null(re)) || (exp_is_null(le) && exp_is_not_null(re))) : (exp_is_null(le) && exp_is_null(re)));
     617             :                                         }
     618             :                                         break;
     619        7241 :                                 case cmp_notequal:
     620        7241 :                                         if (lval_min && lval_max && rval_min && rval_max)
     621       11378 :                                                 always_true |= !has_nil(le) && !has_nil(re) && (is_anti(e) ? (atom_cmp(lval_min, rval_min) == 0 && atom_cmp(lval_max, rval_max) <= 0) : (atom_cmp(rval_max, lval_min) < 0 || atom_cmp(rval_min, lval_max) > 0));
     622        7241 :                                         if (is_semantics(e)) {
     623          29 :                                                 always_true |= (is_anti(e) ? (exp_is_null(le) && exp_is_null(re)) : ((exp_is_not_null(le) && exp_is_null(re)) || (exp_is_null(le) && exp_is_not_null(re))));
     624          58 :                                                 always_false |= (is_anti(e) ? ((exp_is_not_null(le) && exp_is_null(re)) || (exp_is_null(le) && exp_is_not_null(re))) : (exp_is_null(le) && exp_is_null(re)));
     625             :                                         }
     626             :                                         break;
     627        5632 :                                 case cmp_gt:
     628        5632 :                                         if (lval_max && rval_min)
     629        1947 :                                                 always_false |= (is_anti(e) ? atom_cmp(lval_max, rval_min) > 0 : atom_cmp(lval_max, rval_min) <= 0);
     630        5632 :                                         if (lval_min && rval_max)
     631        3894 :                                                 always_true |= !has_nil(le) && !has_nil(re) && (is_anti(e) ? atom_cmp(lval_min, rval_max) <= 0 : atom_cmp(lval_min, rval_max) > 0);
     632             :                                         break;
     633         119 :                                 case cmp_gte:
     634         119 :                                         if (lval_max && rval_min)
     635          49 :                                                 always_false |= (is_anti(e) ? atom_cmp(lval_max, rval_min) >= 0 : atom_cmp(lval_max, rval_min) < 0);
     636         119 :                                         if (lval_min && rval_max)
     637          98 :                                                 always_true |= !has_nil(le) && !has_nil(re) && (is_anti(e) ? atom_cmp(lval_min, rval_max) < 0 : atom_cmp(lval_min, rval_max) >= 0);
     638             :                                         break;
     639        2440 :                                 case cmp_lt:
     640        2440 :                                         if (lval_min && rval_max)
     641        1382 :                                                 always_false |= (is_anti(e) ? atom_cmp(lval_min, rval_max) < 0 : atom_cmp(lval_min, rval_max) >= 0);
     642        2440 :                                         if (lval_max && rval_min)
     643        2812 :                                                 always_true |= !has_nil(le) && !has_nil(re) && (is_anti(e) ? atom_cmp(lval_max, rval_min) >= 0 : atom_cmp(lval_max, rval_min) < 0);
     644             :                                         break;
     645          84 :                                 case cmp_lte:
     646          84 :                                         if (lval_min && rval_max)
     647          11 :                                                 always_false |= (is_anti(e) ? atom_cmp(lval_min, rval_max) <= 0 : atom_cmp(lval_min, rval_max) > 0);
     648          84 :                                         if (lval_max && rval_min)
     649          22 :                                                 always_true |= !has_nil(le) && !has_nil(re) && (is_anti(e) ? atom_cmp(lval_max, rval_min) > 0 : atom_cmp(lval_max, rval_min) <= 0);
     650             :                                         break;
     651             :                                 default: /* Maybe later I can do cmp_in and cmp_notin but don't forget to remove is_theta_exp check up there */
     652             :                                         break;
     653             :                                 }
     654             :                         }
     655      121980 :                         assert(!always_false || !always_true);
     656      121980 :                         if (always_false || always_true) {
     657        3557 :                                 sql_exp *ne = exp_atom_bool(v->sql->sa, always_true /* if always_true set then true else false */);
     658        3557 :                                 if (exp_name(e))
     659           0 :                                         exp_prop_alias(v->sql->sa, ne, e);
     660        3557 :                                 n->data = ne;
     661        3557 :                                 v->changes++;
     662             :                         }
     663             :                 }
     664             :         }
     665      132054 :         return rel->exps;
     666             : }
     667             : 
     668             : static sql_rel *
     669          14 : set_setop_side(visitor *v, sql_rel *rel, sql_rel *side)
     670             : {
     671          14 :         if (side == rel->r)
     672           0 :                 rel->r = NULL;
     673             :         else
     674          14 :                 rel->l = NULL;
     675          14 :         if (!is_simple_project(side->op) || rel_is_ref(side) || !list_empty(side->r)) {
     676           0 :                 side = rel_project(v->sql->sa, side, rel_projections(v->sql, side, NULL, 0, 1));
     677           0 :                 set_count_prop(v->sql->sa, side, get_rel_count(side->l));
     678           0 :                 side->exps = exps_exp_visitor_bottomup(v, side, side->exps, 0, &rel_propagate_statistics, false);
     679             :         }
     680          35 :         for (node *n = rel->exps->h, *m = side->exps->h ; n && m ; n = n->next, m = m->next) {
     681          21 :                 sql_exp *e1 = n->data, *e2 = m->data;
     682          21 :                 exp_prop_alias(v->sql->sa, e2, e1);
     683             :         }
     684          14 :         list_hash_clear(side->exps);
     685             : 
     686          14 :         if (need_distinct(rel))
     687           0 :                 set_distinct(side);
     688          14 :         side->p = prop_copy(v->sql->sa, rel->p);
     689          14 :         rel_destroy(rel);
     690          14 :         return side;
     691             : }
     692             : 
     693             : static BUN
     694       20341 : trivial_project_exp_card(sql_exp *e)
     695             : {
     696       20641 :         if (e->type == e_convert)
     697         300 :                 return trivial_project_exp_card(e->l);
     698       20341 :         return e->type == e_atom && e->f ? (BUN) list_length(e->f) : 1;
     699             : }
     700             : 
     701             : static BUN
     702       24248 : rel_calc_nuniques(mvc *sql, sql_rel *l, list *exps)
     703             : {
     704       24248 :         BUN lv = get_rel_count(l);
     705             : 
     706       24248 :         if (lv == 0)
     707             :                 return 0;
     708       21539 :         if (!list_empty(exps)) {
     709       21539 :                 BUN nuniques = 0;
     710             :                 /* compute the highest number of unique values */
     711       51131 :                 for (node *n = exps->h ; n && nuniques != BUN_NONE ; n = n->next) {
     712       29592 :                         sql_exp *e = n->data;
     713       29592 :                         sql_rel *bt = NULL;
     714       29592 :                         prop *p = NULL;
     715       29592 :                         BUN euniques = BUN_NONE;
     716       29592 :                         atom *min, *max, *sub = NULL;
     717       29592 :                         sql_subtype *tp = exp_subtype(e);
     718       29592 :                         sql_class ec = tp ? tp->type->eclass : EC_STRING; /* if 'e' has no type (eg parameter), use a non-number type to fail condition */
     719             : 
     720       29592 :                         if ((p = find_prop(e->p, PROP_NUNIQUES))) {
     721       20956 :                                 euniques = (BUN) p->value.dval;
     722        8636 :                         } else if (e->type == e_column && e->nid && rel_find_exp_and_corresponding_rel(l, e, false, &bt, NULL) && bt && (p = find_prop(bt->p, PROP_COUNT))) {
     723        6593 :                                 euniques = (BUN) p->value.lval;
     724             :                         }
     725             :                         /* use min to max range to compute number of possible values in the domain for number types */
     726       29592 :                         if ((EC_TEMP(ec)||ec==EC_NUM||ec==EC_MONTH||ec==EC_POS) &&
     727       22363 :                                 (min = find_prop_and_get(e->p, PROP_MIN)) && (max = find_prop_and_get(e->p, PROP_MAX))) {
     728             :                                 /* the range includes min and max, so the atom_inc call is needed */
     729             :                                 /* if 'euniques' has number of distinct values, compute min between both */
     730       17534 :                                 if ((sub = atom_sub(sql->sa, max, min)) && (sub = atom_inc(sql->sa, sub)) && (sub = atom_cast(sql->sa, sub, sql_bind_localtype("oid"))))
     731           0 :                                         euniques = MIN(euniques, (BUN) sub->data.val.oval);
     732             :                         }
     733       29592 :                         if (euniques != BUN_NONE)
     734       27549 :                                 nuniques = MAX(nuniques, euniques); /* the highest cardinality sets the estimation */
     735             :                         else
     736             :                                 nuniques = BUN_NONE;
     737             :                 }
     738       21539 :                 if (nuniques != BUN_NONE)
     739             :                         return nuniques;
     740             :         }
     741             :         return lv;
     742             : }
     743             : 
     744             : static sql_rel *
     745     2073065 : rel_get_statistics_(visitor *v, sql_rel *rel)
     746             : {
     747             :         /* Don't prune updates as pruning will possibly result in removing the joins which therefore cannot be used for constraint checking */
     748     2073065 :         uint8_t has_special_modify = *(uint8_t*) v->data;
     749     2073065 :         bool can_be_pruned = !has_special_modify && v->storage_based_opt;
     750             : 
     751             :         /* Don't look at the same relation twice */
     752     2073065 :         if (are_statistics_gathered(rel->used))
     753             :                 return rel;
     754     1331847 :         rel->used |= statistics_gathered;
     755             : 
     756     1331847 :         switch (rel->op) {
     757      312127 :         case op_basetable: {
     758      312127 :                 sql_table *t = (sql_table *) rel->l;
     759      312127 :                 sqlstore *store = v->sql->session->tr->store;
     760             : 
     761      312127 :                 if (!list_empty(rel->exps)) {
     762     1247733 :                         for (node *n = rel->exps->h ; n ; n = n->next)
     763      935656 :                                 rel_basetable_column_get_statistics(v->sql, rel, n->data);
     764             :                 }
     765             :                 /* Set table row count. TODO? look for remote tables. Don't look at storage for declared tables, because it won't be cleaned */
     766      312126 :                 if (isTable(t) && t->s && !isDeclaredTable(t)) /* count active rows only */
     767      258602 :                         set_count_prop(v->sql->sa, rel, (BUN)store->storage_api.count_col(v->sql->session->tr, ol_first_node(t->columns)->data, 10));
     768             :                 break;
     769             :         }
     770        2790 :         case op_union:
     771             :         case op_inter:
     772             :         case op_except: {
     773        2790 :                 bool empty_cross = false;
     774        2790 :                 int i = 0;
     775        2790 :                 sql_rel *l = rel->l, *pl = rel->l, *r = rel->r, *pr = rel->r;
     776             : 
     777        2790 :                 while (is_sample(pl->op) || is_topn(pl->op)) /* skip topN and sample relations in the middle */
     778           0 :                         pl = pl->l;
     779        2790 :                 while (is_sample(pr->op) || is_topn(pr->op))
     780           0 :                         pr = pr->l;
     781             :                 /* if it's not a projection, then project and propagate statistics */
     782        2790 :                 if (!is_project(pl->op) && !is_base(pl->op)) {
     783           0 :                         pl = rel_project(v->sql->sa, pl, rel_projections(v->sql, pl, NULL, 0, 1));
     784           0 :                         set_count_prop(v->sql->sa, pl, get_rel_count(pl->l));
     785           0 :                         pl->exps = exps_exp_visitor_bottomup(v, pl, pl->exps, 0, &rel_propagate_statistics, false);
     786             :                 }
     787        2790 :                 if (!is_project(pr->op) && !is_base(pr->op)) {
     788           0 :                         pr = rel_project(v->sql->sa, pr, rel_projections(v->sql, pr, NULL, 0, 1));
     789           0 :                         set_count_prop(v->sql->sa, pr, get_rel_count(pr->l));
     790           0 :                         pr->exps = exps_exp_visitor_bottomup(v, pr, pr->exps, 0, &rel_propagate_statistics, false);
     791             :                 }
     792             : 
     793       11662 :                 for (node *n = rel->exps->h ; n ; n = n->next) {
     794        8872 :                         empty_cross |= rel_setop_get_statistics(v->sql, rel, pl->exps, pr->exps, n->data, i);
     795        8872 :                         i++;
     796             :                 }
     797             : 
     798             :                 /* propagate row count */
     799        2790 :                 if (is_union(rel->op)) {
     800         277 :                         BUN lv = need_distinct(rel) ? rel_calc_nuniques(v->sql, l, l->exps) : get_rel_count(l),
     801         277 :                                 rv = need_distinct(rel) ? rel_calc_nuniques(v->sql, r, r->exps) : get_rel_count(r);
     802             : 
     803         277 :                         if (lv == 0 && rv == 0) { /* both sides empty */
     804           2 :                                 if (can_be_pruned)
     805             :                                         empty_cross = true;
     806             :                                 else
     807           2 :                                         set_count_prop(v->sql->sa, rel, 0);
     808         275 :                         } else if (can_be_pruned && lv == 0 && !rel_is_ref(rel)) { /* left side empty */
     809           0 :                                 rel = set_setop_side(v, rel, r);
     810           0 :                                 empty_cross = false; /* don't rewrite again */
     811         275 :                         } else if (can_be_pruned && rv == 0 && !rel_is_ref(rel)) { /* right side empty */
     812           0 :                                 rel = set_setop_side(v, rel, l);
     813           0 :                                 empty_cross = false; /* don't rewrite again */
     814         275 :                         } else if (lv != BUN_NONE && rv != BUN_NONE) {
     815           7 :                                 set_count_prop(v->sql->sa, rel, (rv > (BUN_MAX - lv)) ? BUN_MAX : (lv + rv)); /* overflow check */
     816             :                         }
     817        2513 :                 } else if (is_inter(rel->op) || is_except(rel->op)) {
     818        2513 :                         BUN lv = need_distinct(rel) ? rel_calc_nuniques(v->sql, l, l->exps) : get_rel_count(l),
     819        2513 :                                 rv = need_distinct(rel) ? rel_calc_nuniques(v->sql, r, r->exps) : get_rel_count(r);
     820             : 
     821        2513 :                         if (lv == 0) { /* left side empty */
     822          62 :                                 if (can_be_pruned)
     823             :                                         empty_cross = true;
     824             :                                 else
     825           5 :                                         set_count_prop(v->sql->sa, rel, 0);
     826        2451 :                         } else if (rv == 0) { /* right side empty */
     827          17 :                                 if (is_inter(rel->op)) {
     828           3 :                                         if (can_be_pruned)
     829             :                                                 empty_cross = true;
     830             :                                         else
     831           0 :                                                 set_count_prop(v->sql->sa, rel, 0);
     832          14 :                                 } else if (can_be_pruned && !rel_is_ref(rel)) {
     833          14 :                                         rel = set_setop_side(v, rel, l);
     834          14 :                                         empty_cross = false; /* don't rewrite again */
     835             :                                 } else {
     836           0 :                                         set_count_prop(v->sql->sa, rel, lv);
     837             :                                 }
     838             :                         } else {
     839        2434 :                                 set_count_prop(v->sql->sa, rel, lv);
     840             :                         }
     841             :                 }
     842        2790 :                 if (can_be_pruned && empty_cross) {
     843          80 :                         rel_destroy(rel->l);
     844          80 :                         rel->l = NULL;
     845          80 :                         rel_destroy(rel->r);
     846          80 :                         rel->r = NULL;
     847         244 :                         for (node *n = rel->exps->h ; n ; n = n->next) {
     848         164 :                                 sql_exp *e = n->data, *a = exp_atom(v->sql->sa, atom_general(v->sql->sa, exp_subtype(e), NULL, 0));
     849         164 :                                 exp_prop_alias(v->sql->sa, a, e);
     850         164 :                                 n->data = a;
     851             :                         }
     852          80 :                         list_hash_clear(rel->exps);
     853          80 :                         sql_rel *l = rel_project(v->sql->sa, NULL, rel->exps);
     854          80 :                         set_count_prop(v->sql->sa, l, 1);
     855          80 :                         l = rel_select(v->sql->sa, l, exp_atom_bool(v->sql->sa, 0));
     856          80 :                         set_count_prop(v->sql->sa, l, 0);
     857          80 :                         rel->op = op_project;
     858          80 :                         rel->l = l;
     859          80 :                         rel->exps = rel_projections(v->sql, l, NULL, 1, 1);
     860          80 :                         set_count_prop(v->sql->sa, rel, 0);
     861          80 :                         set_nodistinct(rel); /* set relations may have distinct flag set */
     862          80 :                         v->changes++;
     863             :                 }
     864             :                 break;
     865             :         }
     866       12894 :         case op_munion: {
     867       12894 :                 list *l = rel->l, *nrels = sa_list(v->sql->sa);
     868       12894 :                 BUN cnt = 0;
     869       12894 :                 bool needs_pruning = false;
     870             : 
     871       49290 :                 for (node *n = l->h; n; n = n->next) {
     872       36396 :                         sql_rel *r = n->data, *pl = r;
     873             : 
     874       36396 :                         while (is_sample(pl->op) || is_topn(pl->op)) /* skip topN and sample relations in the middle */
     875           0 :                                         pl = pl->l;
     876             :                         /* if it's not a projection, then project and propagate statistics */
     877       36396 :                         if (!is_project(pl->op) && !is_base(pl->op)) {
     878           0 :                                 pl = rel_project(v->sql->sa, pl, rel_projections(v->sql, pl, NULL, 0, 1));
     879           0 :                                 set_count_prop(v->sql->sa, pl, get_rel_count(pl->l));
     880           0 :                                 pl->exps = exps_exp_visitor_bottomup(v, pl, pl->exps, 0, &rel_propagate_statistics, false);
     881             :                         }
     882       36396 :                         nrels = append(nrels, pl);
     883             :                         /* we need new munion statistics */
     884             :                         /* propagate row count */
     885       36396 :                         BUN rv = need_distinct(rel) ? rel_calc_nuniques(v->sql, r, r->exps) : get_rel_count(r);
     886             :                         /* if PROP_COUNT does not exist we assume at least a row (see get_rel_count def) */
     887       36396 :                         if (rv == BUN_NONE) {
     888        1156 :                                 cnt++;
     889        1156 :                                 continue;
     890             :                         }
     891       35240 :                         if (!rv && can_be_pruned)
     892        6633 :                                 needs_pruning = true;
     893             :                         /* overflow check */
     894       35240 :                         if (rv > (BUN_MAX - cnt))
     895       36396 :                                 rv = BUN_MAX;
     896             :                         else
     897       35240 :                                 cnt += rv;
     898             :                 }
     899       12894 :                 int i = 0;
     900       73836 :                 for (node *n = rel->exps->h ; n ; n = n->next, i++)
     901       60942 :                         rel_munion_get_statistics(v->sql, rel, nrels, n->data, i);
     902             : 
     903       12894 :                 if (needs_pruning && !rel_is_ref(rel)) {
     904        4472 :                         v->changes++;
     905        4472 :                         list *nl = sa_list(l->sa);
     906             : 
     907       16488 :                         for (node *n = nrels->h; n; n = n->next) {
     908       12016 :                                 sql_rel *r = n->data;
     909       12016 :                                 BUN rv = need_distinct(rel) ? rel_calc_nuniques(v->sql, r, r->exps) : get_rel_count(r);
     910             : 
     911       12016 :                                 if (!rv) { /* keep last for now */
     912        6163 :                                         rel_destroy(r);
     913        6163 :                                         continue;
     914             :                                 }
     915        5853 :                                 nl = append(nl, r);
     916             :                         }
     917        4472 :                         rel->l = nl;
     918        4472 :                         if (list_length(nl) == 1) {
     919        4147 :                                 sql_rel *l = rel->l = nl->h->data; /* ugh */
     920        4147 :                                 rel->r = NULL;
     921        4147 :                                 rel->op = op_project;
     922             : 
     923       20433 :                                 for (node *n = rel->exps->h, *m = l->exps->h ; n && m ; n = n->next, m = m->next) {
     924       16286 :                                         sql_exp *pe = n->data, *ie = m->data;
     925       16286 :                                         sql_exp *ne = exp_ref(v->sql, ie);
     926       16286 :                                         exp_prop_alias(v->sql->sa, ne, pe);
     927       16286 :                                         n->data = ne;
     928             :                                 }
     929        4147 :                                 list_hash_clear(rel->exps);
     930         325 :                         } else if (list_empty(nl)) {
     931             :                                 /* empty select (project [ nils ] ) */
     932         454 :                                 for (node *n = rel->exps->h ; n ; n = n->next) {
     933         354 :                                         sql_exp *e = n->data, *a = exp_atom(v->sql->sa, atom_general(v->sql->sa, exp_subtype(e), NULL, 0));
     934         354 :                                         exp_prop_alias(v->sql->sa, a, e);
     935         354 :                                         n->data = a;
     936             :                                 }
     937         100 :                                 list_hash_clear(rel->exps);
     938         100 :                                 sql_rel *l = rel_project(v->sql->sa, NULL, rel->exps);
     939         100 :                                 set_count_prop(v->sql->sa, l, 1);
     940         100 :                                 l = rel_select(v->sql->sa, l, exp_atom_bool(v->sql->sa, 0));
     941         100 :                                 set_count_prop(v->sql->sa, l, 0);
     942         100 :                                 rel->op = op_project;
     943         100 :                                 rel->r = NULL;
     944         100 :                                 rel->l = l;
     945         100 :                                 rel->exps = rel_projections(v->sql, l, NULL, 1, 1);
     946         100 :                                 set_count_prop(v->sql->sa, rel, 0);
     947         100 :                                 set_nodistinct(rel); /* set relations may have distinct flag set */
     948             :                         }
     949             :                 } else {
     950        8422 :                         set_count_prop(v->sql->sa, rel, cnt);
     951             :                 }
     952             :                 break;
     953             :         }
     954      530783 :         case op_join:
     955             :         case op_left:
     956             :         case op_right:
     957             :         case op_full:
     958             :         case op_semi:
     959             :         case op_anti:
     960             :         case op_select:
     961             :         case op_project:
     962             :         case op_groupby: {
     963      530783 :                 if (is_groupby(rel->op) && !list_empty(rel->r))
     964       15531 :                         rel->r = exps_exp_visitor_bottomup(v, rel, rel->r, 0, &rel_propagate_statistics, false);
     965      530783 :                 rel->exps = exps_exp_visitor_bottomup(v, rel, rel->exps, 0, &rel_propagate_statistics, false);
     966      530782 :                 if ((is_simple_project(rel->op) || is_groupby(rel->op)) && !list_empty(rel->r))
     967       39125 :                         rel->r = exps_exp_visitor_bottomup(v, rel, rel->r, 0, &rel_propagate_statistics, false);
     968             :                 /* The following optimizations can only be applied after propagating the statistics to rel->exps */
     969      530782 :                 if (can_be_pruned && (is_join(rel->op) || is_select(rel->op)) && !list_empty(rel->exps)) {
     970      138284 :                         int changes = v->changes;
     971      138284 :                         rel->exps = rel_prune_predicates(v, rel);
     972      138284 :                         if (v->changes > changes)
     973        3524 :                                 rel = rewrite_simplify(v, 0, v->value_based_opt, rel);
     974             :                 }
     975             : 
     976             :                 /* propagate row count */
     977      530782 :                 sql_rel *l = rel->l, *r = rel->r;
     978      530782 :                 switch (rel->op) {
     979      133880 :                 case op_join:
     980             :                 case op_left:
     981             :                 case op_right:
     982             :                 case op_full: {
     983      133880 :                         BUN lv = get_rel_count(l), rv = get_rel_count(r), uniques_estimate = BUN_MAX, join_idx_estimate = BUN_MAX;
     984             : 
     985      133880 :                         if (!list_empty(rel->exps) && !is_single(rel)) {
     986      245020 :                                 for (node *n = rel->exps->h ; n ; n = n->next) {
     987      125082 :                                         sql_exp *e = n->data, *el = e->l, *er = e->r;
     988             : 
     989      125082 :                                         if (find_prop(e->p, PROP_JOINIDX)) {
     990         667 :                                                 join_idx_estimate = lv>rv?lv:rv;
     991      124415 :                                         } else if (e->type == e_cmp && e->flag == cmp_equal) {
     992             :                                                 /* if one of the sides is unique, the cardinality will be that exact number, but look for nulls */
     993      120531 :                                                 if (!is_semantics(e) || !has_nil(el) || !has_nil(er)) {
     994      120135 :                                                         BUN lu = 0, ru = 0;
     995      120135 :                                                         prop *p = NULL;
     996      120135 :                                                         if ((p = find_prop(el->p, PROP_NUNIQUES)))
     997       89002 :                                                                 lu = (BUN) p->value.dval;
     998      120135 :                                                         if ((p = find_prop(er->p, PROP_NUNIQUES)))
     999      103445 :                                                                 ru = (BUN) p->value.dval;
    1000      120135 :                                                         if (is_unique(el) || lu > lv) {
    1001       73181 :                                                                 BUN ncount = (is_right(rel->op) || is_full(rel->op)) ? MAX(lv, rv) : lv;
    1002       73181 :                                                                 uniques_estimate = MIN(uniques_estimate, ncount);
    1003       46954 :                                                         } else if (is_unique(er) || ru > rv) {
    1004       29351 :                                                                 BUN ncount = (is_left(rel->op) || is_full(rel->op)) ? MAX(lv, rv) : rv;
    1005       29351 :                                                                 uniques_estimate = MIN(uniques_estimate, ncount);
    1006             :                                                         }
    1007             :                                                 }
    1008             :                                         }
    1009             :                                 }
    1010             :                         }
    1011      133880 :                         if (is_single(rel)) {
    1012        4609 :                                 set_count_prop(v->sql->sa, rel, lv);
    1013      129271 :                         } else if (join_idx_estimate != BUN_MAX) {
    1014         665 :                                 set_count_prop(v->sql->sa, rel, join_idx_estimate);
    1015      128606 :                         } else if (uniques_estimate != BUN_MAX) {
    1016       96107 :                                 set_count_prop(v->sql->sa, rel, uniques_estimate);
    1017       32499 :                         } else if (list_length(rel->exps) == 1 && (exp_is_false(rel->exps->h->data) || exp_is_null(rel->exps->h->data))) {
    1018             :                                 /* corner cases for outer joins */
    1019         126 :                                 if (is_left(rel->op)) {
    1020         114 :                                         set_count_prop(v->sql->sa, rel, lv);
    1021          12 :                                 } else if (is_right(rel->op)) {
    1022           3 :                                         set_count_prop(v->sql->sa, rel, rv);
    1023           9 :                                 } else if (is_full(rel->op) && lv != BUN_NONE && rv != BUN_NONE) {
    1024           9 :                                         set_count_prop(v->sql->sa, rel, (rv > (BUN_MAX - lv)) ? BUN_MAX : (lv + rv)); /* overflow check */
    1025           0 :                                 } else if (lv != BUN_NONE && rv != BUN_NONE) {
    1026           0 :                                         set_count_prop(v->sql->sa, rel, 0);
    1027             :                                 }
    1028       32373 :                         } else if (lv == 0) {
    1029        1166 :                                 set_count_prop(v->sql->sa, rel, (is_right(rel->op) || is_full(rel->op)) ? rv : 0);
    1030       31776 :                         } else if (rv == 0) {
    1031        1568 :                                 set_count_prop(v->sql->sa, rel, (is_left(rel->op) || is_full(rel->op)) ? lv : 0);
    1032       30702 :                         } else if (lv != BUN_NONE && rv != BUN_NONE) {
    1033       18252 :                                 set_count_prop(v->sql->sa, rel, (rv > (BUN_MAX / lv)) ? BUN_MAX : (lv * rv)); /* overflow check */
    1034             :                         }
    1035             :                         break;
    1036             :                 }
    1037        2630 :                 case op_anti:
    1038        2630 :                         set_count_prop(v->sql->sa, rel, get_rel_count(l));
    1039        2630 :                         break;
    1040      108292 :                 case op_semi:
    1041             :                 case op_select:
    1042             :                         /* TODO calculate cardinalities using selectivities */
    1043      108292 :                         if (list_length(rel->exps) == 1 && (exp_is_false(rel->exps->h->data) || exp_is_null(rel->exps->h->data))) {
    1044        2590 :                                 set_count_prop(v->sql->sa, rel, 0);
    1045             :                         } else {
    1046      208164 :                                 if (!list_empty(rel->exps) && !is_single(rel)) {
    1047      102462 :                                         BUN cnt = get_rel_count(l), u = 1;
    1048      171150 :                                         for (node *n = rel->exps->h ; n ; n = n->next) {
    1049      112327 :                                                 sql_exp *e = n->data, *el = e->l, *er = e->r;
    1050             : 
    1051             :                                                 /* simple expressions first */
    1052      112327 :                                                 if (e->type == e_cmp && e->flag == cmp_equal && exp_is_atom(er)) {
    1053             :                                                         /* use selectivity */
    1054       56843 :                                                         prop *p;
    1055       56843 :                                                         if ((p = find_prop(el->p, PROP_NUNIQUES))) {
    1056       43639 :                                                                 u = (BUN) p->value.dval;
    1057       43639 :                                                                 break;
    1058             :                                                         }
    1059             :                                                 }
    1060             :                                         }
    1061             :                                         /* u is an *estimate*, so don't set count_prop to 0 unless cnt is 0 */
    1062      102462 :                                         set_count_prop(v->sql->sa, rel, cnt == 0 ? 0 : u == 0 || u > cnt ? 1 : cnt/u);
    1063             :                                 } else {
    1064        3240 :                                         set_count_prop(v->sql->sa, rel, get_rel_count(l));
    1065             :                                 }
    1066             :                         }
    1067             :                         break;
    1068      262559 :                 case op_project:
    1069      262559 :                         if (l) {
    1070      252652 :                                 if (need_distinct(rel)) {
    1071           0 :                                         set_count_prop(v->sql->sa, rel, rel_calc_nuniques(v->sql, l, rel->exps));
    1072             :                                 } else {
    1073      252652 :                                         set_count_prop(v->sql->sa, rel, get_rel_count(l));
    1074             :                                 }
    1075             :                         } else {
    1076        9907 :                                 BUN card = 1;
    1077             : 
    1078        9907 :                                 if (!list_empty(rel->exps)) {
    1079       28940 :                                         for (node *n = rel->exps->h ; n ; n = n->next) {
    1080       19033 :                                                 sql_exp *e = n->data;
    1081       19033 :                                                 card = MAX(card, trivial_project_exp_card(e));
    1082             :                                         }
    1083             :                                 }
    1084        9907 :                                 set_count_prop(v->sql->sa, rel, card);
    1085             :                         }
    1086             :                         break;
    1087       23057 :                 case op_groupby:
    1088       23057 :                         if (list_empty(rel->r)) {
    1089        7525 :                                 set_count_prop(v->sql->sa, rel, 1);
    1090             :                         } else {
    1091       15532 :                                 set_count_prop(v->sql->sa, rel, rel_calc_nuniques(v->sql, l, rel->r));
    1092             :                         }
    1093             :                         break;
    1094             :                 default:
    1095             :                         break;
    1096             :                 }
    1097             :                 break;
    1098             :         }
    1099       16859 :         case op_topn: {
    1100       16859 :                 BUN lv = get_rel_count(rel->l);
    1101             : 
    1102       16859 :                 if (lv != BUN_NONE) {
    1103       16842 :                         sql_exp *le = rel->exps->h->data, *oe = list_length(rel->exps) > 1 ? rel->exps->h->next->data : NULL;
    1104          96 :                         if (oe && oe->l && exp_is_not_null(oe)) { /* no parameters */
    1105          96 :                                 BUN offset = (BUN) ((atom*)oe->l)->data.val.lval;
    1106          96 :                                 lv = offset >= lv ? 0 : lv - offset;
    1107             :                         }
    1108       16842 :                         if (le->l && exp_is_not_null(le)) {
    1109       16804 :                                 BUN limit = (BUN) ((atom*)le->l)->data.val.lval;
    1110       16804 :                                 lv = MIN(lv, limit);
    1111             :                         }
    1112       16842 :                         set_count_prop(v->sql->sa, rel, lv);
    1113             :                 }
    1114             :                 break;
    1115             :         }
    1116          22 :         case op_sample: {
    1117          22 :                 BUN lv = get_rel_count(rel->l);
    1118             : 
    1119          22 :                 if (lv != BUN_NONE) {
    1120           4 :                         sql_exp *se = rel->exps->h->data;
    1121           4 :                         sql_subtype *tp = exp_subtype(se);
    1122             : 
    1123           4 :                         if (se->l && tp->type->eclass == EC_NUM) { /* sample is a number of rows */
    1124           4 :                                 BUN sample = (BUN) ((atom*)se->l)->data.val.lval;
    1125           4 :                                 lv = MIN(lv, sample);
    1126           0 :                         } else if (se->l) { /* sample is a percentage of rows */
    1127           0 :                                 dbl percent = ((atom*)se->l)->data.val.dval;
    1128           0 :                                 assert(tp->type->eclass == EC_FLT);
    1129           0 :                                 lv = (BUN) ceil((dbl)lv * percent);
    1130             :                         }
    1131           4 :                         set_count_prop(v->sql->sa, rel, lv);
    1132             :                 }
    1133             :                 break;
    1134             :         }
    1135        6087 :         case op_table: {
    1136        6087 :                 sql_exp *op = rel->r;
    1137        6087 :                 if (rel->flag != TRIGGER_WRAPPER && op) {
    1138        5775 :                         sql_subfunc *f = op->f;
    1139        5775 :                         if (f->func->lang == FUNC_LANG_SQL) {
    1140          97 :                                 set_count_prop(v->sql->sa, rel, 1000 /* just some fallback value */);
    1141        5678 :                         } else if (f->func->lang == FUNC_LANG_MAL && strcmp(f->func->base.name, "storage") == 0) {
    1142         829 :                                 set_count_prop(v->sql->sa, rel, 1000 /* TODO get size of catalog */);
    1143        4849 :                         } else if (f->func->lang == FUNC_LANG_MAL && strcmp(f->func->base.name, "db_users") == 0) {
    1144           0 :                                 set_count_prop(v->sql->sa, rel, 1000 /* TODO get size of users */);
    1145        4849 :                         } else if (f->func->lang == FUNC_LANG_MAL && strncmp(f->func->base.name, "querylog", 8) == 0) {
    1146        1085 :                                 set_count_prop(v->sql->sa, rel, 1000 /* TODO get size of querylog */);
    1147        3764 :                         } else if (f->func->lang == FUNC_LANG_MAL &&
    1148        3654 :                                            (strcmp(f->func->base.name, "queue") == 0 ||
    1149        3390 :                                                 strcmp(f->func->base.name, "optimizers") == 0 ||
    1150        3078 :                                                 strcmp(f->func->base.name, "env") == 0 ||
    1151        2786 :                                                 strcmp(f->func->base.name, "keywords") == 0 ||
    1152        2786 :                                                 strcmp(f->func->base.name, "statistics") == 0 ||
    1153        2119 :                                                 strcmp(f->func->base.name, "rejects") == 0 ||
    1154        1867 :                                                 strcmp(f->func->base.name, "schemastorage") == 0 ||
    1155        1867 :                                                 strncmp(f->func->base.name, "storage", 7) == 0 ||
    1156        1867 :                                                 strcmp(f->func->base.name, "sessions") == 0) ) {
    1157        2077 :                                 set_count_prop(v->sql->sa, rel, 1000 /* TODO get size of queue */);
    1158             :                         }
    1159             :                         /* else {
    1160             :                                 printf("%%func needs stats : %s\n", f->func->base.name);
    1161             :                         } */
    1162             :                 }
    1163             :                 break;
    1164             :         }
    1165             :         /*These relations are less important for now
    1166             :           TODO later we can tune it */
    1167             :         case op_insert:
    1168             :         case op_update:
    1169             :         case op_delete:
    1170             :         case op_truncate:
    1171             :         case op_ddl:
    1172             :         default:
    1173             :                 break;
    1174             :         }
    1175             : 
    1176             :         return rel;
    1177             : }
    1178             : 
    1179             : static sql_rel *
    1180      541947 : rel_get_statistics(visitor *v, global_props *gp, sql_rel *rel)
    1181             : {
    1182             :         /* Don't prune updates as pruning will possibly result in removing the joins which therefore cannot be used for constraint checking */
    1183      541947 :         uint8_t has_special_modify = (uint8_t) gp->has_special_modify;
    1184      541947 :         v->data = &has_special_modify;
    1185      541947 :         rel = rel_visitor_bottomup(v, rel, &rel_get_statistics_);
    1186      541948 :         v->data = gp;
    1187      541948 :         return rel;
    1188             : }
    1189             : 
    1190             : run_optimizer
    1191      718453 : bind_get_statistics(visitor *v, global_props *gp)
    1192             : {
    1193      718453 :         (void) v;
    1194      718453 :         return (gp->opt_level == 1 && !gp->cnt[op_insert]) ? rel_get_statistics : NULL;
    1195             : }
    1196             : 
    1197             : 
    1198             : static bool
    1199       94500 : point_select_on_unique_column(sql_rel *rel)
    1200             : {
    1201       94500 :         if (is_select(rel->op) && !list_empty(rel->exps)) {
    1202      130811 :                 for (node *n = rel->exps->h; n ; n = n->next) {
    1203       75368 :                         sql_exp *e = n->data, *el = e->l, *er = e->r, *found = NULL;
    1204             : 
    1205       75368 :                         if (is_compare(e->type) && e->flag == cmp_equal) {
    1206       33339 :                                 if (is_numeric_upcast(el))
    1207           0 :                                         el = el->l;
    1208       33339 :                                 if (is_numeric_upcast(er))
    1209           0 :                                         er = er->l;
    1210       33339 :                                 if (is_alias(el->type) && exp_is_atom(er) && (found = rel_find_exp(rel->l, el)) &&
    1211         749 :                                         is_unique(found) && (!is_semantics(e) || !has_nil(found) || !has_nil(er)))
    1212             :                                         return true;
    1213       32590 :                                 if (is_alias(er->type) && exp_is_atom(el) && (found = rel_find_exp(rel->l, er)) &&
    1214           0 :                                         is_unique(found) && (!is_semantics(e) || !has_nil(el) || !has_nil(found)))
    1215             :                                         return true;
    1216             :                         }
    1217             :                 }
    1218             :         }
    1219             :         return false;
    1220             : }
    1221             : 
    1222             : /*
    1223             :  * A point select on an unique column reduces the number of rows to 1. If the same select is under a
    1224             :  * join, the opposite side's select can be pushed above the join.
    1225             :  */
    1226             : static inline sql_rel *
    1227     1243070 : rel_push_select_up(visitor *v, sql_rel *rel)
    1228             : {
    1229     1243070 :         if ((is_innerjoin(rel->op) || is_left(rel->op) || is_right(rel->op) || is_semi(rel->op)) && !is_single(rel)) {
    1230      253619 :                 sql_rel *l = rel->l, *r = rel->r;
    1231      253619 :                 bool can_pushup_left = is_select(l->op) && !rel_is_ref(l) && !is_single(l) && (is_innerjoin(rel->op) || is_left(rel->op) || is_semi(rel->op)),
    1232      253619 :                         can_pushup_right = is_select(r->op) && !rel_is_ref(r) && !is_single(r) && (is_innerjoin(rel->op) || is_right(rel->op));
    1233             : 
    1234      253619 :                 if (can_pushup_left || can_pushup_right) {
    1235       66641 :                         if (can_pushup_left)
    1236       45254 :                                 can_pushup_left = point_select_on_unique_column(r);
    1237       66641 :                         if (can_pushup_right)
    1238       49246 :                                 can_pushup_right = point_select_on_unique_column(l);
    1239             : 
    1240             :                         /* if both selects retrieve one row each, it's not worth it to push both up */
    1241       66641 :                         if (can_pushup_left && !can_pushup_right) {
    1242         683 :                                 sql_rel *nrel = rel_dup_copy(v->sql->sa, rel);
    1243         683 :                                 nrel->l = l->l;
    1244         683 :                                 rel = rel_inplace_select(rel, nrel, l->exps);
    1245         683 :                                 assert(is_select(rel->op));
    1246         683 :                                 v->changes++;
    1247       65958 :                         } else if (!can_pushup_left && can_pushup_right) {
    1248          14 :                                 sql_rel *nrel = rel_dup_copy(v->sql->sa, rel);
    1249          14 :                                 nrel->r = r->l;
    1250          14 :                                 rel = rel_inplace_select(rel, nrel, r->exps);
    1251          14 :                                 assert(is_select(rel->op));
    1252          14 :                                 v->changes++;
    1253             :                         }
    1254             :                 }
    1255             :         }
    1256     1243070 :         return rel;
    1257             : }
    1258             : 
    1259             : static int
    1260       93400 : sql_class_base_score(visitor *v, sql_column *c, sql_subtype *t, bool equality_based)
    1261             : {
    1262       93400 :         int de;
    1263             : 
    1264       93400 :         if (!t)
    1265             :                 return 0;
    1266       93400 :         switch (ATOMstorage(t->type->localtype)) {
    1267             :         case TYPE_bte:
    1268             :                 return 150 - 8;
    1269        1672 :         case TYPE_sht:
    1270        1672 :                 return 150 - 16;
    1271       37312 :         case TYPE_int:
    1272       37312 :                 return 150 - 32;
    1273         516 :         case TYPE_void:
    1274             :         case TYPE_lng:
    1275         516 :                 return 150 - 64;
    1276             :         case TYPE_uuid:
    1277             : #ifdef HAVE_HGE
    1278             :         case TYPE_hge:
    1279             : #endif
    1280             :                 return 150 - 128;
    1281           2 :         case TYPE_flt:
    1282           2 :                 return 75 - 24;
    1283             :         case TYPE_dbl:
    1284             :                 return 75 - 53;
    1285       30566 :         default:
    1286       30566 :                 if (equality_based && c && v->storage_based_opt && (de = mvc_is_duplicate_eliminated(v->sql, c)))
    1287        1554 :                         return 150 - de * 8;
    1288             :                 /* strings and blobs not duplicate eliminated don't get any points here */
    1289             :                 return 0;
    1290             :         }
    1291             : }
    1292             : 
    1293             : static int
    1294       34501 : score_se_base(visitor *v, sql_rel *rel, sql_exp *e)
    1295             : {
    1296       34501 :         int res = 0;
    1297       34501 :         sql_subtype *t = exp_subtype(e);
    1298       34501 :         sql_column *c = NULL;
    1299             : 
    1300       34501 :         if (e->type == e_convert) /* keep unsafes at the end (TODO improve) */
    1301             :                 return -1000;
    1302             :         /* can we find out if the underlying table is sorted */
    1303       33900 :         if ((c = exp_find_column(rel, e, -2)) && v->storage_based_opt && mvc_is_sorted(v->sql, c))
    1304       33900 :                 res += 600;
    1305             : 
    1306             :         /* prefer the shorter var types over the longer ones */
    1307       33900 :         res += sql_class_base_score(v, c, t, is_equality_or_inequality_exp(e->flag)); /* smaller the type, better */
    1308       33900 :         return res;
    1309             : }
    1310             : 
    1311             : static int
    1312       58490 : score_se(visitor *v, sql_rel *rel, sql_exp *e)
    1313             : {
    1314       58490 :         int score = 0;
    1315       58490 :         if (e->type == e_cmp && !is_complex_exp(e->flag)) {
    1316       34501 :                 sql_exp *l = e->l;
    1317             : 
    1318       34501 :                 while (l->type == e_cmp) { /* go through nested comparisons */
    1319           2 :                         sql_exp *ll;
    1320             : 
    1321           2 :                         if (l->flag == cmp_filter || l->flag == cmp_or)
    1322           0 :                                 ll = ((list*)l->l)->h->data;
    1323             :                         else
    1324           2 :                                 ll = l->l;
    1325           2 :                         if (ll->type != e_cmp)
    1326             :                                 break;
    1327             :                         l = ll;
    1328             :                 }
    1329       34501 :                 score += score_se_base(v, rel, l);
    1330             :         }
    1331       58490 :         score += exp_keyvalue(e);
    1332       58490 :         return score;
    1333             : }
    1334             : 
    1335             : static inline sql_rel *
    1336     1243070 : rel_select_order(visitor *v, sql_rel *rel)
    1337             : {
    1338     1243070 :         int *scores = NULL;
    1339     1243070 :         sql_exp **exps = NULL;
    1340             : 
    1341     1243070 :         if (is_select(rel->op) && list_length(rel->exps) > 1) {
    1342       27368 :                 node *n;
    1343       27368 :                 int i, nexps = list_length(rel->exps);
    1344       27368 :                 scores = SA_NEW_ARRAY(v->sql->ta, int, nexps);
    1345       27368 :                 exps = SA_NEW_ARRAY(v->sql->ta, sql_exp*, nexps);
    1346             : 
    1347       85858 :                 for (i = 0, n = rel->exps->h; n; i++, n = n->next) {
    1348       58520 :                         exps[i] = n->data;
    1349       58520 :                         if (find_prop(exps[i]->p, PROP_HASHCOL))
    1350             :                                 return rel;
    1351       58490 :                         scores[i] = score_se(v, rel, n->data);
    1352             :                 }
    1353       27338 :                 GDKqsort(scores, exps, NULL, nexps, sizeof(int), sizeof(void *), TYPE_int, true, true);
    1354             : 
    1355       85828 :                 for (i = 0, n = rel->exps->h; n; i++, n = n->next)
    1356       58490 :                         n->data = exps[i];
    1357             :         }
    1358             : 
    1359             :         return rel;
    1360             : }
    1361             : 
    1362             : /* Compute the efficiency of using this expression early in a group by list */
    1363             : static int
    1364       59500 : score_gbe(visitor *v, sql_rel *rel, sql_exp *e)
    1365             : {
    1366       59500 :         int res = 0;
    1367       59500 :         sql_subtype *t = exp_subtype(e);
    1368       59500 :         sql_column *c = exp_find_column(rel, e, -2);
    1369             : 
    1370       59500 :         if (e->card == CARD_ATOM) /* constants are trivial to group */
    1371          38 :                 res += 1000;
    1372             :         /* can we find out if the underlying table is sorted */
    1373       59500 :         if (is_unique(e) || find_prop(e->p, PROP_HASHCOL) || (c && v->storage_based_opt && mvc_is_unique(v->sql, c))) /* distinct columns */
    1374        3585 :                 res += 700;
    1375       38911 :         if (c && v->storage_based_opt && mvc_is_sorted(v->sql, c))
    1376        3712 :                 res += 500;
    1377       59500 :         if (find_prop(e->p, PROP_HASHIDX)) /* has hash index */
    1378           0 :                 res += 200;
    1379             : 
    1380             :         /* prefer the shorter var types over the longer ones */
    1381       59500 :         res += sql_class_base_score(v, c, t, true); /* smaller the type, better */
    1382       59500 :         return res;
    1383             : }
    1384             : 
    1385             : /* reorder group by expressions */
    1386             : static inline sql_rel *
    1387     1243070 : rel_groupby_order(visitor *v, sql_rel *rel)
    1388             : {
    1389     1243070 :         int *scores = NULL;
    1390     1243070 :         sql_exp **exps = NULL;
    1391             : 
    1392     1243070 :         if (is_groupby(rel->op) && list_length(rel->r) > 1) {
    1393       25587 :                 node *n;
    1394       25587 :                 list *gbe = rel->r;
    1395       25587 :                 int i, ngbe = list_length(gbe);
    1396       25587 :                 scores = SA_NEW_ARRAY(v->sql->ta, int, ngbe);
    1397       25587 :                 exps = SA_NEW_ARRAY(v->sql->ta, sql_exp*, ngbe);
    1398             : 
    1399             :                 /* first sorting step, give priority for integers and sorted columns */
    1400       85087 :                 for (i = 0, n = gbe->h; n; i++, n = n->next) {
    1401       59500 :                         exps[i] = n->data;
    1402       59500 :                         scores[i] = score_gbe(v, rel, exps[i]);
    1403             :                 }
    1404       25587 :                 GDKqsort(scores, exps, NULL, ngbe, sizeof(int), sizeof(void *), TYPE_int, true, true);
    1405             : 
    1406             :                 /* second sorting step, give priority to strings with lower number of digits */
    1407       50503 :                 for (i = ngbe - 1; i && !scores[i]; i--); /* find expressions with no score from the first round */
    1408       25587 :                 if (scores[i])
    1409       24589 :                         i++;
    1410       25587 :                 if (ngbe - i > 1) {
    1411        8604 :                         for (int j = i; j < ngbe; j++) {
    1412        6597 :                                 sql_subtype *t = exp_subtype(exps[j]);
    1413        6597 :                                 scores[j] = t ? t->digits : 0;
    1414             :                         }
    1415             :                         /* the less number of digits the better, order ascending */
    1416        2007 :                         GDKqsort(scores + i, exps + i, NULL, ngbe - i, sizeof(int), sizeof(void *), TYPE_int, false, true);
    1417             :                 }
    1418             : 
    1419       85087 :                 for (i = 0, n = gbe->h; n; i++, n = n->next)
    1420       59500 :                         n->data = exps[i];
    1421             :         }
    1422             : 
    1423     1243071 :         return rel;
    1424             : }
    1425             : 
    1426             : /* This optimization loop contains optimizations that can potentially use statistics */
    1427             : static sql_rel *
    1428     1243070 : rel_final_optimization_loop_(visitor *v, sql_rel *rel)
    1429             : {
    1430             :         /* Run rel_push_select_up only once at the end to avoid an infinite optimization loop */
    1431     1243070 :         rel = rel_push_select_up(v, rel);
    1432     1243070 :         rel = rel_select_order(v, rel);
    1433             : 
    1434             :         /* TODO? Maybe later add rel_simplify_count, rel_join2semijoin, rel_simplify_fk_joins,
    1435             :            rel_distinct_project2groupby, rel_simplify_predicates, rel_simplify_math,
    1436             :            rel_distinct_aggregate_on_unique_values */
    1437             : 
    1438     1243070 :         rel = rel_groupby_order(v, rel);
    1439     1243071 :         return rel;
    1440             : }
    1441             : 
    1442             : static sql_rel *
    1443       65961 : rel_final_optimization_loop(visitor *v, global_props *gp, sql_rel *rel)
    1444             : {
    1445       65961 :         (void) gp;
    1446       65961 :         return rel_visitor_bottomup(v, rel, &rel_final_optimization_loop_);
    1447             : }
    1448             : 
    1449             : run_optimizer
    1450      718455 : bind_final_optimization_loop(visitor *v, global_props *gp)
    1451             : {
    1452      718455 :         int flag = v->sql->sql_optimizer;
    1453             :         /* At the moment, this optimizer has dependency on 3 flags */
    1454      549537 :         return gp->opt_level == 1 && (gp->cnt[op_groupby] || gp->cnt[op_select]) &&
    1455      784418 :                 (flag & push_select_up) && (flag & optimize_select_and_joins_topdown) && (flag & optimize_projections) ? rel_final_optimization_loop : NULL;
    1456             : }

Generated by: LCOV version 1.14