LCOV - code coverage report
Current view: top level - sql/server - rel_statistics.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 837 877 95.4 %
Date: 2024-04-26 00:35:57 Functions: 26 26 100.0 %

          Line data    Source code
       1             : /*
       2             :  * SPDX-License-Identifier: MPL-2.0
       3             :  *
       4             :  * This Source Code Form is subject to the terms of the Mozilla Public
       5             :  * License, v. 2.0.  If a copy of the MPL was not distributed with this
       6             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       7             :  *
       8             :  * Copyright 2024 MonetDB Foundation;
       9             :  * Copyright August 2008 - 2023 MonetDB B.V.;
      10             :  * Copyright 1997 - July 2008 CWI.
      11             :  */
      12             : 
      13             : #include "monetdb_config.h"
      14             : #include "rel_optimizer_private.h"
      15             : #include "rel_statistics.h"
      16             : #include "rel_rewriter.h"
      17             : 
      18             : static sql_exp *
      19     3817953 : comparison_find_column(sql_exp *input, sql_exp *e)
      20             : {
      21     3817953 :         switch (input->type) {
      22       99867 :         case e_convert: {
      23       99867 :                 list *types = (list *)input->r;
      24       99867 :                 sql_class from = ((sql_subtype*)types->h->data)->type->eclass, to = ((sql_subtype*)types->h->next->data)->type->eclass;
      25       99867 :                 if (from == to)
      26      192882 :                         return comparison_find_column(input->l, e) ? input : NULL;
      27             :                 return NULL;
      28             :         }
      29     3163877 :         case e_column:
      30     3163877 :                 return exp_match(e, input) ? input : NULL;
      31             :         default:
      32             :                 return NULL;
      33             :         }
      34             : }
      35             : 
      36             : /* multi lo <= col <= hi, maybe still be false even if lo or hi are NULL, possibly similar for filter on multiple
      37             :  * columns */
      38             : #define comp_single_column(c) (!c->f)
      39             : 
      40             : static sql_exp *
      41     8318293 : rel_propagate_column_ref_statistics(mvc *sql, sql_rel *rel, sql_exp *e)
      42             : {
      43     8402049 :         assert(e->type == e_column);
      44     8402049 :         if (rel) {
      45     8402009 :                 switch(rel->op) {
      46     3972628 :                 case op_left:
      47             :                 case op_right:
      48             :                 case op_full:
      49             :                 case op_join:
      50             :                 case op_select:
      51             :                 case op_anti:
      52             :                 case op_semi: {
      53     3972628 :                         bool found_without_semantics = false, found_left = false, found_right = false, still_unique = false;
      54             : 
      55     3972628 :                         if ((is_innerjoin(rel->op) || is_select(rel->op)) && list_length(rel->exps) == 1 && exp_is_false(rel->exps->h->data))
      56             :                                 return NULL; /* nothing will pass, skip */
      57             : 
      58             :                         /* propagate from the bottom first */
      59     3964680 :                         if (rel_propagate_column_ref_statistics(sql, rel->l, e))
      60             :                                 found_left = true;
      61     2532695 :                         if (!found_left && is_join(rel->op) && rel_propagate_column_ref_statistics(sql, rel->r, e))
      62     3964680 :                                 found_right = true;
      63             : 
      64     3964680 :                         if (!found_left && !found_right)
      65             :                                 return NULL;
      66     1686717 :                         if (!list_empty(rel->exps) && rel->op != op_anti) { /* if there's an or, the MIN and MAX get difficult to propagate */
      67     3594472 :                                 for (node *n = rel->exps->h ; n ; n = n->next) {
      68     1991261 :                                         sql_exp *comp = n->data, *le = comp->l, *lne = NULL, *re = comp->r, *rne = NULL, *fe = comp->f, *fne = NULL;
      69             : 
      70     1991261 :                                         if (comp->type == e_cmp) {
      71     1990390 :                                                 if (is_theta_exp(comp->flag) && ((lne = comparison_find_column(le, e)) || (rne = comparison_find_column(re, e)) || (fe && (fne = comparison_find_column(fe, e))))) {
      72      134297 :                                                         atom *lval_min = find_prop_and_get(le->p, PROP_MIN), *lval_max = find_prop_and_get(le->p, PROP_MAX),
      73      134297 :                                                                 *rval_min = find_prop_and_get(re->p, PROP_MIN), *rval_max = find_prop_and_get(re->p, PROP_MAX);
      74             : 
      75             :                                                         /* not semantics found or if explicitly filtering not null values from the column */
      76      134297 :                                                         found_without_semantics |= (!is_semantics(comp) && comp_single_column(comp)) || (comp->flag == cmp_equal && lne && is_anti(comp) && exp_is_null(re));
      77      134297 :                                                         still_unique |= comp->flag == cmp_equal && is_unique(le) && is_unique(re); /* unique if only equi-joins on unique columns are there */
      78      134297 :                                                         if (is_full(rel->op) || (is_left(rel->op) && found_left) || (is_right(rel->op) && found_right)) /* on outer joins, min and max cannot be propagated on some cases */
      79       19149 :                                                                 continue;
      80             :                                                         /* if (end2 >= start1 && start2 <= end1) then the 2 intervals are intersected */
      81      115148 :                                                         if (fe && lval_min && lval_max) { /* range case, the middle expression must intersect the other two */
      82        4579 :                                                                 atom *fval_min = find_prop_and_get(fe->p, PROP_MIN), *fval_max = find_prop_and_get(fe->p, PROP_MAX);
      83        4579 :                                                                 int int1 = rval_min && rval_max && atom_cmp(rval_max, lval_min) >= 0 && atom_cmp(rval_min, lval_max) <= 0,
      84        4579 :                                                                         int2 = fval_min && fval_max && atom_cmp(fval_max, lval_min) >= 0 && atom_cmp(fval_min, lval_max) <= 0,
      85        4579 :                                                                         symmetric = is_symmetric(comp);
      86             : 
      87        4579 :                                                                 if (is_anti(comp) || (!symmetric && fval_min && rval_max && atom_cmp(fval_min, rval_max) < 0)) /* for asymmetric case the re range must be after the fe range */
      88        1814 :                                                                         continue;
      89        2765 :                                                                 if (lne && int1 && int2) {
      90         104 :                                                                         if (symmetric) {
      91           0 :                                                                                 prop *p1 = find_prop(e->p, PROP_MIN), *p2 = find_prop(e->p, PROP_MAX);
      92           0 :                                                                                 atom *nmin = statistics_atom_min(sql, rval_min, fval_min), *nmax = statistics_atom_max(sql, rval_max, fval_max);
      93             :                                                                                 /* max is min from le and (max from re and fe max) */
      94           0 :                                                                                 set_minmax_property(sql, e, PROP_MAX, p2 ? statistics_atom_min(sql, nmax, p2->value.pval) : nmax);
      95             :                                                                                 /* min is max from le and (min from re and fe min) */
      96           0 :                                                                                 set_minmax_property(sql, e, PROP_MIN, p1 ? statistics_atom_max(sql, nmin, p1->value.pval) : nmin);
      97             :                                                                         } else {
      98         104 :                                                                                 prop *p1 = find_prop(e->p, PROP_MIN), *p2 = find_prop(e->p, PROP_MAX);
      99             :                                                                                 /* max is min from le and fe max */
     100         104 :                                                                                 set_minmax_property(sql, e, PROP_MAX, p2 ? statistics_atom_min(sql, fval_max, p2->value.pval) : fval_max);
     101             :                                                                                 /* min is max from le and re min */
     102         104 :                                                                                 set_minmax_property(sql, e, PROP_MIN, p1 ? statistics_atom_max(sql, rval_min, p1->value.pval) : rval_min);
     103             :                                                                         }
     104        2661 :                                                                 } else if (rne) {
     105         584 :                                                                         if (symmetric && int1 && int2) { /* min is max from le and (min from re and fe min) */
     106           0 :                                                                                 prop *p = find_prop(e->p, PROP_MIN);
     107           0 :                                                                                 atom *nmin = p ? statistics_atom_min(sql, p->value.pval, fval_min) : fval_min;
     108           0 :                                                                                 set_minmax_property(sql, e, PROP_MIN, statistics_atom_max(sql, nmin, lval_min));
     109         584 :                                                                         } else if (int1) { /* min is max from le and re min */
     110         100 :                                                                                 prop *p = find_prop(e->p, PROP_MIN);
     111         100 :                                                                                 set_minmax_property(sql, e, PROP_MIN, p ? statistics_atom_max(sql, lval_min, p->value.pval) : lval_min);
     112             :                                                                         }
     113        2077 :                                                                 } else if (fne) {
     114         546 :                                                                         if (symmetric && int1 && int2) { /* max is min from le and (max from re and fe max) */
     115           0 :                                                                                 prop *p = find_prop(e->p, PROP_MAX);
     116           0 :                                                                                 atom *nmax = p ? statistics_atom_max(sql, p->value.pval, rval_max) : rval_max;
     117           0 :                                                                                 set_minmax_property(sql, e, PROP_MAX, p ? statistics_atom_min(sql, nmax, lval_max) : nmax);
     118         546 :                                                                         } else if (int2) { /* max is min from le and fe max */
     119         262 :                                                                                 prop *p = find_prop(e->p, PROP_MAX);
     120         262 :                                                                                 set_minmax_property(sql, e, PROP_MAX, p ? statistics_atom_min(sql, lval_max, p->value.pval) : lval_max);
     121             :                                                                         }
     122             :                                                                 }
     123      110569 :                                                         } else if (lval_min && lval_max && rval_min && rval_max && atom_cmp(rval_max, lval_min) >= 0 && atom_cmp(rval_min, lval_max) <= 0) {
     124             :                                                                 /* both min and max must be set and the intervals must overlap */
     125       46543 :                                                                 switch (comp->flag) {
     126       32144 :                                                                 case cmp_equal: /* for equality reduce */
     127       32144 :                                                                         set_minmax_property(sql, e, PROP_MAX, is_anti(comp) ? statistics_atom_max(sql, lval_max, rval_max) : statistics_atom_min(sql, lval_max, rval_max));
     128       32144 :                                                                         set_minmax_property(sql, e, PROP_MIN, is_anti(comp) ? statistics_atom_min(sql, lval_min, rval_min) : statistics_atom_max(sql, lval_min, rval_min));
     129       32144 :                                                                         break;
     130        4852 :                                                                 case cmp_notequal: /* for inequality expand */
     131        4852 :                                                                         set_minmax_property(sql, e, PROP_MAX, is_anti(comp) ? statistics_atom_min(sql, lval_max, rval_max) : statistics_atom_max(sql, lval_max, rval_max));
     132        4852 :                                                                         set_minmax_property(sql, e, PROP_MIN, is_anti(comp) ? statistics_atom_max(sql, lval_min, rval_min) : statistics_atom_min(sql, lval_min, rval_min));
     133        4852 :                                                                         break;
     134        5575 :                                                                 case cmp_gt:
     135             :                                                                 case cmp_gte:
     136       10219 :                                                                         if (!is_anti(comp) && lne) { /* min is max from both min */
     137        4644 :                                                                                 prop *p = find_prop(e->p, PROP_MIN);
     138        4644 :                                                                                 set_minmax_property(sql, e, PROP_MIN, p ? statistics_atom_max(sql, rval_min, p->value.pval) : rval_min);
     139         931 :                                                                         } else if (!is_anti(comp)) { /* max is min from both max */
     140         931 :                                                                                 prop *p = find_prop(e->p, PROP_MAX);
     141         931 :                                                                                 set_minmax_property(sql, e, PROP_MAX, p ? statistics_atom_min(sql, lval_max, p->value.pval) : lval_max);
     142             :                                                                         }
     143             :                                                                         break;
     144        3972 :                                                                 case cmp_lt:
     145             :                                                                 case cmp_lte:
     146        7151 :                                                                         if (!is_anti(comp) && lne) { /* max is min from both max */
     147        3179 :                                                                                 prop *p = find_prop(e->p, PROP_MAX);
     148        3179 :                                                                                 set_minmax_property(sql, e, PROP_MAX, p ? statistics_atom_min(sql, rval_max, p->value.pval) : rval_max);
     149         793 :                                                                         } else if (!is_anti(comp)) { /* min is max from both min */
     150         793 :                                                                                 prop *p = find_prop(e->p, PROP_MIN);
     151         793 :                                                                                 set_minmax_property(sql, e, PROP_MIN, p ? statistics_atom_max(sql, lval_min, p->value.pval) : lval_min);
     152             :                                                                         }
     153             :                                                                         break;
     154             :                                                                 default: /* Maybe later I can do cmp_in and cmp_notin */
     155             :                                                                         break;
     156             :                                                                 }
     157             :                                                         }
     158             :                                                 }
     159             :                                         }
     160             :                                 }
     161             :                         }
     162     1686717 :                         if (is_full(rel->op) || (is_left(rel->op) && found_right) || (is_right(rel->op) && found_left))
     163       37395 :                                 set_has_nil(e);
     164     1686717 :                         if (!is_outerjoin(rel->op) && found_without_semantics) /* at an outer join, null values pass */
     165      104023 :                                 set_has_no_nil(e);
     166     1686717 :                         if (is_join(rel->op) && is_unique(e) && !still_unique)
     167       99836 :                                 set_not_unique(e);
     168     1686717 :                         return e;
     169             :                 }
     170     4326434 :                 case op_table:
     171             :                 case op_basetable:
     172             :                 case op_union:
     173             :                 case op_except:
     174             :                 case op_inter:
     175             :                 case op_munion:
     176             :                 case op_project:
     177             :                 case op_groupby: {
     178     4326434 :                         sql_exp *found;
     179     4326434 :                         atom *fval;
     180     4326434 :                         prop *est;
     181     4326434 :                         if ((found = rel_find_exp(rel, e))) {
     182     1927520 :                                 if (rel->op != op_table) { /* At the moment don't propagate statistics for table relations */
     183     1889762 :                                         if ((fval = find_prop_and_get(found->p, PROP_MAX)))
     184     1009753 :                                                 set_minmax_property(sql, e, PROP_MAX, fval);
     185     1889760 :                                         if ((fval = find_prop_and_get(found->p, PROP_MIN)))
     186     1015899 :                                                 set_minmax_property(sql, e, PROP_MIN, fval);
     187     1889764 :                                         if (!has_nil(found))
     188      449631 :                                                 set_has_no_nil(e);
     189     1889764 :                                         if (is_unique(found) || (need_distinct(rel) && list_length(rel->exps) == 1) ||
     190     1531137 :                                                 (is_groupby(rel->op) && (list_empty(rel->r) || (list_length(rel->r) == 1 && exps_find_exp(rel->r, e)))))
     191      369379 :                                                 set_unique(e);
     192             :                                         /* propagate unique estimation for known cases */
     193     1889764 :                                         if (is_groupby(rel->op) && list_empty(rel->r) && !find_prop(e->p, PROP_NUNIQUES)) { /* global aggregate case */
     194        6654 :                                                 prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     195        6654 :                                                 p->value.dval = 1;
     196     1883111 :                                         } else if (((is_basetable(rel->op) || is_except(rel->op) || is_inter(rel->op) || is_simple_project(rel->op) ||
     197       64223 :                                                                  (is_groupby(rel->op) && exps_find_exp(rel->r, e))) &&
     198     1801897 :                                                                 (est = find_prop(found->p, PROP_NUNIQUES)) && !find_prop(e->p, PROP_NUNIQUES))) {
     199       93885 :                                                 prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     200       93884 :                                                 p->value.dval = est->value.dval;
     201             :                                         }
     202             :                                 }
     203     1927525 :                                 return e;
     204             :                         }
     205             :                         return NULL;
     206             :                 }
     207       83756 :                 case op_topn:
     208             :                 case op_sample:
     209       83756 :                         return rel_propagate_column_ref_statistics(sql, rel->l, e);
     210             :                 default:
     211             :                         break;
     212             :                 }
     213             :         }
     214             :         return NULL;
     215             : }
     216             : 
     217             : static atom *
     218     4429837 : atom_from_valptr( allocator *sa, sql_subtype *tpe, ValRecord *v)
     219             : {
     220     4429837 :         atom *a = SA_NEW(sa, atom);
     221             : 
     222     4429857 :         assert(!VALisnil(v));
     223     4429909 :         *a = (atom) {.tpe = *tpe,};
     224     4429909 :         SA_VALcopy(sa, &a->data, v);
     225     4430020 :         return a;
     226             : }
     227             : 
     228             : void
     229     4004501 : sql_column_get_statistics(mvc *sql, sql_column *c, sql_exp *e)
     230             : {
     231     4004501 :         bool nonil = false, unique = false;
     232     4004501 :         double unique_est = 0.0;
     233     4004501 :         ValRecord min, max;
     234     4004501 :         int ok = mvc_col_stats(sql, c, &nonil, &unique, &unique_est, &min, &max);
     235             : 
     236     4005464 :         if (has_nil(e) && nonil)
     237      871094 :                 set_has_no_nil(e);
     238     4005464 :         if (!is_unique(e) && unique)
     239     1061404 :                 set_unique(e);
     240     4005464 :         if (unique_est != 0.0) {
     241     2835197 :                 prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     242     2834962 :                 p->value.dval = unique_est;
     243             :         }
     244     4005229 :         unsigned int digits = 0;
     245     4005229 :         sql_subtype *et = exp_subtype(e);
     246     4005599 :         if (et->type->eclass == EC_DEC || et->type->eclass == EC_NUM)
     247     2608514 :                 digits = et->digits;
     248     4005599 :         if ((ok & 2) == 2) {
     249     2212558 :                 if (!VALisnil(&max)) {
     250     2212520 :                         prop *p = e->p = prop_create(sql->sa, PROP_MAX, e->p);
     251     2212480 :                         p->value.pval = atom_from_valptr(sql->sa, &c->type, &max);
     252     2212482 :                         if (digits) {
     253     1649802 :                                 unsigned int nd = atom_digits(p->value.pval);
     254     1649781 :                                 if (nd < digits)
     255             :                                         digits = nd;
     256     1649781 :                                 if (!digits)
     257             :                                         digits = 1;
     258             :                         }
     259             :                 }
     260     2212405 :                 VALclear(&max);
     261             :         }
     262     4005430 :         if ((ok & 1) == 1) {
     263     2217862 :                 if (!VALisnil(&min)) {
     264     2217857 :                         prop *p = e->p = prop_create(sql->sa, PROP_MIN, e->p);
     265     2217891 :                         p->value.pval = atom_from_valptr(sql->sa, &c->type, &min);
     266     2218010 :                         if (digits) {
     267     1655956 :                                 unsigned int nd = atom_digits(p->value.pval);
     268     1655967 :                                 if (nd > digits) /* possibly set to low by max value */
     269             :                                         digits = nd;
     270             :                                 if (!digits)
     271             :                                         digits = 1;
     272             :                         }
     273             :                 }
     274     2218012 :                 VALclear(&min);
     275             :         }
     276     4005595 :         if (digits)
     277     2608456 :                 et->digits = digits;
     278     4005595 :         if (et->type->eclass == EC_DEC && et->digits <= et->scale)
     279         209 :                 et->digits = et->scale + 1;
     280     4005595 : }
     281             : 
     282             : static void
     283      872598 : rel_basetable_column_get_statistics(mvc *sql, sql_rel *rel, sql_exp *e)
     284             : {
     285      872598 :         if (e->p)
     286             :                 return;
     287      272125 :         sql_column *c = NULL;
     288             : 
     289      272125 :         if ((c = name_find_column(rel, exp_relname(e), exp_name(e), -2, NULL))) {
     290      171780 :                 sql_column_get_statistics(sql, c, e);
     291             :         }
     292             : }
     293             : 
     294             : static bool
     295       16651 : rel_setop_get_statistics(mvc *sql, sql_rel *rel, list *lexps, list *rexps, sql_exp *e, int i)
     296             : {
     297       16651 :         sql_exp *le = list_fetch(lexps, i), *re = list_fetch(rexps, i);
     298       16651 :         atom *lval_min = find_prop_and_get(le->p, PROP_MIN), *lval_max = find_prop_and_get(le->p, PROP_MAX),
     299       16651 :                 *rval_min = find_prop_and_get(re->p, PROP_MIN), *rval_max = find_prop_and_get(re->p, PROP_MAX);
     300       16651 :         prop *est;
     301             : 
     302             :         /* for the intersection, if both expresssions don't overlap, it can be pruned */
     303       16651 :         if (is_inter(rel->op) && !has_nil(le) && !has_nil(re) &&
     304         112 :                 ((rval_max && lval_min && atom_cmp(rval_max, lval_min) < 0) || (rval_min && lval_max && atom_cmp(rval_min, lval_max) > 0)))
     305          17 :                 return true;
     306             : 
     307       16634 :         if (lval_max && rval_max) {
     308        5371 :                 if (is_union(rel->op))
     309        2907 :                         set_minmax_property(sql, e, PROP_MAX, statistics_atom_max(sql, lval_max, rval_max)); /* for union the new max will be the max of the two */
     310        2464 :                 else if (is_inter(rel->op))
     311         387 :                         set_minmax_property(sql, e, PROP_MAX, statistics_atom_min(sql, lval_max, rval_max)); /* for intersect the new max will be the min of the two */
     312             :                 else /* except */
     313        2077 :                         set_minmax_property(sql, e, PROP_MAX, lval_max);
     314             :         }
     315       16634 :         if (lval_min && rval_min) {
     316        5371 :                 if (is_union(rel->op))
     317        2907 :                         set_minmax_property(sql, e, PROP_MIN, statistics_atom_min(sql, lval_min, rval_min)); /* for union the new min will be the min of the two */
     318        2464 :                 else if (is_inter(rel->op))
     319         387 :                         set_minmax_property(sql, e, PROP_MIN, statistics_atom_max(sql, lval_min, rval_min)); /* for intersect the new min will be the max of the two */
     320             :                 else /* except */
     321        2077 :                         set_minmax_property(sql, e, PROP_MIN, lval_min);
     322             :         }
     323             : 
     324       16634 :         if (is_union(rel->op) || is_munion(rel->op)) {
     325       13924 :                 if (!has_nil(le) && !has_nil(re))
     326        2938 :                         set_has_no_nil(e);
     327       13924 :                 if (need_distinct(rel) && list_length(rel->exps) == 1)
     328          11 :                         set_unique(e);
     329        2710 :         } else if (is_inter(rel->op)) {
     330         496 :                 if (!has_nil(le) || !has_nil(re))
     331         208 :                         set_has_no_nil(e);
     332         496 :                 if (is_unique(le) || (need_distinct(rel) && list_length(rel->exps) == 1))
     333         344 :                         set_unique(e);
     334             :         } else {
     335        2214 :                 assert(is_except(rel->op));
     336        2214 :                 if (!has_nil(le))
     337         241 :                         set_has_no_nil(e);
     338        2214 :                 if (is_unique(le) || (need_distinct(rel) && list_length(rel->exps) == 1))
     339        1991 :                         set_unique(e);
     340             :         }
     341             :         /* propagate unique estimation for known cases */
     342       16634 :         if (!is_union(rel->op) && (est = find_prop(le->p, PROP_NUNIQUES)) && !find_prop(e->p, PROP_NUNIQUES)) {
     343         117 :                 prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     344         117 :                 p->value.dval = est->value.dval;
     345             :         }
     346             :         return false;
     347             : }
     348             : 
     349             : 
     350             : static void
     351      105317 : rel_munion_get_statistics(mvc *sql, sql_rel *rel, list *rels, sql_exp *e, int i)
     352             : {
     353      105317 :         assert(is_munion(rel->op));
     354             : 
     355      105317 :         sql_rel *l = rels->h->data;
     356      105317 :         sql_exp *le = list_fetch(l->exps, i);
     357      105317 :         atom *lval_min = find_prop_and_get(le->p, PROP_MIN), *lval_max = find_prop_and_get(le->p, PROP_MAX);
     358      105317 :         bool has_nonil = !has_nil(le);
     359             : 
     360      210634 :         for(node *n = rels->h->next; n; n = n->next) {
     361      105317 :                 sql_rel *r = n->data;
     362      105317 :                 sql_exp *re = list_fetch(r->exps, i);
     363      105317 :                 atom *rval_min = find_prop_and_get(re->p, PROP_MIN), *rval_max = find_prop_and_get(re->p, PROP_MAX);
     364             : 
     365      105317 :                 if (lval_max && rval_max) {
     366       17137 :                         set_minmax_property(sql, e, PROP_MAX, statistics_atom_max(sql, lval_max, rval_max)); /* for union the new max will be the max of the two */
     367       17137 :                         lval_max = find_prop_and_get(e->p, PROP_MAX);
     368             :                 }
     369      105317 :                 if (lval_min && rval_min) {
     370       16917 :                         set_minmax_property(sql, e, PROP_MIN, statistics_atom_min(sql, lval_min, rval_min)); /* for union the new min will be the min of the two */
     371       16917 :                         lval_min = find_prop_and_get(e->p, PROP_MIN);
     372             :                 }
     373      105317 :                 has_nonil &= !has_nil(re);
     374             : 
     375             :         }
     376             : 
     377      105317 :         if (has_nonil)
     378       18863 :                 set_has_no_nil(e);
     379             : 
     380      105317 :         if (need_distinct(rel) && list_length(rel->exps) == 1)
     381        1573 :                 set_unique(e);
     382      105317 : }
     383             : 
     384             : 
     385             : static sql_exp *
     386     3138981 : rel_propagate_statistics(visitor *v, sql_rel *rel, sql_exp *e, int depth)
     387             : {
     388     3138981 :         mvc *sql = v->sql;
     389     3138981 :         atom *lval;
     390             : 
     391     3138981 :         (void) depth;
     392     3138981 :         switch(e->type) {
     393     1932096 :         case e_column:
     394     1932096 :                 switch (rel->op) { /* set relations don't call rel_propagate_statistics */
     395      249721 :                 case op_join:
     396             :                 case op_left:
     397             :                 case op_right:
     398             :                 case op_full:
     399             :                 case op_semi:
     400             :                 case op_anti: {
     401      249721 :                         sql_exp *found = rel_propagate_column_ref_statistics(sql, rel->l, e);
     402      249721 :                         if (!found)
     403      125303 :                                 (void) rel_propagate_column_ref_statistics(sql, rel->r, e);
     404             :                         break;
     405             :                 }
     406     1682375 :                 case op_select:
     407             :                 case op_project:
     408             :                 case op_groupby: {
     409     1682375 :                         sql_exp *found = rel_propagate_column_ref_statistics(sql, rel->l, e); /* labels may be found on the same projection, ugh */
     410     1682376 :                         if (!found && is_simple_project(rel->op))
     411      118173 :                                 (void) rel_propagate_column_ref_statistics(sql, rel, e);
     412             :                         break;
     413             :                 }
     414           0 :                 case op_insert:
     415             :                 case op_update:
     416             :                 case op_delete:
     417           0 :                         (void) rel_propagate_column_ref_statistics(sql, rel->r, e);
     418           0 :                         break;
     419             :                 default:
     420             :                         break;
     421             :                 }
     422             :                 break;
     423       81741 :         case e_convert: {
     424       81741 :                 sql_subtype *to = exp_totype(e), *from = exp_fromtype(e);
     425       81741 :                 sql_exp *l = e->l;
     426       81741 :                 sql_class fr = from->type->eclass, too = to->type->eclass;
     427       81741 :                 prop *est;
     428             : 
     429       81741 :                 if (fr == too) {
     430       73097 :                         if ((lval = find_prop_and_get(l->p, PROP_MAX))) {
     431       45223 :                                 atom *res = atom_copy(sql->sa, lval);
     432       45223 :                                 if ((res = atom_cast(sql->sa, res, to)))
     433       45202 :                                         set_minmax_property(sql, e, PROP_MAX, res);
     434             :                         }
     435       73096 :                         if ((lval = find_prop_and_get(l->p, PROP_MIN))) {
     436       45697 :                                 atom *res = atom_copy(sql->sa, lval);
     437       45697 :                                 if ((res = atom_cast(sql->sa, res, to)))
     438       45676 :                                         set_minmax_property(sql, e, PROP_MIN, res);
     439             :                         }
     440             :                 }
     441             :                 /* propagating nuniques through conversions is tricky. I am adding just the general cases */
     442       81740 :                 if ((est = find_prop(l->p, PROP_NUNIQUES)) &&
     443       46952 :                         (((fr == EC_TIME || fr == EC_TIME_TZ || fr == EC_DATE) && (too == EC_TIMESTAMP || too == EC_TIMESTAMP_TZ)) ||
     444       46927 :                          ((fr == EC_NUM || fr == EC_BIT) && too == EC_NUM) || (fr == EC_DEC && too == EC_DEC) || (fr != EC_EXTERNAL && too == EC_STRING))) {
     445       43684 :                         prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     446       43684 :                         p->value.dval = est->value.dval;
     447             :                 }
     448       81740 :                 if (!has_nil(l))
     449       33166 :                         set_has_no_nil(e);
     450             :                 break;
     451             :         }
     452      333348 :         case e_aggr:
     453             :         case e_func: {
     454      333348 :                 BUN lv;
     455      333348 :                 sql_subfunc *f = e->f;
     456             : 
     457      333348 :                 if (!f->func->s) {
     458      306997 :                         int key = hash_key(f->func->base.name); /* Using hash lookup */
     459      306997 :                         sql_hash_e *he = sql_functions_lookup->buckets[key&(sql_functions_lookup->size-1)];
     460      306997 :                         lookup_function look = NULL;
     461             : 
     462      670900 :                         for (; he && !look; he = he->chain) {
     463      363903 :                                 struct function_properties* fp = (struct function_properties*) he->value;
     464             : 
     465      363903 :                                 if (!strcmp(f->func->base.name, fp->name))
     466      104405 :                                         look = fp->func;
     467             :                         }
     468      306997 :                         if (look)
     469      104407 :                                 look(sql, e);
     470             :                 }
     471             :                 /* for global aggregates with no semantics, if the left relation has values, then the output won't be NULL */
     472      333349 :                 if (!is_semantics(e) && e->l && !have_nil(e->l) &&
     473       40564 :                         (e->type != e_aggr || (is_groupby(rel->op) && list_length(rel->r)) || ((lv = get_rel_count(rel->l)) != BUN_NONE && lv > 0)))
     474       40501 :                         set_has_no_nil(e);
     475             :                 /* set properties for global aggregates */
     476      333349 :                 if (e->type == e_aggr && is_groupby(rel->op) && list_empty(rel->r)) {
     477        6902 :                         if (!find_prop(e->p, PROP_NUNIQUES)) {
     478        6902 :                                 prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     479        6902 :                                 p->value.dval = 1;
     480             :                         }
     481        6902 :                         set_unique(e);
     482             :                 }
     483             :                 break;
     484             :         }
     485      525017 :         case e_atom:
     486      525017 :                 if (e->l) {
     487      511089 :                         atom *a = (atom*) e->l;
     488      511089 :                         if (!a->isnull) {
     489      454110 :                                 set_minmax_property(sql, e, PROP_MAX, a);
     490      454111 :                                 set_minmax_property(sql, e, PROP_MIN, a);
     491             :                         }
     492      511090 :                         prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     493      511089 :                         p->value.dval = 1;
     494       13928 :                 } else if (e->f) {
     495        2803 :                         list *vals = (list *) e->f;
     496        2803 :                         sql_exp *first = vals->h ? vals->h->data : NULL;
     497        2803 :                         atom *max = NULL, *min = NULL; /* all child values must have a valid min/max */
     498        2803 :                         int has_nil = 0;
     499             : 
     500        2803 :                         if (first) {
     501        2803 :                                 max = ((lval = find_prop_and_get(first->p, PROP_MAX))) ? lval : NULL;
     502        2803 :                                 min = ((lval = find_prop_and_get(first->p, PROP_MIN))) ? lval : NULL;
     503        2803 :                                 has_nil |= has_nil(first);
     504             :                         }
     505             : 
     506       12773 :                         for (node *n = vals->h ? vals->h->next : NULL ; n ; n = n->next) {
     507        7167 :                                 sql_exp *ee = n->data;
     508             : 
     509        7167 :                                 if (min && max) {
     510        6713 :                                         if ((lval = find_prop_and_get(ee->p, PROP_MAX))) {
     511        6669 :                                                 max = atom_cmp(lval, max) > 0 ? lval : max;
     512             :                                         } else {
     513             :                                                 max = NULL;
     514             :                                         }
     515        6713 :                                         if ((lval = find_prop_and_get(ee->p, PROP_MIN))) {
     516        6669 :                                                 min = atom_cmp(min, lval) > 0 ? lval : min;
     517             :                                         } else {
     518             :                                                 min = NULL;
     519             :                                         }
     520             :                                 }
     521        7167 :                                 has_nil |= has_nil(ee);
     522             :                         }
     523             : 
     524        2803 :                         if (!has_nil)
     525        2444 :                                 set_has_no_nil(e);
     526        2803 :                         if (min && max) {
     527        2420 :                                 set_minmax_property(sql, e, PROP_MAX, max);
     528        2420 :                                 set_minmax_property(sql, e, PROP_MIN, min);
     529             :                         }
     530        2803 :                         prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     531        2803 :                         p->value.dval = (dbl) list_length(vals);
     532             :                 } else {
     533       11125 :                         prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
     534       11125 :                         p->value.dval = 1;
     535             :                 }
     536             :                 break;
     537      266779 :         case e_cmp:
     538             :                 /* TODO? propagating min/max/unique of booleans is not very worth it */
     539      266779 :                 if (e->flag == cmp_or || e->flag == cmp_filter) {
     540       18044 :                         if (!have_nil(e->l) && !have_nil(e->r))
     541        2778 :                                 set_has_no_nil(e);
     542      248735 :                 } else if (e->flag == cmp_in || e->flag == cmp_notin) {
     543       19550 :                         sql_exp *le = e->l;
     544       19550 :                         if (!has_nil(le) && !have_nil(e->r))
     545         837 :                                 set_has_no_nil(e);
     546             :                 } else {
     547      229185 :                         sql_exp *le = e->l, *re = e->r, *fe = e->f;
     548      229185 :                         if (!has_nil(le) && !has_nil(re) && (!e->f || !has_nil(fe)))
     549       23310 :                                 set_has_no_nil(e);
     550             :                 }
     551             :                 break;
     552             :         case e_psm:
     553             :                 break;
     554             :         }
     555             : 
     556             : #ifndef NDEBUG
     557             :         {
     558             :                 /* min and max cannot be NULL and min must be <= than max, if it happens the inner relation must be empty! */
     559     3138982 :                 atom *min = find_prop_and_get(e->p, PROP_MIN), *max = find_prop_and_get(e->p, PROP_MAX);
     560             : 
     561     3138981 :                 (void) min;
     562     3138981 :                 (void) max;
     563     3138981 :                 assert(!min || !min->isnull);
     564     3138981 :                 assert(!max || !max->isnull);
     565     3138981 :                 assert(!min || !max || (min && max)/* atom_cmp(min, max) <= 0*/);
     566             :         }
     567             : #endif
     568     3138981 :         return e;
     569             : }
     570             : 
     571             : static list * /* Remove predicates always false from min/max values */
     572      126923 : rel_prune_predicates(visitor *v, sql_rel *rel)
     573             : {
     574      126923 :         if (rel->l) {
     575      126923 :                 sql_rel *l = rel->l;
     576      126923 :                 if (is_single(l))
     577        3545 :                         return rel->exps;
     578             :         }
     579      123378 :         if (!list_empty(rel->attr))
     580        2683 :                 return rel->exps;
     581      258348 :         for (node *n = rel->exps->h ; n ; n = n->next) {
     582      137653 :                 sql_exp *e = n->data;
     583             : 
     584      137653 :                 if (e->type == e_cmp && is_theta_exp(e->flag)) {
     585      113326 :                         sql_exp *le = e->l, *re = e->r, *fe = e->f;
     586      113326 :                         atom *lval_min = find_prop_and_get(le->p, PROP_MIN), *lval_max = find_prop_and_get(le->p, PROP_MAX),
     587      113326 :                                 *rval_min = find_prop_and_get(re->p, PROP_MIN), *rval_max = find_prop_and_get(re->p, PROP_MAX);
     588      113326 :                         bool always_false = false, always_true = false;
     589             : 
     590      113326 :                         if (fe && !is_symmetric(e)) {
     591        2964 :                                 atom *fval_min = find_prop_and_get(fe->p, PROP_MIN), *fval_max = find_prop_and_get(fe->p, PROP_MAX);
     592        2964 :                                 comp_type lower = range2lcompare(e->flag), higher = range2rcompare(e->flag);
     593        3492 :                                 int not_int1 = rval_min && lval_max && /* the middle and left intervals don't overlap */
     594        1515 :                                         (!is_anti(e) && (lower == cmp_gte ? atom_cmp(rval_min, lval_max) > 0 : atom_cmp(rval_min, lval_max) >= 0)),
     595        3904 :                                         not_int2 = lval_min && fval_max && /* the middle and right intervals don't overlap */
     596        2337 :                                         (!is_anti(e) && (higher == cmp_lte ? atom_cmp(lval_min, fval_max) > 0 : atom_cmp(lval_min, fval_max) >= 0)),
     597        2964 :                                         not_int3 = rval_min && fval_max && /* the left interval is after the right one */
     598        1211 :                                         (!is_anti(e) && (atom_cmp(rval_min, fval_max) > 0));
     599             : 
     600        2964 :                                 always_false |= not_int1 || not_int2 || not_int3;
     601             :                                 /* for anti the middle must be before the left or after the right or the right after the left, for the other the middle must be always between the left and right intervals */
     602        3438 :                                 always_true |= !has_nil(le) && !has_nil(re) && !has_nil(fe) &&
     603        3282 :                                         lval_min && lval_max && rval_min && rval_max && fval_min && fval_max &&
     604          49 :                                         (is_anti(e) ? ((lower == cmp_gte ? atom_cmp(rval_min, lval_max) > 0 : atom_cmp(rval_min, lval_max) >= 0) || (higher == cmp_lte ? atom_cmp(lval_min, fval_max) > 0 : atom_cmp(lval_min, fval_max) >= 0) || atom_cmp(rval_min, fval_max) > 0) :
     605          35 :                                          ((lower == cmp_gte ? atom_cmp(lval_min, rval_max) >= 0 : atom_cmp(lval_min, rval_max) > 0) && (higher == cmp_lte ? atom_cmp(fval_min, lval_max) >= 0 : atom_cmp(fval_min, lval_max) > 0)));
     606             :                         } else if (!fe) {
     607      110344 :                                 if (!is_semantics(e)) /* trival not null cmp null case */
     608      208616 :                                         always_false |= !is_anti(e) && ((exp_is_not_null(le) && exp_is_null(re)) || (exp_is_null(le) && exp_is_not_null(re)));
     609      110344 :                                 switch (e->flag) {
     610       95761 :                                 case cmp_equal:
     611       95761 :                                         if (lval_min && lval_max && rval_min && rval_max && (!is_semantics(e) || !has_nil(le) || !has_nil(re)))
     612      130920 :                                                 always_false |= (is_anti(e) ? (atom_cmp(lval_min, rval_min) == 0 && atom_cmp(lval_max, rval_max) <= 0) : (atom_cmp(rval_max, lval_min) < 0 || atom_cmp(rval_min, lval_max) > 0));
     613       95761 :                                         if (is_semantics(e)) { /* prune *= NULL cases */
     614        6012 :                                                 always_false |= (is_anti(e) ? (exp_is_null(le) && exp_is_null(re)) : ((exp_is_not_null(le) && exp_is_null(re)) || (exp_is_null(le) && exp_is_not_null(re))));
     615       12024 :                                                 always_true |= (is_anti(e) ? ((exp_is_not_null(le) && exp_is_null(re)) || (exp_is_null(le) && exp_is_not_null(re))) : (exp_is_null(le) && exp_is_null(re)));
     616             :                                         }
     617             :                                         break;
     618        6870 :                                 case cmp_notequal:
     619        6870 :                                         if (lval_min && lval_max && rval_min && rval_max)
     620       11234 :                                                 always_true |= !has_nil(le) && !has_nil(re) && (is_anti(e) ? (atom_cmp(lval_min, rval_min) == 0 && atom_cmp(lval_max, rval_max) <= 0) : (atom_cmp(rval_max, lval_min) < 0 || atom_cmp(rval_min, lval_max) > 0));
     621        6870 :                                         if (is_semantics(e)) {
     622          24 :                                                 always_true |= (is_anti(e) ? (exp_is_null(le) && exp_is_null(re)) : ((exp_is_not_null(le) && exp_is_null(re)) || (exp_is_null(le) && exp_is_not_null(re))));
     623          48 :                                                 always_false |= (is_anti(e) ? ((exp_is_not_null(le) && exp_is_null(re)) || (exp_is_null(le) && exp_is_not_null(re))) : (exp_is_null(le) && exp_is_null(re)));
     624             :                                         }
     625             :                                         break;
     626        5130 :                                 case cmp_gt:
     627        5130 :                                         if (lval_max && rval_min)
     628        1916 :                                                 always_false |= (is_anti(e) ? atom_cmp(lval_max, rval_min) > 0 : atom_cmp(lval_max, rval_min) <= 0);
     629        5130 :                                         if (lval_min && rval_max)
     630        3832 :                                                 always_true |= !has_nil(le) && !has_nil(re) && (is_anti(e) ? atom_cmp(lval_min, rval_max) <= 0 : atom_cmp(lval_min, rval_max) > 0);
     631             :                                         break;
     632          94 :                                 case cmp_gte:
     633          94 :                                         if (lval_max && rval_min)
     634          36 :                                                 always_false |= (is_anti(e) ? atom_cmp(lval_max, rval_min) >= 0 : atom_cmp(lval_max, rval_min) < 0);
     635          94 :                                         if (lval_min && rval_max)
     636          72 :                                                 always_true |= !has_nil(le) && !has_nil(re) && (is_anti(e) ? atom_cmp(lval_min, rval_max) < 0 : atom_cmp(lval_min, rval_max) >= 0);
     637             :                                         break;
     638        2410 :                                 case cmp_lt:
     639        2410 :                                         if (lval_min && rval_max)
     640        1358 :                                                 always_false |= (is_anti(e) ? atom_cmp(lval_min, rval_max) < 0 : atom_cmp(lval_min, rval_max) >= 0);
     641        2410 :                                         if (lval_max && rval_min)
     642        2764 :                                                 always_true |= !has_nil(le) && !has_nil(re) && (is_anti(e) ? atom_cmp(lval_max, rval_min) >= 0 : atom_cmp(lval_max, rval_min) < 0);
     643             :                                         break;
     644          79 :                                 case cmp_lte:
     645          79 :                                         if (lval_min && rval_max)
     646           9 :                                                 always_false |= (is_anti(e) ? atom_cmp(lval_min, rval_max) <= 0 : atom_cmp(lval_min, rval_max) > 0);
     647          79 :                                         if (lval_max && rval_min)
     648          18 :                                                 always_true |= !has_nil(le) && !has_nil(re) && (is_anti(e) ? atom_cmp(lval_max, rval_min) > 0 : atom_cmp(lval_max, rval_min) <= 0);
     649             :                                         break;
     650             :                                 default: /* Maybe later I can do cmp_in and cmp_notin but don't forget to remove is_theta_exp check up there */
     651             :                                         break;
     652             :                                 }
     653             :                         }
     654      113326 :                         assert(!always_false || !always_true);
     655      113326 :                         if (always_false || always_true) {
     656         981 :                                 sql_exp *ne = exp_atom_bool(v->sql->sa, always_true /* if always_true set then true else false */);
     657         981 :                                 if (exp_name(e))
     658           0 :                                         exp_prop_alias(v->sql->sa, ne, e);
     659         981 :                                 n->data = ne;
     660         981 :                                 v->changes++;
     661             :                         }
     662             :                 }
     663             :         }
     664      120695 :         return rel->exps;
     665             : }
     666             : 
     667             : static sql_rel *
     668         120 : set_setop_side(visitor *v, sql_rel *rel, sql_rel *side)
     669             : {
     670         120 :         if (side == rel->r)
     671          75 :                 rel->r = NULL;
     672             :         else
     673          45 :                 rel->l = NULL;
     674         120 :         if (!is_simple_project(side->op) || rel_is_ref(side) || !list_empty(side->r)) {
     675           3 :                 side = rel_project(v->sql->sa, side, rel_projections(v->sql, side, NULL, 0, 1));
     676           3 :                 set_count_prop(v->sql->sa, side, get_rel_count(side->l));
     677           3 :                 side->exps = exps_exp_visitor_bottomup(v, side, side->exps, 0, &rel_propagate_statistics, false);
     678             :         }
     679         471 :         for (node *n = rel->exps->h, *m = side->exps->h ; n && m ; n = n->next, m = m->next) {
     680         351 :                 sql_exp *e1 = n->data, *e2 = m->data;
     681         351 :                 exp_prop_alias(v->sql->sa, e2, e1);
     682             :         }
     683         120 :         list_hash_clear(side->exps);
     684             : 
     685         120 :         if (need_distinct(rel))
     686           2 :                 set_distinct(side);
     687         120 :         side->p = prop_copy(v->sql->sa, rel->p);
     688         120 :         rel_destroy(rel);
     689         120 :         return side;
     690             : }
     691             : 
     692             : static BUN
     693       15114 : trivial_project_exp_card(sql_exp *e)
     694             : {
     695       15132 :         if (e->type == e_convert)
     696          18 :                 return trivial_project_exp_card(e->l);
     697       15114 :         return e->type == e_atom && e->f ? (BUN) list_length(e->f) : 1;
     698             : }
     699             : 
     700             : static BUN
     701       23553 : rel_calc_nuniques(mvc *sql, sql_rel *l, list *exps)
     702             : {
     703       23553 :         BUN lv = get_rel_count(l);
     704             : 
     705       23553 :         if (lv == 0)
     706             :                 return 0;
     707       20970 :         if (!list_empty(exps)) {
     708       20970 :                 BUN nuniques = 0;
     709             :                 /* compute the highest number of unique values */
     710       49462 :                 for (node *n = exps->h ; n && nuniques != BUN_NONE ; n = n->next) {
     711       28492 :                         sql_exp *e = n->data;
     712       28492 :                         sql_rel *bt = NULL;
     713       28492 :                         prop *p = NULL;
     714       28492 :                         BUN euniques = BUN_NONE;
     715       28492 :                         atom *min, *max, *sub = NULL;
     716       28492 :                         sql_subtype *tp = exp_subtype(e);
     717       28492 :                         sql_class ec = tp ? tp->type->eclass : EC_STRING; /* if 'e' has no type (eg parameter), use a non-number type to fail condition */
     718             : 
     719       28492 :                         if ((p = find_prop(e->p, PROP_NUNIQUES))) {
     720       20260 :                                 euniques = (BUN) p->value.dval;
     721        8232 :                         } else if (e->type == e_column && rel_find_exp_and_corresponding_rel(l, e, false, &bt, NULL) && bt && (p = find_prop(bt->p, PROP_COUNT))) {
     722        3882 :                                 euniques = (BUN) p->value.lval;
     723             :                         }
     724             :                         /* use min to max range to compute number of possible values in the domain for number types */
     725       28492 :                         if ((EC_TEMP(ec)||ec==EC_NUM||ec==EC_MONTH||ec==EC_POS) &&
     726       21751 :                                 (min = find_prop_and_get(e->p, PROP_MIN)) && (max = find_prop_and_get(e->p, PROP_MAX))) {
     727             :                                 /* the range includes min and max, so the atom_inc call is needed */
     728             :                                 /* if 'euniques' has number of distinct values, compute min between both */
     729       17398 :                                 if ((sub = atom_sub(sql->sa, max, min)) && (sub = atom_inc(sql->sa, sub)) && (sub = atom_cast(sql->sa, sub, sql_bind_localtype("oid"))))
     730           0 :                                         euniques = MIN(euniques, (BUN) sub->data.val.oval);
     731             :                         }
     732       28492 :                         if (euniques != BUN_NONE)
     733       24142 :                                 nuniques = MAX(nuniques, euniques); /* the highest cardinality sets the estimation */
     734             :                         else
     735             :                                 nuniques = BUN_NONE;
     736             :                 }
     737       20970 :                 if (nuniques != BUN_NONE)
     738             :                         return nuniques;
     739             :         }
     740             :         return lv;
     741             : }
     742             : 
     743             : static sql_rel *
     744     1869801 : rel_get_statistics_(visitor *v, sql_rel *rel)
     745             : {
     746             :         /* Don't prune updates as pruning will possibly result in removing the joins which therefore cannot be used for constraint checking */
     747     1869801 :         uint8_t has_special_modify = *(uint8_t*) v->data;
     748     1869801 :         bool can_be_pruned = !has_special_modify && v->storage_based_opt;
     749             : 
     750             :         /* Don't look at the same relation twice */
     751     1869801 :         if (are_statistics_gathered(rel->used))
     752             :                 return rel;
     753     1274685 :         rel->used |= statistics_gathered;
     754             : 
     755     1274685 :         switch (rel->op) {
     756      299988 :         case op_basetable: {
     757      299988 :                 sql_table *t = (sql_table *) rel->l;
     758      299988 :                 sqlstore *store = v->sql->session->tr->store;
     759             : 
     760      299988 :                 if (!list_empty(rel->exps)) {
     761     1172827 :                         for (node *n = rel->exps->h ; n ; n = n->next)
     762      872590 :                                 rel_basetable_column_get_statistics(v->sql, rel, n->data);
     763             :                 }
     764             :                 /* Set table row count. TODO? look for remote tables. Don't look at storage for declared tables, because it won't be cleaned */
     765      300250 :                 if (isTable(t) && t->s && !isDeclaredTable(t)) /* count active rows only */
     766      250673 :                         set_count_prop(v->sql->sa, rel, (BUN)store->storage_api.count_col(v->sql->session->tr, ol_first_node(t->columns)->data, 10));
     767             :                 break;
     768             :         }
     769        4834 :         case op_union:
     770             :         case op_inter:
     771             :         case op_except: {
     772        4834 :                 bool empty_cross = false;
     773        4834 :                 int i = 0;
     774        4834 :                 sql_rel *l = rel->l, *pl = rel->l, *r = rel->r, *pr = rel->r;
     775             : 
     776        4880 :                 while (is_sample(pl->op) || is_topn(pl->op)) /* skip topN and sample relations in the middle */
     777          46 :                         pl = pl->l;
     778        4879 :                 while (is_sample(pr->op) || is_topn(pr->op))
     779          45 :                         pr = pr->l;
     780             :                 /* if it's not a projection, then project and propagate statistics */
     781        4834 :                 if (!is_project(pl->op) && !is_base(pl->op)) {
     782           0 :                         pl = rel_project(v->sql->sa, pl, rel_projections(v->sql, pl, NULL, 0, 1));
     783           0 :                         set_count_prop(v->sql->sa, pl, get_rel_count(pl->l));
     784           0 :                         pl->exps = exps_exp_visitor_bottomup(v, pl, pl->exps, 0, &rel_propagate_statistics, false);
     785             :                 }
     786        4834 :                 if (!is_project(pr->op) && !is_base(pr->op)) {
     787           0 :                         pr = rel_project(v->sql->sa, pr, rel_projections(v->sql, pr, NULL, 0, 1));
     788           0 :                         set_count_prop(v->sql->sa, pr, get_rel_count(pr->l));
     789           0 :                         pr->exps = exps_exp_visitor_bottomup(v, pr, pr->exps, 0, &rel_propagate_statistics, false);
     790             :                 }
     791             : 
     792       21485 :                 for (node *n = rel->exps->h ; n ; n = n->next) {
     793       16651 :                         empty_cross |= rel_setop_get_statistics(v->sql, rel, pl->exps, pr->exps, n->data, i);
     794       16651 :                         i++;
     795             :                 }
     796             : 
     797             :                 /* propagate row count */
     798        4834 :                 if (is_union(rel->op)) {
     799        2378 :                         BUN lv = need_distinct(rel) ? rel_calc_nuniques(v->sql, l, l->exps) : get_rel_count(l),
     800        2378 :                                 rv = need_distinct(rel) ? rel_calc_nuniques(v->sql, r, r->exps) : get_rel_count(r);
     801             : 
     802        2378 :                         if (lv == 0 && rv == 0) { /* both sides empty */
     803          66 :                                 if (can_be_pruned)
     804             :                                         empty_cross = true;
     805             :                                 else
     806           2 :                                         set_count_prop(v->sql->sa, rel, 0);
     807        2312 :                         } else if (can_be_pruned && lv == 0 && !rel_is_ref(rel)) { /* left side empty */
     808          75 :                                 rel = set_setop_side(v, rel, r);
     809          75 :                                 empty_cross = false; /* don't rewrite again */
     810        2237 :                         } else if (can_be_pruned && rv == 0 && !rel_is_ref(rel)) { /* right side empty */
     811          32 :                                 rel = set_setop_side(v, rel, l);
     812          32 :                                 empty_cross = false; /* don't rewrite again */
     813        2205 :                         } else if (lv != BUN_NONE && rv != BUN_NONE) {
     814        1467 :                                 set_count_prop(v->sql->sa, rel, (rv > (BUN_MAX - lv)) ? BUN_MAX : (lv + rv)); /* overflow check */
     815             :                         }
     816        2456 :                 } else if (is_inter(rel->op) || is_except(rel->op)) {
     817        2456 :                         BUN lv = need_distinct(rel) ? rel_calc_nuniques(v->sql, l, l->exps) : get_rel_count(l),
     818        2456 :                                 rv = need_distinct(rel) ? rel_calc_nuniques(v->sql, r, r->exps) : get_rel_count(r);
     819             : 
     820        2456 :                         if (lv == 0) { /* left side empty */
     821          45 :                                 if (can_be_pruned)
     822             :                                         empty_cross = true;
     823             :                                 else
     824           5 :                                         set_count_prop(v->sql->sa, rel, 0);
     825        2411 :                         } else if (rv == 0) { /* right side empty */
     826          16 :                                 if (is_inter(rel->op)) {
     827           3 :                                         if (can_be_pruned)
     828             :                                                 empty_cross = true;
     829             :                                         else
     830           0 :                                                 set_count_prop(v->sql->sa, rel, 0);
     831          13 :                                 } else if (can_be_pruned && !rel_is_ref(rel)) {
     832          13 :                                         rel = set_setop_side(v, rel, l);
     833          13 :                                         empty_cross = false; /* don't rewrite again */
     834             :                                 } else {
     835           0 :                                         set_count_prop(v->sql->sa, rel, lv);
     836             :                                 }
     837             :                         } else {
     838        2395 :                                 set_count_prop(v->sql->sa, rel, lv);
     839             :                         }
     840             :                 }
     841        4834 :                 if (can_be_pruned && empty_cross) {
     842         118 :                         rel_destroy(rel->l);
     843         118 :                         rel->l = NULL;
     844         118 :                         rel_destroy(rel->r);
     845         118 :                         rel->r = NULL;
     846         527 :                         for (node *n = rel->exps->h ; n ; n = n->next) {
     847         409 :                                 sql_exp *e = n->data, *a = exp_atom(v->sql->sa, atom_general(v->sql->sa, exp_subtype(e), NULL, 0));
     848         409 :                                 exp_prop_alias(v->sql->sa, a, e);
     849         409 :                                 n->data = a;
     850             :                         }
     851         118 :                         list_hash_clear(rel->exps);
     852         118 :                         sql_rel *l = rel_project(v->sql->sa, NULL, rel->exps);
     853         118 :                         set_count_prop(v->sql->sa, l, 1);
     854         118 :                         l = rel_select(v->sql->sa, l, exp_atom_bool(v->sql->sa, 0));
     855         118 :                         set_count_prop(v->sql->sa, l, 0);
     856         118 :                         rel->op = op_project;
     857         118 :                         rel->l = l;
     858         118 :                         rel->exps = rel_projections(v->sql, l, NULL, 1, 1);
     859         118 :                         set_count_prop(v->sql->sa, rel, 0);
     860         118 :                         set_nodistinct(rel); /* set relations may have distinct flag set */
     861         118 :                         v->changes++;
     862             :                 }
     863             :                 break;
     864             :         }
     865       19271 :         case op_munion: {
     866       19271 :                 list *l = rel->l, *nrels = sa_list(v->sql->sa);
     867       19271 :                 BUN cnt = 0;
     868       19271 :                 bool needs_pruning = false;
     869             : 
     870       57813 :                 for (node *n = l->h; n; n = n->next) {
     871       38542 :                         sql_rel *r = n->data, *pl = r;
     872             : 
     873       38542 :                         while (is_sample(pl->op) || is_topn(pl->op)) /* skip topN and sample relations in the middle */
     874           0 :                                         pl = pl->l;
     875             :                         /* if it's not a projection, then project and propagate statistics */
     876       38542 :                         if (!is_project(pl->op) && !is_base(pl->op)) {
     877           0 :                                 pl = rel_project(v->sql->sa, pl, rel_projections(v->sql, pl, NULL, 0, 1));
     878           0 :                                 set_count_prop(v->sql->sa, pl, get_rel_count(pl->l));
     879           0 :                                 pl->exps = exps_exp_visitor_bottomup(v, pl, pl->exps, 0, &rel_propagate_statistics, false);
     880             :                         }
     881       38542 :                         nrels = append(nrels, pl);
     882             :                         /* we need new munion statistics */
     883             :                         /* propagate row count */
     884       38542 :                         BUN rv = need_distinct(rel) ? rel_calc_nuniques(v->sql, r, r->exps) : get_rel_count(r);
     885       38542 :                         if (!rv && can_be_pruned)
     886        7002 :                                 needs_pruning = true;
     887       38542 :                         if (rv > (BUN_MAX - cnt)) /* overflow check */
     888       38542 :                                 rv = BUN_MAX;
     889             :                         else
     890       36698 :                                 cnt += rv;
     891             :                 }
     892       19271 :                 int i = 0;
     893      124588 :                 for (node *n = rel->exps->h ; n ; n = n->next, i++)
     894      105317 :                         rel_munion_get_statistics(v->sql, rel, nrels, n->data, i);
     895             : 
     896       19271 :                 if (needs_pruning) {
     897        5937 :                         v->changes++;
     898        5937 :                         list *nl = sa_list(l->sa);
     899             : 
     900       17811 :                         for (node *n = nrels->h; n; n = n->next) {
     901       11874 :                                 sql_rel *r = n->data;
     902       11874 :                                 BUN rv = need_distinct(rel) ? rel_calc_nuniques(v->sql, r, r->exps) : get_rel_count(r);
     903             : 
     904       11874 :                                 if (!rv) { /* keep last for now */
     905        7002 :                                         rel_destroy(r);
     906        7002 :                                         continue;
     907             :                                 }
     908        4872 :                                 nl = append(nl, r);
     909             :                         }
     910        5937 :                         rel->l = nl;
     911        5937 :                         if (list_length(nl) == 1) {
     912        4872 :                                 sql_rel *l = rel->l = nl->h->data; /* ugh */
     913        4872 :                                 rel->op = op_project;
     914             : 
     915       46563 :                                 for (node *n = rel->exps->h, *m = l->exps->h ; n && m ; n = n->next, m = m->next) {
     916       41691 :                                         sql_exp *pe = n->data, *ie = m->data;
     917       41691 :                                         sql_exp *ne = exp_ref(v->sql, ie);
     918       41691 :                                         exp_setname(v->sql->sa, ne, exp_relname(pe), exp_name(pe));
     919       41691 :                                         n->data = ne;
     920             :                                 }
     921        4872 :                                 list_hash_clear(rel->exps);
     922        1065 :                         } else if (list_empty(nl)) {
     923             :                                 /* empty select (project [ nils ] ) */
     924        3816 :                                 for (node *n = rel->exps->h ; n ; n = n->next) {
     925        2751 :                                         sql_exp *e = n->data, *a = exp_atom(v->sql->sa, atom_general(v->sql->sa, exp_subtype(e), NULL, 0));
     926        2751 :                                         exp_prop_alias(v->sql->sa, a, e);
     927        2751 :                                         n->data = a;
     928             :                                 }
     929        1065 :                                 list_hash_clear(rel->exps);
     930        1065 :                                 sql_rel *l = rel_project(v->sql->sa, NULL, rel->exps);
     931        1065 :                                 set_count_prop(v->sql->sa, l, 1);
     932        1065 :                                 l = rel_select(v->sql->sa, l, exp_atom_bool(v->sql->sa, 0));
     933        1065 :                                 set_count_prop(v->sql->sa, l, 0);
     934        1065 :                                 rel->op = op_project;
     935        1065 :                                 rel->l = l;
     936        1065 :                                 rel->exps = rel_projections(v->sql, l, NULL, 1, 1);
     937        1065 :                                 set_count_prop(v->sql->sa, rel, 0);
     938        1065 :                                 set_nodistinct(rel); /* set relations may have distinct flag set */
     939             :                         }
     940             :                 } else {
     941       13334 :                         set_count_prop(v->sql->sa, rel, cnt);
     942             :                 }
     943             :                 break;
     944             :         }
     945      478557 :         case op_join:
     946             :         case op_left:
     947             :         case op_right:
     948             :         case op_full:
     949             :         case op_semi:
     950             :         case op_anti:
     951             :         case op_select:
     952             :         case op_project:
     953             :         case op_groupby: {
     954      478557 :                 if (is_groupby(rel->op) && !list_empty(rel->r))
     955       14937 :                         rel->r = exps_exp_visitor_bottomup(v, rel, rel->r, 0, &rel_propagate_statistics, false);
     956      478557 :                 rel->exps = exps_exp_visitor_bottomup(v, rel, rel->exps, 0, &rel_propagate_statistics, false);
     957      478552 :                 if ((is_simple_project(rel->op) || is_groupby(rel->op)) && !list_empty(rel->r))
     958       37861 :                         rel->r = exps_exp_visitor_bottomup(v, rel, rel->r, 0, &rel_propagate_statistics, false);
     959             :                 /* The following optimizations can only be applied after propagating the statistics to rel->exps */
     960      478551 :                 if (can_be_pruned && (is_join(rel->op) || is_select(rel->op)) && !list_empty(rel->exps)) {
     961      126923 :                         int changes = v->changes;
     962      126923 :                         rel->exps = rel_prune_predicates(v, rel);
     963      126923 :                         if (v->changes > changes)
     964         961 :                                 rel = rewrite_simplify(v, 0, v->value_based_opt, rel);
     965             :                 }
     966             : 
     967             :                 /* propagate row count */
     968      478551 :                 sql_rel *l = rel->l, *r = rel->r;
     969      478551 :                 switch (rel->op) {
     970      122306 :                 case op_join:
     971             :                 case op_left:
     972             :                 case op_right:
     973             :                 case op_full: {
     974      122306 :                         BUN lv = get_rel_count(l), rv = get_rel_count(r), uniques_estimate = BUN_MAX, join_idx_estimate = BUN_MAX;
     975             : 
     976      122306 :                         if (!list_empty(rel->exps) && !is_single(rel)) {
     977      221703 :                                 for (node *n = rel->exps->h ; n ; n = n->next) {
     978      113453 :                                         sql_exp *e = n->data, *el = e->l, *er = e->r;
     979             : 
     980      113453 :                                         if (find_prop(e->p, PROP_JOINIDX)) {
     981         652 :                                                 join_idx_estimate = lv>rv?lv:rv;
     982      112801 :                                         } else if (e->type == e_cmp && e->flag == cmp_equal) {
     983             :                                                 /* if one of the sides is unique, the cardinality will be that exact number, but look for nulls */
     984      108981 :                                                 if (!is_semantics(e) || !has_nil(el) || !has_nil(er)) {
     985      108443 :                                                         BUN lu = 0, ru = 0;
     986      108443 :                                                         prop *p = NULL;
     987      108443 :                                                         if ((p = find_prop(el->p, PROP_NUNIQUES)))
     988       83715 :                                                                 lu = (BUN) p->value.dval;
     989      108443 :                                                         if ((p = find_prop(er->p, PROP_NUNIQUES)))
     990       94081 :                                                                 ru = (BUN) p->value.dval;
     991      108443 :                                                         if (is_unique(el) || lu > lv) {
     992       65615 :                                                                 BUN ncount = (is_right(rel->op) || is_full(rel->op)) ? MAX(lv, rv) : lv;
     993       65615 :                                                                 uniques_estimate = MIN(uniques_estimate, ncount);
     994       42828 :                                                         } else if (is_unique(er) || ru > rv) {
     995       26166 :                                                                 BUN ncount = (is_left(rel->op) || is_full(rel->op)) ? MAX(lv, rv) : rv;
     996       26166 :                                                                 uniques_estimate = MIN(uniques_estimate, ncount);
     997             :                                                         }
     998             :                                                 }
     999             :                                         }
    1000             :                                 }
    1001             :                         }
    1002      122306 :                         if (is_single(rel)) {
    1003        4840 :                                 set_count_prop(v->sql->sa, rel, lv);
    1004      117466 :                         } else if (join_idx_estimate != BUN_MAX) {
    1005         650 :                                 set_count_prop(v->sql->sa, rel, join_idx_estimate);
    1006      116816 :                         } else if (uniques_estimate != BUN_MAX) {
    1007       87587 :                                 set_count_prop(v->sql->sa, rel, uniques_estimate);
    1008       29229 :                         } else if (list_length(rel->exps) == 1 && (exp_is_false(rel->exps->h->data) || exp_is_null(rel->exps->h->data))) {
    1009             :                                 /* corner cases for outer joins */
    1010         106 :                                 if (is_left(rel->op)) {
    1011          95 :                                         set_count_prop(v->sql->sa, rel, lv);
    1012          11 :                                 } else if (is_right(rel->op)) {
    1013           3 :                                         set_count_prop(v->sql->sa, rel, rv);
    1014           8 :                                 } else if (is_full(rel->op) && lv != BUN_NONE && rv != BUN_NONE) {
    1015           8 :                                         set_count_prop(v->sql->sa, rel, (rv > (BUN_MAX - lv)) ? BUN_MAX : (lv + rv)); /* overflow check */
    1016           0 :                                 } else if (lv != BUN_NONE && rv != BUN_NONE) {
    1017           0 :                                         set_count_prop(v->sql->sa, rel, 0);
    1018             :                                 }
    1019       29123 :                         } else if (lv == 0) {
    1020        1104 :                                 set_count_prop(v->sql->sa, rel, (is_right(rel->op) || is_full(rel->op)) ? rv : 0);
    1021       28558 :                         } else if (rv == 0) {
    1022        1523 :                                 set_count_prop(v->sql->sa, rel, (is_left(rel->op) || is_full(rel->op)) ? lv : 0);
    1023       27511 :                         } else if (lv != BUN_NONE && rv != BUN_NONE) {
    1024       17580 :                                 set_count_prop(v->sql->sa, rel, (rv > (BUN_MAX / lv)) ? BUN_MAX : (lv * rv)); /* overflow check */
    1025             :                         }
    1026             :                         break;
    1027             :                 }
    1028        2408 :                 case op_anti:
    1029        2408 :                         set_count_prop(v->sql->sa, rel, get_rel_count(l));
    1030        2408 :                         break;
    1031      101016 :                 case op_semi:
    1032             :                 case op_select:
    1033             :                         /* TODO calculate cardinalities using selectivities */
    1034      101016 :                         if (list_length(rel->exps) == 1 && (exp_is_false(rel->exps->h->data) || exp_is_null(rel->exps->h->data))) {
    1035        1846 :                                 set_count_prop(v->sql->sa, rel, 0);
    1036             :                         } else {
    1037      195471 :                                 if (!list_empty(rel->exps) && !is_single(rel)) {
    1038       96301 :                                         BUN cnt = get_rel_count(l), u = 1;
    1039      159526 :                                         for (node *n = rel->exps->h ; n ; n = n->next) {
    1040      107108 :                                                 sql_exp *e = n->data, *el = e->l, *er = e->r;
    1041             : 
    1042             :                                                 /* simple expressions first */
    1043      107108 :                                                 if (e->type == e_cmp && e->flag == cmp_equal && exp_is_atom(er)) {
    1044             :                                                         /* use selectivity */
    1045       53830 :                                                         prop *p;
    1046       53830 :                                                         if ((p = find_prop(el->p, PROP_NUNIQUES))) {
    1047       43883 :                                                                 u = (BUN) p->value.dval;
    1048       43883 :                                                                 break;
    1049             :                                                         }
    1050             :                                                 }
    1051             :                                         }
    1052             :                                         /* u is an *estimate*, so don't set count_prop to 0 unless cnt is 0 */
    1053       96301 :                                         set_count_prop(v->sql->sa, rel, cnt == 0 ? 0 : u == 0 || u > cnt ? 1 : cnt/u);
    1054             :                                 } else {
    1055        2869 :                                         set_count_prop(v->sql->sa, rel, get_rel_count(l));
    1056             :                                 }
    1057             :                         }
    1058             :                         break;
    1059      231408 :                 case op_project:
    1060      231408 :                         if (l) {
    1061      221890 :                                 if (need_distinct(rel)) {
    1062           0 :                                         set_count_prop(v->sql->sa, rel, rel_calc_nuniques(v->sql, l, rel->exps));
    1063             :                                 } else {
    1064      221890 :                                         set_count_prop(v->sql->sa, rel, get_rel_count(l));
    1065             :                                 }
    1066             :                         } else {
    1067        9518 :                                 BUN card = 1;
    1068             : 
    1069        9518 :                                 if (!list_empty(rel->exps)) {
    1070       23353 :                                         for (node *n = rel->exps->h ; n ; n = n->next) {
    1071       13835 :                                                 sql_exp *e = n->data;
    1072       13835 :                                                 card = MAX(card, trivial_project_exp_card(e));
    1073             :                                         }
    1074             :                                 }
    1075        9518 :                                 set_count_prop(v->sql->sa, rel, card);
    1076             :                         }
    1077             :                         break;
    1078       21295 :                 case op_groupby:
    1079       21295 :                         if (list_empty(rel->r)) {
    1080        6358 :                                 set_count_prop(v->sql->sa, rel, 1);
    1081             :                         } else {
    1082       14937 :                                 set_count_prop(v->sql->sa, rel, rel_calc_nuniques(v->sql, l, rel->r));
    1083             :                         }
    1084             :                         break;
    1085             :                 default:
    1086             :                         break;
    1087             :                 }
    1088             :                 break;
    1089             :         }
    1090       16887 :         case op_topn: {
    1091       16887 :                 BUN lv = get_rel_count(rel->l);
    1092             : 
    1093       16886 :                 if (lv != BUN_NONE) {
    1094       16869 :                         sql_exp *le = rel->exps->h->data, *oe = list_length(rel->exps) > 1 ? rel->exps->h->next->data : NULL;
    1095          94 :                         if (oe && oe->l && !exp_is_null(oe)) { /* no parameters */
    1096          94 :                                 BUN offset = (BUN) ((atom*)oe->l)->data.val.lval;
    1097          94 :                                 lv = offset >= lv ? 0 : lv - offset;
    1098             :                         }
    1099       16870 :                         if (le->l && !exp_is_null(le)) {
    1100       16837 :                                 BUN limit = (BUN) ((atom*)le->l)->data.val.lval;
    1101       16837 :                                 lv = MIN(lv, limit);
    1102             :                         }
    1103       16870 :                         set_count_prop(v->sql->sa, rel, lv);
    1104             :                 }
    1105             :                 break;
    1106             :         }
    1107          22 :         case op_sample: {
    1108          22 :                 BUN lv = get_rel_count(rel->l);
    1109             : 
    1110          22 :                 if (lv != BUN_NONE) {
    1111           4 :                         sql_exp *se = rel->exps->h->data;
    1112           4 :                         sql_subtype *tp = exp_subtype(se);
    1113             : 
    1114           4 :                         if (se->l && tp->type->eclass == EC_NUM) { /* sample is a number of rows */
    1115           4 :                                 BUN sample = (BUN) ((atom*)se->l)->data.val.lval;
    1116           4 :                                 lv = MIN(lv, sample);
    1117           0 :                         } else if (se->l) { /* sample is a percentage of rows */
    1118           0 :                                 dbl percent = ((atom*)se->l)->data.val.dval;
    1119           0 :                                 assert(tp->type->eclass == EC_FLT);
    1120           0 :                                 lv = (BUN) ceil((dbl)lv * percent);
    1121             :                         }
    1122           4 :                         set_count_prop(v->sql->sa, rel, lv);
    1123             :                 }
    1124             :                 break;
    1125             :         }
    1126        5362 :         case op_table: {
    1127        5362 :                 sql_exp *op = rel->r;
    1128        5362 :                 if (rel->flag != TRIGGER_WRAPPER && op) {
    1129        5063 :                         sql_subfunc *f = op->f;
    1130        5063 :                         if (f->func->lang == FUNC_LANG_SQL) {
    1131          95 :                                 set_count_prop(v->sql->sa, rel, 1000 /* just some fallback value */);
    1132        4968 :                         } else if (f->func->lang == FUNC_LANG_MAL && strcmp(f->func->base.name, "storage") == 0) {
    1133         479 :                                 set_count_prop(v->sql->sa, rel, 1000 /* TODO get size of catalog */);
    1134        4489 :                         } else if (f->func->lang == FUNC_LANG_MAL && strcmp(f->func->base.name, "db_users") == 0) {
    1135           0 :                                 set_count_prop(v->sql->sa, rel, 1000 /* TODO get size of users */);
    1136        4489 :                         } else if (f->func->lang == FUNC_LANG_MAL && strncmp(f->func->base.name, "querylog", 8) == 0) {
    1137        1009 :                                 set_count_prop(v->sql->sa, rel, 1000 /* TODO get size of querylog */);
    1138        3480 :                         } else if (f->func->lang == FUNC_LANG_MAL &&
    1139        3375 :                                            (strcmp(f->func->base.name, "queue") == 0 ||
    1140        3120 :                                                 strcmp(f->func->base.name, "optimizers") == 0 ||
    1141        2845 :                                                 strcmp(f->func->base.name, "env") == 0 ||
    1142        2589 :                                                 strcmp(f->func->base.name, "keywords") == 0 ||
    1143        2589 :                                                 strcmp(f->func->base.name, "statistics") == 0 ||
    1144        1985 :                                                 strcmp(f->func->base.name, "rejects") == 0 ||
    1145        1742 :                                                 strcmp(f->func->base.name, "schemastorage") == 0 ||
    1146        1742 :                                                 strncmp(f->func->base.name, "storage", 7) == 0 ||
    1147        1742 :                                                 strcmp(f->func->base.name, "sessions") == 0) ) {
    1148        1879 :                                 set_count_prop(v->sql->sa, rel, 1000 /* TODO get size of queue */);
    1149             :                         }
    1150             :                         /* else {
    1151             :                                 printf("%%func needs stats : %s\n", f->func->base.name);
    1152             :                         } */
    1153             :                 }
    1154             :                 break;
    1155             :         }
    1156             :         /*These relations are less important for now
    1157             :           TODO later we can tune it */
    1158             :         case op_insert:
    1159             :         case op_update:
    1160             :         case op_delete:
    1161             :         case op_truncate:
    1162             :         case op_ddl:
    1163             :         default:
    1164             :                 break;
    1165             :         }
    1166             : 
    1167             :         return rel;
    1168             : }
    1169             : 
    1170             : static sql_rel *
    1171      530278 : rel_get_statistics(visitor *v, global_props *gp, sql_rel *rel)
    1172             : {
    1173             :         /* Don't prune updates as pruning will possibly result in removing the joins which therefore cannot be used for constraint checking */
    1174      530278 :         uint8_t has_special_modify = (uint8_t) gp->has_special_modify;
    1175      530278 :         v->data = &has_special_modify;
    1176      530278 :         rel = rel_visitor_bottomup(v, rel, &rel_get_statistics_);
    1177      530754 :         v->data = gp;
    1178      530754 :         return rel;
    1179             : }
    1180             : 
    1181             : run_optimizer
    1182      698405 : bind_get_statistics(visitor *v, global_props *gp)
    1183             : {
    1184      698405 :         (void) v;
    1185      698405 :         return (gp->opt_level == 1 && !gp->cnt[op_insert]) ? rel_get_statistics : NULL;
    1186             : }
    1187             : 
    1188             : 
    1189             : static bool
    1190       97899 : point_select_on_unique_column(sql_rel *rel)
    1191             : {
    1192       97899 :         if (is_select(rel->op) && !list_empty(rel->exps)) {
    1193      130223 :                 for (node *n = rel->exps->h; n ; n = n->next) {
    1194       75002 :                         sql_exp *e = n->data, *el = e->l, *er = e->r, *found = NULL;
    1195             : 
    1196       75002 :                         if (is_compare(e->type) && e->flag == cmp_equal) {
    1197       33312 :                                 if (is_numeric_upcast(el))
    1198           0 :                                         el = el->l;
    1199       33312 :                                 if (is_numeric_upcast(er))
    1200           0 :                                         er = er->l;
    1201       33312 :                                 if (is_alias(el->type) && exp_is_atom(er) && (found = rel_find_exp(rel->l, el)) &&
    1202         738 :                                         is_unique(found) && (!is_semantics(e) || !has_nil(found) || !has_nil(er)))
    1203             :                                         return true;
    1204       32575 :                                 if (is_alias(er->type) && exp_is_atom(el) && (found = rel_find_exp(rel->l, er)) &&
    1205           0 :                                         is_unique(found) && (!is_semantics(e) || !has_nil(el) || !has_nil(found)))
    1206             :                                         return true;
    1207             :                         }
    1208             :                 }
    1209             :         }
    1210             :         return false;
    1211             : }
    1212             : 
    1213             : /*
    1214             :  * A point select on an unique column reduces the number of rows to 1. If the same select is under a
    1215             :  * join, the opposite side's select can be pushed above the join.
    1216             :  */
    1217             : static inline sql_rel *
    1218     1305013 : rel_push_select_up(visitor *v, sql_rel *rel)
    1219             : {
    1220     1305013 :         if ((is_innerjoin(rel->op) || is_left(rel->op) || is_right(rel->op) || is_semi(rel->op)) && !is_single(rel)) {
    1221      257554 :                 sql_rel *l = rel->l, *r = rel->r;
    1222      257554 :                 bool can_pushup_left = is_select(l->op) && !rel_is_ref(l) && !is_single(l) && (is_innerjoin(rel->op) || is_left(rel->op) || is_semi(rel->op)),
    1223      257554 :                         can_pushup_right = is_select(r->op) && !rel_is_ref(r) && !is_single(r) && (is_innerjoin(rel->op) || is_right(rel->op));
    1224             : 
    1225      257554 :                 if (can_pushup_left || can_pushup_right) {
    1226       70045 :                         if (can_pushup_left)
    1227       48167 :                                 can_pushup_left = point_select_on_unique_column(r);
    1228       70045 :                         if (can_pushup_right)
    1229       49732 :                                 can_pushup_right = point_select_on_unique_column(l);
    1230             : 
    1231             :                         /* if both selects retrieve one row each, it's not worth it to push both up */
    1232       70045 :                         if (can_pushup_left && !can_pushup_right) {
    1233         671 :                                 sql_rel *nrel = rel_dup_copy(v->sql->sa, rel);
    1234         671 :                                 nrel->l = l->l;
    1235         671 :                                 rel = rel_inplace_select(rel, nrel, l->exps);
    1236         671 :                                 assert(is_select(rel->op));
    1237         671 :                                 v->changes++;
    1238       69374 :                         } else if (!can_pushup_left && can_pushup_right) {
    1239          18 :                                 sql_rel *nrel = rel_dup_copy(v->sql->sa, rel);
    1240          18 :                                 nrel->r = r->l;
    1241          18 :                                 rel = rel_inplace_select(rel, nrel, r->exps);
    1242          18 :                                 assert(is_select(rel->op));
    1243          18 :                                 v->changes++;
    1244             :                         }
    1245             :                 }
    1246             :         }
    1247     1305013 :         return rel;
    1248             : }
    1249             : 
    1250             : static int
    1251      102359 : sql_class_base_score(visitor *v, sql_column *c, sql_subtype *t, bool equality_based)
    1252             : {
    1253      102359 :         int de;
    1254             : 
    1255      102359 :         if (!t)
    1256             :                 return 0;
    1257      102359 :         switch (ATOMstorage(t->type->localtype)) {
    1258             :         case TYPE_bte:
    1259             :                 return 150 - 8;
    1260        3608 :         case TYPE_sht:
    1261        3608 :                 return 150 - 16;
    1262       39432 :         case TYPE_int:
    1263       39432 :                 return 150 - 32;
    1264         990 :         case TYPE_void:
    1265             :         case TYPE_lng:
    1266         990 :                 return 150 - 64;
    1267             :         case TYPE_uuid:
    1268             : #ifdef HAVE_HGE
    1269             :         case TYPE_hge:
    1270             : #endif
    1271             :                 return 150 - 128;
    1272           5 :         case TYPE_flt:
    1273           5 :                 return 75 - 24;
    1274             :         case TYPE_dbl:
    1275             :                 return 75 - 53;
    1276       31848 :         default:
    1277       31848 :                 if (equality_based && c && v->storage_based_opt && (de = mvc_is_duplicate_eliminated(v->sql, c)))
    1278        1326 :                         return 150 - de * 8;
    1279             :                 /* strings and blobs not duplicate eliminated don't get any points here */
    1280             :                 return 0;
    1281             :         }
    1282             : }
    1283             : 
    1284             : static int
    1285       41397 : score_se_base(visitor *v, sql_rel *rel, sql_exp *e)
    1286             : {
    1287       41397 :         int res = 0;
    1288       41397 :         sql_subtype *t = exp_subtype(e);
    1289       41397 :         sql_column *c = NULL;
    1290             : 
    1291             :         /* can we find out if the underlying table is sorted */
    1292       41397 :         if ((c = exp_find_column(rel, e, -2)) && v->storage_based_opt && mvc_is_sorted(v->sql, c))
    1293       41397 :                 res += 600;
    1294             : 
    1295             :         /* prefer the shorter var types over the longer ones */
    1296       41397 :         res += sql_class_base_score(v, c, t, is_equality_or_inequality_exp(e->flag)); /* smaller the type, better */
    1297       41397 :         return res;
    1298             : }
    1299             : 
    1300             : static int
    1301       65859 : score_se(visitor *v, sql_rel *rel, sql_exp *e)
    1302             : {
    1303       65859 :         int score = 0;
    1304       65859 :         if (e->type == e_cmp && !is_complex_exp(e->flag)) {
    1305       41397 :                 sql_exp *l = e->l;
    1306             : 
    1307       41397 :                 while (l->type == e_cmp) { /* go through nested comparisons */
    1308           2 :                         sql_exp *ll;
    1309             : 
    1310           2 :                         if (l->flag == cmp_filter || l->flag == cmp_or)
    1311           0 :                                 ll = ((list*)l->l)->h->data;
    1312             :                         else
    1313           2 :                                 ll = l->l;
    1314           2 :                         if (ll->type != e_cmp)
    1315             :                                 break;
    1316             :                         l = ll;
    1317             :                 }
    1318       41397 :                 score += score_se_base(v, rel, l);
    1319             :         }
    1320       65859 :         score += exp_keyvalue(e);
    1321       65859 :         return score;
    1322             : }
    1323             : 
    1324             : static inline sql_rel *
    1325     1305015 : rel_select_order(visitor *v, sql_rel *rel)
    1326             : {
    1327     1305015 :         int *scores = NULL;
    1328     1305015 :         sql_exp **exps = NULL;
    1329             : 
    1330     1305015 :         if (is_select(rel->op) && list_length(rel->exps) > 1) {
    1331       30744 :                 node *n;
    1332       30744 :                 int i, nexps = list_length(rel->exps);
    1333       30744 :                 scores = SA_NEW_ARRAY(v->sql->ta, int, nexps);
    1334       30744 :                 exps = SA_NEW_ARRAY(v->sql->ta, sql_exp*, nexps);
    1335             : 
    1336       96603 :                 for (i = 0, n = rel->exps->h; n; i++, n = n->next) {
    1337       65911 :                         exps[i] = n->data;
    1338       65911 :                         if (find_prop(exps[i]->p, PROP_HASHCOL))
    1339             :                                 return rel;
    1340       65859 :                         scores[i] = score_se(v, rel, n->data);
    1341             :                 }
    1342       30692 :                 GDKqsort(scores, exps, NULL, nexps, sizeof(int), sizeof(void *), TYPE_int, true, true);
    1343             : 
    1344       96551 :                 for (i = 0, n = rel->exps->h; n; i++, n = n->next)
    1345       65859 :                         n->data = exps[i];
    1346             :         }
    1347             : 
    1348             :         return rel;
    1349             : }
    1350             : 
    1351             : /* Compute the efficiency of using this expression early in a group by list */
    1352             : static int
    1353       60962 : score_gbe(visitor *v, sql_rel *rel, sql_exp *e)
    1354             : {
    1355       60962 :         int res = 0;
    1356       60962 :         sql_subtype *t = exp_subtype(e);
    1357       60962 :         sql_column *c = exp_find_column(rel, e, -2);
    1358             : 
    1359       60962 :         if (e->card == CARD_ATOM) /* constants are trivial to group */
    1360          63 :                 res += 1000;
    1361             :         /* can we find out if the underlying table is sorted */
    1362       60962 :         if (is_unique(e) || find_prop(e->p, PROP_HASHCOL) || (c && v->storage_based_opt && mvc_is_unique(v->sql, c))) /* distinct columns */
    1363        3431 :                 res += 700;
    1364       40639 :         if (c && v->storage_based_opt && mvc_is_sorted(v->sql, c))
    1365        3453 :                 res += 500;
    1366       60962 :         if (find_prop(e->p, PROP_HASHIDX)) /* has hash index */
    1367           0 :                 res += 200;
    1368             : 
    1369             :         /* prefer the shorter var types over the longer ones */
    1370       60962 :         res += sql_class_base_score(v, c, t, true); /* smaller the type, better */
    1371       60962 :         return res;
    1372             : }
    1373             : 
    1374             : /* reorder group by expressions */
    1375             : static inline sql_rel *
    1376     1305016 : rel_groupby_order(visitor *v, sql_rel *rel)
    1377             : {
    1378     1305016 :         int *scores = NULL;
    1379     1305016 :         sql_exp **exps = NULL;
    1380             : 
    1381     1305016 :         if (is_groupby(rel->op) && list_length(rel->r) > 1) {
    1382       25781 :                 node *n;
    1383       25781 :                 list *gbe = rel->r;
    1384       25781 :                 int i, ngbe = list_length(gbe);
    1385       25781 :                 scores = SA_NEW_ARRAY(v->sql->ta, int, ngbe);
    1386       25781 :                 exps = SA_NEW_ARRAY(v->sql->ta, sql_exp*, ngbe);
    1387             : 
    1388             :                 /* first sorting step, give priority for integers and sorted columns */
    1389       86743 :                 for (i = 0, n = gbe->h; n; i++, n = n->next) {
    1390       60962 :                         exps[i] = n->data;
    1391       60962 :                         scores[i] = score_gbe(v, rel, exps[i]);
    1392             :                 }
    1393       25781 :                 GDKqsort(scores, exps, NULL, ngbe, sizeof(int), sizeof(void *), TYPE_int, true, true);
    1394             : 
    1395             :                 /* second sorting step, give priority to strings with lower number of digits */
    1396       51102 :                 for (i = ngbe - 1; i && !scores[i]; i--); /* find expressions with no score from the first round */
    1397       25781 :                 if (scores[i])
    1398       24809 :                         i++;
    1399       25781 :                 if (ngbe - i > 1) {
    1400        8483 :                         for (int j = i; j < ngbe; j++) {
    1401        6505 :                                 sql_subtype *t = exp_subtype(exps[j]);
    1402        6505 :                                 scores[j] = t ? t->digits : 0;
    1403             :                         }
    1404             :                         /* the less number of digits the better, order ascending */
    1405        1978 :                         GDKqsort(scores + i, exps + i, NULL, ngbe - i, sizeof(int), sizeof(void *), TYPE_int, false, true);
    1406             :                 }
    1407             : 
    1408       86743 :                 for (i = 0, n = gbe->h; n; i++, n = n->next)
    1409       60962 :                         n->data = exps[i];
    1410             :         }
    1411             : 
    1412     1305016 :         return rel;
    1413             : }
    1414             : 
    1415             : /* This optimization loop contains optimizations that can potentially use statistics */
    1416             : static sql_rel *
    1417     1305014 : rel_final_optimization_loop_(visitor *v, sql_rel *rel)
    1418             : {
    1419             :         /* Run rel_push_select_up only once at the end to avoid an infinite optimization loop */
    1420     1305014 :         rel = rel_push_select_up(v, rel);
    1421     1305015 :         rel = rel_select_order(v, rel);
    1422             : 
    1423             :         /* TODO? Maybe later add rel_simplify_count, rel_join2semijoin, rel_simplify_fk_joins,
    1424             :            rel_distinct_project2groupby, rel_simplify_predicates, rel_simplify_math,
    1425             :            rel_distinct_aggregate_on_unique_values */
    1426             : 
    1427     1305016 :         rel = rel_groupby_order(v, rel);
    1428     1305016 :         return rel;
    1429             : }
    1430             : 
    1431             : static sql_rel *
    1432       54815 : rel_final_optimization_loop(visitor *v, global_props *gp, sql_rel *rel)
    1433             : {
    1434       54815 :         (void) gp;
    1435       54815 :         return rel_visitor_bottomup(v, rel, &rel_final_optimization_loop_);
    1436             : }
    1437             : 
    1438             : run_optimizer
    1439      699111 : bind_final_optimization_loop(visitor *v, global_props *gp)
    1440             : {
    1441      699111 :         int flag = v->sql->sql_optimizer;
    1442             :         /* At the moment, this optimizer has dependency on 3 flags */
    1443      538549 :         return gp->opt_level == 1 && (gp->cnt[op_groupby] || gp->cnt[op_select]) &&
    1444      753927 :                 (flag & push_select_up) && (flag & optimize_select_and_joins_topdown) && (flag & optimize_projections) ? rel_final_optimization_loop : NULL;
    1445             : }

Generated by: LCOV version 1.14