Line data Source code
1 : /*
2 : * SPDX-License-Identifier: MPL-2.0
3 : *
4 : * This Source Code Form is subject to the terms of the Mozilla Public
5 : * License, v. 2.0. If a copy of the MPL was not distributed with this
6 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
7 : *
8 : * Copyright 2024 MonetDB Foundation;
9 : * Copyright August 2008 - 2023 MonetDB B.V.;
10 : * Copyright 1997 - July 2008 CWI.
11 : */
12 :
13 : #include "monetdb_config.h"
14 : #include "rel_optimizer_private.h"
15 : #include "rel_statistics.h"
16 : #include "rel_rewriter.h"
17 :
18 : static sql_exp *
19 3817953 : comparison_find_column(sql_exp *input, sql_exp *e)
20 : {
21 3817953 : switch (input->type) {
22 99867 : case e_convert: {
23 99867 : list *types = (list *)input->r;
24 99867 : sql_class from = ((sql_subtype*)types->h->data)->type->eclass, to = ((sql_subtype*)types->h->next->data)->type->eclass;
25 99867 : if (from == to)
26 192882 : return comparison_find_column(input->l, e) ? input : NULL;
27 : return NULL;
28 : }
29 3163877 : case e_column:
30 3163877 : return exp_match(e, input) ? input : NULL;
31 : default:
32 : return NULL;
33 : }
34 : }
35 :
36 : /* multi lo <= col <= hi, maybe still be false even if lo or hi are NULL, possibly similar for filter on multiple
37 : * columns */
38 : #define comp_single_column(c) (!c->f)
39 :
40 : static sql_exp *
41 8318293 : rel_propagate_column_ref_statistics(mvc *sql, sql_rel *rel, sql_exp *e)
42 : {
43 8402049 : assert(e->type == e_column);
44 8402049 : if (rel) {
45 8402009 : switch(rel->op) {
46 3972628 : case op_left:
47 : case op_right:
48 : case op_full:
49 : case op_join:
50 : case op_select:
51 : case op_anti:
52 : case op_semi: {
53 3972628 : bool found_without_semantics = false, found_left = false, found_right = false, still_unique = false;
54 :
55 3972628 : if ((is_innerjoin(rel->op) || is_select(rel->op)) && list_length(rel->exps) == 1 && exp_is_false(rel->exps->h->data))
56 : return NULL; /* nothing will pass, skip */
57 :
58 : /* propagate from the bottom first */
59 3964680 : if (rel_propagate_column_ref_statistics(sql, rel->l, e))
60 : found_left = true;
61 2532695 : if (!found_left && is_join(rel->op) && rel_propagate_column_ref_statistics(sql, rel->r, e))
62 3964680 : found_right = true;
63 :
64 3964680 : if (!found_left && !found_right)
65 : return NULL;
66 1686717 : if (!list_empty(rel->exps) && rel->op != op_anti) { /* if there's an or, the MIN and MAX get difficult to propagate */
67 3594472 : for (node *n = rel->exps->h ; n ; n = n->next) {
68 1991261 : sql_exp *comp = n->data, *le = comp->l, *lne = NULL, *re = comp->r, *rne = NULL, *fe = comp->f, *fne = NULL;
69 :
70 1991261 : if (comp->type == e_cmp) {
71 1990390 : if (is_theta_exp(comp->flag) && ((lne = comparison_find_column(le, e)) || (rne = comparison_find_column(re, e)) || (fe && (fne = comparison_find_column(fe, e))))) {
72 134297 : atom *lval_min = find_prop_and_get(le->p, PROP_MIN), *lval_max = find_prop_and_get(le->p, PROP_MAX),
73 134297 : *rval_min = find_prop_and_get(re->p, PROP_MIN), *rval_max = find_prop_and_get(re->p, PROP_MAX);
74 :
75 : /* not semantics found or if explicitly filtering not null values from the column */
76 134297 : found_without_semantics |= (!is_semantics(comp) && comp_single_column(comp)) || (comp->flag == cmp_equal && lne && is_anti(comp) && exp_is_null(re));
77 134297 : still_unique |= comp->flag == cmp_equal && is_unique(le) && is_unique(re); /* unique if only equi-joins on unique columns are there */
78 134297 : if (is_full(rel->op) || (is_left(rel->op) && found_left) || (is_right(rel->op) && found_right)) /* on outer joins, min and max cannot be propagated on some cases */
79 19149 : continue;
80 : /* if (end2 >= start1 && start2 <= end1) then the 2 intervals are intersected */
81 115148 : if (fe && lval_min && lval_max) { /* range case, the middle expression must intersect the other two */
82 4579 : atom *fval_min = find_prop_and_get(fe->p, PROP_MIN), *fval_max = find_prop_and_get(fe->p, PROP_MAX);
83 4579 : int int1 = rval_min && rval_max && atom_cmp(rval_max, lval_min) >= 0 && atom_cmp(rval_min, lval_max) <= 0,
84 4579 : int2 = fval_min && fval_max && atom_cmp(fval_max, lval_min) >= 0 && atom_cmp(fval_min, lval_max) <= 0,
85 4579 : symmetric = is_symmetric(comp);
86 :
87 4579 : if (is_anti(comp) || (!symmetric && fval_min && rval_max && atom_cmp(fval_min, rval_max) < 0)) /* for asymmetric case the re range must be after the fe range */
88 1814 : continue;
89 2765 : if (lne && int1 && int2) {
90 104 : if (symmetric) {
91 0 : prop *p1 = find_prop(e->p, PROP_MIN), *p2 = find_prop(e->p, PROP_MAX);
92 0 : atom *nmin = statistics_atom_min(sql, rval_min, fval_min), *nmax = statistics_atom_max(sql, rval_max, fval_max);
93 : /* max is min from le and (max from re and fe max) */
94 0 : set_minmax_property(sql, e, PROP_MAX, p2 ? statistics_atom_min(sql, nmax, p2->value.pval) : nmax);
95 : /* min is max from le and (min from re and fe min) */
96 0 : set_minmax_property(sql, e, PROP_MIN, p1 ? statistics_atom_max(sql, nmin, p1->value.pval) : nmin);
97 : } else {
98 104 : prop *p1 = find_prop(e->p, PROP_MIN), *p2 = find_prop(e->p, PROP_MAX);
99 : /* max is min from le and fe max */
100 104 : set_minmax_property(sql, e, PROP_MAX, p2 ? statistics_atom_min(sql, fval_max, p2->value.pval) : fval_max);
101 : /* min is max from le and re min */
102 104 : set_minmax_property(sql, e, PROP_MIN, p1 ? statistics_atom_max(sql, rval_min, p1->value.pval) : rval_min);
103 : }
104 2661 : } else if (rne) {
105 584 : if (symmetric && int1 && int2) { /* min is max from le and (min from re and fe min) */
106 0 : prop *p = find_prop(e->p, PROP_MIN);
107 0 : atom *nmin = p ? statistics_atom_min(sql, p->value.pval, fval_min) : fval_min;
108 0 : set_minmax_property(sql, e, PROP_MIN, statistics_atom_max(sql, nmin, lval_min));
109 584 : } else if (int1) { /* min is max from le and re min */
110 100 : prop *p = find_prop(e->p, PROP_MIN);
111 100 : set_minmax_property(sql, e, PROP_MIN, p ? statistics_atom_max(sql, lval_min, p->value.pval) : lval_min);
112 : }
113 2077 : } else if (fne) {
114 546 : if (symmetric && int1 && int2) { /* max is min from le and (max from re and fe max) */
115 0 : prop *p = find_prop(e->p, PROP_MAX);
116 0 : atom *nmax = p ? statistics_atom_max(sql, p->value.pval, rval_max) : rval_max;
117 0 : set_minmax_property(sql, e, PROP_MAX, p ? statistics_atom_min(sql, nmax, lval_max) : nmax);
118 546 : } else if (int2) { /* max is min from le and fe max */
119 262 : prop *p = find_prop(e->p, PROP_MAX);
120 262 : set_minmax_property(sql, e, PROP_MAX, p ? statistics_atom_min(sql, lval_max, p->value.pval) : lval_max);
121 : }
122 : }
123 110569 : } else if (lval_min && lval_max && rval_min && rval_max && atom_cmp(rval_max, lval_min) >= 0 && atom_cmp(rval_min, lval_max) <= 0) {
124 : /* both min and max must be set and the intervals must overlap */
125 46543 : switch (comp->flag) {
126 32144 : case cmp_equal: /* for equality reduce */
127 32144 : set_minmax_property(sql, e, PROP_MAX, is_anti(comp) ? statistics_atom_max(sql, lval_max, rval_max) : statistics_atom_min(sql, lval_max, rval_max));
128 32144 : set_minmax_property(sql, e, PROP_MIN, is_anti(comp) ? statistics_atom_min(sql, lval_min, rval_min) : statistics_atom_max(sql, lval_min, rval_min));
129 32144 : break;
130 4852 : case cmp_notequal: /* for inequality expand */
131 4852 : set_minmax_property(sql, e, PROP_MAX, is_anti(comp) ? statistics_atom_min(sql, lval_max, rval_max) : statistics_atom_max(sql, lval_max, rval_max));
132 4852 : set_minmax_property(sql, e, PROP_MIN, is_anti(comp) ? statistics_atom_max(sql, lval_min, rval_min) : statistics_atom_min(sql, lval_min, rval_min));
133 4852 : break;
134 5575 : case cmp_gt:
135 : case cmp_gte:
136 10219 : if (!is_anti(comp) && lne) { /* min is max from both min */
137 4644 : prop *p = find_prop(e->p, PROP_MIN);
138 4644 : set_minmax_property(sql, e, PROP_MIN, p ? statistics_atom_max(sql, rval_min, p->value.pval) : rval_min);
139 931 : } else if (!is_anti(comp)) { /* max is min from both max */
140 931 : prop *p = find_prop(e->p, PROP_MAX);
141 931 : set_minmax_property(sql, e, PROP_MAX, p ? statistics_atom_min(sql, lval_max, p->value.pval) : lval_max);
142 : }
143 : break;
144 3972 : case cmp_lt:
145 : case cmp_lte:
146 7151 : if (!is_anti(comp) && lne) { /* max is min from both max */
147 3179 : prop *p = find_prop(e->p, PROP_MAX);
148 3179 : set_minmax_property(sql, e, PROP_MAX, p ? statistics_atom_min(sql, rval_max, p->value.pval) : rval_max);
149 793 : } else if (!is_anti(comp)) { /* min is max from both min */
150 793 : prop *p = find_prop(e->p, PROP_MIN);
151 793 : set_minmax_property(sql, e, PROP_MIN, p ? statistics_atom_max(sql, lval_min, p->value.pval) : lval_min);
152 : }
153 : break;
154 : default: /* Maybe later I can do cmp_in and cmp_notin */
155 : break;
156 : }
157 : }
158 : }
159 : }
160 : }
161 : }
162 1686717 : if (is_full(rel->op) || (is_left(rel->op) && found_right) || (is_right(rel->op) && found_left))
163 37395 : set_has_nil(e);
164 1686717 : if (!is_outerjoin(rel->op) && found_without_semantics) /* at an outer join, null values pass */
165 104023 : set_has_no_nil(e);
166 1686717 : if (is_join(rel->op) && is_unique(e) && !still_unique)
167 99836 : set_not_unique(e);
168 1686717 : return e;
169 : }
170 4326434 : case op_table:
171 : case op_basetable:
172 : case op_union:
173 : case op_except:
174 : case op_inter:
175 : case op_munion:
176 : case op_project:
177 : case op_groupby: {
178 4326434 : sql_exp *found;
179 4326434 : atom *fval;
180 4326434 : prop *est;
181 4326434 : if ((found = rel_find_exp(rel, e))) {
182 1927520 : if (rel->op != op_table) { /* At the moment don't propagate statistics for table relations */
183 1889762 : if ((fval = find_prop_and_get(found->p, PROP_MAX)))
184 1009753 : set_minmax_property(sql, e, PROP_MAX, fval);
185 1889760 : if ((fval = find_prop_and_get(found->p, PROP_MIN)))
186 1015899 : set_minmax_property(sql, e, PROP_MIN, fval);
187 1889764 : if (!has_nil(found))
188 449631 : set_has_no_nil(e);
189 1889764 : if (is_unique(found) || (need_distinct(rel) && list_length(rel->exps) == 1) ||
190 1531137 : (is_groupby(rel->op) && (list_empty(rel->r) || (list_length(rel->r) == 1 && exps_find_exp(rel->r, e)))))
191 369379 : set_unique(e);
192 : /* propagate unique estimation for known cases */
193 1889764 : if (is_groupby(rel->op) && list_empty(rel->r) && !find_prop(e->p, PROP_NUNIQUES)) { /* global aggregate case */
194 6654 : prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
195 6654 : p->value.dval = 1;
196 1883111 : } else if (((is_basetable(rel->op) || is_except(rel->op) || is_inter(rel->op) || is_simple_project(rel->op) ||
197 64223 : (is_groupby(rel->op) && exps_find_exp(rel->r, e))) &&
198 1801897 : (est = find_prop(found->p, PROP_NUNIQUES)) && !find_prop(e->p, PROP_NUNIQUES))) {
199 93885 : prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
200 93884 : p->value.dval = est->value.dval;
201 : }
202 : }
203 1927525 : return e;
204 : }
205 : return NULL;
206 : }
207 83756 : case op_topn:
208 : case op_sample:
209 83756 : return rel_propagate_column_ref_statistics(sql, rel->l, e);
210 : default:
211 : break;
212 : }
213 : }
214 : return NULL;
215 : }
216 :
217 : static atom *
218 4429837 : atom_from_valptr( allocator *sa, sql_subtype *tpe, ValRecord *v)
219 : {
220 4429837 : atom *a = SA_NEW(sa, atom);
221 :
222 4429857 : assert(!VALisnil(v));
223 4429909 : *a = (atom) {.tpe = *tpe,};
224 4429909 : SA_VALcopy(sa, &a->data, v);
225 4430020 : return a;
226 : }
227 :
228 : void
229 4004501 : sql_column_get_statistics(mvc *sql, sql_column *c, sql_exp *e)
230 : {
231 4004501 : bool nonil = false, unique = false;
232 4004501 : double unique_est = 0.0;
233 4004501 : ValRecord min, max;
234 4004501 : int ok = mvc_col_stats(sql, c, &nonil, &unique, &unique_est, &min, &max);
235 :
236 4005464 : if (has_nil(e) && nonil)
237 871094 : set_has_no_nil(e);
238 4005464 : if (!is_unique(e) && unique)
239 1061404 : set_unique(e);
240 4005464 : if (unique_est != 0.0) {
241 2835197 : prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
242 2834962 : p->value.dval = unique_est;
243 : }
244 4005229 : unsigned int digits = 0;
245 4005229 : sql_subtype *et = exp_subtype(e);
246 4005599 : if (et->type->eclass == EC_DEC || et->type->eclass == EC_NUM)
247 2608514 : digits = et->digits;
248 4005599 : if ((ok & 2) == 2) {
249 2212558 : if (!VALisnil(&max)) {
250 2212520 : prop *p = e->p = prop_create(sql->sa, PROP_MAX, e->p);
251 2212480 : p->value.pval = atom_from_valptr(sql->sa, &c->type, &max);
252 2212482 : if (digits) {
253 1649802 : unsigned int nd = atom_digits(p->value.pval);
254 1649781 : if (nd < digits)
255 : digits = nd;
256 1649781 : if (!digits)
257 : digits = 1;
258 : }
259 : }
260 2212405 : VALclear(&max);
261 : }
262 4005430 : if ((ok & 1) == 1) {
263 2217862 : if (!VALisnil(&min)) {
264 2217857 : prop *p = e->p = prop_create(sql->sa, PROP_MIN, e->p);
265 2217891 : p->value.pval = atom_from_valptr(sql->sa, &c->type, &min);
266 2218010 : if (digits) {
267 1655956 : unsigned int nd = atom_digits(p->value.pval);
268 1655967 : if (nd > digits) /* possibly set to low by max value */
269 : digits = nd;
270 : if (!digits)
271 : digits = 1;
272 : }
273 : }
274 2218012 : VALclear(&min);
275 : }
276 4005595 : if (digits)
277 2608456 : et->digits = digits;
278 4005595 : if (et->type->eclass == EC_DEC && et->digits <= et->scale)
279 209 : et->digits = et->scale + 1;
280 4005595 : }
281 :
282 : static void
283 872598 : rel_basetable_column_get_statistics(mvc *sql, sql_rel *rel, sql_exp *e)
284 : {
285 872598 : if (e->p)
286 : return;
287 272125 : sql_column *c = NULL;
288 :
289 272125 : if ((c = name_find_column(rel, exp_relname(e), exp_name(e), -2, NULL))) {
290 171780 : sql_column_get_statistics(sql, c, e);
291 : }
292 : }
293 :
294 : static bool
295 16651 : rel_setop_get_statistics(mvc *sql, sql_rel *rel, list *lexps, list *rexps, sql_exp *e, int i)
296 : {
297 16651 : sql_exp *le = list_fetch(lexps, i), *re = list_fetch(rexps, i);
298 16651 : atom *lval_min = find_prop_and_get(le->p, PROP_MIN), *lval_max = find_prop_and_get(le->p, PROP_MAX),
299 16651 : *rval_min = find_prop_and_get(re->p, PROP_MIN), *rval_max = find_prop_and_get(re->p, PROP_MAX);
300 16651 : prop *est;
301 :
302 : /* for the intersection, if both expresssions don't overlap, it can be pruned */
303 16651 : if (is_inter(rel->op) && !has_nil(le) && !has_nil(re) &&
304 112 : ((rval_max && lval_min && atom_cmp(rval_max, lval_min) < 0) || (rval_min && lval_max && atom_cmp(rval_min, lval_max) > 0)))
305 17 : return true;
306 :
307 16634 : if (lval_max && rval_max) {
308 5371 : if (is_union(rel->op))
309 2907 : set_minmax_property(sql, e, PROP_MAX, statistics_atom_max(sql, lval_max, rval_max)); /* for union the new max will be the max of the two */
310 2464 : else if (is_inter(rel->op))
311 387 : set_minmax_property(sql, e, PROP_MAX, statistics_atom_min(sql, lval_max, rval_max)); /* for intersect the new max will be the min of the two */
312 : else /* except */
313 2077 : set_minmax_property(sql, e, PROP_MAX, lval_max);
314 : }
315 16634 : if (lval_min && rval_min) {
316 5371 : if (is_union(rel->op))
317 2907 : set_minmax_property(sql, e, PROP_MIN, statistics_atom_min(sql, lval_min, rval_min)); /* for union the new min will be the min of the two */
318 2464 : else if (is_inter(rel->op))
319 387 : set_minmax_property(sql, e, PROP_MIN, statistics_atom_max(sql, lval_min, rval_min)); /* for intersect the new min will be the max of the two */
320 : else /* except */
321 2077 : set_minmax_property(sql, e, PROP_MIN, lval_min);
322 : }
323 :
324 16634 : if (is_union(rel->op) || is_munion(rel->op)) {
325 13924 : if (!has_nil(le) && !has_nil(re))
326 2938 : set_has_no_nil(e);
327 13924 : if (need_distinct(rel) && list_length(rel->exps) == 1)
328 11 : set_unique(e);
329 2710 : } else if (is_inter(rel->op)) {
330 496 : if (!has_nil(le) || !has_nil(re))
331 208 : set_has_no_nil(e);
332 496 : if (is_unique(le) || (need_distinct(rel) && list_length(rel->exps) == 1))
333 344 : set_unique(e);
334 : } else {
335 2214 : assert(is_except(rel->op));
336 2214 : if (!has_nil(le))
337 241 : set_has_no_nil(e);
338 2214 : if (is_unique(le) || (need_distinct(rel) && list_length(rel->exps) == 1))
339 1991 : set_unique(e);
340 : }
341 : /* propagate unique estimation for known cases */
342 16634 : if (!is_union(rel->op) && (est = find_prop(le->p, PROP_NUNIQUES)) && !find_prop(e->p, PROP_NUNIQUES)) {
343 117 : prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
344 117 : p->value.dval = est->value.dval;
345 : }
346 : return false;
347 : }
348 :
349 :
350 : static void
351 105317 : rel_munion_get_statistics(mvc *sql, sql_rel *rel, list *rels, sql_exp *e, int i)
352 : {
353 105317 : assert(is_munion(rel->op));
354 :
355 105317 : sql_rel *l = rels->h->data;
356 105317 : sql_exp *le = list_fetch(l->exps, i);
357 105317 : atom *lval_min = find_prop_and_get(le->p, PROP_MIN), *lval_max = find_prop_and_get(le->p, PROP_MAX);
358 105317 : bool has_nonil = !has_nil(le);
359 :
360 210634 : for(node *n = rels->h->next; n; n = n->next) {
361 105317 : sql_rel *r = n->data;
362 105317 : sql_exp *re = list_fetch(r->exps, i);
363 105317 : atom *rval_min = find_prop_and_get(re->p, PROP_MIN), *rval_max = find_prop_and_get(re->p, PROP_MAX);
364 :
365 105317 : if (lval_max && rval_max) {
366 17137 : set_minmax_property(sql, e, PROP_MAX, statistics_atom_max(sql, lval_max, rval_max)); /* for union the new max will be the max of the two */
367 17137 : lval_max = find_prop_and_get(e->p, PROP_MAX);
368 : }
369 105317 : if (lval_min && rval_min) {
370 16917 : set_minmax_property(sql, e, PROP_MIN, statistics_atom_min(sql, lval_min, rval_min)); /* for union the new min will be the min of the two */
371 16917 : lval_min = find_prop_and_get(e->p, PROP_MIN);
372 : }
373 105317 : has_nonil &= !has_nil(re);
374 :
375 : }
376 :
377 105317 : if (has_nonil)
378 18863 : set_has_no_nil(e);
379 :
380 105317 : if (need_distinct(rel) && list_length(rel->exps) == 1)
381 1573 : set_unique(e);
382 105317 : }
383 :
384 :
385 : static sql_exp *
386 3138981 : rel_propagate_statistics(visitor *v, sql_rel *rel, sql_exp *e, int depth)
387 : {
388 3138981 : mvc *sql = v->sql;
389 3138981 : atom *lval;
390 :
391 3138981 : (void) depth;
392 3138981 : switch(e->type) {
393 1932096 : case e_column:
394 1932096 : switch (rel->op) { /* set relations don't call rel_propagate_statistics */
395 249721 : case op_join:
396 : case op_left:
397 : case op_right:
398 : case op_full:
399 : case op_semi:
400 : case op_anti: {
401 249721 : sql_exp *found = rel_propagate_column_ref_statistics(sql, rel->l, e);
402 249721 : if (!found)
403 125303 : (void) rel_propagate_column_ref_statistics(sql, rel->r, e);
404 : break;
405 : }
406 1682375 : case op_select:
407 : case op_project:
408 : case op_groupby: {
409 1682375 : sql_exp *found = rel_propagate_column_ref_statistics(sql, rel->l, e); /* labels may be found on the same projection, ugh */
410 1682376 : if (!found && is_simple_project(rel->op))
411 118173 : (void) rel_propagate_column_ref_statistics(sql, rel, e);
412 : break;
413 : }
414 0 : case op_insert:
415 : case op_update:
416 : case op_delete:
417 0 : (void) rel_propagate_column_ref_statistics(sql, rel->r, e);
418 0 : break;
419 : default:
420 : break;
421 : }
422 : break;
423 81741 : case e_convert: {
424 81741 : sql_subtype *to = exp_totype(e), *from = exp_fromtype(e);
425 81741 : sql_exp *l = e->l;
426 81741 : sql_class fr = from->type->eclass, too = to->type->eclass;
427 81741 : prop *est;
428 :
429 81741 : if (fr == too) {
430 73097 : if ((lval = find_prop_and_get(l->p, PROP_MAX))) {
431 45223 : atom *res = atom_copy(sql->sa, lval);
432 45223 : if ((res = atom_cast(sql->sa, res, to)))
433 45202 : set_minmax_property(sql, e, PROP_MAX, res);
434 : }
435 73096 : if ((lval = find_prop_and_get(l->p, PROP_MIN))) {
436 45697 : atom *res = atom_copy(sql->sa, lval);
437 45697 : if ((res = atom_cast(sql->sa, res, to)))
438 45676 : set_minmax_property(sql, e, PROP_MIN, res);
439 : }
440 : }
441 : /* propagating nuniques through conversions is tricky. I am adding just the general cases */
442 81740 : if ((est = find_prop(l->p, PROP_NUNIQUES)) &&
443 46952 : (((fr == EC_TIME || fr == EC_TIME_TZ || fr == EC_DATE) && (too == EC_TIMESTAMP || too == EC_TIMESTAMP_TZ)) ||
444 46927 : ((fr == EC_NUM || fr == EC_BIT) && too == EC_NUM) || (fr == EC_DEC && too == EC_DEC) || (fr != EC_EXTERNAL && too == EC_STRING))) {
445 43684 : prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
446 43684 : p->value.dval = est->value.dval;
447 : }
448 81740 : if (!has_nil(l))
449 33166 : set_has_no_nil(e);
450 : break;
451 : }
452 333348 : case e_aggr:
453 : case e_func: {
454 333348 : BUN lv;
455 333348 : sql_subfunc *f = e->f;
456 :
457 333348 : if (!f->func->s) {
458 306997 : int key = hash_key(f->func->base.name); /* Using hash lookup */
459 306997 : sql_hash_e *he = sql_functions_lookup->buckets[key&(sql_functions_lookup->size-1)];
460 306997 : lookup_function look = NULL;
461 :
462 670900 : for (; he && !look; he = he->chain) {
463 363903 : struct function_properties* fp = (struct function_properties*) he->value;
464 :
465 363903 : if (!strcmp(f->func->base.name, fp->name))
466 104405 : look = fp->func;
467 : }
468 306997 : if (look)
469 104407 : look(sql, e);
470 : }
471 : /* for global aggregates with no semantics, if the left relation has values, then the output won't be NULL */
472 333349 : if (!is_semantics(e) && e->l && !have_nil(e->l) &&
473 40564 : (e->type != e_aggr || (is_groupby(rel->op) && list_length(rel->r)) || ((lv = get_rel_count(rel->l)) != BUN_NONE && lv > 0)))
474 40501 : set_has_no_nil(e);
475 : /* set properties for global aggregates */
476 333349 : if (e->type == e_aggr && is_groupby(rel->op) && list_empty(rel->r)) {
477 6902 : if (!find_prop(e->p, PROP_NUNIQUES)) {
478 6902 : prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
479 6902 : p->value.dval = 1;
480 : }
481 6902 : set_unique(e);
482 : }
483 : break;
484 : }
485 525017 : case e_atom:
486 525017 : if (e->l) {
487 511089 : atom *a = (atom*) e->l;
488 511089 : if (!a->isnull) {
489 454110 : set_minmax_property(sql, e, PROP_MAX, a);
490 454111 : set_minmax_property(sql, e, PROP_MIN, a);
491 : }
492 511090 : prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
493 511089 : p->value.dval = 1;
494 13928 : } else if (e->f) {
495 2803 : list *vals = (list *) e->f;
496 2803 : sql_exp *first = vals->h ? vals->h->data : NULL;
497 2803 : atom *max = NULL, *min = NULL; /* all child values must have a valid min/max */
498 2803 : int has_nil = 0;
499 :
500 2803 : if (first) {
501 2803 : max = ((lval = find_prop_and_get(first->p, PROP_MAX))) ? lval : NULL;
502 2803 : min = ((lval = find_prop_and_get(first->p, PROP_MIN))) ? lval : NULL;
503 2803 : has_nil |= has_nil(first);
504 : }
505 :
506 12773 : for (node *n = vals->h ? vals->h->next : NULL ; n ; n = n->next) {
507 7167 : sql_exp *ee = n->data;
508 :
509 7167 : if (min && max) {
510 6713 : if ((lval = find_prop_and_get(ee->p, PROP_MAX))) {
511 6669 : max = atom_cmp(lval, max) > 0 ? lval : max;
512 : } else {
513 : max = NULL;
514 : }
515 6713 : if ((lval = find_prop_and_get(ee->p, PROP_MIN))) {
516 6669 : min = atom_cmp(min, lval) > 0 ? lval : min;
517 : } else {
518 : min = NULL;
519 : }
520 : }
521 7167 : has_nil |= has_nil(ee);
522 : }
523 :
524 2803 : if (!has_nil)
525 2444 : set_has_no_nil(e);
526 2803 : if (min && max) {
527 2420 : set_minmax_property(sql, e, PROP_MAX, max);
528 2420 : set_minmax_property(sql, e, PROP_MIN, min);
529 : }
530 2803 : prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
531 2803 : p->value.dval = (dbl) list_length(vals);
532 : } else {
533 11125 : prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
534 11125 : p->value.dval = 1;
535 : }
536 : break;
537 266779 : case e_cmp:
538 : /* TODO? propagating min/max/unique of booleans is not very worth it */
539 266779 : if (e->flag == cmp_or || e->flag == cmp_filter) {
540 18044 : if (!have_nil(e->l) && !have_nil(e->r))
541 2778 : set_has_no_nil(e);
542 248735 : } else if (e->flag == cmp_in || e->flag == cmp_notin) {
543 19550 : sql_exp *le = e->l;
544 19550 : if (!has_nil(le) && !have_nil(e->r))
545 837 : set_has_no_nil(e);
546 : } else {
547 229185 : sql_exp *le = e->l, *re = e->r, *fe = e->f;
548 229185 : if (!has_nil(le) && !has_nil(re) && (!e->f || !has_nil(fe)))
549 23310 : set_has_no_nil(e);
550 : }
551 : break;
552 : case e_psm:
553 : break;
554 : }
555 :
556 : #ifndef NDEBUG
557 : {
558 : /* min and max cannot be NULL and min must be <= than max, if it happens the inner relation must be empty! */
559 3138982 : atom *min = find_prop_and_get(e->p, PROP_MIN), *max = find_prop_and_get(e->p, PROP_MAX);
560 :
561 3138981 : (void) min;
562 3138981 : (void) max;
563 3138981 : assert(!min || !min->isnull);
564 3138981 : assert(!max || !max->isnull);
565 3138981 : assert(!min || !max || (min && max)/* atom_cmp(min, max) <= 0*/);
566 : }
567 : #endif
568 3138981 : return e;
569 : }
570 :
571 : static list * /* Remove predicates always false from min/max values */
572 126923 : rel_prune_predicates(visitor *v, sql_rel *rel)
573 : {
574 126923 : if (rel->l) {
575 126923 : sql_rel *l = rel->l;
576 126923 : if (is_single(l))
577 3545 : return rel->exps;
578 : }
579 123378 : if (!list_empty(rel->attr))
580 2683 : return rel->exps;
581 258348 : for (node *n = rel->exps->h ; n ; n = n->next) {
582 137653 : sql_exp *e = n->data;
583 :
584 137653 : if (e->type == e_cmp && is_theta_exp(e->flag)) {
585 113326 : sql_exp *le = e->l, *re = e->r, *fe = e->f;
586 113326 : atom *lval_min = find_prop_and_get(le->p, PROP_MIN), *lval_max = find_prop_and_get(le->p, PROP_MAX),
587 113326 : *rval_min = find_prop_and_get(re->p, PROP_MIN), *rval_max = find_prop_and_get(re->p, PROP_MAX);
588 113326 : bool always_false = false, always_true = false;
589 :
590 113326 : if (fe && !is_symmetric(e)) {
591 2964 : atom *fval_min = find_prop_and_get(fe->p, PROP_MIN), *fval_max = find_prop_and_get(fe->p, PROP_MAX);
592 2964 : comp_type lower = range2lcompare(e->flag), higher = range2rcompare(e->flag);
593 3492 : int not_int1 = rval_min && lval_max && /* the middle and left intervals don't overlap */
594 1515 : (!is_anti(e) && (lower == cmp_gte ? atom_cmp(rval_min, lval_max) > 0 : atom_cmp(rval_min, lval_max) >= 0)),
595 3904 : not_int2 = lval_min && fval_max && /* the middle and right intervals don't overlap */
596 2337 : (!is_anti(e) && (higher == cmp_lte ? atom_cmp(lval_min, fval_max) > 0 : atom_cmp(lval_min, fval_max) >= 0)),
597 2964 : not_int3 = rval_min && fval_max && /* the left interval is after the right one */
598 1211 : (!is_anti(e) && (atom_cmp(rval_min, fval_max) > 0));
599 :
600 2964 : always_false |= not_int1 || not_int2 || not_int3;
601 : /* for anti the middle must be before the left or after the right or the right after the left, for the other the middle must be always between the left and right intervals */
602 3438 : always_true |= !has_nil(le) && !has_nil(re) && !has_nil(fe) &&
603 3282 : lval_min && lval_max && rval_min && rval_max && fval_min && fval_max &&
604 49 : (is_anti(e) ? ((lower == cmp_gte ? atom_cmp(rval_min, lval_max) > 0 : atom_cmp(rval_min, lval_max) >= 0) || (higher == cmp_lte ? atom_cmp(lval_min, fval_max) > 0 : atom_cmp(lval_min, fval_max) >= 0) || atom_cmp(rval_min, fval_max) > 0) :
605 35 : ((lower == cmp_gte ? atom_cmp(lval_min, rval_max) >= 0 : atom_cmp(lval_min, rval_max) > 0) && (higher == cmp_lte ? atom_cmp(fval_min, lval_max) >= 0 : atom_cmp(fval_min, lval_max) > 0)));
606 : } else if (!fe) {
607 110344 : if (!is_semantics(e)) /* trival not null cmp null case */
608 208616 : always_false |= !is_anti(e) && ((exp_is_not_null(le) && exp_is_null(re)) || (exp_is_null(le) && exp_is_not_null(re)));
609 110344 : switch (e->flag) {
610 95761 : case cmp_equal:
611 95761 : if (lval_min && lval_max && rval_min && rval_max && (!is_semantics(e) || !has_nil(le) || !has_nil(re)))
612 130920 : always_false |= (is_anti(e) ? (atom_cmp(lval_min, rval_min) == 0 && atom_cmp(lval_max, rval_max) <= 0) : (atom_cmp(rval_max, lval_min) < 0 || atom_cmp(rval_min, lval_max) > 0));
613 95761 : if (is_semantics(e)) { /* prune *= NULL cases */
614 6012 : always_false |= (is_anti(e) ? (exp_is_null(le) && exp_is_null(re)) : ((exp_is_not_null(le) && exp_is_null(re)) || (exp_is_null(le) && exp_is_not_null(re))));
615 12024 : always_true |= (is_anti(e) ? ((exp_is_not_null(le) && exp_is_null(re)) || (exp_is_null(le) && exp_is_not_null(re))) : (exp_is_null(le) && exp_is_null(re)));
616 : }
617 : break;
618 6870 : case cmp_notequal:
619 6870 : if (lval_min && lval_max && rval_min && rval_max)
620 11234 : always_true |= !has_nil(le) && !has_nil(re) && (is_anti(e) ? (atom_cmp(lval_min, rval_min) == 0 && atom_cmp(lval_max, rval_max) <= 0) : (atom_cmp(rval_max, lval_min) < 0 || atom_cmp(rval_min, lval_max) > 0));
621 6870 : if (is_semantics(e)) {
622 24 : always_true |= (is_anti(e) ? (exp_is_null(le) && exp_is_null(re)) : ((exp_is_not_null(le) && exp_is_null(re)) || (exp_is_null(le) && exp_is_not_null(re))));
623 48 : always_false |= (is_anti(e) ? ((exp_is_not_null(le) && exp_is_null(re)) || (exp_is_null(le) && exp_is_not_null(re))) : (exp_is_null(le) && exp_is_null(re)));
624 : }
625 : break;
626 5130 : case cmp_gt:
627 5130 : if (lval_max && rval_min)
628 1916 : always_false |= (is_anti(e) ? atom_cmp(lval_max, rval_min) > 0 : atom_cmp(lval_max, rval_min) <= 0);
629 5130 : if (lval_min && rval_max)
630 3832 : always_true |= !has_nil(le) && !has_nil(re) && (is_anti(e) ? atom_cmp(lval_min, rval_max) <= 0 : atom_cmp(lval_min, rval_max) > 0);
631 : break;
632 94 : case cmp_gte:
633 94 : if (lval_max && rval_min)
634 36 : always_false |= (is_anti(e) ? atom_cmp(lval_max, rval_min) >= 0 : atom_cmp(lval_max, rval_min) < 0);
635 94 : if (lval_min && rval_max)
636 72 : always_true |= !has_nil(le) && !has_nil(re) && (is_anti(e) ? atom_cmp(lval_min, rval_max) < 0 : atom_cmp(lval_min, rval_max) >= 0);
637 : break;
638 2410 : case cmp_lt:
639 2410 : if (lval_min && rval_max)
640 1358 : always_false |= (is_anti(e) ? atom_cmp(lval_min, rval_max) < 0 : atom_cmp(lval_min, rval_max) >= 0);
641 2410 : if (lval_max && rval_min)
642 2764 : always_true |= !has_nil(le) && !has_nil(re) && (is_anti(e) ? atom_cmp(lval_max, rval_min) >= 0 : atom_cmp(lval_max, rval_min) < 0);
643 : break;
644 79 : case cmp_lte:
645 79 : if (lval_min && rval_max)
646 9 : always_false |= (is_anti(e) ? atom_cmp(lval_min, rval_max) <= 0 : atom_cmp(lval_min, rval_max) > 0);
647 79 : if (lval_max && rval_min)
648 18 : always_true |= !has_nil(le) && !has_nil(re) && (is_anti(e) ? atom_cmp(lval_max, rval_min) > 0 : atom_cmp(lval_max, rval_min) <= 0);
649 : break;
650 : default: /* Maybe later I can do cmp_in and cmp_notin but don't forget to remove is_theta_exp check up there */
651 : break;
652 : }
653 : }
654 113326 : assert(!always_false || !always_true);
655 113326 : if (always_false || always_true) {
656 981 : sql_exp *ne = exp_atom_bool(v->sql->sa, always_true /* if always_true set then true else false */);
657 981 : if (exp_name(e))
658 0 : exp_prop_alias(v->sql->sa, ne, e);
659 981 : n->data = ne;
660 981 : v->changes++;
661 : }
662 : }
663 : }
664 120695 : return rel->exps;
665 : }
666 :
667 : static sql_rel *
668 120 : set_setop_side(visitor *v, sql_rel *rel, sql_rel *side)
669 : {
670 120 : if (side == rel->r)
671 75 : rel->r = NULL;
672 : else
673 45 : rel->l = NULL;
674 120 : if (!is_simple_project(side->op) || rel_is_ref(side) || !list_empty(side->r)) {
675 3 : side = rel_project(v->sql->sa, side, rel_projections(v->sql, side, NULL, 0, 1));
676 3 : set_count_prop(v->sql->sa, side, get_rel_count(side->l));
677 3 : side->exps = exps_exp_visitor_bottomup(v, side, side->exps, 0, &rel_propagate_statistics, false);
678 : }
679 471 : for (node *n = rel->exps->h, *m = side->exps->h ; n && m ; n = n->next, m = m->next) {
680 351 : sql_exp *e1 = n->data, *e2 = m->data;
681 351 : exp_prop_alias(v->sql->sa, e2, e1);
682 : }
683 120 : list_hash_clear(side->exps);
684 :
685 120 : if (need_distinct(rel))
686 2 : set_distinct(side);
687 120 : side->p = prop_copy(v->sql->sa, rel->p);
688 120 : rel_destroy(rel);
689 120 : return side;
690 : }
691 :
692 : static BUN
693 15114 : trivial_project_exp_card(sql_exp *e)
694 : {
695 15132 : if (e->type == e_convert)
696 18 : return trivial_project_exp_card(e->l);
697 15114 : return e->type == e_atom && e->f ? (BUN) list_length(e->f) : 1;
698 : }
699 :
700 : static BUN
701 23553 : rel_calc_nuniques(mvc *sql, sql_rel *l, list *exps)
702 : {
703 23553 : BUN lv = get_rel_count(l);
704 :
705 23553 : if (lv == 0)
706 : return 0;
707 20970 : if (!list_empty(exps)) {
708 20970 : BUN nuniques = 0;
709 : /* compute the highest number of unique values */
710 49462 : for (node *n = exps->h ; n && nuniques != BUN_NONE ; n = n->next) {
711 28492 : sql_exp *e = n->data;
712 28492 : sql_rel *bt = NULL;
713 28492 : prop *p = NULL;
714 28492 : BUN euniques = BUN_NONE;
715 28492 : atom *min, *max, *sub = NULL;
716 28492 : sql_subtype *tp = exp_subtype(e);
717 28492 : sql_class ec = tp ? tp->type->eclass : EC_STRING; /* if 'e' has no type (eg parameter), use a non-number type to fail condition */
718 :
719 28492 : if ((p = find_prop(e->p, PROP_NUNIQUES))) {
720 20260 : euniques = (BUN) p->value.dval;
721 8232 : } else if (e->type == e_column && rel_find_exp_and_corresponding_rel(l, e, false, &bt, NULL) && bt && (p = find_prop(bt->p, PROP_COUNT))) {
722 3882 : euniques = (BUN) p->value.lval;
723 : }
724 : /* use min to max range to compute number of possible values in the domain for number types */
725 28492 : if ((EC_TEMP(ec)||ec==EC_NUM||ec==EC_MONTH||ec==EC_POS) &&
726 21751 : (min = find_prop_and_get(e->p, PROP_MIN)) && (max = find_prop_and_get(e->p, PROP_MAX))) {
727 : /* the range includes min and max, so the atom_inc call is needed */
728 : /* if 'euniques' has number of distinct values, compute min between both */
729 17398 : if ((sub = atom_sub(sql->sa, max, min)) && (sub = atom_inc(sql->sa, sub)) && (sub = atom_cast(sql->sa, sub, sql_bind_localtype("oid"))))
730 0 : euniques = MIN(euniques, (BUN) sub->data.val.oval);
731 : }
732 28492 : if (euniques != BUN_NONE)
733 24142 : nuniques = MAX(nuniques, euniques); /* the highest cardinality sets the estimation */
734 : else
735 : nuniques = BUN_NONE;
736 : }
737 20970 : if (nuniques != BUN_NONE)
738 : return nuniques;
739 : }
740 : return lv;
741 : }
742 :
743 : static sql_rel *
744 1869801 : rel_get_statistics_(visitor *v, sql_rel *rel)
745 : {
746 : /* Don't prune updates as pruning will possibly result in removing the joins which therefore cannot be used for constraint checking */
747 1869801 : uint8_t has_special_modify = *(uint8_t*) v->data;
748 1869801 : bool can_be_pruned = !has_special_modify && v->storage_based_opt;
749 :
750 : /* Don't look at the same relation twice */
751 1869801 : if (are_statistics_gathered(rel->used))
752 : return rel;
753 1274685 : rel->used |= statistics_gathered;
754 :
755 1274685 : switch (rel->op) {
756 299988 : case op_basetable: {
757 299988 : sql_table *t = (sql_table *) rel->l;
758 299988 : sqlstore *store = v->sql->session->tr->store;
759 :
760 299988 : if (!list_empty(rel->exps)) {
761 1172827 : for (node *n = rel->exps->h ; n ; n = n->next)
762 872590 : rel_basetable_column_get_statistics(v->sql, rel, n->data);
763 : }
764 : /* Set table row count. TODO? look for remote tables. Don't look at storage for declared tables, because it won't be cleaned */
765 300250 : if (isTable(t) && t->s && !isDeclaredTable(t)) /* count active rows only */
766 250673 : set_count_prop(v->sql->sa, rel, (BUN)store->storage_api.count_col(v->sql->session->tr, ol_first_node(t->columns)->data, 10));
767 : break;
768 : }
769 4834 : case op_union:
770 : case op_inter:
771 : case op_except: {
772 4834 : bool empty_cross = false;
773 4834 : int i = 0;
774 4834 : sql_rel *l = rel->l, *pl = rel->l, *r = rel->r, *pr = rel->r;
775 :
776 4880 : while (is_sample(pl->op) || is_topn(pl->op)) /* skip topN and sample relations in the middle */
777 46 : pl = pl->l;
778 4879 : while (is_sample(pr->op) || is_topn(pr->op))
779 45 : pr = pr->l;
780 : /* if it's not a projection, then project and propagate statistics */
781 4834 : if (!is_project(pl->op) && !is_base(pl->op)) {
782 0 : pl = rel_project(v->sql->sa, pl, rel_projections(v->sql, pl, NULL, 0, 1));
783 0 : set_count_prop(v->sql->sa, pl, get_rel_count(pl->l));
784 0 : pl->exps = exps_exp_visitor_bottomup(v, pl, pl->exps, 0, &rel_propagate_statistics, false);
785 : }
786 4834 : if (!is_project(pr->op) && !is_base(pr->op)) {
787 0 : pr = rel_project(v->sql->sa, pr, rel_projections(v->sql, pr, NULL, 0, 1));
788 0 : set_count_prop(v->sql->sa, pr, get_rel_count(pr->l));
789 0 : pr->exps = exps_exp_visitor_bottomup(v, pr, pr->exps, 0, &rel_propagate_statistics, false);
790 : }
791 :
792 21485 : for (node *n = rel->exps->h ; n ; n = n->next) {
793 16651 : empty_cross |= rel_setop_get_statistics(v->sql, rel, pl->exps, pr->exps, n->data, i);
794 16651 : i++;
795 : }
796 :
797 : /* propagate row count */
798 4834 : if (is_union(rel->op)) {
799 2378 : BUN lv = need_distinct(rel) ? rel_calc_nuniques(v->sql, l, l->exps) : get_rel_count(l),
800 2378 : rv = need_distinct(rel) ? rel_calc_nuniques(v->sql, r, r->exps) : get_rel_count(r);
801 :
802 2378 : if (lv == 0 && rv == 0) { /* both sides empty */
803 66 : if (can_be_pruned)
804 : empty_cross = true;
805 : else
806 2 : set_count_prop(v->sql->sa, rel, 0);
807 2312 : } else if (can_be_pruned && lv == 0 && !rel_is_ref(rel)) { /* left side empty */
808 75 : rel = set_setop_side(v, rel, r);
809 75 : empty_cross = false; /* don't rewrite again */
810 2237 : } else if (can_be_pruned && rv == 0 && !rel_is_ref(rel)) { /* right side empty */
811 32 : rel = set_setop_side(v, rel, l);
812 32 : empty_cross = false; /* don't rewrite again */
813 2205 : } else if (lv != BUN_NONE && rv != BUN_NONE) {
814 1467 : set_count_prop(v->sql->sa, rel, (rv > (BUN_MAX - lv)) ? BUN_MAX : (lv + rv)); /* overflow check */
815 : }
816 2456 : } else if (is_inter(rel->op) || is_except(rel->op)) {
817 2456 : BUN lv = need_distinct(rel) ? rel_calc_nuniques(v->sql, l, l->exps) : get_rel_count(l),
818 2456 : rv = need_distinct(rel) ? rel_calc_nuniques(v->sql, r, r->exps) : get_rel_count(r);
819 :
820 2456 : if (lv == 0) { /* left side empty */
821 45 : if (can_be_pruned)
822 : empty_cross = true;
823 : else
824 5 : set_count_prop(v->sql->sa, rel, 0);
825 2411 : } else if (rv == 0) { /* right side empty */
826 16 : if (is_inter(rel->op)) {
827 3 : if (can_be_pruned)
828 : empty_cross = true;
829 : else
830 0 : set_count_prop(v->sql->sa, rel, 0);
831 13 : } else if (can_be_pruned && !rel_is_ref(rel)) {
832 13 : rel = set_setop_side(v, rel, l);
833 13 : empty_cross = false; /* don't rewrite again */
834 : } else {
835 0 : set_count_prop(v->sql->sa, rel, lv);
836 : }
837 : } else {
838 2395 : set_count_prop(v->sql->sa, rel, lv);
839 : }
840 : }
841 4834 : if (can_be_pruned && empty_cross) {
842 118 : rel_destroy(rel->l);
843 118 : rel->l = NULL;
844 118 : rel_destroy(rel->r);
845 118 : rel->r = NULL;
846 527 : for (node *n = rel->exps->h ; n ; n = n->next) {
847 409 : sql_exp *e = n->data, *a = exp_atom(v->sql->sa, atom_general(v->sql->sa, exp_subtype(e), NULL, 0));
848 409 : exp_prop_alias(v->sql->sa, a, e);
849 409 : n->data = a;
850 : }
851 118 : list_hash_clear(rel->exps);
852 118 : sql_rel *l = rel_project(v->sql->sa, NULL, rel->exps);
853 118 : set_count_prop(v->sql->sa, l, 1);
854 118 : l = rel_select(v->sql->sa, l, exp_atom_bool(v->sql->sa, 0));
855 118 : set_count_prop(v->sql->sa, l, 0);
856 118 : rel->op = op_project;
857 118 : rel->l = l;
858 118 : rel->exps = rel_projections(v->sql, l, NULL, 1, 1);
859 118 : set_count_prop(v->sql->sa, rel, 0);
860 118 : set_nodistinct(rel); /* set relations may have distinct flag set */
861 118 : v->changes++;
862 : }
863 : break;
864 : }
865 19271 : case op_munion: {
866 19271 : list *l = rel->l, *nrels = sa_list(v->sql->sa);
867 19271 : BUN cnt = 0;
868 19271 : bool needs_pruning = false;
869 :
870 57813 : for (node *n = l->h; n; n = n->next) {
871 38542 : sql_rel *r = n->data, *pl = r;
872 :
873 38542 : while (is_sample(pl->op) || is_topn(pl->op)) /* skip topN and sample relations in the middle */
874 0 : pl = pl->l;
875 : /* if it's not a projection, then project and propagate statistics */
876 38542 : if (!is_project(pl->op) && !is_base(pl->op)) {
877 0 : pl = rel_project(v->sql->sa, pl, rel_projections(v->sql, pl, NULL, 0, 1));
878 0 : set_count_prop(v->sql->sa, pl, get_rel_count(pl->l));
879 0 : pl->exps = exps_exp_visitor_bottomup(v, pl, pl->exps, 0, &rel_propagate_statistics, false);
880 : }
881 38542 : nrels = append(nrels, pl);
882 : /* we need new munion statistics */
883 : /* propagate row count */
884 38542 : BUN rv = need_distinct(rel) ? rel_calc_nuniques(v->sql, r, r->exps) : get_rel_count(r);
885 38542 : if (!rv && can_be_pruned)
886 7002 : needs_pruning = true;
887 38542 : if (rv > (BUN_MAX - cnt)) /* overflow check */
888 38542 : rv = BUN_MAX;
889 : else
890 36698 : cnt += rv;
891 : }
892 19271 : int i = 0;
893 124588 : for (node *n = rel->exps->h ; n ; n = n->next, i++)
894 105317 : rel_munion_get_statistics(v->sql, rel, nrels, n->data, i);
895 :
896 19271 : if (needs_pruning) {
897 5937 : v->changes++;
898 5937 : list *nl = sa_list(l->sa);
899 :
900 17811 : for (node *n = nrels->h; n; n = n->next) {
901 11874 : sql_rel *r = n->data;
902 11874 : BUN rv = need_distinct(rel) ? rel_calc_nuniques(v->sql, r, r->exps) : get_rel_count(r);
903 :
904 11874 : if (!rv) { /* keep last for now */
905 7002 : rel_destroy(r);
906 7002 : continue;
907 : }
908 4872 : nl = append(nl, r);
909 : }
910 5937 : rel->l = nl;
911 5937 : if (list_length(nl) == 1) {
912 4872 : sql_rel *l = rel->l = nl->h->data; /* ugh */
913 4872 : rel->op = op_project;
914 :
915 46563 : for (node *n = rel->exps->h, *m = l->exps->h ; n && m ; n = n->next, m = m->next) {
916 41691 : sql_exp *pe = n->data, *ie = m->data;
917 41691 : sql_exp *ne = exp_ref(v->sql, ie);
918 41691 : exp_setname(v->sql->sa, ne, exp_relname(pe), exp_name(pe));
919 41691 : n->data = ne;
920 : }
921 4872 : list_hash_clear(rel->exps);
922 1065 : } else if (list_empty(nl)) {
923 : /* empty select (project [ nils ] ) */
924 3816 : for (node *n = rel->exps->h ; n ; n = n->next) {
925 2751 : sql_exp *e = n->data, *a = exp_atom(v->sql->sa, atom_general(v->sql->sa, exp_subtype(e), NULL, 0));
926 2751 : exp_prop_alias(v->sql->sa, a, e);
927 2751 : n->data = a;
928 : }
929 1065 : list_hash_clear(rel->exps);
930 1065 : sql_rel *l = rel_project(v->sql->sa, NULL, rel->exps);
931 1065 : set_count_prop(v->sql->sa, l, 1);
932 1065 : l = rel_select(v->sql->sa, l, exp_atom_bool(v->sql->sa, 0));
933 1065 : set_count_prop(v->sql->sa, l, 0);
934 1065 : rel->op = op_project;
935 1065 : rel->l = l;
936 1065 : rel->exps = rel_projections(v->sql, l, NULL, 1, 1);
937 1065 : set_count_prop(v->sql->sa, rel, 0);
938 1065 : set_nodistinct(rel); /* set relations may have distinct flag set */
939 : }
940 : } else {
941 13334 : set_count_prop(v->sql->sa, rel, cnt);
942 : }
943 : break;
944 : }
945 478557 : case op_join:
946 : case op_left:
947 : case op_right:
948 : case op_full:
949 : case op_semi:
950 : case op_anti:
951 : case op_select:
952 : case op_project:
953 : case op_groupby: {
954 478557 : if (is_groupby(rel->op) && !list_empty(rel->r))
955 14937 : rel->r = exps_exp_visitor_bottomup(v, rel, rel->r, 0, &rel_propagate_statistics, false);
956 478557 : rel->exps = exps_exp_visitor_bottomup(v, rel, rel->exps, 0, &rel_propagate_statistics, false);
957 478552 : if ((is_simple_project(rel->op) || is_groupby(rel->op)) && !list_empty(rel->r))
958 37861 : rel->r = exps_exp_visitor_bottomup(v, rel, rel->r, 0, &rel_propagate_statistics, false);
959 : /* The following optimizations can only be applied after propagating the statistics to rel->exps */
960 478551 : if (can_be_pruned && (is_join(rel->op) || is_select(rel->op)) && !list_empty(rel->exps)) {
961 126923 : int changes = v->changes;
962 126923 : rel->exps = rel_prune_predicates(v, rel);
963 126923 : if (v->changes > changes)
964 961 : rel = rewrite_simplify(v, 0, v->value_based_opt, rel);
965 : }
966 :
967 : /* propagate row count */
968 478551 : sql_rel *l = rel->l, *r = rel->r;
969 478551 : switch (rel->op) {
970 122306 : case op_join:
971 : case op_left:
972 : case op_right:
973 : case op_full: {
974 122306 : BUN lv = get_rel_count(l), rv = get_rel_count(r), uniques_estimate = BUN_MAX, join_idx_estimate = BUN_MAX;
975 :
976 122306 : if (!list_empty(rel->exps) && !is_single(rel)) {
977 221703 : for (node *n = rel->exps->h ; n ; n = n->next) {
978 113453 : sql_exp *e = n->data, *el = e->l, *er = e->r;
979 :
980 113453 : if (find_prop(e->p, PROP_JOINIDX)) {
981 652 : join_idx_estimate = lv>rv?lv:rv;
982 112801 : } else if (e->type == e_cmp && e->flag == cmp_equal) {
983 : /* if one of the sides is unique, the cardinality will be that exact number, but look for nulls */
984 108981 : if (!is_semantics(e) || !has_nil(el) || !has_nil(er)) {
985 108443 : BUN lu = 0, ru = 0;
986 108443 : prop *p = NULL;
987 108443 : if ((p = find_prop(el->p, PROP_NUNIQUES)))
988 83715 : lu = (BUN) p->value.dval;
989 108443 : if ((p = find_prop(er->p, PROP_NUNIQUES)))
990 94081 : ru = (BUN) p->value.dval;
991 108443 : if (is_unique(el) || lu > lv) {
992 65615 : BUN ncount = (is_right(rel->op) || is_full(rel->op)) ? MAX(lv, rv) : lv;
993 65615 : uniques_estimate = MIN(uniques_estimate, ncount);
994 42828 : } else if (is_unique(er) || ru > rv) {
995 26166 : BUN ncount = (is_left(rel->op) || is_full(rel->op)) ? MAX(lv, rv) : rv;
996 26166 : uniques_estimate = MIN(uniques_estimate, ncount);
997 : }
998 : }
999 : }
1000 : }
1001 : }
1002 122306 : if (is_single(rel)) {
1003 4840 : set_count_prop(v->sql->sa, rel, lv);
1004 117466 : } else if (join_idx_estimate != BUN_MAX) {
1005 650 : set_count_prop(v->sql->sa, rel, join_idx_estimate);
1006 116816 : } else if (uniques_estimate != BUN_MAX) {
1007 87587 : set_count_prop(v->sql->sa, rel, uniques_estimate);
1008 29229 : } else if (list_length(rel->exps) == 1 && (exp_is_false(rel->exps->h->data) || exp_is_null(rel->exps->h->data))) {
1009 : /* corner cases for outer joins */
1010 106 : if (is_left(rel->op)) {
1011 95 : set_count_prop(v->sql->sa, rel, lv);
1012 11 : } else if (is_right(rel->op)) {
1013 3 : set_count_prop(v->sql->sa, rel, rv);
1014 8 : } else if (is_full(rel->op) && lv != BUN_NONE && rv != BUN_NONE) {
1015 8 : set_count_prop(v->sql->sa, rel, (rv > (BUN_MAX - lv)) ? BUN_MAX : (lv + rv)); /* overflow check */
1016 0 : } else if (lv != BUN_NONE && rv != BUN_NONE) {
1017 0 : set_count_prop(v->sql->sa, rel, 0);
1018 : }
1019 29123 : } else if (lv == 0) {
1020 1104 : set_count_prop(v->sql->sa, rel, (is_right(rel->op) || is_full(rel->op)) ? rv : 0);
1021 28558 : } else if (rv == 0) {
1022 1523 : set_count_prop(v->sql->sa, rel, (is_left(rel->op) || is_full(rel->op)) ? lv : 0);
1023 27511 : } else if (lv != BUN_NONE && rv != BUN_NONE) {
1024 17580 : set_count_prop(v->sql->sa, rel, (rv > (BUN_MAX / lv)) ? BUN_MAX : (lv * rv)); /* overflow check */
1025 : }
1026 : break;
1027 : }
1028 2408 : case op_anti:
1029 2408 : set_count_prop(v->sql->sa, rel, get_rel_count(l));
1030 2408 : break;
1031 101016 : case op_semi:
1032 : case op_select:
1033 : /* TODO calculate cardinalities using selectivities */
1034 101016 : if (list_length(rel->exps) == 1 && (exp_is_false(rel->exps->h->data) || exp_is_null(rel->exps->h->data))) {
1035 1846 : set_count_prop(v->sql->sa, rel, 0);
1036 : } else {
1037 195471 : if (!list_empty(rel->exps) && !is_single(rel)) {
1038 96301 : BUN cnt = get_rel_count(l), u = 1;
1039 159526 : for (node *n = rel->exps->h ; n ; n = n->next) {
1040 107108 : sql_exp *e = n->data, *el = e->l, *er = e->r;
1041 :
1042 : /* simple expressions first */
1043 107108 : if (e->type == e_cmp && e->flag == cmp_equal && exp_is_atom(er)) {
1044 : /* use selectivity */
1045 53830 : prop *p;
1046 53830 : if ((p = find_prop(el->p, PROP_NUNIQUES))) {
1047 43883 : u = (BUN) p->value.dval;
1048 43883 : break;
1049 : }
1050 : }
1051 : }
1052 : /* u is an *estimate*, so don't set count_prop to 0 unless cnt is 0 */
1053 96301 : set_count_prop(v->sql->sa, rel, cnt == 0 ? 0 : u == 0 || u > cnt ? 1 : cnt/u);
1054 : } else {
1055 2869 : set_count_prop(v->sql->sa, rel, get_rel_count(l));
1056 : }
1057 : }
1058 : break;
1059 231408 : case op_project:
1060 231408 : if (l) {
1061 221890 : if (need_distinct(rel)) {
1062 0 : set_count_prop(v->sql->sa, rel, rel_calc_nuniques(v->sql, l, rel->exps));
1063 : } else {
1064 221890 : set_count_prop(v->sql->sa, rel, get_rel_count(l));
1065 : }
1066 : } else {
1067 9518 : BUN card = 1;
1068 :
1069 9518 : if (!list_empty(rel->exps)) {
1070 23353 : for (node *n = rel->exps->h ; n ; n = n->next) {
1071 13835 : sql_exp *e = n->data;
1072 13835 : card = MAX(card, trivial_project_exp_card(e));
1073 : }
1074 : }
1075 9518 : set_count_prop(v->sql->sa, rel, card);
1076 : }
1077 : break;
1078 21295 : case op_groupby:
1079 21295 : if (list_empty(rel->r)) {
1080 6358 : set_count_prop(v->sql->sa, rel, 1);
1081 : } else {
1082 14937 : set_count_prop(v->sql->sa, rel, rel_calc_nuniques(v->sql, l, rel->r));
1083 : }
1084 : break;
1085 : default:
1086 : break;
1087 : }
1088 : break;
1089 : }
1090 16887 : case op_topn: {
1091 16887 : BUN lv = get_rel_count(rel->l);
1092 :
1093 16886 : if (lv != BUN_NONE) {
1094 16869 : sql_exp *le = rel->exps->h->data, *oe = list_length(rel->exps) > 1 ? rel->exps->h->next->data : NULL;
1095 94 : if (oe && oe->l && !exp_is_null(oe)) { /* no parameters */
1096 94 : BUN offset = (BUN) ((atom*)oe->l)->data.val.lval;
1097 94 : lv = offset >= lv ? 0 : lv - offset;
1098 : }
1099 16870 : if (le->l && !exp_is_null(le)) {
1100 16837 : BUN limit = (BUN) ((atom*)le->l)->data.val.lval;
1101 16837 : lv = MIN(lv, limit);
1102 : }
1103 16870 : set_count_prop(v->sql->sa, rel, lv);
1104 : }
1105 : break;
1106 : }
1107 22 : case op_sample: {
1108 22 : BUN lv = get_rel_count(rel->l);
1109 :
1110 22 : if (lv != BUN_NONE) {
1111 4 : sql_exp *se = rel->exps->h->data;
1112 4 : sql_subtype *tp = exp_subtype(se);
1113 :
1114 4 : if (se->l && tp->type->eclass == EC_NUM) { /* sample is a number of rows */
1115 4 : BUN sample = (BUN) ((atom*)se->l)->data.val.lval;
1116 4 : lv = MIN(lv, sample);
1117 0 : } else if (se->l) { /* sample is a percentage of rows */
1118 0 : dbl percent = ((atom*)se->l)->data.val.dval;
1119 0 : assert(tp->type->eclass == EC_FLT);
1120 0 : lv = (BUN) ceil((dbl)lv * percent);
1121 : }
1122 4 : set_count_prop(v->sql->sa, rel, lv);
1123 : }
1124 : break;
1125 : }
1126 5362 : case op_table: {
1127 5362 : sql_exp *op = rel->r;
1128 5362 : if (rel->flag != TRIGGER_WRAPPER && op) {
1129 5063 : sql_subfunc *f = op->f;
1130 5063 : if (f->func->lang == FUNC_LANG_SQL) {
1131 95 : set_count_prop(v->sql->sa, rel, 1000 /* just some fallback value */);
1132 4968 : } else if (f->func->lang == FUNC_LANG_MAL && strcmp(f->func->base.name, "storage") == 0) {
1133 479 : set_count_prop(v->sql->sa, rel, 1000 /* TODO get size of catalog */);
1134 4489 : } else if (f->func->lang == FUNC_LANG_MAL && strcmp(f->func->base.name, "db_users") == 0) {
1135 0 : set_count_prop(v->sql->sa, rel, 1000 /* TODO get size of users */);
1136 4489 : } else if (f->func->lang == FUNC_LANG_MAL && strncmp(f->func->base.name, "querylog", 8) == 0) {
1137 1009 : set_count_prop(v->sql->sa, rel, 1000 /* TODO get size of querylog */);
1138 3480 : } else if (f->func->lang == FUNC_LANG_MAL &&
1139 3375 : (strcmp(f->func->base.name, "queue") == 0 ||
1140 3120 : strcmp(f->func->base.name, "optimizers") == 0 ||
1141 2845 : strcmp(f->func->base.name, "env") == 0 ||
1142 2589 : strcmp(f->func->base.name, "keywords") == 0 ||
1143 2589 : strcmp(f->func->base.name, "statistics") == 0 ||
1144 1985 : strcmp(f->func->base.name, "rejects") == 0 ||
1145 1742 : strcmp(f->func->base.name, "schemastorage") == 0 ||
1146 1742 : strncmp(f->func->base.name, "storage", 7) == 0 ||
1147 1742 : strcmp(f->func->base.name, "sessions") == 0) ) {
1148 1879 : set_count_prop(v->sql->sa, rel, 1000 /* TODO get size of queue */);
1149 : }
1150 : /* else {
1151 : printf("%%func needs stats : %s\n", f->func->base.name);
1152 : } */
1153 : }
1154 : break;
1155 : }
1156 : /*These relations are less important for now
1157 : TODO later we can tune it */
1158 : case op_insert:
1159 : case op_update:
1160 : case op_delete:
1161 : case op_truncate:
1162 : case op_ddl:
1163 : default:
1164 : break;
1165 : }
1166 :
1167 : return rel;
1168 : }
1169 :
1170 : static sql_rel *
1171 530278 : rel_get_statistics(visitor *v, global_props *gp, sql_rel *rel)
1172 : {
1173 : /* Don't prune updates as pruning will possibly result in removing the joins which therefore cannot be used for constraint checking */
1174 530278 : uint8_t has_special_modify = (uint8_t) gp->has_special_modify;
1175 530278 : v->data = &has_special_modify;
1176 530278 : rel = rel_visitor_bottomup(v, rel, &rel_get_statistics_);
1177 530754 : v->data = gp;
1178 530754 : return rel;
1179 : }
1180 :
1181 : run_optimizer
1182 698405 : bind_get_statistics(visitor *v, global_props *gp)
1183 : {
1184 698405 : (void) v;
1185 698405 : return (gp->opt_level == 1 && !gp->cnt[op_insert]) ? rel_get_statistics : NULL;
1186 : }
1187 :
1188 :
1189 : static bool
1190 97899 : point_select_on_unique_column(sql_rel *rel)
1191 : {
1192 97899 : if (is_select(rel->op) && !list_empty(rel->exps)) {
1193 130223 : for (node *n = rel->exps->h; n ; n = n->next) {
1194 75002 : sql_exp *e = n->data, *el = e->l, *er = e->r, *found = NULL;
1195 :
1196 75002 : if (is_compare(e->type) && e->flag == cmp_equal) {
1197 33312 : if (is_numeric_upcast(el))
1198 0 : el = el->l;
1199 33312 : if (is_numeric_upcast(er))
1200 0 : er = er->l;
1201 33312 : if (is_alias(el->type) && exp_is_atom(er) && (found = rel_find_exp(rel->l, el)) &&
1202 738 : is_unique(found) && (!is_semantics(e) || !has_nil(found) || !has_nil(er)))
1203 : return true;
1204 32575 : if (is_alias(er->type) && exp_is_atom(el) && (found = rel_find_exp(rel->l, er)) &&
1205 0 : is_unique(found) && (!is_semantics(e) || !has_nil(el) || !has_nil(found)))
1206 : return true;
1207 : }
1208 : }
1209 : }
1210 : return false;
1211 : }
1212 :
1213 : /*
1214 : * A point select on an unique column reduces the number of rows to 1. If the same select is under a
1215 : * join, the opposite side's select can be pushed above the join.
1216 : */
1217 : static inline sql_rel *
1218 1305013 : rel_push_select_up(visitor *v, sql_rel *rel)
1219 : {
1220 1305013 : if ((is_innerjoin(rel->op) || is_left(rel->op) || is_right(rel->op) || is_semi(rel->op)) && !is_single(rel)) {
1221 257554 : sql_rel *l = rel->l, *r = rel->r;
1222 257554 : bool can_pushup_left = is_select(l->op) && !rel_is_ref(l) && !is_single(l) && (is_innerjoin(rel->op) || is_left(rel->op) || is_semi(rel->op)),
1223 257554 : can_pushup_right = is_select(r->op) && !rel_is_ref(r) && !is_single(r) && (is_innerjoin(rel->op) || is_right(rel->op));
1224 :
1225 257554 : if (can_pushup_left || can_pushup_right) {
1226 70045 : if (can_pushup_left)
1227 48167 : can_pushup_left = point_select_on_unique_column(r);
1228 70045 : if (can_pushup_right)
1229 49732 : can_pushup_right = point_select_on_unique_column(l);
1230 :
1231 : /* if both selects retrieve one row each, it's not worth it to push both up */
1232 70045 : if (can_pushup_left && !can_pushup_right) {
1233 671 : sql_rel *nrel = rel_dup_copy(v->sql->sa, rel);
1234 671 : nrel->l = l->l;
1235 671 : rel = rel_inplace_select(rel, nrel, l->exps);
1236 671 : assert(is_select(rel->op));
1237 671 : v->changes++;
1238 69374 : } else if (!can_pushup_left && can_pushup_right) {
1239 18 : sql_rel *nrel = rel_dup_copy(v->sql->sa, rel);
1240 18 : nrel->r = r->l;
1241 18 : rel = rel_inplace_select(rel, nrel, r->exps);
1242 18 : assert(is_select(rel->op));
1243 18 : v->changes++;
1244 : }
1245 : }
1246 : }
1247 1305013 : return rel;
1248 : }
1249 :
1250 : static int
1251 102359 : sql_class_base_score(visitor *v, sql_column *c, sql_subtype *t, bool equality_based)
1252 : {
1253 102359 : int de;
1254 :
1255 102359 : if (!t)
1256 : return 0;
1257 102359 : switch (ATOMstorage(t->type->localtype)) {
1258 : case TYPE_bte:
1259 : return 150 - 8;
1260 3608 : case TYPE_sht:
1261 3608 : return 150 - 16;
1262 39432 : case TYPE_int:
1263 39432 : return 150 - 32;
1264 990 : case TYPE_void:
1265 : case TYPE_lng:
1266 990 : return 150 - 64;
1267 : case TYPE_uuid:
1268 : #ifdef HAVE_HGE
1269 : case TYPE_hge:
1270 : #endif
1271 : return 150 - 128;
1272 5 : case TYPE_flt:
1273 5 : return 75 - 24;
1274 : case TYPE_dbl:
1275 : return 75 - 53;
1276 31848 : default:
1277 31848 : if (equality_based && c && v->storage_based_opt && (de = mvc_is_duplicate_eliminated(v->sql, c)))
1278 1326 : return 150 - de * 8;
1279 : /* strings and blobs not duplicate eliminated don't get any points here */
1280 : return 0;
1281 : }
1282 : }
1283 :
1284 : static int
1285 41397 : score_se_base(visitor *v, sql_rel *rel, sql_exp *e)
1286 : {
1287 41397 : int res = 0;
1288 41397 : sql_subtype *t = exp_subtype(e);
1289 41397 : sql_column *c = NULL;
1290 :
1291 : /* can we find out if the underlying table is sorted */
1292 41397 : if ((c = exp_find_column(rel, e, -2)) && v->storage_based_opt && mvc_is_sorted(v->sql, c))
1293 41397 : res += 600;
1294 :
1295 : /* prefer the shorter var types over the longer ones */
1296 41397 : res += sql_class_base_score(v, c, t, is_equality_or_inequality_exp(e->flag)); /* smaller the type, better */
1297 41397 : return res;
1298 : }
1299 :
1300 : static int
1301 65859 : score_se(visitor *v, sql_rel *rel, sql_exp *e)
1302 : {
1303 65859 : int score = 0;
1304 65859 : if (e->type == e_cmp && !is_complex_exp(e->flag)) {
1305 41397 : sql_exp *l = e->l;
1306 :
1307 41397 : while (l->type == e_cmp) { /* go through nested comparisons */
1308 2 : sql_exp *ll;
1309 :
1310 2 : if (l->flag == cmp_filter || l->flag == cmp_or)
1311 0 : ll = ((list*)l->l)->h->data;
1312 : else
1313 2 : ll = l->l;
1314 2 : if (ll->type != e_cmp)
1315 : break;
1316 : l = ll;
1317 : }
1318 41397 : score += score_se_base(v, rel, l);
1319 : }
1320 65859 : score += exp_keyvalue(e);
1321 65859 : return score;
1322 : }
1323 :
1324 : static inline sql_rel *
1325 1305015 : rel_select_order(visitor *v, sql_rel *rel)
1326 : {
1327 1305015 : int *scores = NULL;
1328 1305015 : sql_exp **exps = NULL;
1329 :
1330 1305015 : if (is_select(rel->op) && list_length(rel->exps) > 1) {
1331 30744 : node *n;
1332 30744 : int i, nexps = list_length(rel->exps);
1333 30744 : scores = SA_NEW_ARRAY(v->sql->ta, int, nexps);
1334 30744 : exps = SA_NEW_ARRAY(v->sql->ta, sql_exp*, nexps);
1335 :
1336 96603 : for (i = 0, n = rel->exps->h; n; i++, n = n->next) {
1337 65911 : exps[i] = n->data;
1338 65911 : if (find_prop(exps[i]->p, PROP_HASHCOL))
1339 : return rel;
1340 65859 : scores[i] = score_se(v, rel, n->data);
1341 : }
1342 30692 : GDKqsort(scores, exps, NULL, nexps, sizeof(int), sizeof(void *), TYPE_int, true, true);
1343 :
1344 96551 : for (i = 0, n = rel->exps->h; n; i++, n = n->next)
1345 65859 : n->data = exps[i];
1346 : }
1347 :
1348 : return rel;
1349 : }
1350 :
1351 : /* Compute the efficiency of using this expression early in a group by list */
1352 : static int
1353 60962 : score_gbe(visitor *v, sql_rel *rel, sql_exp *e)
1354 : {
1355 60962 : int res = 0;
1356 60962 : sql_subtype *t = exp_subtype(e);
1357 60962 : sql_column *c = exp_find_column(rel, e, -2);
1358 :
1359 60962 : if (e->card == CARD_ATOM) /* constants are trivial to group */
1360 63 : res += 1000;
1361 : /* can we find out if the underlying table is sorted */
1362 60962 : if (is_unique(e) || find_prop(e->p, PROP_HASHCOL) || (c && v->storage_based_opt && mvc_is_unique(v->sql, c))) /* distinct columns */
1363 3431 : res += 700;
1364 40639 : if (c && v->storage_based_opt && mvc_is_sorted(v->sql, c))
1365 3453 : res += 500;
1366 60962 : if (find_prop(e->p, PROP_HASHIDX)) /* has hash index */
1367 0 : res += 200;
1368 :
1369 : /* prefer the shorter var types over the longer ones */
1370 60962 : res += sql_class_base_score(v, c, t, true); /* smaller the type, better */
1371 60962 : return res;
1372 : }
1373 :
1374 : /* reorder group by expressions */
1375 : static inline sql_rel *
1376 1305016 : rel_groupby_order(visitor *v, sql_rel *rel)
1377 : {
1378 1305016 : int *scores = NULL;
1379 1305016 : sql_exp **exps = NULL;
1380 :
1381 1305016 : if (is_groupby(rel->op) && list_length(rel->r) > 1) {
1382 25781 : node *n;
1383 25781 : list *gbe = rel->r;
1384 25781 : int i, ngbe = list_length(gbe);
1385 25781 : scores = SA_NEW_ARRAY(v->sql->ta, int, ngbe);
1386 25781 : exps = SA_NEW_ARRAY(v->sql->ta, sql_exp*, ngbe);
1387 :
1388 : /* first sorting step, give priority for integers and sorted columns */
1389 86743 : for (i = 0, n = gbe->h; n; i++, n = n->next) {
1390 60962 : exps[i] = n->data;
1391 60962 : scores[i] = score_gbe(v, rel, exps[i]);
1392 : }
1393 25781 : GDKqsort(scores, exps, NULL, ngbe, sizeof(int), sizeof(void *), TYPE_int, true, true);
1394 :
1395 : /* second sorting step, give priority to strings with lower number of digits */
1396 51102 : for (i = ngbe - 1; i && !scores[i]; i--); /* find expressions with no score from the first round */
1397 25781 : if (scores[i])
1398 24809 : i++;
1399 25781 : if (ngbe - i > 1) {
1400 8483 : for (int j = i; j < ngbe; j++) {
1401 6505 : sql_subtype *t = exp_subtype(exps[j]);
1402 6505 : scores[j] = t ? t->digits : 0;
1403 : }
1404 : /* the less number of digits the better, order ascending */
1405 1978 : GDKqsort(scores + i, exps + i, NULL, ngbe - i, sizeof(int), sizeof(void *), TYPE_int, false, true);
1406 : }
1407 :
1408 86743 : for (i = 0, n = gbe->h; n; i++, n = n->next)
1409 60962 : n->data = exps[i];
1410 : }
1411 :
1412 1305016 : return rel;
1413 : }
1414 :
1415 : /* This optimization loop contains optimizations that can potentially use statistics */
1416 : static sql_rel *
1417 1305014 : rel_final_optimization_loop_(visitor *v, sql_rel *rel)
1418 : {
1419 : /* Run rel_push_select_up only once at the end to avoid an infinite optimization loop */
1420 1305014 : rel = rel_push_select_up(v, rel);
1421 1305015 : rel = rel_select_order(v, rel);
1422 :
1423 : /* TODO? Maybe later add rel_simplify_count, rel_join2semijoin, rel_simplify_fk_joins,
1424 : rel_distinct_project2groupby, rel_simplify_predicates, rel_simplify_math,
1425 : rel_distinct_aggregate_on_unique_values */
1426 :
1427 1305016 : rel = rel_groupby_order(v, rel);
1428 1305016 : return rel;
1429 : }
1430 :
1431 : static sql_rel *
1432 54815 : rel_final_optimization_loop(visitor *v, global_props *gp, sql_rel *rel)
1433 : {
1434 54815 : (void) gp;
1435 54815 : return rel_visitor_bottomup(v, rel, &rel_final_optimization_loop_);
1436 : }
1437 :
1438 : run_optimizer
1439 699111 : bind_final_optimization_loop(visitor *v, global_props *gp)
1440 : {
1441 699111 : int flag = v->sql->sql_optimizer;
1442 : /* At the moment, this optimizer has dependency on 3 flags */
1443 538549 : return gp->opt_level == 1 && (gp->cnt[op_groupby] || gp->cnt[op_select]) &&
1444 753927 : (flag & push_select_up) && (flag & optimize_select_and_joins_topdown) && (flag & optimize_projections) ? rel_final_optimization_loop : NULL;
1445 : }
|