Line data Source code
1 : /*
2 : * SPDX-License-Identifier: MPL-2.0
3 : *
4 : * This Source Code Form is subject to the terms of the Mozilla Public
5 : * License, v. 2.0. If a copy of the MPL was not distributed with this
6 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
7 : *
8 : * Copyright 2024 MonetDB Foundation;
9 : * Copyright August 2008 - 2023 MonetDB B.V.;
10 : * Copyright 1997 - July 2008 CWI.
11 : */
12 :
13 : #include "monetdb_config.h"
14 : #include "rel_optimizer_private.h"
15 : #include "rel_statistics.h"
16 : #include "rel_basetable.h"
17 : #include "rel_rewriter.h"
18 :
19 : static sql_exp *
20 3244857 : comparison_find_column(sql_exp *input, sql_exp *e)
21 : {
22 3244857 : switch (input->type) {
23 92278 : case e_convert: {
24 92278 : list *types = (list *)input->r;
25 92278 : sql_class from = ((sql_subtype*)types->h->data)->type->eclass, to = ((sql_subtype*)types->h->next->data)->type->eclass;
26 92278 : if (from == to)
27 177800 : return comparison_find_column(input->l, e) ? input : NULL;
28 : return NULL;
29 : }
30 2871894 : case e_column:
31 2871894 : return exp_match(e, input) ? input : NULL;
32 : default:
33 : return NULL;
34 : }
35 : }
36 :
37 : /* multi lo <= col <= hi, maybe still be false even if lo or hi are NULL, possibly similar for filter on multiple
38 : * columns */
39 : #define comp_single_column(c) (!c->f)
40 :
41 : static sql_exp *
42 8290810 : rel_propagate_column_ref_statistics(mvc *sql, sql_rel *rel, sql_exp *e)
43 : {
44 8375036 : assert(e->type == e_column);
45 8375036 : if (rel) {
46 8374957 : switch(rel->op) {
47 3894597 : case op_left:
48 : case op_right:
49 : case op_full:
50 : case op_join:
51 : case op_select:
52 : case op_anti:
53 : case op_semi: {
54 3894597 : bool found_without_semantics = false, found_left = false, found_right = false, still_unique = false;
55 :
56 3894597 : if ((is_innerjoin(rel->op) || is_select(rel->op)) && list_length(rel->exps) == 1 && exp_is_false(rel->exps->h->data))
57 : return NULL; /* nothing will pass, skip */
58 :
59 : /* propagate from the bottom first */
60 3886834 : if (rel_propagate_column_ref_statistics(sql, rel->l, e))
61 : found_left = true;
62 2498956 : if (!found_left && is_join(rel->op) && rel_propagate_column_ref_statistics(sql, rel->r, e))
63 3886835 : found_right = true;
64 :
65 3886835 : if (!found_left && !found_right)
66 : return NULL;
67 1661126 : if (!list_empty(rel->exps) && rel->op != op_anti) { /* if there's an or, the MIN and MAX get difficult to propagate */
68 3273095 : for (node *n = rel->exps->h ; n ; n = n->next) {
69 1693430 : sql_exp *comp = n->data, *le = comp->l, *lne = NULL, *re = comp->r, *rne = NULL, *fe = comp->f, *fne = NULL;
70 :
71 1693430 : if (comp->type == e_cmp) {
72 1692734 : if (is_theta_exp(comp->flag) && ((lne = comparison_find_column(le, e)) || (rne = comparison_find_column(re, e)) || (fe && (fne = comparison_find_column(fe, e))))) {
73 113435 : atom *lval_min = find_prop_and_get(le->p, PROP_MIN), *lval_max = find_prop_and_get(le->p, PROP_MAX),
74 113435 : *rval_min = find_prop_and_get(re->p, PROP_MIN), *rval_max = find_prop_and_get(re->p, PROP_MAX);
75 :
76 : /* not semantics found or if explicitly filtering not null values from the column */
77 113435 : found_without_semantics |= (!is_semantics(comp) && comp_single_column(comp)) || (comp->flag == cmp_equal && lne && is_anti(comp) && exp_is_null(re));
78 113435 : still_unique |= comp->flag == cmp_equal && is_unique(le) && is_unique(re); /* unique if only equi-joins on unique columns are there */
79 113435 : if (is_full(rel->op) || (is_left(rel->op) && found_left) || (is_right(rel->op) && found_right)) /* on outer joins, min and max cannot be propagated on some cases */
80 17946 : continue;
81 : /* if (end2 >= start1 && start2 <= end1) then the 2 intervals are intersected */
82 95489 : if (fe && lval_min && lval_max) { /* range case, the middle expression must intersect the other two */
83 4622 : atom *fval_min = find_prop_and_get(fe->p, PROP_MIN), *fval_max = find_prop_and_get(fe->p, PROP_MAX);
84 4622 : int int1 = rval_min && rval_max && atom_cmp(rval_max, lval_min) >= 0 && atom_cmp(rval_min, lval_max) <= 0,
85 4622 : int2 = fval_min && fval_max && atom_cmp(fval_max, lval_min) >= 0 && atom_cmp(fval_min, lval_max) <= 0,
86 4622 : symmetric = is_symmetric(comp);
87 :
88 4622 : if (is_anti(comp) || (!symmetric && fval_min && rval_max && atom_cmp(fval_min, rval_max) < 0)) /* for asymmetric case the re range must be after the fe range */
89 1817 : continue;
90 2805 : if (lne && int1 && int2) {
91 104 : if (symmetric) {
92 0 : prop *p1 = find_prop(e->p, PROP_MIN), *p2 = find_prop(e->p, PROP_MAX);
93 0 : atom *nmin = statistics_atom_min(sql, rval_min, fval_min), *nmax = statistics_atom_max(sql, rval_max, fval_max);
94 : /* max is min from le and (max from re and fe max) */
95 0 : set_minmax_property(sql, e, PROP_MAX, p2 ? statistics_atom_min(sql, nmax, p2->value.pval) : nmax);
96 : /* min is max from le and (min from re and fe min) */
97 0 : set_minmax_property(sql, e, PROP_MIN, p1 ? statistics_atom_max(sql, nmin, p1->value.pval) : nmin);
98 : } else {
99 104 : prop *p1 = find_prop(e->p, PROP_MIN), *p2 = find_prop(e->p, PROP_MAX);
100 : /* max is min from le and fe max */
101 104 : set_minmax_property(sql, e, PROP_MAX, p2 ? statistics_atom_min(sql, fval_max, p2->value.pval) : fval_max);
102 : /* min is max from le and re min */
103 104 : set_minmax_property(sql, e, PROP_MIN, p1 ? statistics_atom_max(sql, rval_min, p1->value.pval) : rval_min);
104 : }
105 2701 : } else if (rne) {
106 590 : if (symmetric && int1 && int2) { /* min is max from le and (min from re and fe min) */
107 0 : prop *p = find_prop(e->p, PROP_MIN);
108 0 : atom *nmin = p ? statistics_atom_min(sql, p->value.pval, fval_min) : fval_min;
109 0 : set_minmax_property(sql, e, PROP_MIN, statistics_atom_max(sql, nmin, lval_min));
110 590 : } else if (int1) { /* min is max from le and re min */
111 100 : prop *p = find_prop(e->p, PROP_MIN);
112 100 : set_minmax_property(sql, e, PROP_MIN, p ? statistics_atom_max(sql, lval_min, p->value.pval) : lval_min);
113 : }
114 2111 : } else if (fne) {
115 549 : if (symmetric && int1 && int2) { /* max is min from le and (max from re and fe max) */
116 0 : prop *p = find_prop(e->p, PROP_MAX);
117 0 : atom *nmax = p ? statistics_atom_max(sql, p->value.pval, rval_max) : rval_max;
118 0 : set_minmax_property(sql, e, PROP_MAX, p ? statistics_atom_min(sql, nmax, lval_max) : nmax);
119 549 : } else if (int2) { /* max is min from le and fe max */
120 266 : prop *p = find_prop(e->p, PROP_MAX);
121 266 : set_minmax_property(sql, e, PROP_MAX, p ? statistics_atom_min(sql, lval_max, p->value.pval) : lval_max);
122 : }
123 : }
124 90867 : } else if (lval_min && lval_max && rval_min && rval_max && atom_cmp(rval_max, lval_min) >= 0 && atom_cmp(rval_min, lval_max) <= 0) {
125 : /* both min and max must be set and the intervals must overlap */
126 39794 : switch (comp->flag) {
127 25916 : case cmp_equal: /* for equality reduce */
128 25916 : set_minmax_property(sql, e, PROP_MAX, is_anti(comp) ? statistics_atom_max(sql, lval_max, rval_max) : statistics_atom_min(sql, lval_max, rval_max));
129 25916 : set_minmax_property(sql, e, PROP_MIN, is_anti(comp) ? statistics_atom_min(sql, lval_min, rval_min) : statistics_atom_max(sql, lval_min, rval_min));
130 25916 : break;
131 4586 : case cmp_notequal: /* for inequality expand */
132 4586 : set_minmax_property(sql, e, PROP_MAX, is_anti(comp) ? statistics_atom_min(sql, lval_max, rval_max) : statistics_atom_max(sql, lval_max, rval_max));
133 4586 : set_minmax_property(sql, e, PROP_MIN, is_anti(comp) ? statistics_atom_max(sql, lval_min, rval_min) : statistics_atom_min(sql, lval_min, rval_min));
134 4586 : break;
135 5286 : case cmp_gt:
136 : case cmp_gte:
137 9634 : if (!is_anti(comp) && lne) { /* min is max from both min */
138 4348 : prop *p = find_prop(e->p, PROP_MIN);
139 4348 : set_minmax_property(sql, e, PROP_MIN, p ? statistics_atom_max(sql, rval_min, p->value.pval) : rval_min);
140 938 : } else if (!is_anti(comp)) { /* max is min from both max */
141 938 : prop *p = find_prop(e->p, PROP_MAX);
142 938 : set_minmax_property(sql, e, PROP_MAX, p ? statistics_atom_min(sql, lval_max, p->value.pval) : lval_max);
143 : }
144 : break;
145 4006 : case cmp_lt:
146 : case cmp_lte:
147 7205 : if (!is_anti(comp) && lne) { /* max is min from both max */
148 3199 : prop *p = find_prop(e->p, PROP_MAX);
149 3199 : set_minmax_property(sql, e, PROP_MAX, p ? statistics_atom_min(sql, rval_max, p->value.pval) : rval_max);
150 807 : } else if (!is_anti(comp)) { /* min is max from both min */
151 807 : prop *p = find_prop(e->p, PROP_MIN);
152 807 : set_minmax_property(sql, e, PROP_MIN, p ? statistics_atom_max(sql, lval_min, p->value.pval) : lval_min);
153 : }
154 : break;
155 : default: /* Maybe later I can do cmp_in and cmp_notin */
156 : break;
157 : }
158 : }
159 : }
160 : }
161 : }
162 : }
163 1661126 : if (is_full(rel->op) || (is_left(rel->op) && found_right) || (is_right(rel->op) && found_left))
164 32645 : set_has_nil(e);
165 1661126 : if (!is_outerjoin(rel->op) && found_without_semantics) /* at an outer join, null values pass */
166 85991 : set_has_no_nil(e);
167 1661126 : if (is_join(rel->op) && is_unique(e) && !still_unique)
168 111462 : set_not_unique(e);
169 1661126 : return e;
170 : }
171 4376773 : case op_table:
172 : case op_basetable:
173 : case op_union:
174 : case op_except:
175 : case op_inter:
176 : case op_munion:
177 : case op_project:
178 : case op_groupby: {
179 4376773 : sql_exp *found;
180 4376773 : atom *fval;
181 4376773 : prop *est;
182 4376773 : if ((found = rel_find_exp(rel, e))) {
183 1982687 : if (rel->op != op_table) { /* At the moment don't propagate statistics for table relations */
184 1946118 : if ((fval = find_prop_and_get(found->p, PROP_MAX)))
185 1048990 : set_minmax_property(sql, e, PROP_MAX, fval);
186 1946119 : if ((fval = find_prop_and_get(found->p, PROP_MIN)))
187 1056938 : set_minmax_property(sql, e, PROP_MIN, fval);
188 1946115 : if (!has_nil(found))
189 1273822 : set_has_no_nil(e);
190 1946115 : if (is_unique(found) || (need_distinct(rel) && list_length(rel->exps) == 1) ||
191 1568526 : (is_groupby(rel->op) && (list_empty(rel->r) || (list_length(rel->r) == 1 && exps_find_exp(rel->r, e)))))
192 388442 : set_unique(e);
193 : /* propagate unique estimation for known cases */
194 1946115 : if (is_groupby(rel->op) && list_empty(rel->r) && !find_prop(e->p, PROP_NUNIQUES)) { /* global aggregate case */
195 7535 : prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
196 7535 : p->value.dval = 1;
197 1938580 : } else if (((is_basetable(rel->op) || is_except(rel->op) || is_inter(rel->op) || is_simple_project(rel->op) ||
198 67963 : (is_groupby(rel->op) && exps_find_exp(rel->r, e))) &&
199 1884839 : (est = find_prop(found->p, PROP_NUNIQUES)) && !find_prop(e->p, PROP_NUNIQUES))) {
200 166725 : prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
201 166725 : p->value.dval = est->value.dval;
202 : }
203 : }
204 1982685 : return e;
205 : }
206 : return NULL;
207 : }
208 84226 : case op_topn:
209 : case op_sample:
210 84226 : return rel_propagate_column_ref_statistics(sql, rel->l, e);
211 : default:
212 : break;
213 : }
214 : }
215 : return NULL;
216 : }
217 :
218 : static atom *
219 4086149 : atom_from_valptr( allocator *sa, sql_subtype *tpe, ValRecord *v)
220 : {
221 4086149 : atom *a = SA_NEW(sa, atom);
222 :
223 4086087 : assert(!VALisnil(v));
224 4085994 : *a = (atom) {.tpe = *tpe,};
225 4085994 : SA_VALcopy(sa, &a->data, v);
226 4085944 : return a;
227 : }
228 :
229 : void
230 3596267 : sql_column_get_statistics(mvc *sql, sql_column *c, sql_exp *e)
231 : {
232 3596267 : bool nonil = false, unique = false;
233 3596267 : double unique_est = 0.0;
234 3596267 : ValRecord min, max;
235 3596267 : int ok = mvc_col_stats(sql, c, &nonil, &unique, &unique_est, &min, &max);
236 :
237 3596269 : if (has_nil(e) && nonil)
238 2453932 : set_has_no_nil(e);
239 3596269 : if (!is_unique(e) && unique)
240 1055296 : set_unique(e);
241 3596269 : if (unique_est != 0.0) {
242 2398661 : prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
243 2398653 : p->value.dval = unique_est;
244 : }
245 3596261 : unsigned int digits = 0;
246 3596261 : sql_subtype *et = exp_subtype(e);
247 3596277 : if (et->type->eclass == EC_DEC || et->type->eclass == EC_NUM)
248 2364880 : digits = et->digits;
249 3596277 : if ((ok & 2) == 2) {
250 2039554 : if (!VALisnil(&max)) {
251 2039538 : prop *p = e->p = prop_create(sql->sa, PROP_MAX, e->p);
252 2039529 : p->value.pval = atom_from_valptr(sql->sa, &c->type, &max);
253 2039480 : if (digits) {
254 1530169 : unsigned int nd = atom_digits(p->value.pval);
255 1530213 : if (nd < digits)
256 : digits = nd;
257 1530213 : if (!digits)
258 : digits = 1;
259 : }
260 : }
261 2039527 : VALclear(&max);
262 : }
263 3596232 : if ((ok & 1) == 1) {
264 2046890 : if (!VALisnil(&min)) {
265 2046878 : prop *p = e->p = prop_create(sql->sa, PROP_MIN, e->p);
266 2046884 : p->value.pval = atom_from_valptr(sql->sa, &c->type, &min);
267 2046847 : if (digits) {
268 1538621 : unsigned int nd = atom_digits(p->value.pval);
269 1538617 : if (nd > digits) /* possibly set to low by max value */
270 : digits = nd;
271 : if (!digits)
272 : digits = 1;
273 : }
274 : }
275 2046838 : VALclear(&min);
276 : }
277 3596167 : if (digits)
278 2364771 : et->digits = digits;
279 3596167 : if (et->type->eclass == EC_DEC && et->digits <= et->scale)
280 216 : et->digits = et->scale + 1;
281 3596167 : }
282 :
283 : static void
284 876141 : rel_basetable_column_get_statistics(mvc *sql, sql_rel *rel, sql_exp *e)
285 : {
286 876141 : if (e->p)
287 : return;
288 292010 : sql_column *c = NULL;
289 :
290 292010 : if ((c = rel_base_find_column(rel, e->nid))) {
291 195835 : sql_column_get_statistics(sql, c, e);
292 : }
293 : }
294 :
295 : static bool
296 8872 : rel_setop_get_statistics(mvc *sql, sql_rel *rel, list *lexps, list *rexps, sql_exp *e, int i)
297 : {
298 8872 : sql_exp *le = list_fetch(lexps, i), *re = list_fetch(rexps, i);
299 8872 : atom *lval_min = find_prop_and_get(le->p, PROP_MIN), *lval_max = find_prop_and_get(le->p, PROP_MAX),
300 8872 : *rval_min = find_prop_and_get(re->p, PROP_MIN), *rval_max = find_prop_and_get(re->p, PROP_MAX);
301 8872 : prop *est;
302 :
303 : /* for the intersection, if both expressions don't overlap, it can be pruned */
304 8872 : if (is_inter(rel->op) && !has_nil(le) && !has_nil(re) &&
305 511 : ((rval_max && lval_min && atom_cmp(rval_max, lval_min) < 0) || (rval_min && lval_max && atom_cmp(rval_min, lval_max) > 0)))
306 26 : return true;
307 :
308 8846 : if (lval_max && rval_max) {
309 2557 : if (is_union(rel->op))
310 3 : set_minmax_property(sql, e, PROP_MAX, statistics_atom_max(sql, lval_max, rval_max)); /* for union the new max will be the max of the two */
311 2554 : else if (is_inter(rel->op))
312 399 : set_minmax_property(sql, e, PROP_MAX, statistics_atom_min(sql, lval_max, rval_max)); /* for intersect the new max will be the min of the two */
313 : else /* except */
314 2155 : set_minmax_property(sql, e, PROP_MAX, lval_max);
315 : }
316 8846 : if (lval_min && rval_min) {
317 2557 : if (is_union(rel->op))
318 3 : set_minmax_property(sql, e, PROP_MIN, statistics_atom_min(sql, lval_min, rval_min)); /* for union the new min will be the min of the two */
319 2554 : else if (is_inter(rel->op))
320 399 : set_minmax_property(sql, e, PROP_MIN, statistics_atom_max(sql, lval_min, rval_min)); /* for intersect the new min will be the max of the two */
321 : else /* except */
322 2155 : set_minmax_property(sql, e, PROP_MIN, lval_min);
323 : }
324 :
325 8846 : if (is_union(rel->op) || is_munion(rel->op)) {
326 5986 : if (!has_nil(le) && !has_nil(re))
327 879 : set_has_no_nil(e);
328 5986 : if (need_distinct(rel) && list_length(rel->exps) == 1)
329 0 : set_unique(e);
330 2860 : } else if (is_inter(rel->op)) {
331 564 : if (!has_nil(le) || !has_nil(re))
332 509 : set_has_no_nil(e);
333 564 : if (is_unique(le) || (need_distinct(rel) && list_length(rel->exps) == 1))
334 345 : set_unique(e);
335 : } else {
336 2296 : assert(is_except(rel->op));
337 2296 : if (!has_nil(le))
338 2233 : set_has_no_nil(e);
339 2296 : if (is_unique(le) || (need_distinct(rel) && list_length(rel->exps) == 1))
340 2009 : set_unique(e);
341 : }
342 : /* propagate unique estimation for known cases */
343 8846 : if (!is_union(rel->op) && (est = find_prop(le->p, PROP_NUNIQUES)) && !find_prop(e->p, PROP_NUNIQUES)) {
344 205 : prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
345 205 : p->value.dval = est->value.dval;
346 : }
347 : return false;
348 : }
349 :
350 :
351 : static void
352 56927 : rel_munion_get_statistics(mvc *sql, sql_rel *rel, list *rels, sql_exp *e, int i)
353 : {
354 56927 : assert(is_munion(rel->op));
355 :
356 56927 : sql_rel *l = rels->h->data;
357 56927 : sql_exp *le = list_fetch(l->exps, i);
358 56927 : atom *lval_min = find_prop_and_get(le->p, PROP_MIN), *lval_max = find_prop_and_get(le->p, PROP_MAX);
359 56927 : bool has_nonil = !has_nil(le);
360 :
361 160702 : for(node *n = rels->h->next; n; n = n->next) {
362 103775 : sql_rel *r = n->data;
363 103775 : sql_exp *re = list_fetch(r->exps, i);
364 103775 : atom *rval_min = find_prop_and_get(re->p, PROP_MIN), *rval_max = find_prop_and_get(re->p, PROP_MAX);
365 :
366 103775 : if (lval_max && rval_max) {
367 39696 : set_minmax_property(sql, e, PROP_MAX, statistics_atom_max(sql, lval_max, rval_max)); /* for union the new max will be the max of the two */
368 39696 : lval_max = find_prop_and_get(e->p, PROP_MAX);
369 : }
370 103775 : if (lval_min && rval_min) {
371 39263 : set_minmax_property(sql, e, PROP_MIN, statistics_atom_min(sql, lval_min, rval_min)); /* for union the new min will be the min of the two */
372 39263 : lval_min = find_prop_and_get(e->p, PROP_MIN);
373 : }
374 103775 : has_nonil &= !has_nil(re);
375 :
376 : }
377 :
378 56927 : if (has_nonil)
379 40429 : set_has_no_nil(e);
380 :
381 56927 : if (need_distinct(rel) && list_length(rel->exps) == 1)
382 1594 : set_unique(e);
383 56927 : }
384 :
385 :
386 : static sql_exp *
387 3201170 : rel_propagate_statistics(visitor *v, sql_rel *rel, sql_exp *e, int depth)
388 : {
389 3201170 : mvc *sql = v->sql;
390 3201170 : atom *lval;
391 :
392 3201170 : (void) depth;
393 3201170 : switch(e->type) {
394 1987358 : case e_column:
395 1987358 : switch (rel->op) { /* set relations don't call rel_propagate_statistics */
396 259459 : case op_join:
397 : case op_left:
398 : case op_right:
399 : case op_full:
400 : case op_semi:
401 : case op_anti: {
402 259459 : sql_exp *found = rel_propagate_column_ref_statistics(sql, rel->l, e);
403 259459 : if (!found)
404 130122 : (void) rel_propagate_column_ref_statistics(sql, rel->r, e);
405 : break;
406 : }
407 1727899 : case op_select:
408 : case op_project:
409 : case op_groupby: {
410 1727899 : sql_exp *found = rel_propagate_column_ref_statistics(sql, rel->l, e); /* labels may be found on the same projection, ugh */
411 1727899 : if (!found && is_simple_project(rel->op))
412 117872 : (void) rel_propagate_column_ref_statistics(sql, rel, e);
413 : break;
414 : }
415 0 : case op_insert:
416 : case op_update:
417 : case op_delete:
418 0 : (void) rel_propagate_column_ref_statistics(sql, rel->r, e);
419 0 : break;
420 : default:
421 : break;
422 : }
423 : break;
424 91799 : case e_convert: {
425 91799 : sql_subtype *to = exp_totype(e), *from = exp_fromtype(e);
426 91799 : sql_exp *l = e->l;
427 91799 : sql_class fr = from->type->eclass, too = to->type->eclass;
428 91799 : prop *est;
429 :
430 91799 : if (fr == too) {
431 82893 : if ((lval = find_prop_and_get(l->p, PROP_MAX))) {
432 54791 : atom *res = atom_copy(sql->sa, lval);
433 54791 : if ((res = atom_cast(sql->sa, res, to)))
434 54768 : set_minmax_property(sql, e, PROP_MAX, res);
435 : }
436 82893 : if ((lval = find_prop_and_get(l->p, PROP_MIN))) {
437 55408 : atom *res = atom_copy(sql->sa, lval);
438 55408 : if ((res = atom_cast(sql->sa, res, to)))
439 55385 : set_minmax_property(sql, e, PROP_MIN, res);
440 : }
441 : }
442 : /* propagating nuniques through conversions is tricky. I am adding just the general cases */
443 91799 : if ((est = find_prop(l->p, PROP_NUNIQUES)) &&
444 56543 : (((fr == EC_TIME || fr == EC_TIME_TZ || fr == EC_DATE) && (too == EC_TIMESTAMP || too == EC_TIMESTAMP_TZ)) ||
445 56518 : ((fr == EC_NUM || fr == EC_BIT) && too == EC_NUM) || (fr == EC_DEC && too == EC_DEC) || (fr != EC_EXTERNAL && too == EC_STRING))) {
446 53121 : prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
447 53121 : p->value.dval = est->value.dval;
448 : }
449 91799 : if (!has_nil(l))
450 50907 : set_has_no_nil(e);
451 : break;
452 : }
453 326637 : case e_aggr:
454 : case e_func: {
455 326637 : BUN lv;
456 326637 : sql_subfunc *f = e->f;
457 :
458 326637 : if (!f->func->s) {
459 301185 : int key = hash_key(f->func->base.name); /* Using hash lookup */
460 301185 : sql_hash_e *he = sql_functions_lookup->buckets[key&(sql_functions_lookup->size-1)];
461 301185 : lookup_function look = NULL;
462 :
463 660119 : for (; he && !look; he = he->chain) {
464 358934 : struct function_properties* fp = (struct function_properties*) he->value;
465 :
466 358934 : if (!strcmp(f->func->base.name, fp->name))
467 104855 : look = fp->func;
468 : }
469 301185 : if (look)
470 104855 : look(sql, e);
471 : }
472 : /* for global aggregates with no semantics, if the left relation has values, then the output won't be NULL */
473 326637 : if (!is_semantics(e) && e->l && !have_nil(e->l) &&
474 86062 : (e->type != e_aggr || (is_groupby(rel->op) && list_length(rel->r)) || ((lv = get_rel_count(rel->l)) != BUN_NONE && lv > 0)))
475 85735 : set_has_no_nil(e);
476 : /* set properties for global aggregates */
477 326637 : if (e->type == e_aggr && is_groupby(rel->op) && list_empty(rel->r)) {
478 7868 : if (!find_prop(e->p, PROP_NUNIQUES)) {
479 7868 : prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
480 7868 : p->value.dval = 1;
481 : }
482 7868 : set_unique(e);
483 : }
484 : break;
485 : }
486 531712 : case e_atom:
487 531712 : if (e->l) {
488 514867 : atom *a = (atom*) e->l;
489 514867 : if (!a->isnull) {
490 459785 : set_minmax_property(sql, e, PROP_MAX, a);
491 459785 : set_minmax_property(sql, e, PROP_MIN, a);
492 : }
493 514867 : prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
494 514867 : p->value.dval = 1;
495 16845 : } else if (e->f) {
496 3520 : list *vals = (list *) e->f;
497 3520 : sql_exp *first = vals->h ? vals->h->data : NULL;
498 3520 : atom *max = NULL, *min = NULL; /* all child values must have a valid min/max */
499 3520 : int has_nil = 0;
500 :
501 3520 : if (first) {
502 3520 : max = ((lval = find_prop_and_get(first->p, PROP_MAX))) ? lval : NULL;
503 3520 : min = ((lval = find_prop_and_get(first->p, PROP_MIN))) ? lval : NULL;
504 3520 : has_nil |= has_nil(first);
505 : }
506 :
507 15389 : for (node *n = vals->h ? vals->h->next : NULL ; n ; n = n->next) {
508 8349 : sql_exp *ee = n->data;
509 :
510 8349 : if (min && max) {
511 7856 : if ((lval = find_prop_and_get(ee->p, PROP_MAX))) {
512 7810 : max = atom_cmp(lval, max) > 0 ? lval : max;
513 : } else {
514 : max = NULL;
515 : }
516 7856 : if ((lval = find_prop_and_get(ee->p, PROP_MIN))) {
517 7810 : min = atom_cmp(min, lval) > 0 ? lval : min;
518 : } else {
519 : min = NULL;
520 : }
521 : }
522 8349 : has_nil |= has_nil(ee);
523 : }
524 :
525 3520 : if (!has_nil)
526 3150 : set_has_no_nil(e);
527 3520 : if (min && max) {
528 3108 : set_minmax_property(sql, e, PROP_MAX, max);
529 3108 : set_minmax_property(sql, e, PROP_MIN, min);
530 : }
531 3520 : prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
532 3520 : p->value.dval = (dbl) list_length(vals);
533 : } else {
534 13325 : prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
535 13325 : p->value.dval = 1;
536 : }
537 : break;
538 263664 : case e_cmp:
539 : /* TODO? propagating min/max/unique of booleans is not very worth it */
540 263664 : if (e->flag == cmp_or || e->flag == cmp_filter) {
541 17012 : if (!have_nil(e->l) && !have_nil(e->r))
542 12832 : set_has_no_nil(e);
543 246652 : } else if (e->flag == cmp_in || e->flag == cmp_notin) {
544 21114 : sql_exp *le = e->l;
545 21114 : if (!has_nil(le) && !have_nil(e->r))
546 18440 : set_has_no_nil(e);
547 : } else {
548 225538 : sql_exp *le = e->l, *re = e->r, *fe = e->f;
549 225538 : if (!has_nil(le) && !has_nil(re) && (!e->f || !has_nil(fe)))
550 168477 : set_has_no_nil(e);
551 : }
552 : break;
553 : case e_psm:
554 : break;
555 : }
556 :
557 : #ifndef NDEBUG
558 : {
559 : /* min and max cannot be NULL and min must be <= than max, if it happens the inner relation must be empty! */
560 3201170 : atom *min = find_prop_and_get(e->p, PROP_MIN), *max = find_prop_and_get(e->p, PROP_MAX);
561 :
562 3201170 : (void) min;
563 3201170 : (void) max;
564 3201170 : assert(!min || !min->isnull);
565 3201170 : assert(!max || !max->isnull);
566 3201170 : assert(!min || !max || (min && max)/* atom_cmp(min, max) <= 0*/);
567 : }
568 : #endif
569 3201170 : return e;
570 : }
571 :
572 : static list * /* Remove predicates always false from min/max values */
573 120948 : rel_prune_predicates(visitor *v, sql_rel *rel)
574 : {
575 120948 : if (rel->l) {
576 120948 : sql_rel *l = rel->l;
577 120948 : if (is_single(l))
578 2173 : return rel->exps;
579 : }
580 118775 : if (!list_empty(rel->attr))
581 2963 : return rel->exps;
582 246190 : for (node *n = rel->exps->h ; n ; n = n->next) {
583 130378 : sql_exp *e = n->data;
584 :
585 130378 : if (e->type == e_cmp && is_theta_exp(e->flag)) {
586 105956 : sql_exp *le = e->l, *re = e->r, *fe = e->f;
587 105956 : atom *lval_min = find_prop_and_get(le->p, PROP_MIN), *lval_max = find_prop_and_get(le->p, PROP_MAX),
588 105956 : *rval_min = find_prop_and_get(re->p, PROP_MIN), *rval_max = find_prop_and_get(re->p, PROP_MAX);
589 105956 : bool always_false = false, always_true = false;
590 :
591 105956 : if (fe && !is_symmetric(e)) {
592 3058 : atom *fval_min = find_prop_and_get(fe->p, PROP_MIN), *fval_max = find_prop_and_get(fe->p, PROP_MAX);
593 3058 : comp_type lower = range2lcompare(e->flag), higher = range2rcompare(e->flag);
594 3660 : int not_int1 = rval_min && lval_max && /* the middle and left intervals don't overlap */
595 1663 : (!is_anti(e) && (lower == cmp_gte ? atom_cmp(rval_min, lval_max) > 0 : atom_cmp(rval_min, lval_max) >= 0)),
596 4072 : not_int2 = lval_min && fval_max && /* the middle and right intervals don't overlap */
597 2485 : (!is_anti(e) && (higher == cmp_lte ? atom_cmp(lval_min, fval_max) > 0 : atom_cmp(lval_min, fval_max) >= 0)),
598 3058 : not_int3 = rval_min && fval_max && /* the left interval is after the right one */
599 1287 : (!is_anti(e) && (atom_cmp(rval_min, fval_max) > 0));
600 :
601 3058 : always_false |= not_int1 || not_int2 || not_int3;
602 : /* for anti the middle must be before the left or after the right or the right after the left, for the other the middle must be always between the left and right intervals */
603 4113 : always_true |= !has_nil(le) && !has_nil(re) && !has_nil(fe) &&
604 3957 : lval_min && lval_max && rval_min && rval_max && fval_min && fval_max &&
605 575 : (is_anti(e) ? ((lower == cmp_gte ? atom_cmp(rval_min, lval_max) > 0 : atom_cmp(rval_min, lval_max) >= 0) || (higher == cmp_lte ? atom_cmp(lval_min, fval_max) > 0 : atom_cmp(lval_min, fval_max) >= 0) || atom_cmp(rval_min, fval_max) > 0) :
606 488 : ((lower == cmp_gte ? atom_cmp(lval_min, rval_max) >= 0 : atom_cmp(lval_min, rval_max) > 0) && (higher == cmp_lte ? atom_cmp(fval_min, lval_max) >= 0 : atom_cmp(fval_min, lval_max) > 0)));
607 : } else if (!fe) {
608 102880 : if (!is_semantics(e)) /* trivial not null cmp null case */
609 196110 : always_false |= !is_anti(e) && ((exp_is_not_null(le) && exp_is_null(re)) || (exp_is_null(le) && exp_is_not_null(re)));
610 102880 : switch (e->flag) {
611 88983 : case cmp_equal:
612 88983 : if (lval_min && lval_max && rval_min && rval_max && (!is_semantics(e) || !has_nil(le) || !has_nil(re)))
613 117424 : always_false |= (is_anti(e) ? (atom_cmp(lval_min, rval_min) == 0 && atom_cmp(lval_max, rval_max) <= 0) : (atom_cmp(rval_max, lval_min) < 0 || atom_cmp(rval_min, lval_max) > 0));
614 88983 : if (is_semantics(e)) { /* prune *= NULL cases */
615 4796 : always_false |= (is_anti(e) ? (exp_is_null(le) && exp_is_null(re)) : ((exp_is_not_null(le) && exp_is_null(re)) || (exp_is_null(le) && exp_is_not_null(re))));
616 9592 : always_true |= (is_anti(e) ? ((exp_is_not_null(le) && exp_is_null(re)) || (exp_is_null(le) && exp_is_not_null(re))) : (exp_is_null(le) && exp_is_null(re)));
617 : }
618 : break;
619 6464 : case cmp_notequal:
620 6464 : if (lval_min && lval_max && rval_min && rval_max)
621 10730 : always_true |= !has_nil(le) && !has_nil(re) && (is_anti(e) ? (atom_cmp(lval_min, rval_min) == 0 && atom_cmp(lval_max, rval_max) <= 0) : (atom_cmp(rval_max, lval_min) < 0 || atom_cmp(rval_min, lval_max) > 0));
622 6464 : if (is_semantics(e)) {
623 29 : always_true |= (is_anti(e) ? (exp_is_null(le) && exp_is_null(re)) : ((exp_is_not_null(le) && exp_is_null(re)) || (exp_is_null(le) && exp_is_not_null(re))));
624 58 : always_false |= (is_anti(e) ? ((exp_is_not_null(le) && exp_is_null(re)) || (exp_is_null(le) && exp_is_not_null(re))) : (exp_is_null(le) && exp_is_null(re)));
625 : }
626 : break;
627 4805 : case cmp_gt:
628 4805 : if (lval_max && rval_min)
629 1789 : always_false |= (is_anti(e) ? atom_cmp(lval_max, rval_min) > 0 : atom_cmp(lval_max, rval_min) <= 0);
630 4805 : if (lval_min && rval_max)
631 3578 : always_true |= !has_nil(le) && !has_nil(re) && (is_anti(e) ? atom_cmp(lval_min, rval_max) <= 0 : atom_cmp(lval_min, rval_max) > 0);
632 : break;
633 103 : case cmp_gte:
634 103 : if (lval_max && rval_min)
635 41 : always_false |= (is_anti(e) ? atom_cmp(lval_max, rval_min) >= 0 : atom_cmp(lval_max, rval_min) < 0);
636 103 : if (lval_min && rval_max)
637 82 : always_true |= !has_nil(le) && !has_nil(re) && (is_anti(e) ? atom_cmp(lval_min, rval_max) < 0 : atom_cmp(lval_min, rval_max) >= 0);
638 : break;
639 2441 : case cmp_lt:
640 2441 : if (lval_min && rval_max)
641 1383 : always_false |= (is_anti(e) ? atom_cmp(lval_min, rval_max) < 0 : atom_cmp(lval_min, rval_max) >= 0);
642 2441 : if (lval_max && rval_min)
643 2814 : always_true |= !has_nil(le) && !has_nil(re) && (is_anti(e) ? atom_cmp(lval_max, rval_min) >= 0 : atom_cmp(lval_max, rval_min) < 0);
644 : break;
645 84 : case cmp_lte:
646 84 : if (lval_min && rval_max)
647 11 : always_false |= (is_anti(e) ? atom_cmp(lval_min, rval_max) <= 0 : atom_cmp(lval_min, rval_max) > 0);
648 84 : if (lval_max && rval_min)
649 22 : always_true |= !has_nil(le) && !has_nil(re) && (is_anti(e) ? atom_cmp(lval_max, rval_min) > 0 : atom_cmp(lval_max, rval_min) <= 0);
650 : break;
651 : default: /* Maybe later I can do cmp_in and cmp_notin but don't forget to remove is_theta_exp check up there */
652 : break;
653 : }
654 : }
655 105956 : assert(!always_false || !always_true);
656 105956 : if (always_false || always_true) {
657 3392 : sql_exp *ne = exp_atom_bool(v->sql->sa, always_true /* if always_true set then true else false */);
658 3392 : if (exp_name(e))
659 0 : exp_prop_alias(v->sql->sa, ne, e);
660 3392 : n->data = ne;
661 3392 : v->changes++;
662 : }
663 : }
664 : }
665 115812 : return rel->exps;
666 : }
667 :
668 : static sql_rel *
669 14 : set_setop_side(visitor *v, sql_rel *rel, sql_rel *side)
670 : {
671 14 : if (side == rel->r)
672 0 : rel->r = NULL;
673 : else
674 14 : rel->l = NULL;
675 14 : if (!is_simple_project(side->op) || rel_is_ref(side) || !list_empty(side->r)) {
676 0 : side = rel_project(v->sql->sa, side, rel_projections(v->sql, side, NULL, 0, 1));
677 0 : set_count_prop(v->sql->sa, side, get_rel_count(side->l));
678 0 : side->exps = exps_exp_visitor_bottomup(v, side, side->exps, 0, &rel_propagate_statistics, false);
679 : }
680 35 : for (node *n = rel->exps->h, *m = side->exps->h ; n && m ; n = n->next, m = m->next) {
681 21 : sql_exp *e1 = n->data, *e2 = m->data;
682 21 : exp_prop_alias(v->sql->sa, e2, e1);
683 : }
684 14 : list_hash_clear(side->exps);
685 :
686 14 : if (need_distinct(rel))
687 0 : set_distinct(side);
688 14 : side->p = prop_copy(v->sql->sa, rel->p);
689 14 : rel_destroy(rel);
690 14 : return side;
691 : }
692 :
693 : static BUN
694 14897 : trivial_project_exp_card(sql_exp *e)
695 : {
696 15104 : if (e->type == e_convert)
697 207 : return trivial_project_exp_card(e->l);
698 14897 : return e->type == e_atom && e->f ? (BUN) list_length(e->f) : 1;
699 : }
700 :
701 : static BUN
702 24956 : rel_calc_nuniques(mvc *sql, sql_rel *l, list *exps)
703 : {
704 24956 : BUN lv = get_rel_count(l);
705 :
706 24956 : if (lv == 0)
707 : return 0;
708 22407 : if (!list_empty(exps)) {
709 22407 : BUN nuniques = 0;
710 : /* compute the highest number of unique values */
711 55862 : for (node *n = exps->h ; n && nuniques != BUN_NONE ; n = n->next) {
712 33454 : sql_exp *e = n->data;
713 33454 : sql_rel *bt = NULL;
714 33454 : prop *p = NULL;
715 33454 : BUN euniques = BUN_NONE;
716 33454 : atom *min, *max, *sub = NULL;
717 33454 : sql_subtype *tp = exp_subtype(e);
718 33454 : sql_class ec = tp ? tp->type->eclass : EC_STRING; /* if 'e' has no type (eg parameter), use a non-number type to fail condition */
719 :
720 33454 : if ((p = find_prop(e->p, PROP_NUNIQUES))) {
721 24110 : euniques = (BUN) p->value.dval;
722 9344 : } else if (e->type == e_column && e->nid && rel_find_exp_and_corresponding_rel(l, e, false, &bt, NULL) && bt && (p = find_prop(bt->p, PROP_COUNT))) {
723 7329 : euniques = (BUN) p->value.lval;
724 : }
725 : /* use min to max range to compute number of possible values in the domain for number types */
726 33454 : if ((EC_TEMP(ec)||ec==EC_NUM||ec==EC_MONTH||ec==EC_POS) &&
727 23407 : (min = find_prop_and_get(e->p, PROP_MIN)) && (max = find_prop_and_get(e->p, PROP_MAX))) {
728 : /* the range includes min and max, so the atom_inc call is needed */
729 : /* if 'euniques' has number of distinct values, compute min between both */
730 18955 : if ((sub = atom_sub(sql->sa, max, min)) && (sub = atom_inc(sql->sa, sub)) && (sub = atom_cast(sql->sa, sub, sql_bind_localtype("oid"))))
731 0 : euniques = MIN(euniques, (BUN) sub->data.val.oval);
732 : }
733 33455 : if (euniques != BUN_NONE)
734 31440 : nuniques = MAX(nuniques, euniques); /* the highest cardinality sets the estimation */
735 : else
736 : nuniques = BUN_NONE;
737 : }
738 22408 : if (nuniques != BUN_NONE)
739 : return nuniques;
740 : }
741 : return lv;
742 : }
743 :
744 : static sql_rel *
745 2007191 : rel_get_statistics_(visitor *v, sql_rel *rel)
746 : {
747 : /* Don't prune updates as pruning will possibly result in removing the joins which therefore cannot be used for constraint checking */
748 2007191 : uint8_t has_special_modify = *(uint8_t*) v->data;
749 2007191 : bool can_be_pruned = !has_special_modify && v->storage_based_opt;
750 :
751 : /* Don't look at the same relation twice */
752 2007191 : if (are_statistics_gathered(rel->used))
753 : return rel;
754 1270875 : rel->used |= statistics_gathered;
755 :
756 1270875 : switch (rel->op) {
757 297259 : case op_basetable: {
758 297259 : sql_table *t = (sql_table *) rel->l;
759 297259 : sqlstore *store = v->sql->session->tr->store;
760 :
761 297259 : if (!list_empty(rel->exps)) {
762 1173351 : for (node *n = rel->exps->h ; n ; n = n->next)
763 876141 : rel_basetable_column_get_statistics(v->sql, rel, n->data);
764 : }
765 : /* Set table row count. TODO? look for remote tables. Don't look at storage for declared tables, because it won't be cleaned */
766 297259 : if (isTable(t) && t->s && !isDeclaredTable(t)) /* count active rows only */
767 243903 : set_count_prop(v->sql->sa, rel, (BUN)store->storage_api.count_col(v->sql->session->tr, ol_first_node(t->columns)->data, 10));
768 : break;
769 : }
770 2790 : case op_union:
771 : case op_inter:
772 : case op_except: {
773 2790 : bool empty_cross = false;
774 2790 : int i = 0;
775 2790 : sql_rel *l = rel->l, *pl = rel->l, *r = rel->r, *pr = rel->r;
776 :
777 2790 : while (is_sample(pl->op) || is_topn(pl->op)) /* skip topN and sample relations in the middle */
778 0 : pl = pl->l;
779 2790 : while (is_sample(pr->op) || is_topn(pr->op))
780 0 : pr = pr->l;
781 : /* if it's not a projection, then project and propagate statistics */
782 2790 : if (!is_project(pl->op) && !is_base(pl->op)) {
783 0 : pl = rel_project(v->sql->sa, pl, rel_projections(v->sql, pl, NULL, 0, 1));
784 0 : set_count_prop(v->sql->sa, pl, get_rel_count(pl->l));
785 0 : pl->exps = exps_exp_visitor_bottomup(v, pl, pl->exps, 0, &rel_propagate_statistics, false);
786 : }
787 2790 : if (!is_project(pr->op) && !is_base(pr->op)) {
788 0 : pr = rel_project(v->sql->sa, pr, rel_projections(v->sql, pr, NULL, 0, 1));
789 0 : set_count_prop(v->sql->sa, pr, get_rel_count(pr->l));
790 0 : pr->exps = exps_exp_visitor_bottomup(v, pr, pr->exps, 0, &rel_propagate_statistics, false);
791 : }
792 :
793 11662 : for (node *n = rel->exps->h ; n ; n = n->next) {
794 8872 : empty_cross |= rel_setop_get_statistics(v->sql, rel, pl->exps, pr->exps, n->data, i);
795 8872 : i++;
796 : }
797 :
798 : /* propagate row count */
799 2790 : if (is_union(rel->op)) {
800 277 : BUN lv = need_distinct(rel) ? rel_calc_nuniques(v->sql, l, l->exps) : get_rel_count(l),
801 277 : rv = need_distinct(rel) ? rel_calc_nuniques(v->sql, r, r->exps) : get_rel_count(r);
802 :
803 277 : if (lv == 0 && rv == 0) { /* both sides empty */
804 2 : if (can_be_pruned)
805 : empty_cross = true;
806 : else
807 2 : set_count_prop(v->sql->sa, rel, 0);
808 275 : } else if (can_be_pruned && lv == 0 && !rel_is_ref(rel)) { /* left side empty */
809 0 : rel = set_setop_side(v, rel, r);
810 0 : empty_cross = false; /* don't rewrite again */
811 275 : } else if (can_be_pruned && rv == 0 && !rel_is_ref(rel)) { /* right side empty */
812 0 : rel = set_setop_side(v, rel, l);
813 0 : empty_cross = false; /* don't rewrite again */
814 275 : } else if (lv != BUN_NONE && rv != BUN_NONE) {
815 7 : set_count_prop(v->sql->sa, rel, (rv > (BUN_MAX - lv)) ? BUN_MAX : (lv + rv)); /* overflow check */
816 : }
817 2513 : } else if (is_inter(rel->op) || is_except(rel->op)) {
818 2513 : BUN lv = need_distinct(rel) ? rel_calc_nuniques(v->sql, l, l->exps) : get_rel_count(l),
819 2513 : rv = need_distinct(rel) ? rel_calc_nuniques(v->sql, r, r->exps) : get_rel_count(r);
820 :
821 2513 : if (lv == 0) { /* left side empty */
822 62 : if (can_be_pruned)
823 : empty_cross = true;
824 : else
825 5 : set_count_prop(v->sql->sa, rel, 0);
826 2451 : } else if (rv == 0) { /* right side empty */
827 17 : if (is_inter(rel->op)) {
828 3 : if (can_be_pruned)
829 : empty_cross = true;
830 : else
831 0 : set_count_prop(v->sql->sa, rel, 0);
832 14 : } else if (can_be_pruned && !rel_is_ref(rel)) {
833 14 : rel = set_setop_side(v, rel, l);
834 14 : empty_cross = false; /* don't rewrite again */
835 : } else {
836 0 : set_count_prop(v->sql->sa, rel, lv);
837 : }
838 : } else {
839 2434 : set_count_prop(v->sql->sa, rel, lv);
840 : }
841 : }
842 2790 : if (can_be_pruned && empty_cross) {
843 80 : rel_destroy(rel->l);
844 80 : rel->l = NULL;
845 80 : rel_destroy(rel->r);
846 80 : rel->r = NULL;
847 244 : for (node *n = rel->exps->h ; n ; n = n->next) {
848 164 : sql_exp *e = n->data, *a = exp_atom(v->sql->sa, atom_general(v->sql->sa, exp_subtype(e), NULL, 0));
849 164 : exp_prop_alias(v->sql->sa, a, e);
850 164 : n->data = a;
851 : }
852 80 : list_hash_clear(rel->exps);
853 80 : sql_rel *l = rel_project(v->sql->sa, NULL, rel->exps);
854 80 : set_count_prop(v->sql->sa, l, 1);
855 80 : l = rel_select(v->sql->sa, l, exp_atom_bool(v->sql->sa, 0));
856 80 : set_count_prop(v->sql->sa, l, 0);
857 80 : rel->op = op_project;
858 80 : rel->l = l;
859 80 : rel->exps = rel_projections(v->sql, l, NULL, 1, 1);
860 80 : set_count_prop(v->sql->sa, rel, 0);
861 80 : set_nodistinct(rel); /* set relations may have distinct flag set */
862 80 : v->changes++;
863 : }
864 : break;
865 : }
866 12221 : case op_munion: {
867 12221 : list *l = rel->l, *nrels = sa_list(v->sql->sa);
868 12221 : BUN cnt = 0;
869 12221 : bool needs_pruning = false;
870 :
871 46040 : for (node *n = l->h; n; n = n->next) {
872 33819 : sql_rel *r = n->data, *pl = r;
873 :
874 33819 : while (is_sample(pl->op) || is_topn(pl->op)) /* skip topN and sample relations in the middle */
875 0 : pl = pl->l;
876 : /* if it's not a projection, then project and propagate statistics */
877 33819 : if (!is_project(pl->op) && !is_base(pl->op)) {
878 0 : pl = rel_project(v->sql->sa, pl, rel_projections(v->sql, pl, NULL, 0, 1));
879 0 : set_count_prop(v->sql->sa, pl, get_rel_count(pl->l));
880 0 : pl->exps = exps_exp_visitor_bottomup(v, pl, pl->exps, 0, &rel_propagate_statistics, false);
881 : }
882 33819 : nrels = append(nrels, pl);
883 : /* we need new munion statistics */
884 : /* propagate row count */
885 33819 : BUN rv = need_distinct(rel) ? rel_calc_nuniques(v->sql, r, r->exps) : get_rel_count(r);
886 : /* if PROP_COUNT does not exist we assume at least a row (see get_rel_count def) */
887 33819 : if (rv == BUN_NONE) {
888 1076 : cnt++;
889 1076 : continue;
890 : }
891 32743 : if (!rv && can_be_pruned)
892 5613 : needs_pruning = true;
893 : /* overflow check */
894 32743 : if (rv > (BUN_MAX - cnt))
895 33819 : rv = BUN_MAX;
896 : else
897 32743 : cnt += rv;
898 : }
899 12221 : int i = 0;
900 69148 : for (node *n = rel->exps->h ; n ; n = n->next, i++)
901 56927 : rel_munion_get_statistics(v->sql, rel, nrels, n->data, i);
902 :
903 12221 : if (needs_pruning && !rel_is_ref(rel)) {
904 3828 : v->changes++;
905 3828 : list *nl = sa_list(l->sa);
906 :
907 13517 : for (node *n = nrels->h; n; n = n->next) {
908 9689 : sql_rel *r = n->data;
909 9689 : BUN rv = need_distinct(rel) ? rel_calc_nuniques(v->sql, r, r->exps) : get_rel_count(r);
910 :
911 9689 : if (!rv) { /* keep last for now */
912 5159 : rel_destroy(r);
913 5159 : continue;
914 : }
915 4530 : nl = append(nl, r);
916 : }
917 3828 : rel->l = nl;
918 3828 : if (list_length(nl) == 1) {
919 3606 : sql_rel *l = rel->l = nl->h->data; /* ugh */
920 3606 : rel->r = NULL;
921 3606 : rel->op = op_project;
922 :
923 17555 : for (node *n = rel->exps->h, *m = l->exps->h ; n && m ; n = n->next, m = m->next) {
924 13949 : sql_exp *pe = n->data, *ie = m->data;
925 13949 : sql_exp *ne = exp_ref(v->sql, ie);
926 13949 : exp_prop_alias(v->sql->sa, ne, pe);
927 13949 : n->data = ne;
928 : }
929 3606 : list_hash_clear(rel->exps);
930 222 : } else if (list_empty(nl)) {
931 : /* empty select (project [ nils ] ) */
932 454 : for (node *n = rel->exps->h ; n ; n = n->next) {
933 354 : sql_exp *e = n->data, *a = exp_atom(v->sql->sa, atom_general(v->sql->sa, exp_subtype(e), NULL, 0));
934 354 : exp_prop_alias(v->sql->sa, a, e);
935 354 : n->data = a;
936 : }
937 100 : list_hash_clear(rel->exps);
938 100 : sql_rel *l = rel_project(v->sql->sa, NULL, rel->exps);
939 100 : set_count_prop(v->sql->sa, l, 1);
940 100 : l = rel_select(v->sql->sa, l, exp_atom_bool(v->sql->sa, 0));
941 100 : set_count_prop(v->sql->sa, l, 0);
942 100 : rel->op = op_project;
943 100 : rel->r = NULL;
944 100 : rel->l = l;
945 100 : rel->exps = rel_projections(v->sql, l, NULL, 1, 1);
946 100 : set_count_prop(v->sql->sa, rel, 0);
947 100 : set_nodistinct(rel); /* set relations may have distinct flag set */
948 : }
949 : } else {
950 8393 : set_count_prop(v->sql->sa, rel, cnt);
951 : }
952 : break;
953 : }
954 488049 : case op_join:
955 : case op_left:
956 : case op_right:
957 : case op_full:
958 : case op_semi:
959 : case op_anti:
960 : case op_select:
961 : case op_project:
962 : case op_groupby: {
963 488049 : if (is_groupby(rel->op) && !list_empty(rel->r))
964 15762 : rel->r = exps_exp_visitor_bottomup(v, rel, rel->r, 0, &rel_propagate_statistics, false);
965 488049 : rel->exps = exps_exp_visitor_bottomup(v, rel, rel->exps, 0, &rel_propagate_statistics, false);
966 488049 : if ((is_simple_project(rel->op) || is_groupby(rel->op)) && !list_empty(rel->r))
967 35962 : rel->r = exps_exp_visitor_bottomup(v, rel, rel->r, 0, &rel_propagate_statistics, false);
968 : /* The following optimizations can only be applied after propagating the statistics to rel->exps */
969 488049 : if (can_be_pruned && (is_join(rel->op) || is_select(rel->op)) && !list_empty(rel->exps)) {
970 120948 : int changes = v->changes;
971 120948 : rel->exps = rel_prune_predicates(v, rel);
972 120948 : if (v->changes > changes)
973 3359 : rel = rewrite_simplify(v, 0, v->value_based_opt, rel);
974 : }
975 :
976 : /* propagate row count */
977 488049 : sql_rel *l = rel->l, *r = rel->r;
978 488049 : switch (rel->op) {
979 125258 : case op_join:
980 : case op_left:
981 : case op_right:
982 : case op_full: {
983 125258 : BUN lv = get_rel_count(l), rv = get_rel_count(r), uniques_estimate = BUN_MAX, join_idx_estimate = BUN_MAX;
984 :
985 125258 : if (!list_empty(rel->exps) && !is_single(rel)) {
986 229546 : for (node *n = rel->exps->h ; n ; n = n->next) {
987 117126 : sql_exp *e = n->data, *el = e->l, *er = e->r;
988 :
989 117126 : if (find_prop(e->p, PROP_JOINIDX)) {
990 667 : join_idx_estimate = lv>rv?lv:rv;
991 116459 : } else if (e->type == e_cmp && e->flag == cmp_equal) {
992 : /* if one of the sides is unique, the cardinality will be that exact number, but look for nulls */
993 112633 : if (!is_semantics(e) || !has_nil(el) || !has_nil(er)) {
994 112484 : BUN lu = 0, ru = 0;
995 112484 : prop *p = NULL;
996 112484 : if ((p = find_prop(el->p, PROP_NUNIQUES)))
997 82336 : lu = (BUN) p->value.dval;
998 112484 : if ((p = find_prop(er->p, PROP_NUNIQUES)))
999 96330 : ru = (BUN) p->value.dval;
1000 112484 : if (is_unique(el) || lu > lv) {
1001 67856 : BUN ncount = (is_right(rel->op) || is_full(rel->op)) ? MAX(lv, rv) : lv;
1002 67856 : uniques_estimate = MIN(uniques_estimate, ncount);
1003 44628 : } else if (is_unique(er) || ru > rv) {
1004 27437 : BUN ncount = (is_left(rel->op) || is_full(rel->op)) ? MAX(lv, rv) : rv;
1005 27437 : uniques_estimate = MIN(uniques_estimate, ncount);
1006 : }
1007 : }
1008 : }
1009 : }
1010 : }
1011 125258 : if (is_single(rel)) {
1012 3513 : set_count_prop(v->sql->sa, rel, lv);
1013 121745 : } else if (join_idx_estimate != BUN_MAX) {
1014 665 : set_count_prop(v->sql->sa, rel, join_idx_estimate);
1015 121080 : } else if (uniques_estimate != BUN_MAX) {
1016 89117 : set_count_prop(v->sql->sa, rel, uniques_estimate);
1017 31963 : } else if (list_length(rel->exps) == 1 && (exp_is_false(rel->exps->h->data) || exp_is_null(rel->exps->h->data))) {
1018 : /* corner cases for outer joins */
1019 84 : if (is_left(rel->op)) {
1020 72 : set_count_prop(v->sql->sa, rel, lv);
1021 12 : } else if (is_right(rel->op)) {
1022 3 : set_count_prop(v->sql->sa, rel, rv);
1023 9 : } else if (is_full(rel->op) && lv != BUN_NONE && rv != BUN_NONE) {
1024 9 : set_count_prop(v->sql->sa, rel, (rv > (BUN_MAX - lv)) ? BUN_MAX : (lv + rv)); /* overflow check */
1025 0 : } else if (lv != BUN_NONE && rv != BUN_NONE) {
1026 0 : set_count_prop(v->sql->sa, rel, 0);
1027 : }
1028 31879 : } else if (lv == 0) {
1029 1166 : set_count_prop(v->sql->sa, rel, (is_right(rel->op) || is_full(rel->op)) ? rv : 0);
1030 31282 : } else if (rv == 0) {
1031 1568 : set_count_prop(v->sql->sa, rel, (is_left(rel->op) || is_full(rel->op)) ? lv : 0);
1032 30208 : } else if (lv != BUN_NONE && rv != BUN_NONE) {
1033 17870 : set_count_prop(v->sql->sa, rel, (rv > (BUN_MAX / lv)) ? BUN_MAX : (lv * rv)); /* overflow check */
1034 : }
1035 : break;
1036 : }
1037 2054 : case op_anti:
1038 2054 : set_count_prop(v->sql->sa, rel, get_rel_count(l));
1039 2054 : break;
1040 97157 : case op_semi:
1041 : case op_select:
1042 : /* TODO calculate cardinalities using selectivities */
1043 97157 : if (list_length(rel->exps) == 1 && (exp_is_false(rel->exps->h->data) || exp_is_null(rel->exps->h->data))) {
1044 1739 : set_count_prop(v->sql->sa, rel, 0);
1045 : } else {
1046 187596 : if (!list_empty(rel->exps) && !is_single(rel)) {
1047 92178 : BUN cnt = get_rel_count(l), u = 1;
1048 155466 : for (node *n = rel->exps->h ; n ; n = n->next) {
1049 101154 : sql_exp *e = n->data, *el = e->l, *er = e->r;
1050 :
1051 : /* simple expressions first */
1052 101154 : if (e->type == e_cmp && e->flag == cmp_equal && exp_is_atom(er)) {
1053 : /* use selectivity */
1054 50387 : prop *p;
1055 50387 : if ((p = find_prop(el->p, PROP_NUNIQUES))) {
1056 37866 : u = (BUN) p->value.dval;
1057 37866 : break;
1058 : }
1059 : }
1060 : }
1061 : /* u is an *estimate*, so don't set count_prop to 0 unless cnt is 0 */
1062 92178 : set_count_prop(v->sql->sa, rel, cnt == 0 ? 0 : u == 0 || u > cnt ? 1 : cnt/u);
1063 : } else {
1064 3240 : set_count_prop(v->sql->sa, rel, get_rel_count(l));
1065 : }
1066 : }
1067 : break;
1068 240036 : case op_project:
1069 240036 : if (l) {
1070 231213 : if (need_distinct(rel)) {
1071 0 : set_count_prop(v->sql->sa, rel, rel_calc_nuniques(v->sql, l, rel->exps));
1072 : } else {
1073 231213 : set_count_prop(v->sql->sa, rel, get_rel_count(l));
1074 : }
1075 : } else {
1076 8823 : BUN card = 1;
1077 :
1078 8823 : if (!list_empty(rel->exps)) {
1079 22671 : for (node *n = rel->exps->h ; n ; n = n->next) {
1080 13848 : sql_exp *e = n->data;
1081 13848 : card = MAX(card, trivial_project_exp_card(e));
1082 : }
1083 : }
1084 8823 : set_count_prop(v->sql->sa, rel, card);
1085 : }
1086 : break;
1087 23180 : case op_groupby:
1088 23180 : if (list_empty(rel->r)) {
1089 7417 : set_count_prop(v->sql->sa, rel, 1);
1090 : } else {
1091 15762 : set_count_prop(v->sql->sa, rel, rel_calc_nuniques(v->sql, l, rel->r));
1092 : }
1093 : break;
1094 : default:
1095 : break;
1096 : }
1097 : break;
1098 : }
1099 17016 : case op_topn: {
1100 17016 : BUN lv = get_rel_count(rel->l);
1101 :
1102 17016 : if (lv != BUN_NONE) {
1103 16999 : sql_exp *le = rel->exps->h->data, *oe = list_length(rel->exps) > 1 ? rel->exps->h->next->data : NULL;
1104 96 : if (oe && oe->l && exp_is_not_null(oe)) { /* no parameters */
1105 96 : BUN offset = (BUN) ((atom*)oe->l)->data.val.lval;
1106 96 : lv = offset >= lv ? 0 : lv - offset;
1107 : }
1108 16999 : if (le->l && exp_is_not_null(le)) {
1109 16961 : BUN limit = (BUN) ((atom*)le->l)->data.val.lval;
1110 16961 : lv = MIN(lv, limit);
1111 : }
1112 16999 : set_count_prop(v->sql->sa, rel, lv);
1113 : }
1114 : break;
1115 : }
1116 22 : case op_sample: {
1117 22 : BUN lv = get_rel_count(rel->l);
1118 :
1119 22 : if (lv != BUN_NONE) {
1120 4 : sql_exp *se = rel->exps->h->data;
1121 4 : sql_subtype *tp = exp_subtype(se);
1122 :
1123 4 : if (se->l && tp->type->eclass == EC_NUM) { /* sample is a number of rows */
1124 4 : BUN sample = (BUN) ((atom*)se->l)->data.val.lval;
1125 4 : lv = MIN(lv, sample);
1126 0 : } else if (se->l) { /* sample is a percentage of rows */
1127 0 : dbl percent = ((atom*)se->l)->data.val.dval;
1128 0 : assert(tp->type->eclass == EC_FLT);
1129 0 : lv = (BUN) ceil((dbl)lv * percent);
1130 : }
1131 4 : set_count_prop(v->sql->sa, rel, lv);
1132 : }
1133 : break;
1134 : }
1135 5357 : case op_table: {
1136 5357 : sql_exp *op = rel->r;
1137 5357 : if (rel->flag != TRIGGER_WRAPPER && op) {
1138 5045 : sql_subfunc *f = op->f;
1139 5045 : if (f->func->lang == FUNC_LANG_SQL) {
1140 97 : set_count_prop(v->sql->sa, rel, 1000 /* just some fallback value */);
1141 4948 : } else if (f->func->lang == FUNC_LANG_MAL && strcmp(f->func->base.name, "storage") == 0) {
1142 597 : set_count_prop(v->sql->sa, rel, 1000 /* TODO get size of catalog */);
1143 4351 : } else if (f->func->lang == FUNC_LANG_MAL && strcmp(f->func->base.name, "db_users") == 0) {
1144 0 : set_count_prop(v->sql->sa, rel, 1000 /* TODO get size of users */);
1145 4351 : } else if (f->func->lang == FUNC_LANG_MAL && strncmp(f->func->base.name, "querylog", 8) == 0) {
1146 965 : set_count_prop(v->sql->sa, rel, 1000 /* TODO get size of querylog */);
1147 3386 : } else if (f->func->lang == FUNC_LANG_MAL &&
1148 3276 : (strcmp(f->func->base.name, "queue") == 0 ||
1149 3020 : strcmp(f->func->base.name, "optimizers") == 0 ||
1150 2772 : strcmp(f->func->base.name, "env") == 0 ||
1151 2488 : strcmp(f->func->base.name, "keywords") == 0 ||
1152 2488 : strcmp(f->func->base.name, "statistics") == 0 ||
1153 1941 : strcmp(f->func->base.name, "rejects") == 0 ||
1154 1695 : strcmp(f->func->base.name, "schemastorage") == 0 ||
1155 1695 : strncmp(f->func->base.name, "storage", 7) == 0 ||
1156 1695 : strcmp(f->func->base.name, "sessions") == 0) ) {
1157 1839 : set_count_prop(v->sql->sa, rel, 1000 /* TODO get size of queue */);
1158 : }
1159 : /* else {
1160 : printf("%%func needs stats : %s\n", f->func->base.name);
1161 : } */
1162 : }
1163 : break;
1164 : }
1165 : /*These relations are less important for now
1166 : TODO later we can tune it */
1167 : case op_insert:
1168 : case op_update:
1169 : case op_delete:
1170 : case op_truncate:
1171 : case op_ddl:
1172 : default:
1173 : break;
1174 : }
1175 :
1176 : return rel;
1177 : }
1178 :
1179 : static sql_rel *
1180 534110 : rel_get_statistics(visitor *v, global_props *gp, sql_rel *rel)
1181 : {
1182 : /* Don't prune updates as pruning will possibly result in removing the joins which therefore cannot be used for constraint checking */
1183 534110 : uint8_t has_special_modify = (uint8_t) gp->has_special_modify;
1184 534110 : v->data = &has_special_modify;
1185 534110 : rel = rel_visitor_bottomup(v, rel, &rel_get_statistics_);
1186 534114 : v->data = gp;
1187 534114 : return rel;
1188 : }
1189 :
1190 : run_optimizer
1191 668215 : bind_get_statistics(visitor *v, global_props *gp)
1192 : {
1193 668215 : (void) v;
1194 668215 : return (gp->opt_level == 1 && !gp->cnt[op_insert]) ? rel_get_statistics : NULL;
1195 : }
1196 :
1197 :
1198 : static bool
1199 90736 : point_select_on_unique_column(sql_rel *rel)
1200 : {
1201 90736 : if (is_select(rel->op) && !list_empty(rel->exps)) {
1202 126015 : for (node *n = rel->exps->h; n ; n = n->next) {
1203 72939 : sql_exp *e = n->data, *el = e->l, *er = e->r, *found = NULL;
1204 :
1205 72939 : if (is_compare(e->type) && e->flag == cmp_equal) {
1206 30990 : if (is_numeric_upcast(el))
1207 0 : el = el->l;
1208 30990 : if (is_numeric_upcast(er))
1209 0 : er = er->l;
1210 30990 : if (is_alias(el->type) && exp_is_atom(er) && (found = rel_find_exp(rel->l, el)) &&
1211 749 : is_unique(found) && (!is_semantics(e) || !has_nil(found) || !has_nil(er)))
1212 : return true;
1213 30241 : if (is_alias(er->type) && exp_is_atom(el) && (found = rel_find_exp(rel->l, er)) &&
1214 0 : is_unique(found) && (!is_semantics(e) || !has_nil(el) || !has_nil(found)))
1215 : return true;
1216 : }
1217 : }
1218 : }
1219 : return false;
1220 : }
1221 :
1222 : /*
1223 : * A point select on an unique column reduces the number of rows to 1. If the same select is under a
1224 : * join, the opposite side's select can be pushed above the join.
1225 : */
1226 : static inline sql_rel *
1227 1188360 : rel_push_select_up(visitor *v, sql_rel *rel)
1228 : {
1229 1188360 : if ((is_innerjoin(rel->op) || is_left(rel->op) || is_right(rel->op) || is_semi(rel->op)) && !is_single(rel)) {
1230 245920 : sql_rel *l = rel->l, *r = rel->r;
1231 245920 : bool can_pushup_left = is_select(l->op) && !rel_is_ref(l) && !is_single(l) && (is_innerjoin(rel->op) || is_left(rel->op) || is_semi(rel->op)),
1232 245920 : can_pushup_right = is_select(r->op) && !rel_is_ref(r) && !is_single(r) && (is_innerjoin(rel->op) || is_right(rel->op));
1233 :
1234 245920 : if (can_pushup_left || can_pushup_right) {
1235 63939 : if (can_pushup_left)
1236 43434 : can_pushup_left = point_select_on_unique_column(r);
1237 63939 : if (can_pushup_right)
1238 47302 : can_pushup_right = point_select_on_unique_column(l);
1239 :
1240 : /* if both selects retrieve one row each, it's not worth it to push both up */
1241 63939 : if (can_pushup_left && !can_pushup_right) {
1242 683 : sql_rel *nrel = rel_dup_copy(v->sql->sa, rel);
1243 683 : nrel->l = l->l;
1244 683 : rel = rel_inplace_select(rel, nrel, l->exps);
1245 683 : assert(is_select(rel->op));
1246 683 : v->changes++;
1247 63256 : } else if (!can_pushup_left && can_pushup_right) {
1248 14 : sql_rel *nrel = rel_dup_copy(v->sql->sa, rel);
1249 14 : nrel->r = r->l;
1250 14 : rel = rel_inplace_select(rel, nrel, r->exps);
1251 14 : assert(is_select(rel->op));
1252 14 : v->changes++;
1253 : }
1254 : }
1255 : }
1256 1188360 : return rel;
1257 : }
1258 :
1259 : static int
1260 94236 : sql_class_base_score(visitor *v, sql_column *c, sql_subtype *t, bool equality_based)
1261 : {
1262 94236 : int de;
1263 :
1264 94236 : if (!t)
1265 : return 0;
1266 94236 : switch (ATOMstorage(t->type->localtype)) {
1267 : case TYPE_bte:
1268 : return 150 - 8;
1269 1307 : case TYPE_sht:
1270 1307 : return 150 - 16;
1271 37399 : case TYPE_int:
1272 37399 : return 150 - 32;
1273 518 : case TYPE_void:
1274 : case TYPE_lng:
1275 518 : return 150 - 64;
1276 : case TYPE_uuid:
1277 : #ifdef HAVE_HGE
1278 : case TYPE_hge:
1279 : #endif
1280 : return 150 - 128;
1281 2 : case TYPE_flt:
1282 2 : return 75 - 24;
1283 : case TYPE_dbl:
1284 : return 75 - 53;
1285 32104 : default:
1286 32104 : if (equality_based && c && v->storage_based_opt && (de = mvc_is_duplicate_eliminated(v->sql, c)))
1287 1478 : return 150 - de * 8;
1288 : /* strings and blobs not duplicate eliminated don't get any points here */
1289 : return 0;
1290 : }
1291 : }
1292 :
1293 : static int
1294 32731 : score_se_base(visitor *v, sql_rel *rel, sql_exp *e)
1295 : {
1296 32731 : int res = 0;
1297 32731 : sql_subtype *t = exp_subtype(e);
1298 32731 : sql_column *c = NULL;
1299 :
1300 32731 : if (e->type == e_convert) /* keep unsafes at the end (TODO improve) */
1301 : return -1000;
1302 : /* can we find out if the underlying table is sorted */
1303 32130 : if ((c = exp_find_column(rel, e, -2)) && v->storage_based_opt && mvc_is_sorted(v->sql, c))
1304 32130 : res += 600;
1305 :
1306 : /* prefer the shorter var types over the longer ones */
1307 32130 : res += sql_class_base_score(v, c, t, is_equality_or_inequality_exp(e->flag)); /* smaller the type, better */
1308 32130 : return res;
1309 : }
1310 :
1311 : static int
1312 56433 : score_se(visitor *v, sql_rel *rel, sql_exp *e)
1313 : {
1314 56433 : int score = 0;
1315 56433 : if (e->type == e_cmp && !is_complex_exp(e->flag)) {
1316 32731 : sql_exp *l = e->l;
1317 :
1318 32731 : while (l->type == e_cmp) { /* go through nested comparisons */
1319 2 : sql_exp *ll;
1320 :
1321 2 : if (l->flag == cmp_filter || l->flag == cmp_or)
1322 0 : ll = ((list*)l->l)->h->data;
1323 : else
1324 2 : ll = l->l;
1325 2 : if (ll->type != e_cmp)
1326 : break;
1327 : l = ll;
1328 : }
1329 32731 : score += score_se_base(v, rel, l);
1330 : }
1331 56433 : score += exp_keyvalue(e);
1332 56433 : return score;
1333 : }
1334 :
1335 : static inline sql_rel *
1336 1188360 : rel_select_order(visitor *v, sql_rel *rel)
1337 : {
1338 1188360 : int *scores = NULL;
1339 1188360 : sql_exp **exps = NULL;
1340 :
1341 1188360 : if (is_select(rel->op) && list_length(rel->exps) > 1) {
1342 26503 : node *n;
1343 26503 : int i, nexps = list_length(rel->exps);
1344 26503 : scores = SA_NEW_ARRAY(v->sql->ta, int, nexps);
1345 26503 : exps = SA_NEW_ARRAY(v->sql->ta, sql_exp*, nexps);
1346 :
1347 82936 : for (i = 0, n = rel->exps->h; n; i++, n = n->next) {
1348 56463 : exps[i] = n->data;
1349 56463 : if (find_prop(exps[i]->p, PROP_HASHCOL))
1350 : return rel;
1351 56433 : scores[i] = score_se(v, rel, n->data);
1352 : }
1353 26473 : GDKqsort(scores, exps, NULL, nexps, sizeof(int), sizeof(void *), TYPE_int, true, true);
1354 :
1355 82906 : for (i = 0, n = rel->exps->h; n; i++, n = n->next)
1356 56433 : n->data = exps[i];
1357 : }
1358 :
1359 : return rel;
1360 : }
1361 :
1362 : /* Compute the efficiency of using this expression early in a group by list */
1363 : static int
1364 62106 : score_gbe(visitor *v, sql_rel *rel, sql_exp *e)
1365 : {
1366 62106 : int res = 0;
1367 62106 : sql_subtype *t = exp_subtype(e);
1368 62106 : sql_column *c = exp_find_column(rel, e, -2);
1369 :
1370 62106 : if (e->card == CARD_ATOM) /* constants are trivial to group */
1371 38 : res += 1000;
1372 : /* can we find out if the underlying table is sorted */
1373 62106 : if (is_unique(e) || find_prop(e->p, PROP_HASHCOL) || (c && v->storage_based_opt && mvc_is_unique(v->sql, c))) /* distinct columns */
1374 3396 : res += 700;
1375 41123 : if (c && v->storage_based_opt && mvc_is_sorted(v->sql, c))
1376 3436 : res += 500;
1377 62106 : if (find_prop(e->p, PROP_HASHIDX)) /* has hash index */
1378 0 : res += 200;
1379 :
1380 : /* prefer the shorter var types over the longer ones */
1381 62106 : res += sql_class_base_score(v, c, t, true); /* smaller the type, better */
1382 62106 : return res;
1383 : }
1384 :
1385 : /* reorder group by expressions */
1386 : static inline sql_rel *
1387 1188360 : rel_groupby_order(visitor *v, sql_rel *rel)
1388 : {
1389 1188360 : int *scores = NULL;
1390 1188360 : sql_exp **exps = NULL;
1391 :
1392 1188360 : if (is_groupby(rel->op) && list_length(rel->r) > 1) {
1393 26255 : node *n;
1394 26255 : list *gbe = rel->r;
1395 26255 : int i, ngbe = list_length(gbe);
1396 26255 : scores = SA_NEW_ARRAY(v->sql->ta, int, ngbe);
1397 26255 : exps = SA_NEW_ARRAY(v->sql->ta, sql_exp*, ngbe);
1398 :
1399 : /* first sorting step, give priority for integers and sorted columns */
1400 88361 : for (i = 0, n = gbe->h; n; i++, n = n->next) {
1401 62106 : exps[i] = n->data;
1402 62106 : scores[i] = score_gbe(v, rel, exps[i]);
1403 : }
1404 26255 : GDKqsort(scores, exps, NULL, ngbe, sizeof(int), sizeof(void *), TYPE_int, true, true);
1405 :
1406 : /* second sorting step, give priority to strings with lower number of digits */
1407 53405 : for (i = ngbe - 1; i && !scores[i]; i--); /* find expressions with no score from the first round */
1408 26255 : if (scores[i])
1409 25289 : i++;
1410 26255 : if (ngbe - i > 1) {
1411 11678 : for (int j = i; j < ngbe; j++) {
1412 8795 : sql_subtype *t = exp_subtype(exps[j]);
1413 8795 : scores[j] = t ? t->digits : 0;
1414 : }
1415 : /* the less number of digits the better, order ascending */
1416 2883 : GDKqsort(scores + i, exps + i, NULL, ngbe - i, sizeof(int), sizeof(void *), TYPE_int, false, true);
1417 : }
1418 :
1419 88361 : for (i = 0, n = gbe->h; n; i++, n = n->next)
1420 62106 : n->data = exps[i];
1421 : }
1422 :
1423 1188360 : return rel;
1424 : }
1425 :
1426 : /* This optimization loop contains optimizations that can potentially use statistics */
1427 : static sql_rel *
1428 1188360 : rel_final_optimization_loop_(visitor *v, sql_rel *rel)
1429 : {
1430 : /* Run rel_push_select_up only once at the end to avoid an infinite optimization loop */
1431 1188360 : rel = rel_push_select_up(v, rel);
1432 1188360 : rel = rel_select_order(v, rel);
1433 :
1434 : /* TODO? Maybe later add rel_simplify_count, rel_join2semijoin, rel_simplify_fk_joins,
1435 : rel_distinct_project2groupby, rel_simplify_predicates, rel_simplify_math,
1436 : rel_distinct_aggregate_on_unique_values */
1437 :
1438 1188360 : rel = rel_groupby_order(v, rel);
1439 1188359 : return rel;
1440 : }
1441 :
1442 : static sql_rel *
1443 60681 : rel_final_optimization_loop(visitor *v, global_props *gp, sql_rel *rel)
1444 : {
1445 60681 : (void) gp;
1446 60681 : return rel_visitor_bottomup(v, rel, &rel_final_optimization_loop_);
1447 : }
1448 :
1449 : run_optimizer
1450 668214 : bind_final_optimization_loop(visitor *v, global_props *gp)
1451 : {
1452 668214 : int flag = v->sql->sql_optimizer;
1453 : /* At the moment, this optimizer has dependency on 3 flags */
1454 541546 : return gp->opt_level == 1 && (gp->cnt[op_groupby] || gp->cnt[op_select]) &&
1455 728897 : (flag & push_select_up) && (flag & optimize_select_and_joins_topdown) && (flag & optimize_projections) ? rel_final_optimization_loop : NULL;
1456 : }
|