Line data Source code
1 : /*
2 : * SPDX-License-Identifier: MPL-2.0
3 : *
4 : * This Source Code Form is subject to the terms of the Mozilla Public
5 : * License, v. 2.0. If a copy of the MPL was not distributed with this
6 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
7 : *
8 : * Copyright 2024 MonetDB Foundation;
9 : * Copyright August 2008 - 2023 MonetDB B.V.;
10 : * Copyright 1997 - July 2008 CWI.
11 : */
12 :
13 : #include "monetdb_config.h"
14 : #include "rel_optimizer_private.h"
15 : #include "rel_statistics.h"
16 : #include "rel_basetable.h"
17 : #include "rel_rewriter.h"
18 :
19 : static sql_exp *
20 3462441 : comparison_find_column(sql_exp *input, sql_exp *e)
21 : {
22 3462441 : switch (input->type) {
23 98657 : case e_convert: {
24 98657 : list *types = (list *)input->r;
25 98657 : sql_class from = ((sql_subtype*)types->h->data)->type->eclass, to = ((sql_subtype*)types->h->next->data)->type->eclass;
26 98657 : if (from == to)
27 190249 : return comparison_find_column(input->l, e) ? input : NULL;
28 : return NULL;
29 : }
30 3054183 : case e_column:
31 3054183 : return exp_match(e, input) ? input : NULL;
32 : default:
33 : return NULL;
34 : }
35 : }
36 :
37 : /* multi lo <= col <= hi, maybe still be false even if lo or hi are NULL, possibly similar for filter on multiple
38 : * columns */
39 : #define comp_single_column(c) (!c->f)
40 :
41 : static sql_exp *
42 8755228 : rel_propagate_column_ref_statistics(mvc *sql, sql_rel *rel, sql_exp *e)
43 : {
44 8838849 : assert(e->type == e_column);
45 8838849 : if (rel) {
46 8838770 : switch(rel->op) {
47 4087909 : case op_left:
48 : case op_right:
49 : case op_full:
50 : case op_join:
51 : case op_select:
52 : case op_anti:
53 : case op_semi: {
54 4087909 : bool found_without_semantics = false, found_left = false, found_right = false, still_unique = false;
55 :
56 4087909 : if ((is_innerjoin(rel->op) || is_select(rel->op)) && list_length(rel->exps) == 1 && exp_is_false(rel->exps->h->data))
57 : return NULL; /* nothing will pass, skip */
58 :
59 : /* propagate from the bottom first */
60 4072354 : if (rel_propagate_column_ref_statistics(sql, rel->l, e))
61 : found_left = true;
62 2592158 : if (!found_left && is_join(rel->op) && rel_propagate_column_ref_statistics(sql, rel->r, e))
63 4072354 : found_right = true;
64 :
65 4072354 : if (!found_left && !found_right)
66 : return NULL;
67 1776344 : if (!list_empty(rel->exps) && rel->op != op_anti) { /* if there's an or, the MIN and MAX get difficult to propagate */
68 3494160 : for (node *n = rel->exps->h ; n ; n = n->next) {
69 1809020 : sql_exp *comp = n->data, *le = comp->l, *lne = NULL, *re = comp->r, *rne = NULL, *fe = comp->f, *fne = NULL;
70 :
71 1809020 : if (comp->type == e_cmp) {
72 1808018 : if (is_theta_exp(comp->flag) && ((lne = comparison_find_column(le, e)) || (rne = comparison_find_column(re, e)) || (fe && (fne = comparison_find_column(fe, e))))) {
73 124979 : atom *lval_min = find_prop_and_get(le->p, PROP_MIN), *lval_max = find_prop_and_get(le->p, PROP_MAX),
74 124979 : *rval_min = find_prop_and_get(re->p, PROP_MIN), *rval_max = find_prop_and_get(re->p, PROP_MAX);
75 :
76 : /* not semantics found or if explicitly filtering not null values from the column */
77 124979 : found_without_semantics |= (!is_semantics(comp) && comp_single_column(comp)) || (comp->flag == cmp_equal && lne && is_anti(comp) && exp_is_null(re));
78 124979 : still_unique |= comp->flag == cmp_equal && is_unique(le) && is_unique(re); /* unique if only equi-joins on unique columns are there */
79 124979 : if (is_full(rel->op) || (is_left(rel->op) && found_left) || (is_right(rel->op) && found_right)) /* on outer joins, min and max cannot be propagated on some cases */
80 19789 : continue;
81 : /* if (end2 >= start1 && start2 <= end1) then the 2 intervals are intersected */
82 105190 : if (fe && lval_min && lval_max) { /* range case, the middle expression must intersect the other two */
83 4622 : atom *fval_min = find_prop_and_get(fe->p, PROP_MIN), *fval_max = find_prop_and_get(fe->p, PROP_MAX);
84 4622 : int int1 = rval_min && rval_max && atom_cmp(rval_max, lval_min) >= 0 && atom_cmp(rval_min, lval_max) <= 0,
85 4622 : int2 = fval_min && fval_max && atom_cmp(fval_max, lval_min) >= 0 && atom_cmp(fval_min, lval_max) <= 0,
86 4622 : symmetric = is_symmetric(comp);
87 :
88 4622 : if (is_anti(comp) || (!symmetric && fval_min && rval_max && atom_cmp(fval_min, rval_max) < 0)) /* for asymmetric case the re range must be after the fe range */
89 1817 : continue;
90 2805 : if (lne && int1 && int2) {
91 104 : if (symmetric) {
92 0 : prop *p1 = find_prop(e->p, PROP_MIN), *p2 = find_prop(e->p, PROP_MAX);
93 0 : atom *nmin = statistics_atom_min(sql, rval_min, fval_min), *nmax = statistics_atom_max(sql, rval_max, fval_max);
94 : /* max is min from le and (max from re and fe max) */
95 0 : set_minmax_property(sql, e, PROP_MAX, p2 ? statistics_atom_min(sql, nmax, p2->value.pval) : nmax);
96 : /* min is max from le and (min from re and fe min) */
97 0 : set_minmax_property(sql, e, PROP_MIN, p1 ? statistics_atom_max(sql, nmin, p1->value.pval) : nmin);
98 : } else {
99 104 : prop *p1 = find_prop(e->p, PROP_MIN), *p2 = find_prop(e->p, PROP_MAX);
100 : /* max is min from le and fe max */
101 104 : set_minmax_property(sql, e, PROP_MAX, p2 ? statistics_atom_min(sql, fval_max, p2->value.pval) : fval_max);
102 : /* min is max from le and re min */
103 104 : set_minmax_property(sql, e, PROP_MIN, p1 ? statistics_atom_max(sql, rval_min, p1->value.pval) : rval_min);
104 : }
105 2701 : } else if (rne) {
106 590 : if (symmetric && int1 && int2) { /* min is max from le and (min from re and fe min) */
107 0 : prop *p = find_prop(e->p, PROP_MIN);
108 0 : atom *nmin = p ? statistics_atom_min(sql, p->value.pval, fval_min) : fval_min;
109 0 : set_minmax_property(sql, e, PROP_MIN, statistics_atom_max(sql, nmin, lval_min));
110 590 : } else if (int1) { /* min is max from le and re min */
111 100 : prop *p = find_prop(e->p, PROP_MIN);
112 100 : set_minmax_property(sql, e, PROP_MIN, p ? statistics_atom_max(sql, lval_min, p->value.pval) : lval_min);
113 : }
114 2111 : } else if (fne) {
115 549 : if (symmetric && int1 && int2) { /* max is min from le and (max from re and fe max) */
116 0 : prop *p = find_prop(e->p, PROP_MAX);
117 0 : atom *nmax = p ? statistics_atom_max(sql, p->value.pval, rval_max) : rval_max;
118 0 : set_minmax_property(sql, e, PROP_MAX, p ? statistics_atom_min(sql, nmax, lval_max) : nmax);
119 549 : } else if (int2) { /* max is min from le and fe max */
120 266 : prop *p = find_prop(e->p, PROP_MAX);
121 266 : set_minmax_property(sql, e, PROP_MAX, p ? statistics_atom_min(sql, lval_max, p->value.pval) : lval_max);
122 : }
123 : }
124 100568 : } else if (lval_min && lval_max && rval_min && rval_max && atom_cmp(rval_max, lval_min) >= 0 && atom_cmp(rval_min, lval_max) <= 0) {
125 : /* both min and max must be set and the intervals must overlap */
126 42997 : switch (comp->flag) {
127 29001 : case cmp_equal: /* for equality reduce */
128 29001 : set_minmax_property(sql, e, PROP_MAX, is_anti(comp) ? statistics_atom_max(sql, lval_max, rval_max) : statistics_atom_min(sql, lval_max, rval_max));
129 29001 : set_minmax_property(sql, e, PROP_MIN, is_anti(comp) ? statistics_atom_min(sql, lval_min, rval_min) : statistics_atom_max(sql, lval_min, rval_min));
130 29001 : break;
131 4680 : case cmp_notequal: /* for inequality expand */
132 4680 : set_minmax_property(sql, e, PROP_MAX, is_anti(comp) ? statistics_atom_min(sql, lval_max, rval_max) : statistics_atom_max(sql, lval_max, rval_max));
133 4680 : set_minmax_property(sql, e, PROP_MIN, is_anti(comp) ? statistics_atom_max(sql, lval_min, rval_min) : statistics_atom_min(sql, lval_min, rval_min));
134 4680 : break;
135 5310 : case cmp_gt:
136 : case cmp_gte:
137 9682 : if (!is_anti(comp) && lne) { /* min is max from both min */
138 4372 : prop *p = find_prop(e->p, PROP_MIN);
139 4372 : set_minmax_property(sql, e, PROP_MIN, p ? statistics_atom_max(sql, rval_min, p->value.pval) : rval_min);
140 938 : } else if (!is_anti(comp)) { /* max is min from both max */
141 938 : prop *p = find_prop(e->p, PROP_MAX);
142 938 : set_minmax_property(sql, e, PROP_MAX, p ? statistics_atom_min(sql, lval_max, p->value.pval) : lval_max);
143 : }
144 : break;
145 4006 : case cmp_lt:
146 : case cmp_lte:
147 7205 : if (!is_anti(comp) && lne) { /* max is min from both max */
148 3199 : prop *p = find_prop(e->p, PROP_MAX);
149 3199 : set_minmax_property(sql, e, PROP_MAX, p ? statistics_atom_min(sql, rval_max, p->value.pval) : rval_max);
150 807 : } else if (!is_anti(comp)) { /* min is max from both min */
151 807 : prop *p = find_prop(e->p, PROP_MIN);
152 807 : set_minmax_property(sql, e, PROP_MIN, p ? statistics_atom_max(sql, lval_min, p->value.pval) : lval_min);
153 : }
154 : break;
155 : default: /* Maybe later I can do cmp_in and cmp_notin */
156 : break;
157 : }
158 : }
159 : }
160 : }
161 : }
162 : }
163 1776344 : if (is_full(rel->op) || (is_left(rel->op) && found_right) || (is_right(rel->op) && found_left))
164 38046 : set_has_nil(e);
165 1776344 : if (!is_outerjoin(rel->op) && found_without_semantics) /* at an outer join, null values pass */
166 94131 : set_has_no_nil(e);
167 1776344 : if (is_join(rel->op) && is_unique(e) && !still_unique)
168 119293 : set_not_unique(e);
169 1776344 : return e;
170 : }
171 4647879 : case op_table:
172 : case op_basetable:
173 : case op_union:
174 : case op_except:
175 : case op_inter:
176 : case op_munion:
177 : case op_project:
178 : case op_groupby: {
179 4647879 : sql_exp *found;
180 4647879 : atom *fval;
181 4647879 : prop *est;
182 4647879 : if ((found = rel_find_exp(rel, e))) {
183 2184562 : if (rel->op != op_table) { /* At the moment don't propagate statistics for table relations */
184 2140242 : if ((fval = find_prop_and_get(found->p, PROP_MAX)))
185 1136142 : set_minmax_property(sql, e, PROP_MAX, fval);
186 2140238 : if ((fval = find_prop_and_get(found->p, PROP_MIN)))
187 1143025 : set_minmax_property(sql, e, PROP_MIN, fval);
188 2140236 : if (!has_nil(found))
189 1381927 : set_has_no_nil(e);
190 2140236 : if (is_unique(found) || (need_distinct(rel) && list_length(rel->exps) == 1) ||
191 1728969 : (is_groupby(rel->op) && (list_empty(rel->r) || (list_length(rel->r) == 1 && exps_find_exp(rel->r, e)))))
192 422149 : set_unique(e);
193 : /* propagate unique estimation for known cases */
194 2140236 : if (is_groupby(rel->op) && list_empty(rel->r) && !find_prop(e->p, PROP_NUNIQUES)) { /* global aggregate case */
195 7543 : prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
196 7543 : p->value.dval = 1;
197 2132693 : } else if (((is_basetable(rel->op) || is_except(rel->op) || is_inter(rel->op) || is_simple_project(rel->op) ||
198 66651 : (is_groupby(rel->op) && exps_find_exp(rel->r, e))) &&
199 2074978 : (est = find_prop(found->p, PROP_NUNIQUES)) && !find_prop(e->p, PROP_NUNIQUES))) {
200 191758 : prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
201 191757 : p->value.dval = est->value.dval;
202 : }
203 : }
204 2184556 : return e;
205 : }
206 : return NULL;
207 : }
208 83621 : case op_topn:
209 : case op_sample:
210 83621 : return rel_propagate_column_ref_statistics(sql, rel->l, e);
211 : default:
212 : break;
213 : }
214 : }
215 : return NULL;
216 : }
217 :
218 : static atom *
219 4541994 : atom_from_valptr( allocator *sa, sql_subtype *tpe, ValRecord *v)
220 : {
221 4541994 : atom *a = SA_NEW(sa, atom);
222 :
223 4541900 : assert(!VALisnil(v));
224 4541801 : *a = (atom) {.tpe = *tpe,};
225 4541801 : SA_VALcopy(sa, &a->data, v);
226 4541725 : return a;
227 : }
228 :
229 : void
230 4179165 : sql_column_get_statistics(mvc *sql, sql_column *c, sql_exp *e)
231 : {
232 4179165 : bool nonil = false, unique = false;
233 4179165 : double unique_est = 0.0;
234 4179165 : ValRecord min, max;
235 4179165 : int ok = mvc_col_stats(sql, c, &nonil, &unique, &unique_est, &min, &max);
236 :
237 4179180 : if (has_nil(e) && nonil)
238 2753845 : set_has_no_nil(e);
239 4179180 : if (!is_unique(e) && unique)
240 1106664 : set_unique(e);
241 4179180 : if (unique_est != 0.0) {
242 2934618 : prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
243 2934625 : p->value.dval = unique_est;
244 : }
245 4179187 : unsigned int digits = 0;
246 4179187 : sql_subtype *et = exp_subtype(e);
247 4179155 : if (et->type->eclass == EC_DEC || et->type->eclass == EC_NUM)
248 2712581 : digits = et->digits;
249 4179155 : if ((ok & 2) == 2) {
250 2268195 : if (!VALisnil(&max)) {
251 2268196 : prop *p = e->p = prop_create(sql->sa, PROP_MAX, e->p);
252 2268155 : p->value.pval = atom_from_valptr(sql->sa, &c->type, &max);
253 2268089 : if (digits) {
254 1687283 : unsigned int nd = atom_digits(p->value.pval);
255 1687376 : if (nd < digits)
256 : digits = nd;
257 1687376 : if (!digits)
258 : digits = 1;
259 : }
260 : }
261 2268187 : VALclear(&max);
262 : }
263 4179119 : if ((ok & 1) == 1) {
264 2274008 : if (!VALisnil(&min)) {
265 2274005 : prop *p = e->p = prop_create(sql->sa, PROP_MIN, e->p);
266 2274000 : p->value.pval = atom_from_valptr(sql->sa, &c->type, &min);
267 2273964 : if (digits) {
268 1694243 : unsigned int nd = atom_digits(p->value.pval);
269 1694240 : if (nd > digits) /* possibly set to low by max value */
270 : digits = nd;
271 : if (!digits)
272 : digits = 1;
273 : }
274 : }
275 2273961 : VALclear(&min);
276 : }
277 4179061 : if (digits)
278 2712490 : et->digits = digits;
279 4179061 : if (et->type->eclass == EC_DEC && et->digits <= et->scale)
280 216 : et->digits = et->scale + 1;
281 4179061 : }
282 :
283 : static void
284 938948 : rel_basetable_column_get_statistics(mvc *sql, sql_rel *rel, sql_exp *e)
285 : {
286 938948 : if (e->p)
287 : return;
288 303109 : sql_column *c = NULL;
289 :
290 303109 : if ((c = rel_base_find_column(rel, e->nid))) {
291 204909 : sql_column_get_statistics(sql, c, e);
292 : }
293 : }
294 :
295 : static bool
296 8872 : rel_setop_get_statistics(mvc *sql, sql_rel *rel, list *lexps, list *rexps, sql_exp *e, int i)
297 : {
298 8872 : sql_exp *le = list_fetch(lexps, i), *re = list_fetch(rexps, i);
299 8872 : atom *lval_min = find_prop_and_get(le->p, PROP_MIN), *lval_max = find_prop_and_get(le->p, PROP_MAX),
300 8872 : *rval_min = find_prop_and_get(re->p, PROP_MIN), *rval_max = find_prop_and_get(re->p, PROP_MAX);
301 8872 : prop *est;
302 :
303 : /* for the intersection, if both expressions don't overlap, it can be pruned */
304 8872 : if (is_inter(rel->op) && !has_nil(le) && !has_nil(re) &&
305 511 : ((rval_max && lval_min && atom_cmp(rval_max, lval_min) < 0) || (rval_min && lval_max && atom_cmp(rval_min, lval_max) > 0)))
306 26 : return true;
307 :
308 8846 : if (lval_max && rval_max) {
309 2557 : if (is_union(rel->op))
310 3 : set_minmax_property(sql, e, PROP_MAX, statistics_atom_max(sql, lval_max, rval_max)); /* for union the new max will be the max of the two */
311 2554 : else if (is_inter(rel->op))
312 399 : set_minmax_property(sql, e, PROP_MAX, statistics_atom_min(sql, lval_max, rval_max)); /* for intersect the new max will be the min of the two */
313 : else /* except */
314 2155 : set_minmax_property(sql, e, PROP_MAX, lval_max);
315 : }
316 8846 : if (lval_min && rval_min) {
317 2557 : if (is_union(rel->op))
318 3 : set_minmax_property(sql, e, PROP_MIN, statistics_atom_min(sql, lval_min, rval_min)); /* for union the new min will be the min of the two */
319 2554 : else if (is_inter(rel->op))
320 399 : set_minmax_property(sql, e, PROP_MIN, statistics_atom_max(sql, lval_min, rval_min)); /* for intersect the new min will be the max of the two */
321 : else /* except */
322 2155 : set_minmax_property(sql, e, PROP_MIN, lval_min);
323 : }
324 :
325 8846 : if (is_union(rel->op) || is_munion(rel->op)) {
326 5986 : if (!has_nil(le) && !has_nil(re))
327 879 : set_has_no_nil(e);
328 5986 : if (need_distinct(rel) && list_length(rel->exps) == 1)
329 0 : set_unique(e);
330 2860 : } else if (is_inter(rel->op)) {
331 564 : if (!has_nil(le) || !has_nil(re))
332 509 : set_has_no_nil(e);
333 564 : if (is_unique(le) || (need_distinct(rel) && list_length(rel->exps) == 1))
334 345 : set_unique(e);
335 : } else {
336 2296 : assert(is_except(rel->op));
337 2296 : if (!has_nil(le))
338 2233 : set_has_no_nil(e);
339 2296 : if (is_unique(le) || (need_distinct(rel) && list_length(rel->exps) == 1))
340 2009 : set_unique(e);
341 : }
342 : /* propagate unique estimation for known cases */
343 8846 : if (!is_union(rel->op) && (est = find_prop(le->p, PROP_NUNIQUES)) && !find_prop(e->p, PROP_NUNIQUES)) {
344 205 : prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
345 205 : p->value.dval = est->value.dval;
346 : }
347 : return false;
348 : }
349 :
350 :
351 : static void
352 60852 : rel_munion_get_statistics(mvc *sql, sql_rel *rel, list *rels, sql_exp *e, int i)
353 : {
354 60852 : assert(is_munion(rel->op));
355 :
356 60852 : sql_rel *l = rels->h->data;
357 60852 : sql_exp *le = list_fetch(l->exps, i);
358 60852 : atom *lval_min = find_prop_and_get(le->p, PROP_MIN), *lval_max = find_prop_and_get(le->p, PROP_MAX);
359 60852 : bool has_nonil = !has_nil(le);
360 :
361 178024 : for(node *n = rels->h->next; n; n = n->next) {
362 117172 : sql_rel *r = n->data;
363 117172 : sql_exp *re = list_fetch(r->exps, i);
364 117172 : atom *rval_min = find_prop_and_get(re->p, PROP_MIN), *rval_max = find_prop_and_get(re->p, PROP_MAX);
365 :
366 117172 : if (lval_max && rval_max) {
367 43136 : set_minmax_property(sql, e, PROP_MAX, statistics_atom_max(sql, lval_max, rval_max)); /* for union the new max will be the max of the two */
368 43136 : lval_max = find_prop_and_get(e->p, PROP_MAX);
369 : }
370 117172 : if (lval_min && rval_min) {
371 42591 : set_minmax_property(sql, e, PROP_MIN, statistics_atom_min(sql, lval_min, rval_min)); /* for union the new min will be the min of the two */
372 42591 : lval_min = find_prop_and_get(e->p, PROP_MIN);
373 : }
374 117172 : has_nonil &= !has_nil(re);
375 :
376 : }
377 :
378 60852 : if (has_nonil)
379 41583 : set_has_no_nil(e);
380 :
381 60852 : if (need_distinct(rel) && list_length(rel->exps) == 1)
382 1594 : set_unique(e);
383 60852 : }
384 :
385 :
386 : static sql_exp *
387 3491153 : rel_propagate_statistics(visitor *v, sql_rel *rel, sql_exp *e, int depth)
388 : {
389 3491153 : mvc *sql = v->sql;
390 3491153 : atom *lval;
391 :
392 3491153 : (void) depth;
393 3491153 : switch(e->type) {
394 2190395 : case e_column:
395 2190395 : switch (rel->op) { /* set relations don't call rel_propagate_statistics */
396 279481 : case op_join:
397 : case op_left:
398 : case op_right:
399 : case op_full:
400 : case op_semi:
401 : case op_anti: {
402 279481 : sql_exp *found = rel_propagate_column_ref_statistics(sql, rel->l, e);
403 279481 : if (!found)
404 140185 : (void) rel_propagate_column_ref_statistics(sql, rel->r, e);
405 : break;
406 : }
407 1910914 : case op_select:
408 : case op_project:
409 : case op_groupby: {
410 1910914 : sql_exp *found = rel_propagate_column_ref_statistics(sql, rel->l, e); /* labels may be found on the same projection, ugh */
411 1910909 : if (!found && is_simple_project(rel->op))
412 125491 : (void) rel_propagate_column_ref_statistics(sql, rel, e);
413 : break;
414 : }
415 0 : case op_insert:
416 : case op_update:
417 : case op_delete:
418 0 : (void) rel_propagate_column_ref_statistics(sql, rel->r, e);
419 0 : break;
420 : default:
421 : break;
422 : }
423 : break;
424 99314 : case e_convert: {
425 99314 : sql_subtype *to = exp_totype(e), *from = exp_fromtype(e);
426 99314 : sql_exp *l = e->l;
427 99314 : sql_class fr = from->type->eclass, too = to->type->eclass;
428 99314 : prop *est;
429 :
430 99314 : if (fr == too) {
431 90242 : if ((lval = find_prop_and_get(l->p, PROP_MAX))) {
432 59460 : atom *res = atom_copy(sql->sa, lval);
433 59460 : if ((res = atom_cast(sql->sa, res, to)))
434 59437 : set_minmax_property(sql, e, PROP_MAX, res);
435 : }
436 90242 : if ((lval = find_prop_and_get(l->p, PROP_MIN))) {
437 60077 : atom *res = atom_copy(sql->sa, lval);
438 60077 : if ((res = atom_cast(sql->sa, res, to)))
439 60054 : set_minmax_property(sql, e, PROP_MIN, res);
440 : }
441 : }
442 : /* propagating nuniques through conversions is tricky. I am adding just the general cases */
443 99314 : if ((est = find_prop(l->p, PROP_NUNIQUES)) &&
444 61694 : (((fr == EC_TIME || fr == EC_TIME_TZ || fr == EC_DATE) && (too == EC_TIMESTAMP || too == EC_TIMESTAMP_TZ)) ||
445 61669 : ((fr == EC_NUM || fr == EC_BIT) && too == EC_NUM) || (fr == EC_DEC && too == EC_DEC) || (fr != EC_EXTERNAL && too == EC_STRING))) {
446 58242 : prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
447 58242 : p->value.dval = est->value.dval;
448 : }
449 99314 : if (!has_nil(l))
450 56039 : set_has_no_nil(e);
451 : break;
452 : }
453 342131 : case e_aggr:
454 : case e_func: {
455 342131 : BUN lv;
456 342131 : sql_subfunc *f = e->f;
457 :
458 342131 : if (!f->func->s) {
459 315658 : int key = hash_key(f->func->base.name); /* Using hash lookup */
460 315658 : sql_hash_e *he = sql_functions_lookup->buckets[key&(sql_functions_lookup->size-1)];
461 315658 : lookup_function look = NULL;
462 :
463 689297 : for (; he && !look; he = he->chain) {
464 373639 : struct function_properties* fp = (struct function_properties*) he->value;
465 :
466 373639 : if (!strcmp(f->func->base.name, fp->name))
467 107852 : look = fp->func;
468 : }
469 315658 : if (look)
470 107852 : look(sql, e);
471 : }
472 : /* for global aggregates with no semantics, if the left relation has values, then the output won't be NULL */
473 342131 : if (!is_semantics(e) && e->l && !have_nil(e->l) &&
474 89264 : (e->type != e_aggr || (is_groupby(rel->op) && list_length(rel->r)) || ((lv = get_rel_count(rel->l)) != BUN_NONE && lv > 0)))
475 88937 : set_has_no_nil(e);
476 : /* set properties for global aggregates */
477 342131 : if (e->type == e_aggr && is_groupby(rel->op) && list_empty(rel->r)) {
478 7875 : if (!find_prop(e->p, PROP_NUNIQUES)) {
479 7875 : prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
480 7875 : p->value.dval = 1;
481 : }
482 7875 : set_unique(e);
483 : }
484 : break;
485 : }
486 573626 : case e_atom:
487 573626 : if (e->l) {
488 555925 : atom *a = (atom*) e->l;
489 555925 : if (!a->isnull) {
490 492419 : set_minmax_property(sql, e, PROP_MAX, a);
491 492419 : set_minmax_property(sql, e, PROP_MIN, a);
492 : }
493 555925 : prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
494 555925 : p->value.dval = 1;
495 17701 : } else if (e->f) {
496 4176 : list *vals = (list *) e->f;
497 4176 : sql_exp *first = vals->h ? vals->h->data : NULL;
498 4176 : atom *max = NULL, *min = NULL; /* all child values must have a valid min/max */
499 4176 : int has_nil = 0;
500 :
501 4176 : if (first) {
502 4176 : max = ((lval = find_prop_and_get(first->p, PROP_MAX))) ? lval : NULL;
503 4176 : min = ((lval = find_prop_and_get(first->p, PROP_MIN))) ? lval : NULL;
504 4176 : has_nil |= has_nil(first);
505 : }
506 :
507 17949 : for (node *n = vals->h ? vals->h->next : NULL ; n ; n = n->next) {
508 9597 : sql_exp *ee = n->data;
509 :
510 9597 : if (min && max) {
511 9104 : if ((lval = find_prop_and_get(ee->p, PROP_MAX))) {
512 9058 : max = atom_cmp(lval, max) > 0 ? lval : max;
513 : } else {
514 : max = NULL;
515 : }
516 9104 : if ((lval = find_prop_and_get(ee->p, PROP_MIN))) {
517 9058 : min = atom_cmp(min, lval) > 0 ? lval : min;
518 : } else {
519 : min = NULL;
520 : }
521 : }
522 9597 : has_nil |= has_nil(ee);
523 : }
524 :
525 4176 : if (!has_nil)
526 3806 : set_has_no_nil(e);
527 4176 : if (min && max) {
528 3764 : set_minmax_property(sql, e, PROP_MAX, max);
529 3764 : set_minmax_property(sql, e, PROP_MIN, min);
530 : }
531 4176 : prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
532 4176 : p->value.dval = (dbl) list_length(vals);
533 : } else {
534 13525 : prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
535 13525 : p->value.dval = 1;
536 : }
537 : break;
538 285687 : case e_cmp:
539 : /* TODO? propagating min/max/unique of booleans is not very worth it */
540 285687 : if (e->flag == cmp_or || e->flag == cmp_filter) {
541 17436 : if (!have_nil(e->l) && !have_nil(e->r))
542 12966 : set_has_no_nil(e);
543 268251 : } else if (e->flag == cmp_in || e->flag == cmp_notin) {
544 21798 : sql_exp *le = e->l;
545 21798 : if (!has_nil(le) && !have_nil(e->r))
546 18734 : set_has_no_nil(e);
547 : } else {
548 246453 : sql_exp *le = e->l, *re = e->r, *fe = e->f;
549 246453 : if (!has_nil(le) && !has_nil(re) && (!e->f || !has_nil(fe)))
550 176318 : set_has_no_nil(e);
551 : }
552 : break;
553 : case e_psm:
554 : break;
555 : }
556 :
557 : #ifndef NDEBUG
558 : {
559 : /* min and max cannot be NULL and min must be <= than max, if it happens the inner relation must be empty! */
560 3491148 : atom *min = find_prop_and_get(e->p, PROP_MIN), *max = find_prop_and_get(e->p, PROP_MAX);
561 :
562 3491148 : (void) min;
563 3491148 : (void) max;
564 3491148 : assert(!min || !min->isnull);
565 3491148 : assert(!max || !max->isnull);
566 3491148 : assert(!min || !max || (min && max)/* atom_cmp(min, max) <= 0*/);
567 : }
568 : #endif
569 3491148 : return e;
570 : }
571 :
572 : static list * /* Remove predicates always false from min/max values */
573 139300 : rel_prune_predicates(visitor *v, sql_rel *rel)
574 : {
575 139300 : if (rel->l) {
576 139300 : sql_rel *l = rel->l;
577 139300 : if (is_single(l))
578 3397 : return rel->exps;
579 : }
580 135903 : if (!list_empty(rel->attr))
581 2963 : return rel->exps;
582 282008 : for (node *n = rel->exps->h ; n ; n = n->next) {
583 149068 : sql_exp *e = n->data;
584 :
585 149068 : if (e->type == e_cmp && is_theta_exp(e->flag)) {
586 122668 : sql_exp *le = e->l, *re = e->r, *fe = e->f;
587 122668 : atom *lval_min = find_prop_and_get(le->p, PROP_MIN), *lval_max = find_prop_and_get(le->p, PROP_MAX),
588 122668 : *rval_min = find_prop_and_get(re->p, PROP_MIN), *rval_max = find_prop_and_get(re->p, PROP_MAX);
589 122668 : bool always_false = false, always_true = false;
590 :
591 122668 : if (fe && !is_symmetric(e)) {
592 3058 : atom *fval_min = find_prop_and_get(fe->p, PROP_MIN), *fval_max = find_prop_and_get(fe->p, PROP_MAX);
593 3058 : comp_type lower = range2lcompare(e->flag), higher = range2rcompare(e->flag);
594 3660 : int not_int1 = rval_min && lval_max && /* the middle and left intervals don't overlap */
595 1663 : (!is_anti(e) && (lower == cmp_gte ? atom_cmp(rval_min, lval_max) > 0 : atom_cmp(rval_min, lval_max) >= 0)),
596 4072 : not_int2 = lval_min && fval_max && /* the middle and right intervals don't overlap */
597 2485 : (!is_anti(e) && (higher == cmp_lte ? atom_cmp(lval_min, fval_max) > 0 : atom_cmp(lval_min, fval_max) >= 0)),
598 3058 : not_int3 = rval_min && fval_max && /* the left interval is after the right one */
599 1287 : (!is_anti(e) && (atom_cmp(rval_min, fval_max) > 0));
600 :
601 3058 : always_false |= not_int1 || not_int2 || not_int3;
602 : /* for anti the middle must be before the left or after the right or the right after the left, for the other the middle must be always between the left and right intervals */
603 4113 : always_true |= !has_nil(le) && !has_nil(re) && !has_nil(fe) &&
604 3957 : lval_min && lval_max && rval_min && rval_max && fval_min && fval_max &&
605 575 : (is_anti(e) ? ((lower == cmp_gte ? atom_cmp(rval_min, lval_max) > 0 : atom_cmp(rval_min, lval_max) >= 0) || (higher == cmp_lte ? atom_cmp(lval_min, fval_max) > 0 : atom_cmp(lval_min, fval_max) >= 0) || atom_cmp(rval_min, fval_max) > 0) :
606 488 : ((lower == cmp_gte ? atom_cmp(lval_min, rval_max) >= 0 : atom_cmp(lval_min, rval_max) > 0) && (higher == cmp_lte ? atom_cmp(fval_min, lval_max) >= 0 : atom_cmp(fval_min, lval_max) > 0)));
607 : } else if (!fe) {
608 119592 : if (!is_semantics(e)) /* trivial not null cmp null case */
609 227754 : always_false |= !is_anti(e) && ((exp_is_not_null(le) && exp_is_null(re)) || (exp_is_null(le) && exp_is_not_null(re)));
610 119592 : switch (e->flag) {
611 104211 : case cmp_equal:
612 104211 : if (lval_min && lval_max && rval_min && rval_max && (!is_semantics(e) || !has_nil(le) || !has_nil(re)))
613 133468 : always_false |= (is_anti(e) ? (atom_cmp(lval_min, rval_min) == 0 && atom_cmp(lval_max, rval_max) <= 0) : (atom_cmp(rval_max, lval_min) < 0 || atom_cmp(rval_min, lval_max) > 0));
614 104211 : if (is_semantics(e)) { /* prune *= NULL cases */
615 5686 : always_false |= (is_anti(e) ? (exp_is_null(le) && exp_is_null(re)) : ((exp_is_not_null(le) && exp_is_null(re)) || (exp_is_null(le) && exp_is_not_null(re))));
616 11372 : always_true |= (is_anti(e) ? ((exp_is_not_null(le) && exp_is_null(re)) || (exp_is_null(le) && exp_is_not_null(re))) : (exp_is_null(le) && exp_is_null(re)));
617 : }
618 : break;
619 7250 : case cmp_notequal:
620 7250 : if (lval_min && lval_max && rval_min && rval_max)
621 11354 : always_true |= !has_nil(le) && !has_nil(re) && (is_anti(e) ? (atom_cmp(lval_min, rval_min) == 0 && atom_cmp(lval_max, rval_max) <= 0) : (atom_cmp(rval_max, lval_min) < 0 || atom_cmp(rval_min, lval_max) > 0));
622 7250 : if (is_semantics(e)) {
623 29 : always_true |= (is_anti(e) ? (exp_is_null(le) && exp_is_null(re)) : ((exp_is_not_null(le) && exp_is_null(re)) || (exp_is_null(le) && exp_is_not_null(re))));
624 58 : always_false |= (is_anti(e) ? ((exp_is_not_null(le) && exp_is_null(re)) || (exp_is_null(le) && exp_is_not_null(re))) : (exp_is_null(le) && exp_is_null(re)));
625 : }
626 : break;
627 5487 : case cmp_gt:
628 5487 : if (lval_max && rval_min)
629 1833 : always_false |= (is_anti(e) ? atom_cmp(lval_max, rval_min) > 0 : atom_cmp(lval_max, rval_min) <= 0);
630 5487 : if (lval_min && rval_max)
631 3666 : always_true |= !has_nil(le) && !has_nil(re) && (is_anti(e) ? atom_cmp(lval_min, rval_max) <= 0 : atom_cmp(lval_min, rval_max) > 0);
632 : break;
633 119 : case cmp_gte:
634 119 : if (lval_max && rval_min)
635 49 : always_false |= (is_anti(e) ? atom_cmp(lval_max, rval_min) >= 0 : atom_cmp(lval_max, rval_min) < 0);
636 119 : if (lval_min && rval_max)
637 98 : always_true |= !has_nil(le) && !has_nil(re) && (is_anti(e) ? atom_cmp(lval_min, rval_max) < 0 : atom_cmp(lval_min, rval_max) >= 0);
638 : break;
639 2441 : case cmp_lt:
640 2441 : if (lval_min && rval_max)
641 1383 : always_false |= (is_anti(e) ? atom_cmp(lval_min, rval_max) < 0 : atom_cmp(lval_min, rval_max) >= 0);
642 2441 : if (lval_max && rval_min)
643 2814 : always_true |= !has_nil(le) && !has_nil(re) && (is_anti(e) ? atom_cmp(lval_max, rval_min) >= 0 : atom_cmp(lval_max, rval_min) < 0);
644 : break;
645 84 : case cmp_lte:
646 84 : if (lval_min && rval_max)
647 11 : always_false |= (is_anti(e) ? atom_cmp(lval_min, rval_max) <= 0 : atom_cmp(lval_min, rval_max) > 0);
648 84 : if (lval_max && rval_min)
649 22 : always_true |= !has_nil(le) && !has_nil(re) && (is_anti(e) ? atom_cmp(lval_max, rval_min) > 0 : atom_cmp(lval_max, rval_min) <= 0);
650 : break;
651 : default: /* Maybe later I can do cmp_in and cmp_notin but don't forget to remove is_theta_exp check up there */
652 : break;
653 : }
654 : }
655 122668 : assert(!always_false || !always_true);
656 122668 : if (always_false || always_true) {
657 3656 : sql_exp *ne = exp_atom_bool(v->sql->sa, always_true /* if always_true set then true else false */);
658 3656 : if (exp_name(e))
659 0 : exp_prop_alias(v->sql->sa, ne, e);
660 3656 : n->data = ne;
661 3656 : v->changes++;
662 : }
663 : }
664 : }
665 132940 : return rel->exps;
666 : }
667 :
668 : static sql_rel *
669 14 : set_setop_side(visitor *v, sql_rel *rel, sql_rel *side)
670 : {
671 14 : if (side == rel->r)
672 0 : rel->r = NULL;
673 : else
674 14 : rel->l = NULL;
675 14 : if (!is_simple_project(side->op) || rel_is_ref(side) || !list_empty(side->r)) {
676 0 : side = rel_project(v->sql->sa, side, rel_projections(v->sql, side, NULL, 0, 1));
677 0 : set_count_prop(v->sql->sa, side, get_rel_count(side->l));
678 0 : side->exps = exps_exp_visitor_bottomup(v, side, side->exps, 0, &rel_propagate_statistics, false);
679 : }
680 35 : for (node *n = rel->exps->h, *m = side->exps->h ; n && m ; n = n->next, m = m->next) {
681 21 : sql_exp *e1 = n->data, *e2 = m->data;
682 21 : exp_prop_alias(v->sql->sa, e2, e1);
683 : }
684 14 : list_hash_clear(side->exps);
685 :
686 14 : if (need_distinct(rel))
687 0 : set_distinct(side);
688 14 : side->p = prop_copy(v->sql->sa, rel->p);
689 14 : rel_destroy(rel);
690 14 : return side;
691 : }
692 :
693 : static BUN
694 21885 : trivial_project_exp_card(sql_exp *e)
695 : {
696 22188 : if (e->type == e_convert)
697 303 : return trivial_project_exp_card(e->l);
698 21885 : return e->type == e_atom && e->f ? (BUN) list_length(e->f) : 1;
699 : }
700 :
701 : static BUN
702 24248 : rel_calc_nuniques(mvc *sql, sql_rel *l, list *exps)
703 : {
704 24248 : BUN lv = get_rel_count(l);
705 :
706 24248 : if (lv == 0)
707 : return 0;
708 21539 : if (!list_empty(exps)) {
709 21539 : BUN nuniques = 0;
710 : /* compute the highest number of unique values */
711 51131 : for (node *n = exps->h ; n && nuniques != BUN_NONE ; n = n->next) {
712 29592 : sql_exp *e = n->data;
713 29592 : sql_rel *bt = NULL;
714 29592 : prop *p = NULL;
715 29592 : BUN euniques = BUN_NONE;
716 29592 : atom *min, *max, *sub = NULL;
717 29592 : sql_subtype *tp = exp_subtype(e);
718 29592 : sql_class ec = tp ? tp->type->eclass : EC_STRING; /* if 'e' has no type (eg parameter), use a non-number type to fail condition */
719 :
720 29592 : if ((p = find_prop(e->p, PROP_NUNIQUES))) {
721 20956 : euniques = (BUN) p->value.dval;
722 8636 : } else if (e->type == e_column && e->nid && rel_find_exp_and_corresponding_rel(l, e, false, &bt, NULL) && bt && (p = find_prop(bt->p, PROP_COUNT))) {
723 6593 : euniques = (BUN) p->value.lval;
724 : }
725 : /* use min to max range to compute number of possible values in the domain for number types */
726 29592 : if ((EC_TEMP(ec)||ec==EC_NUM||ec==EC_MONTH||ec==EC_POS) &&
727 22363 : (min = find_prop_and_get(e->p, PROP_MIN)) && (max = find_prop_and_get(e->p, PROP_MAX))) {
728 : /* the range includes min and max, so the atom_inc call is needed */
729 : /* if 'euniques' has number of distinct values, compute min between both */
730 17528 : if ((sub = atom_sub(sql->sa, max, min)) && (sub = atom_inc(sql->sa, sub)) && (sub = atom_cast(sql->sa, sub, sql_bind_localtype("oid"))))
731 0 : euniques = MIN(euniques, (BUN) sub->data.val.oval);
732 : }
733 29592 : if (euniques != BUN_NONE)
734 27549 : nuniques = MAX(nuniques, euniques); /* the highest cardinality sets the estimation */
735 : else
736 : nuniques = BUN_NONE;
737 : }
738 21539 : if (nuniques != BUN_NONE)
739 : return nuniques;
740 : }
741 : return lv;
742 : }
743 :
744 : static sql_rel *
745 2077322 : rel_get_statistics_(visitor *v, sql_rel *rel)
746 : {
747 : /* Don't prune updates as pruning will possibly result in removing the joins which therefore cannot be used for constraint checking */
748 2077322 : uint8_t has_special_modify = *(uint8_t*) v->data;
749 2077322 : bool can_be_pruned = !has_special_modify && v->storage_based_opt;
750 :
751 : /* Don't look at the same relation twice */
752 2077322 : if (are_statistics_gathered(rel->used))
753 : return rel;
754 1336117 : rel->used |= statistics_gathered;
755 :
756 1336117 : switch (rel->op) {
757 313477 : case op_basetable: {
758 313477 : sql_table *t = (sql_table *) rel->l;
759 313477 : sqlstore *store = v->sql->session->tr->store;
760 :
761 313477 : if (!list_empty(rel->exps)) {
762 1252376 : for (node *n = rel->exps->h ; n ; n = n->next)
763 938948 : rel_basetable_column_get_statistics(v->sql, rel, n->data);
764 : }
765 : /* Set table row count. TODO? look for remote tables. Don't look at storage for declared tables, because it won't be cleaned */
766 313477 : if (isTable(t) && t->s && !isDeclaredTable(t)) /* count active rows only */
767 259849 : set_count_prop(v->sql->sa, rel, (BUN)store->storage_api.count_col(v->sql->session->tr, ol_first_node(t->columns)->data, 10));
768 : break;
769 : }
770 2790 : case op_union:
771 : case op_inter:
772 : case op_except: {
773 2790 : bool empty_cross = false;
774 2790 : int i = 0;
775 2790 : sql_rel *l = rel->l, *pl = rel->l, *r = rel->r, *pr = rel->r;
776 :
777 2790 : while (is_sample(pl->op) || is_topn(pl->op)) /* skip topN and sample relations in the middle */
778 0 : pl = pl->l;
779 2790 : while (is_sample(pr->op) || is_topn(pr->op))
780 0 : pr = pr->l;
781 : /* if it's not a projection, then project and propagate statistics */
782 2790 : if (!is_project(pl->op) && !is_base(pl->op)) {
783 0 : pl = rel_project(v->sql->sa, pl, rel_projections(v->sql, pl, NULL, 0, 1));
784 0 : set_count_prop(v->sql->sa, pl, get_rel_count(pl->l));
785 0 : pl->exps = exps_exp_visitor_bottomup(v, pl, pl->exps, 0, &rel_propagate_statistics, false);
786 : }
787 2790 : if (!is_project(pr->op) && !is_base(pr->op)) {
788 0 : pr = rel_project(v->sql->sa, pr, rel_projections(v->sql, pr, NULL, 0, 1));
789 0 : set_count_prop(v->sql->sa, pr, get_rel_count(pr->l));
790 0 : pr->exps = exps_exp_visitor_bottomup(v, pr, pr->exps, 0, &rel_propagate_statistics, false);
791 : }
792 :
793 11662 : for (node *n = rel->exps->h ; n ; n = n->next) {
794 8872 : empty_cross |= rel_setop_get_statistics(v->sql, rel, pl->exps, pr->exps, n->data, i);
795 8872 : i++;
796 : }
797 :
798 : /* propagate row count */
799 2790 : if (is_union(rel->op)) {
800 277 : BUN lv = need_distinct(rel) ? rel_calc_nuniques(v->sql, l, l->exps) : get_rel_count(l),
801 277 : rv = need_distinct(rel) ? rel_calc_nuniques(v->sql, r, r->exps) : get_rel_count(r);
802 :
803 277 : if (lv == 0 && rv == 0) { /* both sides empty */
804 2 : if (can_be_pruned)
805 : empty_cross = true;
806 : else
807 2 : set_count_prop(v->sql->sa, rel, 0);
808 275 : } else if (can_be_pruned && lv == 0 && !rel_is_ref(rel)) { /* left side empty */
809 0 : rel = set_setop_side(v, rel, r);
810 0 : empty_cross = false; /* don't rewrite again */
811 275 : } else if (can_be_pruned && rv == 0 && !rel_is_ref(rel)) { /* right side empty */
812 0 : rel = set_setop_side(v, rel, l);
813 0 : empty_cross = false; /* don't rewrite again */
814 275 : } else if (lv != BUN_NONE && rv != BUN_NONE) {
815 7 : set_count_prop(v->sql->sa, rel, (rv > (BUN_MAX - lv)) ? BUN_MAX : (lv + rv)); /* overflow check */
816 : }
817 2513 : } else if (is_inter(rel->op) || is_except(rel->op)) {
818 2513 : BUN lv = need_distinct(rel) ? rel_calc_nuniques(v->sql, l, l->exps) : get_rel_count(l),
819 2513 : rv = need_distinct(rel) ? rel_calc_nuniques(v->sql, r, r->exps) : get_rel_count(r);
820 :
821 2513 : if (lv == 0) { /* left side empty */
822 62 : if (can_be_pruned)
823 : empty_cross = true;
824 : else
825 5 : set_count_prop(v->sql->sa, rel, 0);
826 2451 : } else if (rv == 0) { /* right side empty */
827 17 : if (is_inter(rel->op)) {
828 3 : if (can_be_pruned)
829 : empty_cross = true;
830 : else
831 0 : set_count_prop(v->sql->sa, rel, 0);
832 14 : } else if (can_be_pruned && !rel_is_ref(rel)) {
833 14 : rel = set_setop_side(v, rel, l);
834 14 : empty_cross = false; /* don't rewrite again */
835 : } else {
836 0 : set_count_prop(v->sql->sa, rel, lv);
837 : }
838 : } else {
839 2434 : set_count_prop(v->sql->sa, rel, lv);
840 : }
841 : }
842 2790 : if (can_be_pruned && empty_cross) {
843 80 : rel_destroy(rel->l);
844 80 : rel->l = NULL;
845 80 : rel_destroy(rel->r);
846 80 : rel->r = NULL;
847 244 : for (node *n = rel->exps->h ; n ; n = n->next) {
848 164 : sql_exp *e = n->data, *a = exp_atom(v->sql->sa, atom_general(v->sql->sa, exp_subtype(e), NULL, 0));
849 164 : exp_prop_alias(v->sql->sa, a, e);
850 164 : n->data = a;
851 : }
852 80 : list_hash_clear(rel->exps);
853 80 : sql_rel *l = rel_project(v->sql->sa, NULL, rel->exps);
854 80 : set_count_prop(v->sql->sa, l, 1);
855 80 : l = rel_select(v->sql->sa, l, exp_atom_bool(v->sql->sa, 0));
856 80 : set_count_prop(v->sql->sa, l, 0);
857 80 : rel->op = op_project;
858 80 : rel->l = l;
859 80 : rel->exps = rel_projections(v->sql, l, NULL, 1, 1);
860 80 : set_count_prop(v->sql->sa, rel, 0);
861 80 : set_nodistinct(rel); /* set relations may have distinct flag set */
862 80 : v->changes++;
863 : }
864 : break;
865 : }
866 12888 : case op_munion: {
867 12888 : list *l = rel->l, *nrels = sa_list(v->sql->sa);
868 12888 : BUN cnt = 0;
869 12888 : bool needs_pruning = false;
870 :
871 49497 : for (node *n = l->h; n; n = n->next) {
872 36609 : sql_rel *r = n->data, *pl = r;
873 :
874 36609 : while (is_sample(pl->op) || is_topn(pl->op)) /* skip topN and sample relations in the middle */
875 0 : pl = pl->l;
876 : /* if it's not a projection, then project and propagate statistics */
877 36609 : if (!is_project(pl->op) && !is_base(pl->op)) {
878 0 : pl = rel_project(v->sql->sa, pl, rel_projections(v->sql, pl, NULL, 0, 1));
879 0 : set_count_prop(v->sql->sa, pl, get_rel_count(pl->l));
880 0 : pl->exps = exps_exp_visitor_bottomup(v, pl, pl->exps, 0, &rel_propagate_statistics, false);
881 : }
882 36609 : nrels = append(nrels, pl);
883 : /* we need new munion statistics */
884 : /* propagate row count */
885 36609 : BUN rv = need_distinct(rel) ? rel_calc_nuniques(v->sql, r, r->exps) : get_rel_count(r);
886 : /* if PROP_COUNT does not exist we assume at least a row (see get_rel_count def) */
887 36609 : if (rv == BUN_NONE) {
888 1172 : cnt++;
889 1172 : continue;
890 : }
891 35437 : if (!rv && can_be_pruned)
892 6625 : needs_pruning = true;
893 : /* overflow check */
894 35437 : if (rv > (BUN_MAX - cnt))
895 36609 : rv = BUN_MAX;
896 : else
897 35437 : cnt += rv;
898 : }
899 12888 : int i = 0;
900 73740 : for (node *n = rel->exps->h ; n ; n = n->next, i++)
901 60852 : rel_munion_get_statistics(v->sql, rel, nrels, n->data, i);
902 :
903 12888 : if (needs_pruning && !rel_is_ref(rel)) {
904 4464 : v->changes++;
905 4464 : list *nl = sa_list(l->sa);
906 :
907 16465 : for (node *n = nrels->h; n; n = n->next) {
908 12001 : sql_rel *r = n->data;
909 12001 : BUN rv = need_distinct(rel) ? rel_calc_nuniques(v->sql, r, r->exps) : get_rel_count(r);
910 :
911 12001 : if (!rv) { /* keep last for now */
912 6155 : rel_destroy(r);
913 6155 : continue;
914 : }
915 5846 : nl = append(nl, r);
916 : }
917 4464 : rel->l = nl;
918 4464 : if (list_length(nl) == 1) {
919 4138 : sql_rel *l = rel->l = nl->h->data; /* ugh */
920 4138 : rel->r = NULL;
921 4138 : rel->op = op_project;
922 :
923 20331 : for (node *n = rel->exps->h, *m = l->exps->h ; n && m ; n = n->next, m = m->next) {
924 16193 : sql_exp *pe = n->data, *ie = m->data;
925 16193 : sql_exp *ne = exp_ref(v->sql, ie);
926 16193 : exp_prop_alias(v->sql->sa, ne, pe);
927 16193 : n->data = ne;
928 : }
929 4138 : list_hash_clear(rel->exps);
930 326 : } else if (list_empty(nl)) {
931 : /* empty select (project [ nils ] ) */
932 454 : for (node *n = rel->exps->h ; n ; n = n->next) {
933 354 : sql_exp *e = n->data, *a = exp_atom(v->sql->sa, atom_general(v->sql->sa, exp_subtype(e), NULL, 0));
934 354 : exp_prop_alias(v->sql->sa, a, e);
935 354 : n->data = a;
936 : }
937 100 : list_hash_clear(rel->exps);
938 100 : sql_rel *l = rel_project(v->sql->sa, NULL, rel->exps);
939 100 : set_count_prop(v->sql->sa, l, 1);
940 100 : l = rel_select(v->sql->sa, l, exp_atom_bool(v->sql->sa, 0));
941 100 : set_count_prop(v->sql->sa, l, 0);
942 100 : rel->op = op_project;
943 100 : rel->r = NULL;
944 100 : rel->l = l;
945 100 : rel->exps = rel_projections(v->sql, l, NULL, 1, 1);
946 100 : set_count_prop(v->sql->sa, rel, 0);
947 100 : set_nodistinct(rel); /* set relations may have distinct flag set */
948 : }
949 : } else {
950 8424 : set_count_prop(v->sql->sa, rel, cnt);
951 : }
952 : break;
953 : }
954 533609 : case op_join:
955 : case op_left:
956 : case op_right:
957 : case op_full:
958 : case op_semi:
959 : case op_anti:
960 : case op_select:
961 : case op_project:
962 : case op_groupby: {
963 533609 : if (is_groupby(rel->op) && !list_empty(rel->r))
964 15531 : rel->r = exps_exp_visitor_bottomup(v, rel, rel->r, 0, &rel_propagate_statistics, false);
965 533609 : rel->exps = exps_exp_visitor_bottomup(v, rel, rel->exps, 0, &rel_propagate_statistics, false);
966 533608 : if ((is_simple_project(rel->op) || is_groupby(rel->op)) && !list_empty(rel->r))
967 39284 : rel->r = exps_exp_visitor_bottomup(v, rel, rel->r, 0, &rel_propagate_statistics, false);
968 : /* The following optimizations can only be applied after propagating the statistics to rel->exps */
969 533608 : if (can_be_pruned && (is_join(rel->op) || is_select(rel->op)) && !list_empty(rel->exps)) {
970 139300 : int changes = v->changes;
971 139300 : rel->exps = rel_prune_predicates(v, rel);
972 139300 : if (v->changes > changes)
973 3623 : rel = rewrite_simplify(v, 0, v->value_based_opt, rel);
974 : }
975 :
976 : /* propagate row count */
977 533608 : sql_rel *l = rel->l, *r = rel->r;
978 533608 : switch (rel->op) {
979 134701 : case op_join:
980 : case op_left:
981 : case op_right:
982 : case op_full: {
983 134701 : BUN lv = get_rel_count(l), rv = get_rel_count(r), uniques_estimate = BUN_MAX, join_idx_estimate = BUN_MAX;
984 :
985 134701 : if (!list_empty(rel->exps) && !is_single(rel)) {
986 246427 : for (node *n = rel->exps->h ; n ; n = n->next) {
987 125796 : sql_exp *e = n->data, *el = e->l, *er = e->r;
988 :
989 125796 : if (find_prop(e->p, PROP_JOINIDX)) {
990 667 : join_idx_estimate = lv>rv?lv:rv;
991 125129 : } else if (e->type == e_cmp && e->flag == cmp_equal) {
992 : /* if one of the sides is unique, the cardinality will be that exact number, but look for nulls */
993 121245 : if (!is_semantics(e) || !has_nil(el) || !has_nil(er)) {
994 120849 : BUN lu = 0, ru = 0;
995 120849 : prop *p = NULL;
996 120849 : if ((p = find_prop(el->p, PROP_NUNIQUES)))
997 89685 : lu = (BUN) p->value.dval;
998 120849 : if ((p = find_prop(er->p, PROP_NUNIQUES)))
999 104063 : ru = (BUN) p->value.dval;
1000 120849 : if (is_unique(el) || lu > lv) {
1001 73808 : BUN ncount = (is_right(rel->op) || is_full(rel->op)) ? MAX(lv, rv) : lv;
1002 73808 : uniques_estimate = MIN(uniques_estimate, ncount);
1003 47041 : } else if (is_unique(er) || ru > rv) {
1004 29402 : BUN ncount = (is_left(rel->op) || is_full(rel->op)) ? MAX(lv, rv) : rv;
1005 29402 : uniques_estimate = MIN(uniques_estimate, ncount);
1006 : }
1007 : }
1008 : }
1009 : }
1010 : }
1011 134701 : if (is_single(rel)) {
1012 4737 : set_count_prop(v->sql->sa, rel, lv);
1013 129964 : } else if (join_idx_estimate != BUN_MAX) {
1014 665 : set_count_prop(v->sql->sa, rel, join_idx_estimate);
1015 129299 : } else if (uniques_estimate != BUN_MAX) {
1016 96746 : set_count_prop(v->sql->sa, rel, uniques_estimate);
1017 32553 : } else if (list_length(rel->exps) == 1 && (exp_is_false(rel->exps->h->data) || exp_is_null(rel->exps->h->data))) {
1018 : /* corner cases for outer joins */
1019 126 : if (is_left(rel->op)) {
1020 114 : set_count_prop(v->sql->sa, rel, lv);
1021 12 : } else if (is_right(rel->op)) {
1022 3 : set_count_prop(v->sql->sa, rel, rv);
1023 9 : } else if (is_full(rel->op) && lv != BUN_NONE && rv != BUN_NONE) {
1024 9 : set_count_prop(v->sql->sa, rel, (rv > (BUN_MAX - lv)) ? BUN_MAX : (lv + rv)); /* overflow check */
1025 0 : } else if (lv != BUN_NONE && rv != BUN_NONE) {
1026 0 : set_count_prop(v->sql->sa, rel, 0);
1027 : }
1028 32427 : } else if (lv == 0) {
1029 1166 : set_count_prop(v->sql->sa, rel, (is_right(rel->op) || is_full(rel->op)) ? rv : 0);
1030 31830 : } else if (rv == 0) {
1031 1568 : set_count_prop(v->sql->sa, rel, (is_left(rel->op) || is_full(rel->op)) ? lv : 0);
1032 30756 : } else if (lv != BUN_NONE && rv != BUN_NONE) {
1033 18288 : set_count_prop(v->sql->sa, rel, (rv > (BUN_MAX / lv)) ? BUN_MAX : (lv * rv)); /* overflow check */
1034 : }
1035 : break;
1036 : }
1037 2639 : case op_anti:
1038 2639 : set_count_prop(v->sql->sa, rel, get_rel_count(l));
1039 2639 : break;
1040 108641 : case op_semi:
1041 : case op_select:
1042 : /* TODO calculate cardinalities using selectivities */
1043 108641 : if (list_length(rel->exps) == 1 && (exp_is_false(rel->exps->h->data) || exp_is_null(rel->exps->h->data))) {
1044 2745 : set_count_prop(v->sql->sa, rel, 0);
1045 : } else {
1046 208552 : if (!list_empty(rel->exps) && !is_single(rel)) {
1047 102656 : BUN cnt = get_rel_count(l), u = 1;
1048 171084 : for (node *n = rel->exps->h ; n ; n = n->next) {
1049 112474 : sql_exp *e = n->data, *el = e->l, *er = e->r;
1050 :
1051 : /* simple expressions first */
1052 112474 : if (e->type == e_cmp && e->flag == cmp_equal && exp_is_atom(er)) {
1053 : /* use selectivity */
1054 57247 : prop *p;
1055 57247 : if ((p = find_prop(el->p, PROP_NUNIQUES))) {
1056 44046 : u = (BUN) p->value.dval;
1057 44046 : break;
1058 : }
1059 : }
1060 : }
1061 : /* u is an *estimate*, so don't set count_prop to 0 unless cnt is 0 */
1062 102656 : set_count_prop(v->sql->sa, rel, cnt == 0 ? 0 : u == 0 || u > cnt ? 1 : cnt/u);
1063 : } else {
1064 3240 : set_count_prop(v->sql->sa, rel, get_rel_count(l));
1065 : }
1066 : }
1067 : break;
1068 264257 : case op_project:
1069 264257 : if (l) {
1070 254140 : if (need_distinct(rel)) {
1071 0 : set_count_prop(v->sql->sa, rel, rel_calc_nuniques(v->sql, l, rel->exps));
1072 : } else {
1073 254140 : set_count_prop(v->sql->sa, rel, get_rel_count(l));
1074 : }
1075 : } else {
1076 10117 : BUN card = 1;
1077 :
1078 10117 : if (!list_empty(rel->exps)) {
1079 30673 : for (node *n = rel->exps->h ; n ; n = n->next) {
1080 20556 : sql_exp *e = n->data;
1081 20556 : card = MAX(card, trivial_project_exp_card(e));
1082 : }
1083 : }
1084 10117 : set_count_prop(v->sql->sa, rel, card);
1085 : }
1086 : break;
1087 22956 : case op_groupby:
1088 22956 : if (list_empty(rel->r)) {
1089 7424 : set_count_prop(v->sql->sa, rel, 1);
1090 : } else {
1091 15532 : set_count_prop(v->sql->sa, rel, rel_calc_nuniques(v->sql, l, rel->r));
1092 : }
1093 : break;
1094 : default:
1095 : break;
1096 : }
1097 : break;
1098 : }
1099 16896 : case op_topn: {
1100 16896 : BUN lv = get_rel_count(rel->l);
1101 :
1102 16896 : if (lv != BUN_NONE) {
1103 16879 : sql_exp *le = rel->exps->h->data, *oe = list_length(rel->exps) > 1 ? rel->exps->h->next->data : NULL;
1104 96 : if (oe && oe->l && exp_is_not_null(oe)) { /* no parameters */
1105 96 : BUN offset = (BUN) ((atom*)oe->l)->data.val.lval;
1106 96 : lv = offset >= lv ? 0 : lv - offset;
1107 : }
1108 16879 : if (le->l && exp_is_not_null(le)) {
1109 16841 : BUN limit = (BUN) ((atom*)le->l)->data.val.lval;
1110 16841 : lv = MIN(lv, limit);
1111 : }
1112 16879 : set_count_prop(v->sql->sa, rel, lv);
1113 : }
1114 : break;
1115 : }
1116 22 : case op_sample: {
1117 22 : BUN lv = get_rel_count(rel->l);
1118 :
1119 22 : if (lv != BUN_NONE) {
1120 4 : sql_exp *se = rel->exps->h->data;
1121 4 : sql_subtype *tp = exp_subtype(se);
1122 :
1123 4 : if (se->l && tp->type->eclass == EC_NUM) { /* sample is a number of rows */
1124 4 : BUN sample = (BUN) ((atom*)se->l)->data.val.lval;
1125 4 : lv = MIN(lv, sample);
1126 0 : } else if (se->l) { /* sample is a percentage of rows */
1127 0 : dbl percent = ((atom*)se->l)->data.val.dval;
1128 0 : assert(tp->type->eclass == EC_FLT);
1129 0 : lv = (BUN) ceil((dbl)lv * percent);
1130 : }
1131 4 : set_count_prop(v->sql->sa, rel, lv);
1132 : }
1133 : break;
1134 : }
1135 5973 : case op_table: {
1136 5973 : sql_exp *op = rel->r;
1137 5973 : if (rel->flag != TRIGGER_WRAPPER && op) {
1138 5661 : sql_subfunc *f = op->f;
1139 5661 : if (f->func->lang == FUNC_LANG_SQL) {
1140 97 : set_count_prop(v->sql->sa, rel, 1000 /* just some fallback value */);
1141 5564 : } else if (f->func->lang == FUNC_LANG_MAL && strcmp(f->func->base.name, "storage") == 0) {
1142 829 : set_count_prop(v->sql->sa, rel, 1000 /* TODO get size of catalog */);
1143 4735 : } else if (f->func->lang == FUNC_LANG_MAL && strcmp(f->func->base.name, "db_users") == 0) {
1144 0 : set_count_prop(v->sql->sa, rel, 1000 /* TODO get size of users */);
1145 4735 : } else if (f->func->lang == FUNC_LANG_MAL && strncmp(f->func->base.name, "querylog", 8) == 0) {
1146 1085 : set_count_prop(v->sql->sa, rel, 1000 /* TODO get size of querylog */);
1147 3650 : } else if (f->func->lang == FUNC_LANG_MAL &&
1148 3540 : (strcmp(f->func->base.name, "queue") == 0 ||
1149 3276 : strcmp(f->func->base.name, "optimizers") == 0 ||
1150 2964 : strcmp(f->func->base.name, "env") == 0 ||
1151 2672 : strcmp(f->func->base.name, "keywords") == 0 ||
1152 2672 : strcmp(f->func->base.name, "statistics") == 0 ||
1153 2005 : strcmp(f->func->base.name, "rejects") == 0 ||
1154 1751 : strcmp(f->func->base.name, "schemastorage") == 0 ||
1155 1751 : strncmp(f->func->base.name, "storage", 7) == 0 ||
1156 1751 : strcmp(f->func->base.name, "sessions") == 0) ) {
1157 2079 : set_count_prop(v->sql->sa, rel, 1000 /* TODO get size of queue */);
1158 : }
1159 : /* else {
1160 : printf("%%func needs stats : %s\n", f->func->base.name);
1161 : } */
1162 : }
1163 : break;
1164 : }
1165 : /*These relations are less important for now
1166 : TODO later we can tune it */
1167 : case op_insert:
1168 : case op_update:
1169 : case op_delete:
1170 : case op_truncate:
1171 : case op_ddl:
1172 : default:
1173 : break;
1174 : }
1175 :
1176 : return rel;
1177 : }
1178 :
1179 : static sql_rel *
1180 542396 : rel_get_statistics(visitor *v, global_props *gp, sql_rel *rel)
1181 : {
1182 : /* Don't prune updates as pruning will possibly result in removing the joins which therefore cannot be used for constraint checking */
1183 542396 : uint8_t has_special_modify = (uint8_t) gp->has_special_modify;
1184 542396 : v->data = &has_special_modify;
1185 542396 : rel = rel_visitor_bottomup(v, rel, &rel_get_statistics_);
1186 542396 : v->data = gp;
1187 542396 : return rel;
1188 : }
1189 :
1190 : run_optimizer
1191 718925 : bind_get_statistics(visitor *v, global_props *gp)
1192 : {
1193 718925 : (void) v;
1194 718925 : return (gp->opt_level == 1 && !gp->cnt[op_insert]) ? rel_get_statistics : NULL;
1195 : }
1196 :
1197 :
1198 : static bool
1199 94932 : point_select_on_unique_column(sql_rel *rel)
1200 : {
1201 94932 : if (is_select(rel->op) && !list_empty(rel->exps)) {
1202 131381 : for (node *n = rel->exps->h; n ; n = n->next) {
1203 75654 : sql_exp *e = n->data, *el = e->l, *er = e->r, *found = NULL;
1204 :
1205 75654 : if (is_compare(e->type) && e->flag == cmp_equal) {
1206 33625 : if (is_numeric_upcast(el))
1207 0 : el = el->l;
1208 33625 : if (is_numeric_upcast(er))
1209 0 : er = er->l;
1210 33625 : if (is_alias(el->type) && exp_is_atom(er) && (found = rel_find_exp(rel->l, el)) &&
1211 749 : is_unique(found) && (!is_semantics(e) || !has_nil(found) || !has_nil(er)))
1212 : return true;
1213 32876 : if (is_alias(er->type) && exp_is_atom(el) && (found = rel_find_exp(rel->l, er)) &&
1214 0 : is_unique(found) && (!is_semantics(e) || !has_nil(el) || !has_nil(found)))
1215 : return true;
1216 : }
1217 : }
1218 : }
1219 : return false;
1220 : }
1221 :
1222 : /*
1223 : * A point select on an unique column reduces the number of rows to 1. If the same select is under a
1224 : * join, the opposite side's select can be pushed above the join.
1225 : */
1226 : static inline sql_rel *
1227 1246752 : rel_push_select_up(visitor *v, sql_rel *rel)
1228 : {
1229 1246752 : if ((is_innerjoin(rel->op) || is_left(rel->op) || is_right(rel->op) || is_semi(rel->op)) && !is_single(rel)) {
1230 254263 : sql_rel *l = rel->l, *r = rel->r;
1231 254263 : bool can_pushup_left = is_select(l->op) && !rel_is_ref(l) && !is_single(l) && (is_innerjoin(rel->op) || is_left(rel->op) || is_semi(rel->op)),
1232 254263 : can_pushup_right = is_select(r->op) && !rel_is_ref(r) && !is_single(r) && (is_innerjoin(rel->op) || is_right(rel->op));
1233 :
1234 254263 : if (can_pushup_left || can_pushup_right) {
1235 66931 : if (can_pushup_left)
1236 45442 : can_pushup_left = point_select_on_unique_column(r);
1237 66931 : if (can_pushup_right)
1238 49490 : can_pushup_right = point_select_on_unique_column(l);
1239 :
1240 : /* if both selects retrieve one row each, it's not worth it to push both up */
1241 66931 : if (can_pushup_left && !can_pushup_right) {
1242 683 : sql_rel *nrel = rel_dup_copy(v->sql->sa, rel);
1243 683 : nrel->l = l->l;
1244 683 : rel = rel_inplace_select(rel, nrel, l->exps);
1245 683 : assert(is_select(rel->op));
1246 683 : v->changes++;
1247 66248 : } else if (!can_pushup_left && can_pushup_right) {
1248 14 : sql_rel *nrel = rel_dup_copy(v->sql->sa, rel);
1249 14 : nrel->r = r->l;
1250 14 : rel = rel_inplace_select(rel, nrel, r->exps);
1251 14 : assert(is_select(rel->op));
1252 14 : v->changes++;
1253 : }
1254 : }
1255 : }
1256 1246752 : return rel;
1257 : }
1258 :
1259 : static int
1260 93332 : sql_class_base_score(visitor *v, sql_column *c, sql_subtype *t, bool equality_based)
1261 : {
1262 93332 : int de;
1263 :
1264 93332 : if (!t)
1265 : return 0;
1266 93332 : switch (ATOMstorage(t->type->localtype)) {
1267 : case TYPE_bte:
1268 : return 150 - 8;
1269 1599 : case TYPE_sht:
1270 1599 : return 150 - 16;
1271 37333 : case TYPE_int:
1272 37333 : return 150 - 32;
1273 518 : case TYPE_void:
1274 : case TYPE_lng:
1275 518 : return 150 - 64;
1276 : case TYPE_uuid:
1277 : #ifdef HAVE_HGE
1278 : case TYPE_hge:
1279 : #endif
1280 : return 150 - 128;
1281 2 : case TYPE_flt:
1282 2 : return 75 - 24;
1283 : case TYPE_dbl:
1284 : return 75 - 53;
1285 30582 : default:
1286 30582 : if (equality_based && c && v->storage_based_opt && (de = mvc_is_duplicate_eliminated(v->sql, c)))
1287 1554 : return 150 - de * 8;
1288 : /* strings and blobs not duplicate eliminated don't get any points here */
1289 : return 0;
1290 : }
1291 : }
1292 :
1293 : static int
1294 34433 : score_se_base(visitor *v, sql_rel *rel, sql_exp *e)
1295 : {
1296 34433 : int res = 0;
1297 34433 : sql_subtype *t = exp_subtype(e);
1298 34433 : sql_column *c = NULL;
1299 :
1300 34433 : if (e->type == e_convert) /* keep unsafes at the end (TODO improve) */
1301 : return -1000;
1302 : /* can we find out if the underlying table is sorted */
1303 33832 : if ((c = exp_find_column(rel, e, -2)) && v->storage_based_opt && mvc_is_sorted(v->sql, c))
1304 33832 : res += 600;
1305 :
1306 : /* prefer the shorter var types over the longer ones */
1307 33832 : res += sql_class_base_score(v, c, t, is_equality_or_inequality_exp(e->flag)); /* smaller the type, better */
1308 33832 : return res;
1309 : }
1310 :
1311 : static int
1312 58443 : score_se(visitor *v, sql_rel *rel, sql_exp *e)
1313 : {
1314 58443 : int score = 0;
1315 58443 : if (e->type == e_cmp && !is_complex_exp(e->flag)) {
1316 34433 : sql_exp *l = e->l;
1317 :
1318 34433 : while (l->type == e_cmp) { /* go through nested comparisons */
1319 2 : sql_exp *ll;
1320 :
1321 2 : if (l->flag == cmp_filter || l->flag == cmp_or)
1322 0 : ll = ((list*)l->l)->h->data;
1323 : else
1324 2 : ll = l->l;
1325 2 : if (ll->type != e_cmp)
1326 : break;
1327 : l = ll;
1328 : }
1329 34433 : score += score_se_base(v, rel, l);
1330 : }
1331 58443 : score += exp_keyvalue(e);
1332 58443 : return score;
1333 : }
1334 :
1335 : static inline sql_rel *
1336 1246752 : rel_select_order(visitor *v, sql_rel *rel)
1337 : {
1338 1246752 : int *scores = NULL;
1339 1246752 : sql_exp **exps = NULL;
1340 :
1341 1246752 : if (is_select(rel->op) && list_length(rel->exps) > 1) {
1342 27337 : node *n;
1343 27337 : int i, nexps = list_length(rel->exps);
1344 27337 : scores = SA_NEW_ARRAY(v->sql->ta, int, nexps);
1345 27337 : exps = SA_NEW_ARRAY(v->sql->ta, sql_exp*, nexps);
1346 :
1347 85780 : for (i = 0, n = rel->exps->h; n; i++, n = n->next) {
1348 58473 : exps[i] = n->data;
1349 58473 : if (find_prop(exps[i]->p, PROP_HASHCOL))
1350 : return rel;
1351 58443 : scores[i] = score_se(v, rel, n->data);
1352 : }
1353 27307 : GDKqsort(scores, exps, NULL, nexps, sizeof(int), sizeof(void *), TYPE_int, true, true);
1354 :
1355 85750 : for (i = 0, n = rel->exps->h; n; i++, n = n->next)
1356 58443 : n->data = exps[i];
1357 : }
1358 :
1359 : return rel;
1360 : }
1361 :
1362 : /* Compute the efficiency of using this expression early in a group by list */
1363 : static int
1364 59500 : score_gbe(visitor *v, sql_rel *rel, sql_exp *e)
1365 : {
1366 59500 : int res = 0;
1367 59500 : sql_subtype *t = exp_subtype(e);
1368 59500 : sql_column *c = exp_find_column(rel, e, -2);
1369 :
1370 59500 : if (e->card == CARD_ATOM) /* constants are trivial to group */
1371 38 : res += 1000;
1372 : /* can we find out if the underlying table is sorted */
1373 59500 : if (is_unique(e) || find_prop(e->p, PROP_HASHCOL) || (c && v->storage_based_opt && mvc_is_unique(v->sql, c))) /* distinct columns */
1374 3582 : res += 700;
1375 38911 : if (c && v->storage_based_opt && mvc_is_sorted(v->sql, c))
1376 3712 : res += 500;
1377 59500 : if (find_prop(e->p, PROP_HASHIDX)) /* has hash index */
1378 0 : res += 200;
1379 :
1380 : /* prefer the shorter var types over the longer ones */
1381 59500 : res += sql_class_base_score(v, c, t, true); /* smaller the type, better */
1382 59500 : return res;
1383 : }
1384 :
1385 : /* reorder group by expressions */
1386 : static inline sql_rel *
1387 1246751 : rel_groupby_order(visitor *v, sql_rel *rel)
1388 : {
1389 1246751 : int *scores = NULL;
1390 1246751 : sql_exp **exps = NULL;
1391 :
1392 1246751 : if (is_groupby(rel->op) && list_length(rel->r) > 1) {
1393 25587 : node *n;
1394 25587 : list *gbe = rel->r;
1395 25587 : int i, ngbe = list_length(gbe);
1396 25587 : scores = SA_NEW_ARRAY(v->sql->ta, int, ngbe);
1397 25587 : exps = SA_NEW_ARRAY(v->sql->ta, sql_exp*, ngbe);
1398 :
1399 : /* first sorting step, give priority for integers and sorted columns */
1400 85087 : for (i = 0, n = gbe->h; n; i++, n = n->next) {
1401 59500 : exps[i] = n->data;
1402 59500 : scores[i] = score_gbe(v, rel, exps[i]);
1403 : }
1404 25587 : GDKqsort(scores, exps, NULL, ngbe, sizeof(int), sizeof(void *), TYPE_int, true, true);
1405 :
1406 : /* second sorting step, give priority to strings with lower number of digits */
1407 50503 : for (i = ngbe - 1; i && !scores[i]; i--); /* find expressions with no score from the first round */
1408 25587 : if (scores[i])
1409 24589 : i++;
1410 25587 : if (ngbe - i > 1) {
1411 8604 : for (int j = i; j < ngbe; j++) {
1412 6597 : sql_subtype *t = exp_subtype(exps[j]);
1413 6597 : scores[j] = t ? t->digits : 0;
1414 : }
1415 : /* the less number of digits the better, order ascending */
1416 2007 : GDKqsort(scores + i, exps + i, NULL, ngbe - i, sizeof(int), sizeof(void *), TYPE_int, false, true);
1417 : }
1418 :
1419 85087 : for (i = 0, n = gbe->h; n; i++, n = n->next)
1420 59500 : n->data = exps[i];
1421 : }
1422 :
1423 1246751 : return rel;
1424 : }
1425 :
1426 : /* This optimization loop contains optimizations that can potentially use statistics */
1427 : static sql_rel *
1428 1246752 : rel_final_optimization_loop_(visitor *v, sql_rel *rel)
1429 : {
1430 : /* Run rel_push_select_up only once at the end to avoid an infinite optimization loop */
1431 1246752 : rel = rel_push_select_up(v, rel);
1432 1246752 : rel = rel_select_order(v, rel);
1433 :
1434 : /* TODO? Maybe later add rel_simplify_count, rel_join2semijoin, rel_simplify_fk_joins,
1435 : rel_distinct_project2groupby, rel_simplify_predicates, rel_simplify_math,
1436 : rel_distinct_aggregate_on_unique_values */
1437 :
1438 1246751 : rel = rel_groupby_order(v, rel);
1439 1246751 : return rel;
1440 : }
1441 :
1442 : static sql_rel *
1443 66181 : rel_final_optimization_loop(visitor *v, global_props *gp, sql_rel *rel)
1444 : {
1445 66181 : (void) gp;
1446 66181 : return rel_visitor_bottomup(v, rel, &rel_final_optimization_loop_);
1447 : }
1448 :
1449 : run_optimizer
1450 718921 : bind_final_optimization_loop(visitor *v, global_props *gp)
1451 : {
1452 718921 : int flag = v->sql->sql_optimizer;
1453 : /* At the moment, this optimizer has dependency on 3 flags */
1454 549989 : return gp->opt_level == 1 && (gp->cnt[op_groupby] || gp->cnt[op_select]) &&
1455 785104 : (flag & push_select_up) && (flag & optimize_select_and_joins_topdown) && (flag & optimize_projections) ? rel_final_optimization_loop : NULL;
1456 : }
|