Line data Source code
1 : /*
2 : * SPDX-License-Identifier: MPL-2.0
3 : *
4 : * This Source Code Form is subject to the terms of the Mozilla Public
5 : * License, v. 2.0. If a copy of the MPL was not distributed with this
6 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
7 : *
8 : * Copyright 2024 MonetDB Foundation;
9 : * Copyright August 2008 - 2023 MonetDB B.V.;
10 : * Copyright 1997 - July 2008 CWI.
11 : */
12 :
13 : #include "monetdb_config.h"
14 : #include "rel_optimizer_private.h"
15 : #include "rel_statistics.h"
16 : #include "rel_basetable.h"
17 : #include "rel_rewriter.h"
18 : #include "sql_storage.h"
19 :
20 : static sql_exp *
21 3467188 : comparison_find_column(sql_exp *input, sql_exp *e)
22 : {
23 3467188 : switch (input->type) {
24 99051 : case e_convert: {
25 99051 : list *types = (list *)input->r;
26 99051 : sql_class from = ((sql_subtype*)types->h->data)->type->eclass, to = ((sql_subtype*)types->h->next->data)->type->eclass;
27 99051 : if (from == to)
28 191277 : return comparison_find_column(input->l, e) ? input : NULL;
29 : return NULL;
30 : }
31 3052965 : case e_column:
32 3052965 : return exp_match(e, input) ? input : NULL;
33 : default:
34 : return NULL;
35 : }
36 : }
37 :
38 : /* multi lo <= col <= hi, maybe still be false even if lo or hi are NULL, possibly similar for filter on multiple
39 : * columns */
40 : #define comp_single_column(c) (!c->f)
41 :
42 : static sql_exp *
43 8755006 : rel_propagate_column_ref_statistics(mvc *sql, sql_rel *rel, sql_exp *e)
44 : {
45 8839301 : assert(e->type == e_column);
46 8839301 : if (rel) {
47 8839220 : switch(rel->op) {
48 4093645 : case op_left:
49 : case op_right:
50 : case op_full:
51 : case op_join:
52 : case op_select:
53 : case op_anti:
54 : case op_semi: {
55 4093645 : bool found_without_semantics = false, found_left = false, found_right = false, still_unique = false;
56 :
57 4093645 : if ((is_innerjoin(rel->op) || is_select(rel->op)) && list_length(rel->exps) == 1 && exp_is_false(rel->exps->h->data))
58 : return NULL; /* nothing will pass, skip */
59 :
60 : /* propagate from the bottom first */
61 4078004 : if (rel_propagate_column_ref_statistics(sql, rel->l, e))
62 : found_left = true;
63 2592152 : if (!found_left && is_join(rel->op) && rel_propagate_column_ref_statistics(sql, rel->r, e))
64 4078004 : found_right = true;
65 :
66 4078004 : if (!found_left && !found_right)
67 : return NULL;
68 1781561 : if (!list_empty(rel->exps) && rel->op != op_anti) { /* if there's an or, the MIN and MAX get difficult to propagate */
69 3498812 : for (node *n = rel->exps->h ; n ; n = n->next) {
70 1812836 : sql_exp *comp = n->data, *le = comp->l, *lne = NULL, *re = comp->r, *rne = NULL, *fe = comp->f, *fne = NULL;
71 :
72 1812836 : if (comp->type == e_cmp) {
73 1811832 : if (is_theta_exp(comp->flag) && ((lne = comparison_find_column(le, e)) || (rne = comparison_find_column(re, e)) || (fe && (fne = comparison_find_column(fe, e))))) {
74 124335 : atom *lval_min = find_prop_and_get(le->p, PROP_MIN), *lval_max = find_prop_and_get(le->p, PROP_MAX),
75 124334 : *rval_min = find_prop_and_get(re->p, PROP_MIN), *rval_max = find_prop_and_get(re->p, PROP_MAX);
76 :
77 : /* not semantics found or if explicitly filtering not null values from the column */
78 124334 : found_without_semantics |= (!is_semantics(comp) && comp_single_column(comp)) || (comp->flag == cmp_equal && lne && is_anti(comp) && exp_is_null(re));
79 124334 : still_unique |= comp->flag == cmp_equal && is_unique(le) && is_unique(re); /* unique if only equi-joins on unique columns are there */
80 124334 : if (is_full(rel->op) || (is_left(rel->op) && found_left) || (is_right(rel->op) && found_right)) /* on outer joins, min and max cannot be propagated on some cases */
81 19707 : continue;
82 : /* if (end2 >= start1 && start2 <= end1) then the 2 intervals are intersected */
83 104627 : if (fe && lval_min && lval_max) { /* range case, the middle expression must intersect the other two */
84 4621 : atom *fval_min = find_prop_and_get(fe->p, PROP_MIN), *fval_max = find_prop_and_get(fe->p, PROP_MAX);
85 4621 : int int1 = rval_min && rval_max && atom_cmp(rval_max, lval_min) >= 0 && atom_cmp(rval_min, lval_max) <= 0,
86 4621 : int2 = fval_min && fval_max && atom_cmp(fval_max, lval_min) >= 0 && atom_cmp(fval_min, lval_max) <= 0,
87 4621 : symmetric = is_symmetric(comp);
88 :
89 4621 : if (is_anti(comp) || (!symmetric && fval_min && rval_max && atom_cmp(fval_min, rval_max) < 0)) /* for asymmetric case the re range must be after the fe range */
90 1816 : continue;
91 2805 : if (lne && int1 && int2) {
92 104 : if (symmetric) {
93 0 : prop *p1 = find_prop(e->p, PROP_MIN), *p2 = find_prop(e->p, PROP_MAX);
94 0 : atom *nmin = statistics_atom_min(sql, rval_min, fval_min), *nmax = statistics_atom_max(sql, rval_max, fval_max);
95 : /* max is min from le and (max from re and fe max) */
96 0 : set_minmax_property(sql, e, PROP_MAX, p2 ? statistics_atom_min(sql, nmax, p2->value.pval) : nmax);
97 : /* min is max from le and (min from re and fe min) */
98 0 : set_minmax_property(sql, e, PROP_MIN, p1 ? statistics_atom_max(sql, nmin, p1->value.pval) : nmin);
99 : } else {
100 104 : prop *p1 = find_prop(e->p, PROP_MIN), *p2 = find_prop(e->p, PROP_MAX);
101 : /* max is min from le and fe max */
102 104 : set_minmax_property(sql, e, PROP_MAX, p2 ? statistics_atom_min(sql, fval_max, p2->value.pval) : fval_max);
103 : /* min is max from le and re min */
104 104 : set_minmax_property(sql, e, PROP_MIN, p1 ? statistics_atom_max(sql, rval_min, p1->value.pval) : rval_min);
105 : }
106 2701 : } else if (rne) {
107 590 : if (symmetric && int1 && int2) { /* min is max from le and (min from re and fe min) */
108 0 : prop *p = find_prop(e->p, PROP_MIN);
109 0 : atom *nmin = p ? statistics_atom_min(sql, p->value.pval, fval_min) : fval_min;
110 0 : set_minmax_property(sql, e, PROP_MIN, statistics_atom_max(sql, nmin, lval_min));
111 590 : } else if (int1) { /* min is max from le and re min */
112 100 : prop *p = find_prop(e->p, PROP_MIN);
113 100 : set_minmax_property(sql, e, PROP_MIN, p ? statistics_atom_max(sql, lval_min, p->value.pval) : lval_min);
114 : }
115 2111 : } else if (fne) {
116 549 : if (symmetric && int1 && int2) { /* max is min from le and (max from re and fe max) */
117 0 : prop *p = find_prop(e->p, PROP_MAX);
118 0 : atom *nmax = p ? statistics_atom_max(sql, p->value.pval, rval_max) : rval_max;
119 0 : set_minmax_property(sql, e, PROP_MAX, p ? statistics_atom_min(sql, nmax, lval_max) : nmax);
120 549 : } else if (int2) { /* max is min from le and fe max */
121 266 : prop *p = find_prop(e->p, PROP_MAX);
122 266 : set_minmax_property(sql, e, PROP_MAX, p ? statistics_atom_min(sql, lval_max, p->value.pval) : lval_max);
123 : }
124 : }
125 100006 : } else if (lval_min && lval_max && rval_min && rval_max && atom_cmp(rval_max, lval_min) >= 0 && atom_cmp(rval_min, lval_max) <= 0) {
126 : /* both min and max must be set and the intervals must overlap */
127 42029 : switch (comp->flag) {
128 28005 : case cmp_equal: /* for equality reduce */
129 28005 : set_minmax_property(sql, e, PROP_MAX, is_anti(comp) ? statistics_atom_max(sql, lval_max, rval_max) : statistics_atom_min(sql, lval_max, rval_max));
130 28005 : set_minmax_property(sql, e, PROP_MIN, is_anti(comp) ? statistics_atom_min(sql, lval_min, rval_min) : statistics_atom_max(sql, lval_min, rval_min));
131 28005 : break;
132 4707 : case cmp_notequal: /* for inequality expand */
133 4707 : set_minmax_property(sql, e, PROP_MAX, is_anti(comp) ? statistics_atom_min(sql, lval_max, rval_max) : statistics_atom_max(sql, lval_max, rval_max));
134 4707 : set_minmax_property(sql, e, PROP_MIN, is_anti(comp) ? statistics_atom_max(sql, lval_min, rval_min) : statistics_atom_min(sql, lval_min, rval_min));
135 4707 : break;
136 5311 : case cmp_gt:
137 : case cmp_gte:
138 9684 : if (!is_anti(comp) && lne) { /* min is max from both min */
139 4373 : prop *p = find_prop(e->p, PROP_MIN);
140 4373 : set_minmax_property(sql, e, PROP_MIN, p ? statistics_atom_max(sql, rval_min, p->value.pval) : rval_min);
141 938 : } else if (!is_anti(comp)) { /* max is min from both max */
142 938 : prop *p = find_prop(e->p, PROP_MAX);
143 938 : set_minmax_property(sql, e, PROP_MAX, p ? statistics_atom_min(sql, lval_max, p->value.pval) : lval_max);
144 : }
145 : break;
146 4006 : case cmp_lt:
147 : case cmp_lte:
148 7205 : if (!is_anti(comp) && lne) { /* max is min from both max */
149 3199 : prop *p = find_prop(e->p, PROP_MAX);
150 3199 : set_minmax_property(sql, e, PROP_MAX, p ? statistics_atom_min(sql, rval_max, p->value.pval) : rval_max);
151 807 : } else if (!is_anti(comp)) { /* min is max from both min */
152 807 : prop *p = find_prop(e->p, PROP_MIN);
153 807 : set_minmax_property(sql, e, PROP_MIN, p ? statistics_atom_max(sql, lval_min, p->value.pval) : lval_min);
154 : }
155 : break;
156 : default: /* Maybe later I can do cmp_in and cmp_notin */
157 : break;
158 : }
159 : }
160 : }
161 : }
162 : }
163 : }
164 1781561 : if (is_full(rel->op) || (is_left(rel->op) && found_right) || (is_right(rel->op) && found_left))
165 35468 : set_has_nil(e);
166 1781561 : if (!is_outerjoin(rel->op) && found_without_semantics) /* at an outer join, null values pass */
167 93803 : set_has_no_nil(e);
168 1781561 : if (is_join(rel->op) && is_unique(e) && !still_unique)
169 118741 : set_not_unique(e);
170 1781561 : return e;
171 : }
172 4641919 : case op_table:
173 : case op_basetable:
174 : case op_union:
175 : case op_except:
176 : case op_inter:
177 : case op_munion:
178 : case op_project:
179 : case op_groupby: {
180 4641919 : if (is_recursive(rel))
181 : return NULL;
182 4641724 : sql_exp *found;
183 4641724 : atom *fval;
184 4641724 : prop *est;
185 4641724 : if ((found = rel_find_exp(rel, e))) {
186 2175703 : if (rel->op != op_table) { /* At the moment don't propagate statistics for table relations */
187 2131209 : if ((fval = find_prop_and_get(found->p, PROP_MAX)))
188 1121022 : set_minmax_property(sql, e, PROP_MAX, fval);
189 2131200 : if ((fval = find_prop_and_get(found->p, PROP_MIN)))
190 1128068 : set_minmax_property(sql, e, PROP_MIN, fval);
191 2131194 : if (!has_nil(found))
192 1374110 : set_has_no_nil(e);
193 2131194 : if (is_unique(found) || (need_distinct(rel) && list_length(rel->exps) == 1) ||
194 1717079 : (is_groupby(rel->op) && (list_empty(rel->r) || (list_length(rel->r) == 1 && exps_find_exp(rel->r, e)))))
195 425357 : set_unique(e);
196 : /* propagate unique estimation for known cases */
197 2131195 : if (is_groupby(rel->op) && list_empty(rel->r) && !find_prop(e->p, PROP_NUNIQUES)) { /* global aggregate case */
198 7592 : prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
199 7592 : p->value.dval = 1;
200 2123603 : } else if (((is_basetable(rel->op) || is_except(rel->op) || is_inter(rel->op) || is_simple_project(rel->op) ||
201 71518 : (is_groupby(rel->op) && exps_find_exp(rel->r, e))) &&
202 2064753 : (est = find_prop(found->p, PROP_NUNIQUES)) && !find_prop(e->p, PROP_NUNIQUES))) {
203 193931 : prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
204 193932 : p->value.dval = est->value.dval;
205 : }
206 : }
207 2175695 : return e;
208 : }
209 : return NULL;
210 : }
211 84295 : case op_topn:
212 : case op_sample:
213 84295 : return rel_propagate_column_ref_statistics(sql, rel->l, e);
214 : default:
215 : break;
216 : }
217 : }
218 : return NULL;
219 : }
220 :
221 : static atom *
222 4592781 : atom_from_valptr( allocator *sa, sql_subtype *tpe, ValRecord *v)
223 : {
224 4592781 : atom *a = SA_NEW(sa, atom);
225 :
226 4592830 : assert(!VALisnil(v));
227 4592876 : *a = (atom) {.tpe = *tpe,};
228 4592876 : SA_VALcopy(sa, &a->data, v);
229 4593029 : return a;
230 : }
231 :
232 : void
233 4243728 : sql_column_get_statistics(mvc *sql, sql_column *c, sql_exp *e)
234 : {
235 4243728 : bool nonil = false, unique = false;
236 4243728 : double unique_est = 0.0;
237 4243728 : ValRecord min, max;
238 4243728 : int ok = mvc_col_stats(sql, c, &nonil, &unique, &unique_est, &min, &max);
239 :
240 4244913 : if (has_nil(e) && nonil)
241 2797491 : set_has_no_nil(e);
242 4244913 : if (!is_unique(e) && unique)
243 1120154 : set_unique(e);
244 4244913 : if (unique_est != 0.0) {
245 2987804 : prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
246 2987644 : p->value.dval = unique_est;
247 : }
248 4244753 : unsigned int digits = 0;
249 4244753 : sql_subtype *et = exp_subtype(e);
250 4244896 : if (et->type->eclass == EC_DEC || et->type->eclass == EC_NUM)
251 2758220 : digits = et->digits;
252 4244896 : if ((ok & 2) == 2) {
253 2293718 : if (!VALisnil(&max)) {
254 2293642 : prop *p = e->p = prop_create(sql->sa, PROP_MAX, e->p);
255 2293576 : p->value.pval = atom_from_valptr(sql->sa, &c->type, &max);
256 2293609 : if (digits) {
257 1704694 : unsigned int nd = atom_digits(p->value.pval);
258 1704555 : if (nd < digits)
259 : digits = nd;
260 1704555 : if (!digits)
261 : digits = 1;
262 : }
263 : }
264 2293450 : VALclear(&max);
265 : }
266 4244623 : if ((ok & 1) == 1) {
267 2299615 : if (!VALisnil(&min)) {
268 2299626 : prop *p = e->p = prop_create(sql->sa, PROP_MIN, e->p);
269 2299686 : p->value.pval = atom_from_valptr(sql->sa, &c->type, &min);
270 2299863 : if (digits) {
271 1711919 : unsigned int nd = atom_digits(p->value.pval);
272 1711937 : if (nd > digits) /* possibly set to low by max value */
273 : digits = nd;
274 : if (!digits)
275 : digits = 1;
276 : }
277 : }
278 2299888 : VALclear(&min);
279 : }
280 4244920 : if (digits)
281 2758235 : et->digits = digits;
282 4244920 : if (et->type->eclass == EC_DEC && et->digits <= et->scale)
283 216 : et->digits = et->scale + 1;
284 4244920 : }
285 :
286 : static void
287 947620 : rel_basetable_column_get_statistics(mvc *sql, sql_rel *rel, sql_exp *e)
288 : {
289 947620 : if (e->p)
290 : return;
291 304454 : sql_column *c = NULL;
292 :
293 304454 : if ((c = rel_base_find_column(rel, e->nid))) {
294 206825 : sql_column_get_statistics(sql, c, e);
295 : }
296 : }
297 :
298 : static bool
299 8875 : rel_setop_get_statistics(mvc *sql, sql_rel *rel, list *lexps, list *rexps, sql_exp *e, int i)
300 : {
301 8875 : sql_exp *le = list_fetch(lexps, i), *re = list_fetch(rexps, i);
302 8875 : atom *lval_min = find_prop_and_get(le->p, PROP_MIN), *lval_max = find_prop_and_get(le->p, PROP_MAX),
303 8875 : *rval_min = find_prop_and_get(re->p, PROP_MIN), *rval_max = find_prop_and_get(re->p, PROP_MAX);
304 8875 : prop *est;
305 :
306 : /* for the intersection, if both expressions don't overlap, it can be pruned */
307 8875 : if (is_inter(rel->op) && !has_nil(le) && !has_nil(re) &&
308 511 : ((rval_max && lval_min && atom_cmp(rval_max, lval_min) < 0) || (rval_min && lval_max && atom_cmp(rval_min, lval_max) > 0)))
309 26 : return true;
310 :
311 8849 : if (lval_max && rval_max) {
312 2557 : if (is_union(rel->op))
313 3 : set_minmax_property(sql, e, PROP_MAX, statistics_atom_max(sql, lval_max, rval_max)); /* for union the new max will be the max of the two */
314 2554 : else if (is_inter(rel->op))
315 399 : set_minmax_property(sql, e, PROP_MAX, statistics_atom_min(sql, lval_max, rval_max)); /* for intersect the new max will be the min of the two */
316 : else /* except */
317 2155 : set_minmax_property(sql, e, PROP_MAX, lval_max);
318 : }
319 8849 : if (lval_min && rval_min) {
320 2557 : if (is_union(rel->op))
321 3 : set_minmax_property(sql, e, PROP_MIN, statistics_atom_min(sql, lval_min, rval_min)); /* for union the new min will be the min of the two */
322 2554 : else if (is_inter(rel->op))
323 399 : set_minmax_property(sql, e, PROP_MIN, statistics_atom_max(sql, lval_min, rval_min)); /* for intersect the new min will be the max of the two */
324 : else /* except */
325 2155 : set_minmax_property(sql, e, PROP_MIN, lval_min);
326 : }
327 :
328 8849 : if (is_union(rel->op) || is_munion(rel->op)) {
329 5986 : if (!has_nil(le) && !has_nil(re))
330 879 : set_has_no_nil(e);
331 5986 : if (need_distinct(rel) && list_length(rel->exps) == 1)
332 0 : set_unique(e);
333 2863 : } else if (is_inter(rel->op)) {
334 564 : if (!has_nil(le) || !has_nil(re))
335 509 : set_has_no_nil(e);
336 564 : if (is_unique(le) || (need_distinct(rel) && list_length(rel->exps) == 1))
337 345 : set_unique(e);
338 : } else {
339 2299 : assert(is_except(rel->op));
340 2299 : if (!has_nil(le))
341 2236 : set_has_no_nil(e);
342 2299 : if (is_unique(le) || (need_distinct(rel) && list_length(rel->exps) == 1))
343 2012 : set_unique(e);
344 : }
345 : /* propagate unique estimation for known cases */
346 8849 : if (!is_union(rel->op) && (est = find_prop(le->p, PROP_NUNIQUES)) && !find_prop(e->p, PROP_NUNIQUES)) {
347 207 : prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
348 207 : p->value.dval = est->value.dval;
349 : }
350 : return false;
351 : }
352 :
353 :
354 : static void
355 62381 : rel_munion_get_statistics(mvc *sql, sql_rel *rel, list *rels, sql_exp *e, int i)
356 : {
357 62381 : if (is_recursive(rel))
358 : return ;
359 62381 : assert(is_munion(rel->op));
360 :
361 62381 : sql_rel *l = rels->h->data;
362 62381 : sql_exp *le = list_fetch(l->exps, i);
363 62381 : atom *lval_min = find_prop_and_get(le->p, PROP_MIN), *lval_max = find_prop_and_get(le->p, PROP_MAX);
364 62381 : bool has_nonil = !has_nil(le);
365 :
366 181448 : for(node *n = rels->h->next; n; n = n->next) {
367 119067 : sql_rel *r = n->data;
368 119067 : sql_exp *re = list_fetch(r->exps, i);
369 119067 : atom *rval_min = find_prop_and_get(re->p, PROP_MIN), *rval_max = find_prop_and_get(re->p, PROP_MAX);
370 :
371 119067 : if (lval_max && rval_max) {
372 43903 : set_minmax_property(sql, e, PROP_MAX, statistics_atom_max(sql, lval_max, rval_max)); /* for union the new max will be the max of the two */
373 43903 : lval_max = find_prop_and_get(e->p, PROP_MAX);
374 : }
375 119067 : if (lval_min && rval_min) {
376 43354 : set_minmax_property(sql, e, PROP_MIN, statistics_atom_min(sql, lval_min, rval_min)); /* for union the new min will be the min of the two */
377 43354 : lval_min = find_prop_and_get(e->p, PROP_MIN);
378 : }
379 119067 : has_nonil &= !has_nil(re);
380 :
381 : }
382 :
383 62381 : if (has_nonil)
384 42482 : set_has_no_nil(e);
385 :
386 62381 : if (need_distinct(rel) && list_length(rel->exps) == 1)
387 1596 : set_unique(e);
388 : }
389 :
390 :
391 : static sql_exp *
392 3493969 : rel_propagate_statistics(visitor *v, sql_rel *rel, sql_exp *e, int depth)
393 : {
394 3493969 : mvc *sql = v->sql;
395 3493969 : atom *lval;
396 :
397 3493969 : (void) depth;
398 3493969 : switch(e->type) {
399 2181679 : case e_column:
400 2181679 : switch (rel->op) { /* set relations don't call rel_propagate_statistics */
401 282787 : case op_join:
402 : case op_left:
403 : case op_right:
404 : case op_full:
405 : case op_semi:
406 : case op_anti: {
407 282787 : sql_exp *found = rel_propagate_column_ref_statistics(sql, rel->l, e);
408 282787 : if (!found)
409 141835 : (void) rel_propagate_column_ref_statistics(sql, rel->r, e);
410 : break;
411 : }
412 1898892 : case op_select:
413 : case op_project:
414 : case op_groupby: {
415 1898892 : sql_exp *found = rel_propagate_column_ref_statistics(sql, rel->l, e); /* labels may be found on the same projection, ugh */
416 1898882 : if (!found && is_simple_project(rel->op))
417 127706 : (void) rel_propagate_column_ref_statistics(sql, rel, e);
418 : break;
419 : }
420 0 : case op_insert:
421 : case op_update:
422 : case op_delete:
423 0 : (void) rel_propagate_column_ref_statistics(sql, rel->r, e);
424 0 : break;
425 : default:
426 : break;
427 : }
428 : break;
429 101111 : case e_convert: {
430 101111 : sql_subtype *to = exp_totype(e), *from = exp_fromtype(e);
431 101111 : sql_exp *l = e->l;
432 101111 : sql_class fr = from->type->eclass, too = to->type->eclass;
433 101111 : prop *est;
434 :
435 101111 : if (fr == too) {
436 91999 : if ((lval = find_prop_and_get(l->p, PROP_MAX))) {
437 59868 : atom *res = atom_copy(sql->sa, lval);
438 59868 : if ((res = atom_cast(sql->sa, res, to)))
439 59845 : set_minmax_property(sql, e, PROP_MAX, res);
440 : }
441 91998 : if ((lval = find_prop_and_get(l->p, PROP_MIN))) {
442 60505 : atom *res = atom_copy(sql->sa, lval);
443 60504 : if ((res = atom_cast(sql->sa, res, to)))
444 60482 : set_minmax_property(sql, e, PROP_MIN, res);
445 : }
446 : }
447 : /* propagating nuniques through conversions is tricky. I am adding just the general cases */
448 101111 : if ((est = find_prop(l->p, PROP_NUNIQUES)) &&
449 62903 : (((fr == EC_TIME || fr == EC_TIME_TZ || fr == EC_DATE) && (too == EC_TIMESTAMP || too == EC_TIMESTAMP_TZ)) ||
450 62878 : ((fr == EC_NUM || fr == EC_BIT) && too == EC_NUM) || (fr == EC_DEC && too == EC_DEC) || (fr != EC_EXTERNAL && too == EC_STRING))) {
451 59450 : prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
452 59450 : p->value.dval = est->value.dval;
453 : }
454 101110 : if (!has_nil(l))
455 57508 : set_has_no_nil(e);
456 : break;
457 : }
458 342820 : case e_aggr:
459 : case e_func: {
460 342820 : BUN lv;
461 342820 : sql_subfunc *f = e->f;
462 :
463 342820 : if (!f->func->s) {
464 316037 : int key = hash_key(f->func->base.name); /* Using hash lookup */
465 316037 : sql_hash_e *he = sql_functions_lookup->buckets[key&(sql_functions_lookup->size-1)];
466 316037 : lookup_function look = NULL;
467 :
468 690653 : for (; he && !look; he = he->chain) {
469 374616 : struct function_properties* fp = (struct function_properties*) he->value;
470 :
471 374616 : if (!strcmp(f->func->base.name, fp->name))
472 108083 : look = fp->func;
473 : }
474 316037 : if (look)
475 108083 : look(sql, e);
476 : }
477 : /* for global aggregates with no semantics, if the left relation has values, then the output won't be NULL */
478 342820 : if (!is_semantics(e) && e->l && !have_nil(e->l) &&
479 90809 : (e->type != e_aggr || (is_groupby(rel->op) && list_length(rel->r)) || ((lv = get_rel_count(rel->l)) != BUN_NONE && lv > 0)))
480 90478 : set_has_no_nil(e);
481 : /* set properties for global aggregates */
482 342820 : if (e->type == e_aggr && is_groupby(rel->op) && list_empty(rel->r)) {
483 7920 : if (!find_prop(e->p, PROP_NUNIQUES)) {
484 7920 : prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
485 7920 : p->value.dval = 1;
486 : }
487 7920 : set_unique(e);
488 : }
489 : break;
490 : }
491 578349 : case e_atom:
492 578349 : if (e->l) {
493 560493 : atom *a = (atom*) e->l;
494 560493 : if (!a->isnull) {
495 496184 : set_minmax_property(sql, e, PROP_MAX, a);
496 496184 : set_minmax_property(sql, e, PROP_MIN, a);
497 : }
498 560493 : prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
499 560493 : p->value.dval = 1;
500 17856 : } else if (e->f) {
501 4280 : list *vals = (list *) e->f;
502 4280 : sql_exp *first = vals->h ? vals->h->data : NULL;
503 4280 : atom *max = NULL, *min = NULL; /* all child values must have a valid min/max */
504 4280 : int has_nil = 0;
505 :
506 4280 : if (first) {
507 4280 : max = ((lval = find_prop_and_get(first->p, PROP_MAX))) ? lval : NULL;
508 4280 : min = ((lval = find_prop_and_get(first->p, PROP_MIN))) ? lval : NULL;
509 4280 : has_nil |= has_nil(first);
510 : }
511 :
512 18356 : for (node *n = vals->h ? vals->h->next : NULL ; n ; n = n->next) {
513 9796 : sql_exp *ee = n->data;
514 :
515 9796 : if (min && max) {
516 9303 : if ((lval = find_prop_and_get(ee->p, PROP_MAX))) {
517 9257 : max = atom_cmp(lval, max) > 0 ? lval : max;
518 : } else {
519 : max = NULL;
520 : }
521 9303 : if ((lval = find_prop_and_get(ee->p, PROP_MIN))) {
522 9257 : min = atom_cmp(min, lval) > 0 ? lval : min;
523 : } else {
524 : min = NULL;
525 : }
526 : }
527 9796 : has_nil |= has_nil(ee);
528 : }
529 :
530 4280 : if (!has_nil)
531 3909 : set_has_no_nil(e);
532 4280 : if (min && max) {
533 3867 : set_minmax_property(sql, e, PROP_MAX, max);
534 3867 : set_minmax_property(sql, e, PROP_MIN, min);
535 : }
536 4280 : prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
537 4280 : p->value.dval = (dbl) list_length(vals);
538 : } else {
539 13576 : prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
540 13576 : p->value.dval = 1;
541 : }
542 : break;
543 290010 : case e_cmp:
544 : /* TODO? propagating min/max/unique of booleans is not very worth it */
545 290010 : if (e->flag == cmp_or || e->flag == cmp_filter) {
546 17845 : if (!have_nil(e->l) && !have_nil(e->r))
547 13202 : set_has_no_nil(e);
548 272165 : } else if (e->flag == cmp_in || e->flag == cmp_notin) {
549 21734 : sql_exp *le = e->l;
550 21734 : if (!has_nil(le) && !have_nil(e->r))
551 18611 : set_has_no_nil(e);
552 : } else {
553 250431 : sql_exp *le = e->l, *re = e->r, *fe = e->f;
554 250431 : if (!has_nil(le) && !has_nil(re) && (!e->f || !has_nil(fe)))
555 179860 : set_has_no_nil(e);
556 : }
557 : break;
558 : case e_psm:
559 : break;
560 : }
561 :
562 : #ifndef NDEBUG
563 : {
564 : /* min and max cannot be NULL and min must be <= than max, if it happens the inner relation must be empty! */
565 3493959 : atom *min = find_prop_and_get(e->p, PROP_MIN), *max = find_prop_and_get(e->p, PROP_MAX);
566 :
567 3493970 : (void) min;
568 3493970 : (void) max;
569 3493970 : assert(!min || !min->isnull);
570 3493970 : assert(!max || !max->isnull);
571 3493970 : assert(!min || !max || (min && max)/* atom_cmp(min, max) <= 0*/);
572 : }
573 : #endif
574 3493970 : return e;
575 : }
576 :
577 : static list * /* Remove predicates always false from min/max values */
578 141247 : rel_prune_predicates(visitor *v, sql_rel *rel)
579 : {
580 141247 : if (rel->l) {
581 141247 : sql_rel *l = rel->l;
582 141247 : if (is_single(l))
583 3533 : return rel->exps;
584 : }
585 137714 : if (!list_empty(rel->attr))
586 2989 : return rel->exps;
587 286749 : for (node *n = rel->exps->h ; n ; n = n->next) {
588 152024 : sql_exp *e = n->data;
589 :
590 152024 : if (e->type == e_cmp && is_theta_exp(e->flag)) {
591 125085 : sql_exp *le = e->l, *re = e->r, *fe = e->f;
592 125085 : atom *lval_min = find_prop_and_get(le->p, PROP_MIN), *lval_max = find_prop_and_get(le->p, PROP_MAX),
593 125085 : *rval_min = find_prop_and_get(re->p, PROP_MIN), *rval_max = find_prop_and_get(re->p, PROP_MAX);
594 125085 : bool always_false = false, always_true = false;
595 :
596 125085 : if (fe && !is_symmetric(e)) {
597 3059 : atom *fval_min = find_prop_and_get(fe->p, PROP_MIN), *fval_max = find_prop_and_get(fe->p, PROP_MAX);
598 3059 : comp_type lower = range2lcompare(e->flag), higher = range2rcompare(e->flag);
599 3662 : int not_int1 = rval_min && lval_max && /* the middle and left intervals don't overlap */
600 1664 : (!is_anti(e) && (lower == cmp_gte ? atom_cmp(rval_min, lval_max) > 0 : atom_cmp(rval_min, lval_max) >= 0)),
601 4074 : not_int2 = lval_min && fval_max && /* the middle and right intervals don't overlap */
602 2486 : (!is_anti(e) && (higher == cmp_lte ? atom_cmp(lval_min, fval_max) > 0 : atom_cmp(lval_min, fval_max) >= 0)),
603 3059 : not_int3 = rval_min && fval_max && /* the left interval is after the right one */
604 1288 : (!is_anti(e) && (atom_cmp(rval_min, fval_max) > 0));
605 :
606 3059 : always_false |= not_int1 || not_int2 || not_int3;
607 : /* for anti the middle must be before the left or after the right or the right after the left, for the other the middle must be always between the left and right intervals */
608 4116 : always_true |= !has_nil(le) && !has_nil(re) && !has_nil(fe) &&
609 3960 : lval_min && lval_max && rval_min && rval_max && fval_min && fval_max &&
610 575 : (is_anti(e) ? ((lower == cmp_gte ? atom_cmp(rval_min, lval_max) > 0 : atom_cmp(rval_min, lval_max) >= 0) || (higher == cmp_lte ? atom_cmp(lval_min, fval_max) > 0 : atom_cmp(lval_min, fval_max) >= 0) || atom_cmp(rval_min, fval_max) > 0) :
611 489 : ((lower == cmp_gte ? atom_cmp(lval_min, rval_max) >= 0 : atom_cmp(lval_min, rval_max) > 0) && (higher == cmp_lte ? atom_cmp(fval_min, lval_max) >= 0 : atom_cmp(fval_min, lval_max) > 0)));
612 : } else if (!fe) {
613 122008 : if (!is_semantics(e)) /* trivial not null cmp null case */
614 232620 : always_false |= !is_anti(e) && ((exp_is_not_null(le) && exp_is_null(re)) || (exp_is_null(le) && exp_is_not_null(re)));
615 122008 : switch (e->flag) {
616 106460 : case cmp_equal:
617 106460 : if (lval_min && lval_max && rval_min && rval_max && (!is_semantics(e) || !has_nil(le) || !has_nil(re)))
618 135966 : always_false |= (is_anti(e) ? (atom_cmp(lval_min, rval_min) == 0 && atom_cmp(lval_max, rval_max) <= 0) : (atom_cmp(rval_max, lval_min) < 0 || atom_cmp(rval_min, lval_max) > 0));
619 106460 : if (is_semantics(e)) { /* prune *= NULL cases */
620 5669 : always_false |= (is_anti(e) ? (exp_is_null(le) && exp_is_null(re)) : ((exp_is_not_null(le) && exp_is_null(re)) || (exp_is_null(le) && exp_is_not_null(re))));
621 11338 : always_true |= (is_anti(e) ? ((exp_is_not_null(le) && exp_is_null(re)) || (exp_is_null(le) && exp_is_not_null(re))) : (exp_is_null(le) && exp_is_null(re)));
622 : }
623 : break;
624 7352 : case cmp_notequal:
625 7352 : if (lval_min && lval_max && rval_min && rval_max)
626 11434 : always_true |= !has_nil(le) && !has_nil(re) && (is_anti(e) ? (atom_cmp(lval_min, rval_min) == 0 && atom_cmp(lval_max, rval_max) <= 0) : (atom_cmp(rval_max, lval_min) < 0 || atom_cmp(rval_min, lval_max) > 0));
627 7352 : if (is_semantics(e)) {
628 29 : always_true |= (is_anti(e) ? (exp_is_null(le) && exp_is_null(re)) : ((exp_is_not_null(le) && exp_is_null(re)) || (exp_is_null(le) && exp_is_not_null(re))));
629 58 : always_false |= (is_anti(e) ? ((exp_is_not_null(le) && exp_is_null(re)) || (exp_is_null(le) && exp_is_not_null(re))) : (exp_is_null(le) && exp_is_null(re)));
630 : }
631 : break;
632 5489 : case cmp_gt:
633 5489 : if (lval_max && rval_min)
634 1835 : always_false |= (is_anti(e) ? atom_cmp(lval_max, rval_min) > 0 : atom_cmp(lval_max, rval_min) <= 0);
635 5489 : if (lval_min && rval_max)
636 3670 : always_true |= !has_nil(le) && !has_nil(re) && (is_anti(e) ? atom_cmp(lval_min, rval_max) <= 0 : atom_cmp(lval_min, rval_max) > 0);
637 : break;
638 121 : case cmp_gte:
639 121 : if (lval_max && rval_min)
640 51 : always_false |= (is_anti(e) ? atom_cmp(lval_max, rval_min) >= 0 : atom_cmp(lval_max, rval_min) < 0);
641 121 : if (lval_min && rval_max)
642 102 : always_true |= !has_nil(le) && !has_nil(re) && (is_anti(e) ? atom_cmp(lval_min, rval_max) < 0 : atom_cmp(lval_min, rval_max) >= 0);
643 : break;
644 2501 : case cmp_lt:
645 2501 : if (lval_min && rval_max)
646 1382 : always_false |= (is_anti(e) ? atom_cmp(lval_min, rval_max) < 0 : atom_cmp(lval_min, rval_max) >= 0);
647 2501 : if (lval_max && rval_min)
648 2812 : always_true |= !has_nil(le) && !has_nil(re) && (is_anti(e) ? atom_cmp(lval_max, rval_min) >= 0 : atom_cmp(lval_max, rval_min) < 0);
649 : break;
650 85 : case cmp_lte:
651 85 : if (lval_min && rval_max)
652 11 : always_false |= (is_anti(e) ? atom_cmp(lval_min, rval_max) <= 0 : atom_cmp(lval_min, rval_max) > 0);
653 85 : if (lval_max && rval_min)
654 22 : always_true |= !has_nil(le) && !has_nil(re) && (is_anti(e) ? atom_cmp(lval_max, rval_min) > 0 : atom_cmp(lval_max, rval_min) <= 0);
655 : break;
656 : default: /* Maybe later I can do cmp_in and cmp_notin but don't forget to remove is_theta_exp check up there */
657 : break;
658 : }
659 : }
660 125085 : assert(!always_false || !always_true);
661 125085 : if (always_false || always_true) {
662 3688 : sql_exp *ne = exp_atom_bool(v->sql->sa, always_true /* if always_true set then true else false */);
663 3688 : if (exp_name(e))
664 0 : exp_prop_alias(v->sql->sa, ne, e);
665 3688 : n->data = ne;
666 3688 : v->changes++;
667 : }
668 : }
669 : }
670 134725 : return rel->exps;
671 : }
672 :
673 : static sql_rel *
674 14 : set_setop_side(visitor *v, sql_rel *rel, sql_rel *side)
675 : {
676 14 : if (side == rel->r)
677 0 : rel->r = NULL;
678 : else
679 14 : rel->l = NULL;
680 14 : if (!is_simple_project(side->op) || rel_is_ref(side) || !list_empty(side->r)) {
681 0 : side = rel_project(v->sql->sa, side, rel_projections(v->sql, side, NULL, 0, 1));
682 0 : set_count_prop(v->sql->sa, side, get_rel_count(side->l));
683 0 : side->exps = exps_exp_visitor_bottomup(v, side, side->exps, 0, &rel_propagate_statistics, false);
684 : }
685 35 : for (node *n = rel->exps->h, *m = side->exps->h ; n && m ; n = n->next, m = m->next) {
686 21 : sql_exp *e1 = n->data, *e2 = m->data;
687 21 : exp_prop_alias(v->sql->sa, e2, e1);
688 : }
689 14 : list_hash_clear(side->exps);
690 :
691 14 : if (need_distinct(rel))
692 0 : set_distinct(side);
693 14 : side->p = prop_copy(v->sql->sa, rel->p);
694 14 : rel_destroy(rel);
695 14 : return side;
696 : }
697 :
698 : static BUN
699 22176 : trivial_project_exp_card(sql_exp *e)
700 : {
701 22545 : if (e->type == e_convert)
702 369 : return trivial_project_exp_card(e->l);
703 22176 : return e->type == e_atom && e->f ? (BUN) list_length(e->f) : 1;
704 : }
705 :
706 : static BUN
707 26140 : rel_calc_nuniques(mvc *sql, sql_rel *l, list *exps)
708 : {
709 26140 : BUN lv = get_rel_count(l);
710 :
711 26140 : if (lv == 0)
712 : return 0;
713 23425 : if (!list_empty(exps)) {
714 23425 : BUN nuniques = 0;
715 : /* compute the highest number of unique values */
716 58508 : for (node *n = exps->h ; n && nuniques != BUN_NONE ; n = n->next) {
717 35083 : sql_exp *e = n->data;
718 35083 : sql_rel *bt = NULL;
719 35083 : prop *p = NULL;
720 35083 : BUN euniques = BUN_NONE;
721 35083 : atom *min, *max, *sub = NULL;
722 35083 : sql_subtype *tp = exp_subtype(e);
723 35082 : sql_class ec = tp ? tp->type->eclass : EC_STRING; /* if 'e' has no type (eg parameter), use a non-number type to fail condition */
724 :
725 35082 : if ((p = find_prop(e->p, PROP_NUNIQUES))) {
726 25375 : euniques = (BUN) p->value.dval;
727 9707 : } else if (e->type == e_column && e->nid && rel_find_exp_and_corresponding_rel(l, e, false, &bt, NULL) && bt && (p = find_prop(bt->p, PROP_COUNT))) {
728 7633 : euniques = (BUN) p->value.lval;
729 : }
730 : /* use min to max range to compute number of possible values in the domain for number types */
731 35082 : if ((EC_TEMP(ec)||ec==EC_NUM||ec==EC_MONTH||ec==EC_POS) &&
732 24254 : (min = find_prop_and_get(e->p, PROP_MIN)) && (max = find_prop_and_get(e->p, PROP_MAX))) {
733 : /* the range includes min and max, so the atom_inc call is needed */
734 : /* if 'euniques' has number of distinct values, compute min between both */
735 19307 : if ((sub = atom_sub(sql->sa, max, min)) && (sub = atom_inc(sql->sa, sub)) && (sub = atom_cast(sql->sa, sub, sql_bind_localtype("oid"))))
736 0 : euniques = MIN(euniques, (BUN) sub->data.val.oval);
737 : }
738 35083 : if (euniques != BUN_NONE)
739 33009 : nuniques = MAX(nuniques, euniques); /* the highest cardinality sets the estimation */
740 : else
741 : nuniques = BUN_NONE;
742 : }
743 23425 : if (nuniques != BUN_NONE)
744 : return nuniques;
745 : }
746 : return lv;
747 : }
748 :
749 : static sql_rel *
750 2090517 : rel_get_statistics_(visitor *v, sql_rel *rel)
751 : {
752 : /* Don't prune updates as pruning will possibly result in removing the joins which therefore cannot be used for constraint checking */
753 2090517 : uint8_t has_special_modify = *(uint8_t*) v->data;
754 2090517 : bool can_be_pruned = !has_special_modify && v->storage_based_opt;
755 :
756 : /* Don't look at the same relation twice */
757 2090517 : if (are_statistics_gathered(rel->used))
758 : return rel;
759 1348898 : rel->used |= statistics_gathered;
760 :
761 1348898 : switch (rel->op) {
762 317306 : case op_basetable: {
763 317306 : sql_table *t = (sql_table *) rel->l;
764 317306 : sqlstore *store = v->sql->session->tr->store;
765 :
766 317306 : if (!list_empty(rel->exps)) {
767 1265237 : for (node *n = rel->exps->h ; n ; n = n->next)
768 947705 : rel_basetable_column_get_statistics(v->sql, rel, n->data);
769 : }
770 : /* Set table row count. TODO? look for remote tables. Don't look at storage for declared tables, because it won't be cleaned */
771 317526 : if (isTable(t) && t->s && !isDeclaredTable(t)) /* count active rows only */
772 263545 : set_count_prop(v->sql->sa, rel, (BUN)store->storage_api.count_col(v->sql->session->tr, ol_first_node(t->columns)->data, 10));
773 : break;
774 : }
775 2793 : case op_union:
776 : case op_inter:
777 : case op_except: {
778 2793 : bool empty_cross = false;
779 2793 : int i = 0;
780 2793 : sql_rel *l = rel->l, *pl = rel->l, *r = rel->r, *pr = rel->r;
781 :
782 2793 : while (is_sample(pl->op) || is_topn(pl->op)) /* skip topN and sample relations in the middle */
783 0 : pl = pl->l;
784 2793 : while (is_sample(pr->op) || is_topn(pr->op))
785 0 : pr = pr->l;
786 : /* if it's not a projection, then project and propagate statistics */
787 2793 : if (!is_project(pl->op) && !is_base(pl->op)) {
788 0 : pl = rel_project(v->sql->sa, pl, rel_projections(v->sql, pl, NULL, 0, 1));
789 0 : set_count_prop(v->sql->sa, pl, get_rel_count(pl->l));
790 0 : pl->exps = exps_exp_visitor_bottomup(v, pl, pl->exps, 0, &rel_propagate_statistics, false);
791 : }
792 2793 : if (!is_project(pr->op) && !is_base(pr->op)) {
793 0 : pr = rel_project(v->sql->sa, pr, rel_projections(v->sql, pr, NULL, 0, 1));
794 0 : set_count_prop(v->sql->sa, pr, get_rel_count(pr->l));
795 0 : pr->exps = exps_exp_visitor_bottomup(v, pr, pr->exps, 0, &rel_propagate_statistics, false);
796 : }
797 :
798 11668 : for (node *n = rel->exps->h ; n ; n = n->next) {
799 8875 : empty_cross |= rel_setop_get_statistics(v->sql, rel, pl->exps, pr->exps, n->data, i);
800 8875 : i++;
801 : }
802 :
803 : /* propagate row count */
804 2793 : if (is_union(rel->op)) {
805 277 : BUN lv = need_distinct(rel) ? rel_calc_nuniques(v->sql, l, l->exps) : get_rel_count(l),
806 277 : rv = need_distinct(rel) ? rel_calc_nuniques(v->sql, r, r->exps) : get_rel_count(r);
807 :
808 277 : if (lv == 0 && rv == 0) { /* both sides empty */
809 2 : if (can_be_pruned)
810 : empty_cross = true;
811 : else
812 2 : set_count_prop(v->sql->sa, rel, 0);
813 275 : } else if (can_be_pruned && lv == 0 && !rel_is_ref(rel)) { /* left side empty */
814 0 : rel = set_setop_side(v, rel, r);
815 0 : empty_cross = false; /* don't rewrite again */
816 275 : } else if (can_be_pruned && rv == 0 && !rel_is_ref(rel)) { /* right side empty */
817 0 : rel = set_setop_side(v, rel, l);
818 0 : empty_cross = false; /* don't rewrite again */
819 275 : } else if (lv != BUN_NONE && rv != BUN_NONE) {
820 7 : set_count_prop(v->sql->sa, rel, (rv > (BUN_MAX - lv)) ? BUN_MAX : (lv + rv)); /* overflow check */
821 : }
822 2516 : } else if (is_inter(rel->op) || is_except(rel->op)) {
823 2516 : BUN lv = need_distinct(rel) ? rel_calc_nuniques(v->sql, l, l->exps) : get_rel_count(l),
824 2516 : rv = need_distinct(rel) ? rel_calc_nuniques(v->sql, r, r->exps) : get_rel_count(r);
825 :
826 2516 : if (lv == 0) { /* left side empty */
827 62 : if (can_be_pruned)
828 : empty_cross = true;
829 : else
830 5 : set_count_prop(v->sql->sa, rel, 0);
831 2454 : } else if (rv == 0) { /* right side empty */
832 17 : if (is_inter(rel->op)) {
833 3 : if (can_be_pruned)
834 : empty_cross = true;
835 : else
836 0 : set_count_prop(v->sql->sa, rel, 0);
837 14 : } else if (can_be_pruned && !rel_is_ref(rel)) {
838 14 : rel = set_setop_side(v, rel, l);
839 14 : empty_cross = false; /* don't rewrite again */
840 : } else {
841 0 : set_count_prop(v->sql->sa, rel, lv);
842 : }
843 : } else {
844 2437 : set_count_prop(v->sql->sa, rel, lv);
845 : }
846 : }
847 2793 : if (can_be_pruned && empty_cross) {
848 80 : rel_destroy(rel->l);
849 80 : rel->l = NULL;
850 80 : rel_destroy(rel->r);
851 80 : rel->r = NULL;
852 244 : for (node *n = rel->exps->h ; n ; n = n->next) {
853 164 : sql_exp *e = n->data, *a = exp_atom(v->sql->sa, atom_general(v->sql->sa, exp_subtype(e), NULL, 0));
854 164 : exp_prop_alias(v->sql->sa, a, e);
855 164 : n->data = a;
856 : }
857 80 : list_hash_clear(rel->exps);
858 80 : sql_rel *l = rel_project(v->sql->sa, NULL, rel->exps);
859 80 : set_count_prop(v->sql->sa, l, 1);
860 80 : l = rel_select(v->sql->sa, l, exp_atom_bool(v->sql->sa, 0));
861 80 : set_count_prop(v->sql->sa, l, 0);
862 80 : rel->op = op_project;
863 80 : rel->l = l;
864 80 : rel->exps = rel_projections(v->sql, l, NULL, 1, 1);
865 80 : set_count_prop(v->sql->sa, rel, 0);
866 80 : set_nodistinct(rel); /* set relations may have distinct flag set */
867 80 : v->changes++;
868 : }
869 : break;
870 : }
871 13395 : case op_munion: {
872 13395 : list *l = rel->l, *nrels = sa_list(v->sql->sa);
873 13395 : BUN cnt = 0;
874 13395 : bool needs_pruning = false;
875 :
876 13395 : if (is_recursive(rel))
877 : break;
878 50928 : for (node *n = l->h; n; n = n->next) {
879 37604 : sql_rel *r = n->data, *pl = r;
880 :
881 37604 : while (is_sample(pl->op) || is_topn(pl->op)) /* skip topN and sample relations in the middle */
882 0 : pl = pl->l;
883 : /* if it's not a projection, then project and propagate statistics */
884 37604 : if (!is_project(pl->op) && !is_base(pl->op)) {
885 0 : pl = rel_project(v->sql->sa, pl, rel_projections(v->sql, pl, NULL, 0, 1));
886 0 : set_count_prop(v->sql->sa, pl, get_rel_count(pl->l));
887 0 : pl->exps = exps_exp_visitor_bottomup(v, pl, pl->exps, 0, &rel_propagate_statistics, false);
888 : }
889 37604 : nrels = append(nrels, pl);
890 : /* we need new munion statistics */
891 : /* propagate row count */
892 37604 : BUN rv = need_distinct(rel) ? rel_calc_nuniques(v->sql, r, r->exps) : get_rel_count(r);
893 : /* if PROP_COUNT does not exist we assume at least a row (see get_rel_count def) */
894 37604 : if (rv == BUN_NONE) {
895 1208 : cnt++;
896 1208 : continue;
897 : }
898 36396 : if (!rv && can_be_pruned)
899 6703 : needs_pruning = true;
900 : /* overflow check */
901 36396 : if (rv > (BUN_MAX - cnt))
902 37604 : rv = BUN_MAX;
903 : else
904 36396 : cnt += rv;
905 : }
906 13324 : int i = 0;
907 75705 : for (node *n = rel->exps->h ; n ; n = n->next, i++)
908 62381 : rel_munion_get_statistics(v->sql, rel, nrels, n->data, i);
909 :
910 13324 : if (needs_pruning && !rel_is_ref(rel)) {
911 4542 : v->changes++;
912 4542 : list *nl = sa_list(l->sa);
913 :
914 16769 : for (node *n = nrels->h; n; n = n->next) {
915 12227 : sql_rel *r = n->data;
916 12227 : BUN rv = need_distinct(rel) ? rel_calc_nuniques(v->sql, r, r->exps) : get_rel_count(r);
917 :
918 12227 : if (!rv) { /* keep last for now */
919 6233 : rel_destroy(r);
920 6233 : continue;
921 : }
922 5994 : nl = append(nl, r);
923 : }
924 4542 : rel->l = nl;
925 4542 : if (list_length(nl) == 1) {
926 4206 : sql_rel *l = rel->l = nl->h->data; /* ugh */
927 4206 : rel->r = NULL;
928 4206 : rel->op = op_project;
929 :
930 20683 : for (node *n = rel->exps->h, *m = l->exps->h ; n && m ; n = n->next, m = m->next) {
931 16477 : sql_exp *pe = n->data, *ie = m->data;
932 16477 : sql_exp *ne = exp_ref(v->sql, ie);
933 16477 : exp_prop_alias(v->sql->sa, ne, pe);
934 16477 : n->data = ne;
935 : }
936 4206 : list_hash_clear(rel->exps);
937 336 : } else if (list_empty(nl)) {
938 : /* empty select (project [ nils ] ) */
939 454 : for (node *n = rel->exps->h ; n ; n = n->next) {
940 354 : sql_exp *e = n->data, *a = exp_atom(v->sql->sa, atom_general(v->sql->sa, exp_subtype(e), NULL, 0));
941 354 : exp_prop_alias(v->sql->sa, a, e);
942 354 : n->data = a;
943 : }
944 100 : list_hash_clear(rel->exps);
945 100 : sql_rel *l = rel_project(v->sql->sa, NULL, rel->exps);
946 100 : set_count_prop(v->sql->sa, l, 1);
947 100 : l = rel_select(v->sql->sa, l, exp_atom_bool(v->sql->sa, 0));
948 100 : set_count_prop(v->sql->sa, l, 0);
949 100 : rel->op = op_project;
950 100 : rel->r = NULL;
951 100 : rel->l = l;
952 100 : rel->exps = rel_projections(v->sql, l, NULL, 1, 1);
953 100 : set_count_prop(v->sql->sa, rel, 0);
954 100 : set_nodistinct(rel); /* set relations may have distinct flag set */
955 : }
956 : } else {
957 8782 : set_count_prop(v->sql->sa, rel, cnt);
958 : }
959 : break;
960 : }
961 539349 : case op_join:
962 : case op_left:
963 : case op_right:
964 : case op_full:
965 : case op_semi:
966 : case op_anti:
967 : case op_select:
968 : case op_project:
969 : case op_groupby: {
970 539349 : if (is_groupby(rel->op) && !list_empty(rel->r))
971 16893 : rel->r = exps_exp_visitor_bottomup(v, rel, rel->r, 0, &rel_propagate_statistics, false);
972 539348 : rel->exps = exps_exp_visitor_bottomup(v, rel, rel->exps, 0, &rel_propagate_statistics, false);
973 539333 : if ((is_simple_project(rel->op) || is_groupby(rel->op)) && !list_empty(rel->r))
974 39361 : rel->r = exps_exp_visitor_bottomup(v, rel, rel->r, 0, &rel_propagate_statistics, false);
975 : /* The following optimizations can only be applied after propagating the statistics to rel->exps */
976 539336 : if (can_be_pruned && (is_join(rel->op) || is_select(rel->op)) && !list_empty(rel->exps)) {
977 141247 : int changes = v->changes;
978 141247 : rel->exps = rel_prune_predicates(v, rel);
979 141247 : if (v->changes > changes)
980 3655 : rel = rewrite_simplify(v, 0, v->value_based_opt, rel);
981 : }
982 :
983 : /* propagate row count */
984 539336 : sql_rel *l = rel->l, *r = rel->r;
985 539336 : switch (rel->op) {
986 136403 : case op_join:
987 : case op_left:
988 : case op_right:
989 : case op_full: {
990 136403 : BUN lv = get_rel_count(l), rv = get_rel_count(r), uniques_estimate = BUN_MAX, join_idx_estimate = BUN_MAX;
991 :
992 136403 : if (!list_empty(rel->exps) && !is_single(rel)) {
993 249036 : for (node *n = rel->exps->h ; n ; n = n->next) {
994 126986 : sql_exp *e = n->data, *el = e->l, *er = e->r;
995 :
996 126986 : if (find_prop(e->p, PROP_JOINIDX)) {
997 667 : join_idx_estimate = lv>rv?lv:rv;
998 126319 : } else if (e->type == e_cmp && e->flag == cmp_equal) {
999 : /* if one of the sides is unique, the cardinality will be that exact number, but look for nulls */
1000 122434 : if (!is_semantics(e) || !has_nil(el) || !has_nil(er)) {
1001 122257 : BUN lu = 0, ru = 0;
1002 122257 : prop *p = NULL;
1003 122257 : if ((p = find_prop(el->p, PROP_NUNIQUES)))
1004 90810 : lu = (BUN) p->value.dval;
1005 122257 : if ((p = find_prop(er->p, PROP_NUNIQUES)))
1006 105330 : ru = (BUN) p->value.dval;
1007 122257 : if (is_unique(el) || lu > lv) {
1008 74604 : BUN ncount = (is_right(rel->op) || is_full(rel->op)) ? MAX(lv, rv) : lv;
1009 74604 : uniques_estimate = MIN(uniques_estimate, ncount);
1010 47653 : } else if (is_unique(er) || ru > rv) {
1011 29805 : BUN ncount = (is_left(rel->op) || is_full(rel->op)) ? MAX(lv, rv) : rv;
1012 29805 : uniques_estimate = MIN(uniques_estimate, ncount);
1013 : }
1014 : }
1015 : }
1016 : }
1017 : }
1018 136403 : if (is_single(rel)) {
1019 4947 : set_count_prop(v->sql->sa, rel, lv);
1020 131456 : } else if (join_idx_estimate != BUN_MAX) {
1021 665 : set_count_prop(v->sql->sa, rel, join_idx_estimate);
1022 130791 : } else if (uniques_estimate != BUN_MAX) {
1023 97898 : set_count_prop(v->sql->sa, rel, uniques_estimate);
1024 32893 : } else if (list_length(rel->exps) == 1 && (exp_is_false(rel->exps->h->data) || exp_is_null(rel->exps->h->data))) {
1025 : /* corner cases for outer joins */
1026 126 : if (is_left(rel->op)) {
1027 114 : set_count_prop(v->sql->sa, rel, lv);
1028 12 : } else if (is_right(rel->op)) {
1029 3 : set_count_prop(v->sql->sa, rel, rv);
1030 9 : } else if (is_full(rel->op) && lv != BUN_NONE && rv != BUN_NONE) {
1031 9 : set_count_prop(v->sql->sa, rel, (rv > (BUN_MAX - lv)) ? BUN_MAX : (lv + rv)); /* overflow check */
1032 0 : } else if (lv != BUN_NONE && rv != BUN_NONE) {
1033 0 : set_count_prop(v->sql->sa, rel, 0);
1034 : }
1035 32767 : } else if (lv == 0) {
1036 1180 : set_count_prop(v->sql->sa, rel, (is_right(rel->op) || is_full(rel->op)) ? rv : 0);
1037 32163 : } else if (rv == 0) {
1038 1570 : set_count_prop(v->sql->sa, rel, (is_left(rel->op) || is_full(rel->op)) ? lv : 0);
1039 31087 : } else if (lv != BUN_NONE && rv != BUN_NONE) {
1040 18442 : set_count_prop(v->sql->sa, rel, (rv > (BUN_MAX / lv)) ? BUN_MAX : (lv * rv)); /* overflow check */
1041 : }
1042 : break;
1043 : }
1044 2917 : case op_anti:
1045 2917 : set_count_prop(v->sql->sa, rel, get_rel_count(l));
1046 2917 : break;
1047 110179 : case op_semi:
1048 : case op_select:
1049 : /* TODO calculate cardinalities using selectivities */
1050 110179 : if (list_length(rel->exps) == 1 && (exp_is_false(rel->exps->h->data) || exp_is_null(rel->exps->h->data))) {
1051 2759 : set_count_prop(v->sql->sa, rel, 0);
1052 : } else {
1053 211578 : if (!list_empty(rel->exps) && !is_single(rel)) {
1054 104156 : BUN cnt = get_rel_count(l), u = 1;
1055 172754 : for (node *n = rel->exps->h ; n ; n = n->next) {
1056 114295 : sql_exp *e = n->data, *el = e->l, *er = e->r;
1057 :
1058 : /* simple expressions first */
1059 114295 : if (e->type == e_cmp && e->flag == cmp_equal && exp_is_atom(er)) {
1060 : /* use selectivity */
1061 59005 : prop *p;
1062 59005 : if ((p = find_prop(el->p, PROP_NUNIQUES))) {
1063 45698 : u = (BUN) p->value.dval;
1064 45698 : break;
1065 : }
1066 : }
1067 : }
1068 : /* u is an *estimate*, so don't set count_prop to 0 unless cnt is 0 */
1069 104157 : set_count_prop(v->sql->sa, rel, cnt == 0 ? 0 : u == 0 || u > cnt ? 1 : cnt/u);
1070 : } else {
1071 3265 : set_count_prop(v->sql->sa, rel, get_rel_count(l));
1072 : }
1073 : }
1074 : break;
1075 265064 : case op_project:
1076 265064 : if (l) {
1077 254799 : if (need_distinct(rel)) {
1078 0 : set_count_prop(v->sql->sa, rel, rel_calc_nuniques(v->sql, l, rel->exps));
1079 : } else {
1080 254799 : set_count_prop(v->sql->sa, rel, get_rel_count(l));
1081 : }
1082 : } else {
1083 10265 : BUN card = 1;
1084 :
1085 10265 : if (!list_empty(rel->exps)) {
1086 31098 : for (node *n = rel->exps->h ; n ; n = n->next) {
1087 20833 : sql_exp *e = n->data;
1088 20833 : card = MAX(card, trivial_project_exp_card(e));
1089 : }
1090 : }
1091 10265 : set_count_prop(v->sql->sa, rel, card);
1092 : }
1093 : break;
1094 24360 : case op_groupby:
1095 24360 : if (list_empty(rel->r)) {
1096 7467 : set_count_prop(v->sql->sa, rel, 1);
1097 : } else {
1098 16893 : set_count_prop(v->sql->sa, rel, rel_calc_nuniques(v->sql, l, rel->r));
1099 : }
1100 : break;
1101 : default:
1102 : break;
1103 : }
1104 : break;
1105 : }
1106 17038 : case op_topn: {
1107 17038 : BUN lv = get_rel_count(rel->l);
1108 :
1109 17038 : if (lv != BUN_NONE) {
1110 17015 : sql_exp *le = rel->exps->h->data, *oe = list_length(rel->exps) > 1 ? rel->exps->h->next->data : NULL;
1111 84 : if (oe && oe->l && exp_is_not_null(oe)) { /* no parameters */
1112 84 : BUN offset = (BUN) ((atom*)oe->l)->data.val.lval;
1113 84 : lv = offset >= lv ? 0 : lv - offset;
1114 : }
1115 17015 : if (le->l && exp_is_not_null(le)) {
1116 16980 : BUN limit = (BUN) ((atom*)le->l)->data.val.lval;
1117 16980 : lv = MIN(lv, limit);
1118 : }
1119 17015 : set_count_prop(v->sql->sa, rel, lv);
1120 : }
1121 : break;
1122 : }
1123 22 : case op_sample: {
1124 22 : BUN lv = get_rel_count(rel->l);
1125 :
1126 22 : if (lv != BUN_NONE) {
1127 4 : sql_exp *se = rel->exps->h->data;
1128 4 : sql_subtype *tp = exp_subtype(se);
1129 :
1130 4 : if (se->l && tp->type->eclass == EC_NUM) { /* sample is a number of rows */
1131 4 : BUN sample = (BUN) ((atom*)se->l)->data.val.lval;
1132 4 : lv = MIN(lv, sample);
1133 0 : } else if (se->l) { /* sample is a percentage of rows */
1134 0 : dbl percent = ((atom*)se->l)->data.val.dval;
1135 0 : assert(tp->type->eclass == EC_FLT);
1136 0 : lv = (BUN) ceil((dbl)lv * percent);
1137 : }
1138 4 : set_count_prop(v->sql->sa, rel, lv);
1139 : }
1140 : break;
1141 : }
1142 6015 : case op_table: {
1143 6015 : sql_exp *op = rel->r;
1144 6015 : if (rel->flag != TRIGGER_WRAPPER && op) {
1145 5703 : sql_subfunc *f = op->f;
1146 5703 : if (f->func->lang == FUNC_LANG_SQL) {
1147 97 : set_count_prop(v->sql->sa, rel, 1000 /* just some fallback value */);
1148 5606 : } else if (f->func->lang == FUNC_LANG_MAL && strcmp(f->func->base.name, "storage") == 0) {
1149 831 : set_count_prop(v->sql->sa, rel, 1000 /* TODO get size of catalog */);
1150 4775 : } else if (f->func->lang == FUNC_LANG_MAL && strcmp(f->func->base.name, "db_users") == 0) {
1151 0 : set_count_prop(v->sql->sa, rel, 1000 /* TODO get size of users */);
1152 4775 : } else if (f->func->lang == FUNC_LANG_MAL && strncmp(f->func->base.name, "querylog", 8) == 0) {
1153 1089 : set_count_prop(v->sql->sa, rel, 1000 /* TODO get size of querylog */);
1154 3686 : } else if (f->func->lang == FUNC_LANG_MAL &&
1155 3574 : (strcmp(f->func->base.name, "queue") == 0 ||
1156 3309 : strcmp(f->func->base.name, "optimizers") == 0 ||
1157 2996 : strcmp(f->func->base.name, "env") == 0 ||
1158 2703 : strcmp(f->func->base.name, "keywords") == 0 ||
1159 2703 : strcmp(f->func->base.name, "statistics") == 0 ||
1160 2034 : strcmp(f->func->base.name, "rejects") == 0 ||
1161 1779 : strcmp(f->func->base.name, "schemastorage") == 0 ||
1162 1779 : strncmp(f->func->base.name, "storage", 7) == 0 ||
1163 1779 : strcmp(f->func->base.name, "sessions") == 0) ) {
1164 2086 : set_count_prop(v->sql->sa, rel, 1000 /* TODO get size of queue */);
1165 : }
1166 : /* else {
1167 : printf("%%func needs stats : %s\n", f->func->base.name);
1168 : } */
1169 : }
1170 : break;
1171 : }
1172 : /*These relations are less important for now
1173 : TODO later we can tune it */
1174 : case op_insert:
1175 : case op_update:
1176 : case op_delete:
1177 : case op_truncate:
1178 : case op_ddl:
1179 : default:
1180 : break;
1181 : }
1182 :
1183 : return rel;
1184 : }
1185 :
1186 : static sql_rel *
1187 545770 : rel_get_statistics(visitor *v, global_props *gp, sql_rel *rel)
1188 : {
1189 : /* Don't prune updates as pruning will possibly result in removing the joins which therefore cannot be used for constraint checking */
1190 545770 : uint8_t has_special_modify = (uint8_t) gp->has_special_modify;
1191 545770 : v->data = &has_special_modify;
1192 545770 : rel = rel_visitor_bottomup(v, rel, &rel_get_statistics_);
1193 546319 : v->data = gp;
1194 546319 : return rel;
1195 : }
1196 :
1197 : run_optimizer
1198 721649 : bind_get_statistics(visitor *v, global_props *gp)
1199 : {
1200 721649 : (void) v;
1201 721649 : return (gp->opt_level == 1 && !gp->cnt[op_insert]) ? rel_get_statistics : NULL;
1202 : }
1203 :
1204 :
1205 : static bool
1206 95557 : point_select_on_unique_column(sql_rel *rel)
1207 : {
1208 95557 : if (is_select(rel->op) && !list_empty(rel->exps)) {
1209 131620 : for (node *n = rel->exps->h; n ; n = n->next) {
1210 75836 : sql_exp *e = n->data, *el = e->l, *er = e->r, *found = NULL;
1211 :
1212 75836 : if (is_compare(e->type) && e->flag == cmp_equal) {
1213 33632 : if (is_numeric_upcast(el))
1214 0 : el = el->l;
1215 33632 : if (is_numeric_upcast(er))
1216 0 : er = er->l;
1217 33632 : if (is_alias(el->type) && exp_is_atom(er) && (found = rel_find_exp(rel->l, el)) &&
1218 752 : is_unique(found) && (!is_semantics(e) || !has_nil(found) || !has_nil(er)))
1219 : return true;
1220 32880 : if (is_alias(er->type) && exp_is_atom(el) && (found = rel_find_exp(rel->l, er)) &&
1221 0 : is_unique(found) && (!is_semantics(e) || !has_nil(el) || !has_nil(found)))
1222 : return true;
1223 : }
1224 : }
1225 : }
1226 : return false;
1227 : }
1228 :
1229 : /*
1230 : * A point select on an unique column reduces the number of rows to 1. If the same select is under a
1231 : * join, the opposite side's select can be pushed above the join.
1232 : */
1233 : static inline sql_rel *
1234 1258996 : rel_push_select_up(visitor *v, sql_rel *rel)
1235 : {
1236 1258996 : if ((is_innerjoin(rel->op) || is_left(rel->op) || is_right(rel->op) || is_semi(rel->op)) && !is_single(rel)) {
1237 256110 : sql_rel *l = rel->l, *r = rel->r;
1238 256110 : bool can_pushup_left = is_select(l->op) && !rel_is_ref(l) && !is_single(l) && (is_innerjoin(rel->op) || is_left(rel->op) || is_semi(rel->op)),
1239 256110 : can_pushup_right = is_select(r->op) && !rel_is_ref(r) && !is_single(r) && (is_innerjoin(rel->op) || is_right(rel->op));
1240 :
1241 256110 : if (can_pushup_left || can_pushup_right) {
1242 67413 : if (can_pushup_left)
1243 45775 : can_pushup_left = point_select_on_unique_column(r);
1244 67413 : if (can_pushup_right)
1245 49782 : can_pushup_right = point_select_on_unique_column(l);
1246 :
1247 : /* if both selects retrieve one row each, it's not worth it to push both up */
1248 67413 : if (can_pushup_left && !can_pushup_right) {
1249 686 : sql_rel *nrel = rel_dup_copy(v->sql->sa, rel);
1250 686 : nrel->l = l->l;
1251 686 : rel = rel_inplace_select(rel, nrel, l->exps);
1252 686 : assert(is_select(rel->op));
1253 686 : v->changes++;
1254 66727 : } else if (!can_pushup_left && can_pushup_right) {
1255 14 : sql_rel *nrel = rel_dup_copy(v->sql->sa, rel);
1256 14 : nrel->r = r->l;
1257 14 : rel = rel_inplace_select(rel, nrel, r->exps);
1258 14 : assert(is_select(rel->op));
1259 14 : v->changes++;
1260 : }
1261 : }
1262 : }
1263 1258996 : return rel;
1264 : }
1265 :
1266 : static int
1267 98711 : sql_class_base_score(visitor *v, sql_column *c, sql_subtype *t, bool equality_based)
1268 : {
1269 98711 : int de;
1270 :
1271 98711 : if (!t)
1272 : return 0;
1273 98711 : switch (ATOMstorage(t->type->localtype)) {
1274 : case TYPE_bte:
1275 : return 150 - 8;
1276 1632 : case TYPE_sht:
1277 1632 : return 150 - 16;
1278 39131 : case TYPE_int:
1279 39131 : return 150 - 32;
1280 519 : case TYPE_void:
1281 : case TYPE_lng:
1282 519 : return 150 - 64;
1283 : case TYPE_uuid:
1284 : #ifdef HAVE_HGE
1285 : case TYPE_hge:
1286 : #endif
1287 : return 150 - 128;
1288 2 : case TYPE_flt:
1289 2 : return 75 - 24;
1290 : case TYPE_dbl:
1291 : return 75 - 53;
1292 33888 : default:
1293 33888 : if (equality_based && c && v->storage_based_opt && (de = mvc_is_duplicate_eliminated(v->sql, c)))
1294 1666 : return 150 - de * 8;
1295 : /* strings and blobs not duplicate eliminated don't get any points here */
1296 : return 0;
1297 : }
1298 : }
1299 :
1300 : static int
1301 35952 : score_se_base(visitor *v, sql_rel *rel, sql_exp *e)
1302 : {
1303 35952 : int res = 0;
1304 35952 : sql_subtype *t = exp_subtype(e);
1305 35952 : sql_column *c = NULL;
1306 :
1307 35952 : if (e->type == e_convert) /* keep unsafes at the end (TODO improve) */
1308 : return -1000;
1309 : /* can we find out if the underlying table is sorted */
1310 35351 : if ((c = exp_find_column(rel, e, -2)) && v->storage_based_opt && mvc_is_sorted(v->sql, c))
1311 35351 : res += 600;
1312 :
1313 : /* prefer the shorter var types over the longer ones */
1314 35351 : res += sql_class_base_score(v, c, t, is_equality_or_inequality_exp(e->flag)); /* smaller the type, better */
1315 35351 : return res;
1316 : }
1317 :
1318 : static int
1319 60453 : score_se(visitor *v, sql_rel *rel, sql_exp *e)
1320 : {
1321 60453 : int score = 0;
1322 60453 : if (e->type == e_cmp && !is_complex_exp(e->flag)) {
1323 35952 : sql_exp *l = e->l;
1324 :
1325 35952 : while (l->type == e_cmp) { /* go through nested comparisons */
1326 2 : sql_exp *ll;
1327 :
1328 2 : if (l->flag == cmp_filter || l->flag == cmp_or)
1329 0 : ll = ((list*)l->l)->h->data;
1330 : else
1331 2 : ll = l->l;
1332 2 : if (ll->type != e_cmp)
1333 : break;
1334 : l = ll;
1335 : }
1336 35952 : score += score_se_base(v, rel, l);
1337 : }
1338 60453 : score += exp_keyvalue(e);
1339 60453 : return score;
1340 : }
1341 :
1342 : static inline sql_rel *
1343 1258996 : rel_select_order(visitor *v, sql_rel *rel)
1344 : {
1345 1258996 : int *scores = NULL;
1346 1258996 : sql_exp **exps = NULL;
1347 :
1348 1258996 : if (is_select(rel->op) && list_length(rel->exps) > 1) {
1349 28117 : node *n;
1350 28117 : int i, nexps = list_length(rel->exps);
1351 28117 : scores = SA_NEW_ARRAY(v->sql->ta, int, nexps);
1352 28117 : exps = SA_NEW_ARRAY(v->sql->ta, sql_exp*, nexps);
1353 :
1354 88570 : for (i = 0, n = rel->exps->h; n; i++, n = n->next) {
1355 60483 : exps[i] = n->data;
1356 60483 : if (find_prop(exps[i]->p, PROP_HASHCOL))
1357 : return rel;
1358 60453 : scores[i] = score_se(v, rel, n->data);
1359 : }
1360 28087 : GDKqsort(scores, exps, NULL, nexps, sizeof(int), sizeof(void *), TYPE_int, true, true);
1361 :
1362 88540 : for (i = 0, n = rel->exps->h; n; i++, n = n->next)
1363 60453 : n->data = exps[i];
1364 : }
1365 :
1366 : return rel;
1367 : }
1368 :
1369 : /* Compute the efficiency of using this expression early in a group by list */
1370 : static int
1371 63360 : score_gbe(visitor *v, sql_rel *rel, sql_exp *e)
1372 : {
1373 63360 : int res = 0;
1374 63360 : sql_subtype *t = exp_subtype(e);
1375 63360 : sql_column *c = exp_find_column(rel, e, -2);
1376 :
1377 63360 : if (e->card == CARD_ATOM) /* constants are trivial to group */
1378 38 : res += 1000;
1379 : /* can we find out if the underlying table is sorted */
1380 63360 : if (is_unique(e) || find_prop(e->p, PROP_HASHCOL) || (c && v->storage_based_opt && mvc_is_unique(v->sql, c))) /* distinct columns */
1381 3584 : res += 700;
1382 42134 : if (c && v->storage_based_opt && mvc_is_sorted(v->sql, c))
1383 3692 : res += 500;
1384 63360 : if (find_prop(e->p, PROP_HASHIDX)) /* has hash index */
1385 0 : res += 200;
1386 :
1387 : /* prefer the shorter var types over the longer ones */
1388 63360 : res += sql_class_base_score(v, c, t, true); /* smaller the type, better */
1389 63360 : return res;
1390 : }
1391 :
1392 : /* reorder group by expressions */
1393 : static inline sql_rel *
1394 1258996 : rel_groupby_order(visitor *v, sql_rel *rel)
1395 : {
1396 1258996 : int *scores = NULL;
1397 1258996 : sql_exp **exps = NULL;
1398 :
1399 1258996 : if (is_groupby(rel->op) && list_length(rel->r) > 1) {
1400 26727 : node *n;
1401 26727 : list *gbe = rel->r;
1402 26727 : int i, ngbe = list_length(gbe);
1403 26727 : scores = SA_NEW_ARRAY(v->sql->ta, int, ngbe);
1404 26727 : exps = SA_NEW_ARRAY(v->sql->ta, sql_exp*, ngbe);
1405 :
1406 : /* first sorting step, give priority for integers and sorted columns */
1407 90087 : for (i = 0, n = gbe->h; n; i++, n = n->next) {
1408 63360 : exps[i] = n->data;
1409 63360 : scores[i] = score_gbe(v, rel, exps[i]);
1410 : }
1411 26727 : GDKqsort(scores, exps, NULL, ngbe, sizeof(int), sizeof(void *), TYPE_int, true, true);
1412 :
1413 : /* second sorting step, give priority to strings with lower number of digits */
1414 54217 : for (i = ngbe - 1; i && !scores[i]; i--); /* find expressions with no score from the first round */
1415 26727 : if (scores[i])
1416 25725 : i++;
1417 26727 : if (ngbe - i > 1) {
1418 12055 : for (int j = i; j < ngbe; j++) {
1419 9064 : sql_subtype *t = exp_subtype(exps[j]);
1420 9064 : scores[j] = t ? t->digits : 0;
1421 : }
1422 : /* the less number of digits the better, order ascending */
1423 2991 : GDKqsort(scores + i, exps + i, NULL, ngbe - i, sizeof(int), sizeof(void *), TYPE_int, false, true);
1424 : }
1425 :
1426 90087 : for (i = 0, n = gbe->h; n; i++, n = n->next)
1427 63360 : n->data = exps[i];
1428 : }
1429 :
1430 1258996 : return rel;
1431 : }
1432 :
1433 : /* This optimization loop contains optimizations that can potentially use statistics */
1434 : static sql_rel *
1435 1258996 : rel_final_optimization_loop_(visitor *v, sql_rel *rel)
1436 : {
1437 : /* Run rel_push_select_up only once at the end to avoid an infinite optimization loop */
1438 1258996 : rel = rel_push_select_up(v, rel);
1439 1258996 : rel = rel_select_order(v, rel);
1440 :
1441 : /* TODO? Maybe later add rel_simplify_count, rel_join2semijoin, rel_simplify_fk_joins,
1442 : rel_distinct_project2groupby, rel_simplify_predicates, rel_simplify_math,
1443 : rel_distinct_aggregate_on_unique_values */
1444 :
1445 1258996 : rel = rel_groupby_order(v, rel);
1446 1258996 : return rel;
1447 : }
1448 :
1449 : static sql_rel *
1450 67725 : rel_final_optimization_loop(visitor *v, global_props *gp, sql_rel *rel)
1451 : {
1452 67725 : (void) gp;
1453 67725 : return rel_visitor_bottomup(v, rel, &rel_final_optimization_loop_);
1454 : }
1455 :
1456 : run_optimizer
1457 722331 : bind_final_optimization_loop(visitor *v, global_props *gp)
1458 : {
1459 722331 : int flag = v->sql->sql_optimizer;
1460 : /* At the moment, this optimizer has dependency on 3 flags */
1461 554117 : return gp->opt_level == 1 && (gp->cnt[op_groupby] || gp->cnt[op_select]) &&
1462 790058 : (flag & push_select_up) && (flag & optimize_select_and_joins_topdown) && (flag & optimize_projections) ? rel_final_optimization_loop : NULL;
1463 : }
|