Line data Source code
1 : /*
2 : * SPDX-License-Identifier: MPL-2.0
3 : *
4 : * This Source Code Form is subject to the terms of the Mozilla Public
5 : * License, v. 2.0. If a copy of the MPL was not distributed with this
6 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
7 : *
8 : * Copyright 2024, 2025 MonetDB Foundation;
9 : * Copyright August 2008 - 2023 MonetDB B.V.;
10 : * Copyright 1997 - July 2008 CWI.
11 : */
12 :
13 : #include "monetdb_config.h"
14 : #include "rel_optimizer_private.h"
15 : #include "rel_statistics.h"
16 : #include "rel_basetable.h"
17 : #include "rel_rewriter.h"
18 : #include "sql_storage.h"
19 :
20 : static sql_exp *
21 3487630 : comparison_find_column(sql_exp *input, sql_exp *e)
22 : {
23 3487630 : switch (input->type) {
24 100105 : case e_convert: {
25 100105 : list *types = (list *)input->r;
26 100105 : sql_class from = ((sql_subtype*)types->h->data)->type->eclass, to = ((sql_subtype*)types->h->next->data)->type->eclass;
27 100105 : if (from == to)
28 193445 : return comparison_find_column(input->l, e) ? input : NULL;
29 : return NULL;
30 : }
31 3067442 : case e_column:
32 3067442 : return exp_match(e, input) ? input : NULL;
33 : default:
34 : return NULL;
35 : }
36 : }
37 :
38 : /* multi lo <= col <= hi, maybe still be false even if lo or hi are NULL, possibly similar for filter on multiple
39 : * columns */
40 : #define comp_single_column(c) (!c->f)
41 :
42 : static sql_exp *
43 8815337 : rel_propagate_column_ref_statistics(mvc *sql, sql_rel *rel, sql_exp *e)
44 : {
45 8898616 : assert(e->type == e_column);
46 8898616 : if (rel) {
47 8898535 : switch(rel->op) {
48 4114898 : case op_left:
49 : case op_right:
50 : case op_full:
51 : case op_join:
52 : case op_select:
53 : case op_anti:
54 : case op_semi: {
55 4114898 : bool found_without_semantics = false, found_left = false, found_right = false, still_unique = false;
56 :
57 4114898 : if ((is_innerjoin(rel->op) || is_select(rel->op)) && list_length(rel->exps) == 1 && exp_is_false(rel->exps->h->data))
58 : return NULL; /* nothing will pass, skip */
59 :
60 : /* propagate from the bottom first */
61 4094446 : if (rel_propagate_column_ref_statistics(sql, rel->l, e))
62 : found_left = true;
63 2601530 : if (!found_left && is_join(rel->op) && rel_propagate_column_ref_statistics(sql, rel->r, e))
64 4094446 : found_right = true;
65 :
66 4094446 : if (!found_left && !found_right)
67 : return NULL;
68 1790823 : if (!list_empty(rel->exps) && rel->op != op_anti) { /* if there's an or, the MIN and MAX get difficult to propagate */
69 3515776 : for (node *n = rel->exps->h ; n ; n = n->next) {
70 1820293 : sql_exp *comp = n->data, *le = comp->l, *lne = NULL, *re = comp->r, *rne = NULL, *fe = comp->f, *fne = NULL;
71 :
72 1820293 : if (comp->type == e_cmp) {
73 1819289 : if (is_theta_exp(comp->flag) && ((lne = comparison_find_column(le, e)) || (rne = comparison_find_column(re, e)) || (fe && (fne = comparison_find_column(fe, e))))) {
74 124772 : atom *lval_min = find_prop_and_get(le->p, PROP_MIN), *lval_max = find_prop_and_get(le->p, PROP_MAX),
75 124772 : *rval_min = find_prop_and_get(re->p, PROP_MIN), *rval_max = find_prop_and_get(re->p, PROP_MAX);
76 :
77 : /* not semantics found or if explicitly filtering not null values from the column */
78 124772 : found_without_semantics |= (!is_semantics(comp) && comp_single_column(comp)) || (comp->flag == cmp_equal && lne && is_anti(comp) && exp_is_null(re));
79 124772 : still_unique |= comp->flag == cmp_equal && is_unique(le) && is_unique(re); /* unique if only equi-joins on unique columns are there */
80 124772 : if (is_full(rel->op) || (is_left(rel->op) && found_left) || (is_right(rel->op) && found_right)) /* on outer joins, min and max cannot be propagated on some cases */
81 19870 : continue;
82 : /* if (end2 >= start1 && start2 <= end1) then the 2 intervals are intersected */
83 104902 : if (fe && lval_min && lval_max) { /* range case, the middle expression must intersect the other two */
84 4618 : atom *fval_min = find_prop_and_get(fe->p, PROP_MIN), *fval_max = find_prop_and_get(fe->p, PROP_MAX);
85 4618 : int int1 = rval_min && rval_max && atom_cmp(rval_max, lval_min) >= 0 && atom_cmp(rval_min, lval_max) <= 0,
86 4618 : int2 = fval_min && fval_max && atom_cmp(fval_max, lval_min) >= 0 && atom_cmp(fval_min, lval_max) <= 0,
87 4618 : symmetric = is_symmetric(comp);
88 :
89 4618 : if (is_anti(comp) || (!symmetric && fval_min && rval_max && atom_cmp(fval_min, rval_max) < 0)) /* for asymmetric case the re range must be after the fe range */
90 1815 : continue;
91 2803 : if (lne && int1 && int2) {
92 103 : if (symmetric) {
93 0 : prop *p1 = find_prop(e->p, PROP_MIN), *p2 = find_prop(e->p, PROP_MAX);
94 0 : atom *nmin = statistics_atom_min(sql, rval_min, fval_min), *nmax = statistics_atom_max(sql, rval_max, fval_max);
95 : /* max is min from le and (max from re and fe max) */
96 0 : set_minmax_property(sql, e, PROP_MAX, p2 ? statistics_atom_min(sql, nmax, p2->value.pval) : nmax);
97 : /* min is max from le and (min from re and fe min) */
98 0 : set_minmax_property(sql, e, PROP_MIN, p1 ? statistics_atom_max(sql, nmin, p1->value.pval) : nmin);
99 : } else {
100 103 : prop *p1 = find_prop(e->p, PROP_MIN), *p2 = find_prop(e->p, PROP_MAX);
101 : /* max is min from le and fe max */
102 103 : set_minmax_property(sql, e, PROP_MAX, p2 ? statistics_atom_min(sql, fval_max, p2->value.pval) : fval_max);
103 : /* min is max from le and re min */
104 103 : set_minmax_property(sql, e, PROP_MIN, p1 ? statistics_atom_max(sql, rval_min, p1->value.pval) : rval_min);
105 : }
106 2700 : } else if (rne) {
107 590 : if (symmetric && int1 && int2) { /* min is max from le and (min from re and fe min) */
108 0 : prop *p = find_prop(e->p, PROP_MIN);
109 0 : atom *nmin = p ? statistics_atom_min(sql, p->value.pval, fval_min) : fval_min;
110 0 : set_minmax_property(sql, e, PROP_MIN, statistics_atom_max(sql, nmin, lval_min));
111 590 : } else if (int1) { /* min is max from le and re min */
112 100 : prop *p = find_prop(e->p, PROP_MIN);
113 100 : set_minmax_property(sql, e, PROP_MIN, p ? statistics_atom_max(sql, lval_min, p->value.pval) : lval_min);
114 : }
115 2110 : } else if (fne) {
116 549 : if (symmetric && int1 && int2) { /* max is min from le and (max from re and fe max) */
117 0 : prop *p = find_prop(e->p, PROP_MAX);
118 0 : atom *nmax = p ? statistics_atom_max(sql, p->value.pval, rval_max) : rval_max;
119 0 : set_minmax_property(sql, e, PROP_MAX, p ? statistics_atom_min(sql, nmax, lval_max) : nmax);
120 549 : } else if (int2) { /* max is min from le and fe max */
121 266 : prop *p = find_prop(e->p, PROP_MAX);
122 266 : set_minmax_property(sql, e, PROP_MAX, p ? statistics_atom_min(sql, lval_max, p->value.pval) : lval_max);
123 : }
124 : }
125 100284 : } else if (lval_min && lval_max && rval_min && rval_max && atom_cmp(rval_max, lval_min) >= 0 && atom_cmp(rval_min, lval_max) <= 0) {
126 : /* both min and max must be set and the intervals must overlap */
127 41888 : switch (comp->flag) {
128 28227 : case cmp_equal: /* for equality reduce */
129 28227 : set_minmax_property(sql, e, PROP_MAX, is_anti(comp) ? statistics_atom_max(sql, lval_max, rval_max) : statistics_atom_min(sql, lval_max, rval_max));
130 28227 : set_minmax_property(sql, e, PROP_MIN, is_anti(comp) ? statistics_atom_min(sql, lval_min, rval_min) : statistics_atom_max(sql, lval_min, rval_min));
131 28227 : break;
132 4345 : case cmp_notequal: /* for inequality expand */
133 4345 : set_minmax_property(sql, e, PROP_MAX, is_anti(comp) ? statistics_atom_min(sql, lval_max, rval_max) : statistics_atom_max(sql, lval_max, rval_max));
134 4345 : set_minmax_property(sql, e, PROP_MIN, is_anti(comp) ? statistics_atom_max(sql, lval_min, rval_min) : statistics_atom_min(sql, lval_min, rval_min));
135 4345 : break;
136 5310 : case cmp_gt:
137 : case cmp_gte:
138 9682 : if (!is_anti(comp) && lne) { /* min is max from both min */
139 4372 : prop *p = find_prop(e->p, PROP_MIN);
140 4372 : set_minmax_property(sql, e, PROP_MIN, p ? statistics_atom_max(sql, rval_min, p->value.pval) : rval_min);
141 938 : } else if (!is_anti(comp)) { /* max is min from both max */
142 938 : prop *p = find_prop(e->p, PROP_MAX);
143 938 : set_minmax_property(sql, e, PROP_MAX, p ? statistics_atom_min(sql, lval_max, p->value.pval) : lval_max);
144 : }
145 : break;
146 4006 : case cmp_lt:
147 : case cmp_lte:
148 7205 : if (!is_anti(comp) && lne) { /* max is min from both max */
149 3199 : prop *p = find_prop(e->p, PROP_MAX);
150 3199 : set_minmax_property(sql, e, PROP_MAX, p ? statistics_atom_min(sql, rval_max, p->value.pval) : rval_max);
151 807 : } else if (!is_anti(comp)) { /* min is max from both min */
152 807 : prop *p = find_prop(e->p, PROP_MIN);
153 807 : set_minmax_property(sql, e, PROP_MIN, p ? statistics_atom_max(sql, lval_min, p->value.pval) : lval_min);
154 : }
155 : break;
156 : default: /* Maybe later I can do cmp_in and cmp_notin */
157 : break;
158 : }
159 : }
160 : }
161 : }
162 : }
163 : }
164 1790823 : if (is_full(rel->op) || (is_left(rel->op) && found_right) || (is_right(rel->op) && found_left))
165 35775 : set_has_nil(e);
166 1790823 : if (!is_outerjoin(rel->op) && found_without_semantics) /* at an outer join, null values pass */
167 94044 : set_has_no_nil(e);
168 1790823 : if (is_join(rel->op) && is_unique(e) && !still_unique)
169 119329 : set_not_unique(e);
170 1790823 : return e;
171 : }
172 4680990 : case op_table:
173 : case op_basetable:
174 : case op_union:
175 : case op_except:
176 : case op_inter:
177 : case op_munion:
178 : case op_project:
179 : case op_groupby: {
180 4680990 : if (is_recursive(rel))
181 : return NULL;
182 4680793 : sql_exp *found;
183 4680793 : atom *fval;
184 4680793 : prop *est;
185 4680793 : if ((found = rel_find_exp(rel, e))) {
186 2203253 : if (rel->op != op_table) { /* At the moment don't propagate statistics for table relations */
187 2157915 : if ((fval = find_prop_and_get(found->p, PROP_MAX)))
188 1140360 : set_minmax_property(sql, e, PROP_MAX, fval);
189 2157906 : if ((fval = find_prop_and_get(found->p, PROP_MIN)))
190 1147428 : set_minmax_property(sql, e, PROP_MIN, fval);
191 2157905 : if (!has_nil(found))
192 1396558 : set_has_no_nil(e);
193 2157905 : if (is_unique(found) || (need_distinct(rel) && list_length(rel->exps) == 1) ||
194 1741368 : (is_groupby(rel->op) && (list_empty(rel->r) || (list_length(rel->r) == 1 && exps_find_exp(rel->r, e)))))
195 427733 : set_unique(e);
196 : /* propagate unique estimation for known cases */
197 2157905 : if (is_groupby(rel->op) && list_empty(rel->r) && !find_prop(e->p, PROP_NUNIQUES)) { /* global aggregate case */
198 7645 : prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
199 7645 : p->value.dval = 1;
200 2150261 : } else if (((is_basetable(rel->op) || is_except(rel->op) || is_inter(rel->op) || is_simple_project(rel->op) ||
201 71914 : (is_groupby(rel->op) && exps_find_exp(rel->r, e))) &&
202 2090977 : (est = find_prop(found->p, PROP_NUNIQUES)) && !find_prop(e->p, PROP_NUNIQUES))) {
203 195233 : prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
204 195233 : p->value.dval = est->value.dval;
205 : }
206 : }
207 2203241 : return e;
208 : }
209 : return NULL;
210 : }
211 83279 : case op_topn:
212 : case op_sample:
213 83279 : return rel_propagate_column_ref_statistics(sql, rel->l, e);
214 : default:
215 : break;
216 : }
217 : }
218 : return NULL;
219 : }
220 :
221 : static atom *
222 4657905 : atom_from_valptr( allocator *sa, sql_subtype *tpe, ValRecord *v)
223 : {
224 4657905 : atom *a = SA_NEW(sa, atom);
225 :
226 4657921 : assert(!VALisnil(v));
227 4657940 : *a = (atom) {.tpe = *tpe,};
228 4657940 : SA_VALcopy(sa, &a->data, v);
229 4658105 : return a;
230 : }
231 :
232 : void
233 4290013 : sql_column_get_statistics(mvc *sql, sql_column *c, sql_exp *e)
234 : {
235 4290013 : bool nonil = false, unique = false;
236 4290013 : double unique_est = 0.0;
237 4290013 : ValRecord min, max;
238 4290013 : int ok = mvc_col_stats(sql, c, &nonil, &unique, &unique_est, &min, &max);
239 :
240 4291398 : if (has_nil(e) && nonil)
241 2837097 : set_has_no_nil(e);
242 4291398 : if (!is_unique(e) && unique)
243 1133738 : set_unique(e);
244 4291398 : if (unique_est != 0.0) {
245 3023219 : prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
246 3023052 : p->value.dval = unique_est;
247 : }
248 4291231 : unsigned int digits = 0;
249 4291231 : sql_subtype *et = exp_subtype(e);
250 4291380 : if (et->type->eclass == EC_DEC || et->type->eclass == EC_NUM)
251 2783440 : digits = et->digits;
252 4291380 : if ((ok & 2) == 2) {
253 2326266 : if (!VALisnil(&max)) {
254 2326188 : prop *p = e->p = prop_create(sql->sa, PROP_MAX, e->p);
255 2326137 : p->value.pval = atom_from_valptr(sql->sa, &c->type, &max);
256 2326118 : if (digits) {
257 1723117 : unsigned int nd = atom_digits(p->value.pval);
258 1723071 : if (nd < digits)
259 : digits = nd;
260 1723071 : if (!digits)
261 : digits = 1;
262 : }
263 : }
264 2326055 : VALclear(&max);
265 : }
266 4291157 : if ((ok & 1) == 1) {
267 2332257 : if (!VALisnil(&min)) {
268 2332270 : prop *p = e->p = prop_create(sql->sa, PROP_MIN, e->p);
269 2332291 : p->value.pval = atom_from_valptr(sql->sa, &c->type, &min);
270 2332397 : if (digits) {
271 1730406 : unsigned int nd = atom_digits(p->value.pval);
272 1730433 : if (nd > digits) /* possibly set to low by max value */
273 : digits = nd;
274 : if (!digits)
275 : digits = 1;
276 : }
277 : }
278 2332422 : VALclear(&min);
279 : }
280 4291339 : if (digits)
281 2783420 : et->digits = digits;
282 4291339 : if (et->type->eclass == EC_DEC && et->digits <= et->scale)
283 216 : et->digits = et->scale + 1;
284 4291339 : }
285 :
286 : static void
287 953148 : rel_basetable_column_get_statistics(mvc *sql, sql_rel *rel, sql_exp *e)
288 : {
289 953148 : if (e->p)
290 : return;
291 306726 : sql_column *c = NULL;
292 :
293 306726 : if ((c = rel_base_find_column(rel, e->nid))) {
294 208600 : sql_column_get_statistics(sql, c, e);
295 : }
296 : }
297 :
298 : static bool
299 8876 : rel_setop_get_statistics(mvc *sql, sql_rel *rel, list *lexps, list *rexps, sql_exp *e, int i)
300 : {
301 8876 : sql_exp *le = list_fetch(lexps, i), *re = list_fetch(rexps, i);
302 8876 : atom *lval_min = find_prop_and_get(le->p, PROP_MIN), *lval_max = find_prop_and_get(le->p, PROP_MAX),
303 8876 : *rval_min = find_prop_and_get(re->p, PROP_MIN), *rval_max = find_prop_and_get(re->p, PROP_MAX);
304 8876 : prop *est;
305 :
306 : /* for the intersection, if both expressions don't overlap, it can be pruned */
307 8876 : if (is_inter(rel->op) && !has_nil(le) && !has_nil(re) &&
308 511 : ((rval_max && lval_min && atom_cmp(rval_max, lval_min) < 0) || (rval_min && lval_max && atom_cmp(rval_min, lval_max) > 0)))
309 26 : return true;
310 :
311 8850 : if (lval_max && rval_max) {
312 2557 : if (is_union(rel->op))
313 3 : set_minmax_property(sql, e, PROP_MAX, statistics_atom_max(sql, lval_max, rval_max)); /* for union the new max will be the max of the two */
314 2554 : else if (is_inter(rel->op))
315 399 : set_minmax_property(sql, e, PROP_MAX, statistics_atom_min(sql, lval_max, rval_max)); /* for intersect the new max will be the min of the two */
316 : else /* except */
317 2155 : set_minmax_property(sql, e, PROP_MAX, lval_max);
318 : }
319 8850 : if (lval_min && rval_min) {
320 2557 : if (is_union(rel->op))
321 3 : set_minmax_property(sql, e, PROP_MIN, statistics_atom_min(sql, lval_min, rval_min)); /* for union the new min will be the min of the two */
322 2554 : else if (is_inter(rel->op))
323 399 : set_minmax_property(sql, e, PROP_MIN, statistics_atom_max(sql, lval_min, rval_min)); /* for intersect the new min will be the max of the two */
324 : else /* except */
325 2155 : set_minmax_property(sql, e, PROP_MIN, lval_min);
326 : }
327 :
328 8850 : if (is_union(rel->op) || is_munion(rel->op)) {
329 5986 : if (!has_nil(le) && !has_nil(re))
330 879 : set_has_no_nil(e);
331 5986 : if (need_distinct(rel) && list_length(rel->exps) == 1)
332 0 : set_unique(e);
333 2864 : } else if (is_inter(rel->op)) {
334 564 : if (!has_nil(le) || !has_nil(re))
335 509 : set_has_no_nil(e);
336 564 : if (is_unique(le) || (need_distinct(rel) && list_length(rel->exps) == 1))
337 345 : set_unique(e);
338 : } else {
339 2300 : assert(is_except(rel->op));
340 2300 : if (!has_nil(le))
341 2236 : set_has_no_nil(e);
342 2300 : if (is_unique(le) || (need_distinct(rel) && list_length(rel->exps) == 1))
343 2012 : set_unique(e);
344 : }
345 : /* propagate unique estimation for known cases */
346 8850 : if (!is_union(rel->op) && (est = find_prop(le->p, PROP_NUNIQUES)) && !find_prop(e->p, PROP_NUNIQUES)) {
347 207 : prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
348 207 : p->value.dval = est->value.dval;
349 : }
350 : return false;
351 : }
352 :
353 :
354 : static void
355 62837 : rel_munion_get_statistics(mvc *sql, sql_rel *rel, list *rels, sql_exp *e, int i)
356 : {
357 62837 : if (is_recursive(rel))
358 : return ;
359 62837 : assert(is_munion(rel->op));
360 :
361 62837 : sql_rel *l = rels->h->data;
362 62837 : sql_exp *le = list_fetch(l->exps, i);
363 62837 : atom *lval_min = find_prop_and_get(le->p, PROP_MIN), *lval_max = find_prop_and_get(le->p, PROP_MAX);
364 62837 : bool has_nonil = !has_nil(le);
365 :
366 182734 : for(node *n = rels->h->next; n; n = n->next) {
367 119897 : sql_rel *r = n->data;
368 119897 : sql_exp *re = list_fetch(r->exps, i);
369 119897 : atom *rval_min = find_prop_and_get(re->p, PROP_MIN), *rval_max = find_prop_and_get(re->p, PROP_MAX);
370 :
371 119897 : if (lval_max && rval_max) {
372 44177 : set_minmax_property(sql, e, PROP_MAX, statistics_atom_max(sql, lval_max, rval_max)); /* for union the new max will be the max of the two */
373 44177 : lval_max = find_prop_and_get(e->p, PROP_MAX);
374 : }
375 119897 : if (lval_min && rval_min) {
376 43628 : set_minmax_property(sql, e, PROP_MIN, statistics_atom_min(sql, lval_min, rval_min)); /* for union the new min will be the min of the two */
377 43628 : lval_min = find_prop_and_get(e->p, PROP_MIN);
378 : }
379 119897 : has_nonil &= !has_nil(re);
380 :
381 : }
382 :
383 62837 : if (has_nonil)
384 42790 : set_has_no_nil(e);
385 :
386 62837 : if (need_distinct(rel) && list_length(rel->exps) == 1)
387 1597 : set_unique(e);
388 : }
389 :
390 :
391 : static sql_exp *
392 3566059 : rel_propagate_statistics(visitor *v, sql_rel *rel, sql_exp *e, int depth)
393 : {
394 3566059 : mvc *sql = v->sql;
395 3566059 : atom *lval;
396 :
397 3566059 : (void) depth;
398 3566059 : switch(e->type) {
399 2211641 : case e_column:
400 2211641 : switch (rel->op) { /* set relations don't call rel_propagate_statistics */
401 284686 : case op_join:
402 : case op_left:
403 : case op_right:
404 : case op_full:
405 : case op_semi:
406 : case op_anti: {
407 284686 : sql_exp *found = rel_propagate_column_ref_statistics(sql, rel->l, e);
408 284686 : if (!found)
409 142791 : (void) rel_propagate_column_ref_statistics(sql, rel->r, e);
410 : break;
411 : }
412 1926955 : case op_select:
413 : case op_project:
414 : case op_groupby: {
415 1926955 : sql_exp *found = rel_propagate_column_ref_statistics(sql, rel->l, e); /* labels may be found on the same projection, ugh */
416 1926949 : if (!found && is_simple_project(rel->op))
417 133515 : (void) rel_propagate_column_ref_statistics(sql, rel, e);
418 : break;
419 : }
420 0 : case op_insert:
421 : case op_update:
422 : case op_delete:
423 0 : (void) rel_propagate_column_ref_statistics(sql, rel->r, e);
424 0 : break;
425 : default:
426 : break;
427 : }
428 : break;
429 101863 : case e_convert: {
430 101863 : sql_subtype *to = exp_totype(e), *from = exp_fromtype(e);
431 101863 : sql_exp *l = e->l;
432 101863 : sql_class fr = from->type->eclass, too = to->type->eclass;
433 101863 : prop *est;
434 :
435 101863 : if (fr == too) {
436 92701 : if ((lval = find_prop_and_get(l->p, PROP_MAX))) {
437 60263 : atom *res = atom_copy(sql->sa, lval);
438 60263 : if ((res = atom_cast(sql->sa, res, to)))
439 60240 : set_minmax_property(sql, e, PROP_MAX, res);
440 : }
441 92701 : if ((lval = find_prop_and_get(l->p, PROP_MIN))) {
442 60901 : atom *res = atom_copy(sql->sa, lval);
443 60901 : if ((res = atom_cast(sql->sa, res, to)))
444 60878 : set_minmax_property(sql, e, PROP_MIN, res);
445 : }
446 : }
447 : /* propagating nuniques through conversions is tricky. I am adding just the general cases */
448 101863 : if ((est = find_prop(l->p, PROP_NUNIQUES)) &&
449 63324 : (((fr == EC_TIME || fr == EC_TIME_TZ || fr == EC_DATE) && (too == EC_TIMESTAMP || too == EC_TIMESTAMP_TZ)) ||
450 63299 : ((fr == EC_NUM || fr == EC_BIT) && too == EC_NUM) || (fr == EC_DEC && too == EC_DEC) || (fr != EC_EXTERNAL && too == EC_STRING))) {
451 59865 : prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
452 59865 : p->value.dval = est->value.dval;
453 : }
454 101863 : if (!has_nil(l))
455 57955 : set_has_no_nil(e);
456 : break;
457 : }
458 379254 : case e_aggr:
459 : case e_func: {
460 379254 : BUN lv;
461 379254 : sql_subfunc *f = e->f;
462 :
463 379254 : if (!f->func->s) {
464 352139 : int key = hash_key(f->func->base.name); /* Using hash lookup */
465 352139 : sql_hash_e *he = sql_functions_lookup->buckets[key&(sql_functions_lookup->size-1)];
466 352139 : lookup_function look = NULL;
467 :
468 753756 : for (; he && !look; he = he->chain) {
469 401617 : struct function_properties* fp = (struct function_properties*) he->value;
470 :
471 401617 : if (!strcmp(f->func->base.name, fp->name))
472 108565 : look = fp->func;
473 : }
474 352139 : if (look)
475 108565 : look(sql, e);
476 : }
477 : /* for global aggregates with no semantics, if the left relation has values, then the output won't be NULL */
478 379254 : if (!is_semantics(e) && e->l && !have_nil(e->l) &&
479 109263 : (e->type != e_aggr || (is_groupby(rel->op) && list_length(rel->r)) || ((lv = get_rel_count(rel->l)) != BUN_NONE && lv > 0)))
480 108927 : set_has_no_nil(e);
481 : /* set properties for global aggregates */
482 379254 : if (e->type == e_aggr && is_groupby(rel->op) && list_empty(rel->r)) {
483 7949 : if (!find_prop(e->p, PROP_NUNIQUES)) {
484 7949 : prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
485 7949 : p->value.dval = 1;
486 : }
487 7949 : set_unique(e);
488 : }
489 : break;
490 : }
491 591095 : case e_atom:
492 591095 : if (e->l) {
493 573066 : atom *a = (atom*) e->l;
494 573066 : if (!a->isnull) {
495 503993 : set_minmax_property(sql, e, PROP_MAX, a);
496 503993 : set_minmax_property(sql, e, PROP_MIN, a);
497 : }
498 573066 : prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
499 573066 : p->value.dval = 1;
500 18029 : } else if (e->f) {
501 4302 : list *vals = (list *) e->f;
502 4302 : sql_exp *first = vals->h ? vals->h->data : NULL;
503 4302 : atom *max = NULL, *min = NULL; /* all child values must have a valid min/max */
504 4302 : int has_nil = 0;
505 :
506 4302 : if (first) {
507 4302 : max = ((lval = find_prop_and_get(first->p, PROP_MAX))) ? lval : NULL;
508 4302 : min = ((lval = find_prop_and_get(first->p, PROP_MIN))) ? lval : NULL;
509 4302 : has_nil |= has_nil(first);
510 : }
511 :
512 18436 : for (node *n = vals->h ? vals->h->next : NULL ; n ; n = n->next) {
513 9832 : sql_exp *ee = n->data;
514 :
515 9832 : if (min && max) {
516 9339 : if ((lval = find_prop_and_get(ee->p, PROP_MAX))) {
517 9293 : max = atom_cmp(lval, max) > 0 ? lval : max;
518 : } else {
519 : max = NULL;
520 : }
521 9339 : if ((lval = find_prop_and_get(ee->p, PROP_MIN))) {
522 9293 : min = atom_cmp(min, lval) > 0 ? lval : min;
523 : } else {
524 : min = NULL;
525 : }
526 : }
527 9832 : has_nil |= has_nil(ee);
528 : }
529 :
530 4302 : if (!has_nil)
531 3928 : set_has_no_nil(e);
532 4302 : if (min && max) {
533 3886 : set_minmax_property(sql, e, PROP_MAX, max);
534 3886 : set_minmax_property(sql, e, PROP_MIN, min);
535 : }
536 4302 : prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
537 4302 : p->value.dval = (dbl) list_length(vals);
538 : } else {
539 13727 : prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
540 13727 : p->value.dval = 1;
541 : }
542 : break;
543 282206 : case e_cmp:
544 : /* TODO? propagating min/max/unique of booleans is not very worth it */
545 282206 : if (e->flag == cmp_or || e->flag == cmp_filter) {
546 14765 : if (!have_nil(e->l) && !have_nil(e->r))
547 10114 : set_has_no_nil(e);
548 267441 : } else if (e->flag == cmp_in || e->flag == cmp_notin) {
549 20162 : sql_exp *le = e->l;
550 20162 : if (!has_nil(le) && !have_nil(e->r))
551 16927 : set_has_no_nil(e);
552 : } else {
553 247279 : sql_exp *le = e->l, *re = e->r, *fe = e->f;
554 247279 : if (!has_nil(le) && !has_nil(re) && (!e->f || !has_nil(fe)))
555 174380 : set_has_no_nil(e);
556 : }
557 : break;
558 : case e_psm:
559 : break;
560 : }
561 :
562 : #ifndef NDEBUG
563 : {
564 : /* min and max cannot be NULL and min must be <= than max, if it happens the inner relation must be empty! */
565 3566053 : atom *min = find_prop_and_get(e->p, PROP_MIN), *max = find_prop_and_get(e->p, PROP_MAX);
566 :
567 3566055 : (void) min;
568 3566055 : (void) max;
569 3566055 : assert(!min || !min->isnull);
570 3566055 : assert(!max || !max->isnull);
571 3566055 : assert(!min || !max || (min && max)/* atom_cmp(min, max) <= 0*/);
572 : }
573 : #endif
574 3566055 : return e;
575 : }
576 :
577 : static list * /* Remove predicates always false from min/max values */
578 143712 : rel_prune_predicates(visitor *v, sql_rel *rel)
579 : {
580 143712 : if (rel->l) {
581 143712 : sql_rel *l = rel->l;
582 143712 : if (is_single(l))
583 3372 : return rel->exps;
584 : }
585 140340 : if (!list_empty(rel->attr))
586 3003 : return rel->exps;
587 289085 : for (node *n = rel->exps->h ; n ; n = n->next) {
588 151748 : sql_exp *e = n->data;
589 :
590 151748 : if (e->type == e_cmp && is_theta_exp(e->flag)) {
591 125224 : sql_exp *le = e->l, *re = e->r, *fe = e->f;
592 125224 : atom *lval_min = find_prop_and_get(le->p, PROP_MIN), *lval_max = find_prop_and_get(le->p, PROP_MAX),
593 125224 : *rval_min = find_prop_and_get(re->p, PROP_MIN), *rval_max = find_prop_and_get(re->p, PROP_MAX);
594 125224 : bool always_false = false, always_true = false;
595 :
596 125224 : if (fe && !is_symmetric(e)) {
597 3045 : atom *fval_min = find_prop_and_get(fe->p, PROP_MIN), *fval_max = find_prop_and_get(fe->p, PROP_MAX);
598 3045 : comp_type lower = range2lcompare(e->flag), higher = range2rcompare(e->flag);
599 3644 : int not_int1 = rval_min && lval_max && /* the middle and left intervals don't overlap */
600 1654 : (!is_anti(e) && (lower == cmp_gte ? atom_cmp(rval_min, lval_max) > 0 : atom_cmp(rval_min, lval_max) >= 0)),
601 4059 : not_int2 = lval_min && fval_max && /* the middle and right intervals don't overlap */
602 2482 : (!is_anti(e) && (higher == cmp_lte ? atom_cmp(lval_min, fval_max) > 0 : atom_cmp(lval_min, fval_max) >= 0)),
603 3045 : not_int3 = rval_min && fval_max && /* the left interval is after the right one */
604 1285 : (!is_anti(e) && (atom_cmp(rval_min, fval_max) > 0));
605 :
606 3045 : always_false |= not_int1 || not_int2 || not_int3;
607 : /* for anti the middle must be before the left or after the right or the right after the left, for the other the middle must be always between the left and right intervals */
608 4089 : always_true |= !has_nil(le) && !has_nil(re) && !has_nil(fe) &&
609 3941 : lval_min && lval_max && rval_min && rval_max && fval_min && fval_max &&
610 573 : (is_anti(e) ? ((lower == cmp_gte ? atom_cmp(rval_min, lval_max) > 0 : atom_cmp(rval_min, lval_max) >= 0) || (higher == cmp_lte ? atom_cmp(lval_min, fval_max) > 0 : atom_cmp(lval_min, fval_max) >= 0) || atom_cmp(rval_min, fval_max) > 0) :
611 488 : ((lower == cmp_gte ? atom_cmp(lval_min, rval_max) >= 0 : atom_cmp(lval_min, rval_max) > 0) && (higher == cmp_lte ? atom_cmp(fval_min, lval_max) >= 0 : atom_cmp(fval_min, lval_max) > 0)));
612 : } else if (!fe) {
613 122163 : if (!is_semantics(e)) /* trivial not null cmp null case */
614 232882 : always_false |= !is_anti(e) && ((exp_is_not_null(le) && exp_is_null(re)) || (exp_is_null(le) && exp_is_not_null(re)));
615 122163 : switch (e->flag) {
616 108637 : case cmp_equal:
617 108637 : if (lval_min && lval_max && rval_min && rval_max && (!is_semantics(e) || !has_nil(le) || !has_nil(re)))
618 135956 : always_false |= (is_anti(e) ? (atom_cmp(lval_min, rval_min) == 0 && atom_cmp(lval_max, rval_max) <= 0) : (atom_cmp(rval_max, lval_min) < 0 || atom_cmp(rval_min, lval_max) > 0));
619 108637 : if (is_semantics(e)) { /* prune *= NULL cases */
620 5685 : always_false |= (is_anti(e) ? (exp_is_null(le) && exp_is_null(re)) : ((exp_is_not_null(le) && exp_is_null(re)) || (exp_is_null(le) && exp_is_not_null(re))));
621 11370 : always_true |= (is_anti(e) ? ((exp_is_not_null(le) && exp_is_null(re)) || (exp_is_null(le) && exp_is_not_null(re))) : (exp_is_null(le) && exp_is_null(re)));
622 : }
623 : break;
624 5334 : case cmp_notequal:
625 5334 : if (lval_min && lval_max && rval_min && rval_max)
626 7364 : always_true |= !has_nil(le) && !has_nil(re) && (is_anti(e) ? (atom_cmp(lval_min, rval_min) == 0 && atom_cmp(lval_max, rval_max) <= 0) : (atom_cmp(rval_max, lval_min) < 0 || atom_cmp(rval_min, lval_max) > 0));
627 5334 : if (is_semantics(e)) {
628 37 : always_true |= (is_anti(e) ? (exp_is_null(le) && exp_is_null(re)) : ((exp_is_not_null(le) && exp_is_null(re)) || (exp_is_null(le) && exp_is_not_null(re))));
629 74 : always_false |= (is_anti(e) ? ((exp_is_not_null(le) && exp_is_null(re)) || (exp_is_null(le) && exp_is_not_null(re))) : (exp_is_null(le) && exp_is_null(re)));
630 : }
631 : break;
632 5487 : case cmp_gt:
633 5487 : if (lval_max && rval_min)
634 1834 : always_false |= (is_anti(e) ? atom_cmp(lval_max, rval_min) > 0 : atom_cmp(lval_max, rval_min) <= 0);
635 5487 : if (lval_min && rval_max)
636 3668 : always_true |= !has_nil(le) && !has_nil(re) && (is_anti(e) ? atom_cmp(lval_min, rval_max) <= 0 : atom_cmp(lval_min, rval_max) > 0);
637 : break;
638 121 : case cmp_gte:
639 121 : if (lval_max && rval_min)
640 51 : always_false |= (is_anti(e) ? atom_cmp(lval_max, rval_min) >= 0 : atom_cmp(lval_max, rval_min) < 0);
641 121 : if (lval_min && rval_max)
642 102 : always_true |= !has_nil(le) && !has_nil(re) && (is_anti(e) ? atom_cmp(lval_min, rval_max) < 0 : atom_cmp(lval_min, rval_max) >= 0);
643 : break;
644 2502 : case cmp_lt:
645 2502 : if (lval_min && rval_max)
646 1382 : always_false |= (is_anti(e) ? atom_cmp(lval_min, rval_max) < 0 : atom_cmp(lval_min, rval_max) >= 0);
647 2502 : if (lval_max && rval_min)
648 2812 : always_true |= !has_nil(le) && !has_nil(re) && (is_anti(e) ? atom_cmp(lval_max, rval_min) >= 0 : atom_cmp(lval_max, rval_min) < 0);
649 : break;
650 82 : case cmp_lte:
651 82 : if (lval_min && rval_max)
652 11 : always_false |= (is_anti(e) ? atom_cmp(lval_min, rval_max) <= 0 : atom_cmp(lval_min, rval_max) > 0);
653 82 : if (lval_max && rval_min)
654 22 : always_true |= !has_nil(le) && !has_nil(re) && (is_anti(e) ? atom_cmp(lval_max, rval_min) > 0 : atom_cmp(lval_max, rval_min) <= 0);
655 : break;
656 : default: /* Maybe later I can do cmp_in and cmp_notin but don't forget to remove is_theta_exp check up there */
657 : break;
658 : }
659 : }
660 125224 : assert(!always_false || !always_true);
661 125224 : if (always_false || always_true) {
662 3698 : sql_exp *ne = exp_atom_bool(v->sql->sa, always_true /* if always_true set then true else false */);
663 3698 : if (exp_name(e))
664 0 : exp_prop_alias(v->sql->sa, ne, e);
665 3698 : n->data = ne;
666 3698 : v->changes++;
667 : }
668 : }
669 : }
670 137337 : return rel->exps;
671 : }
672 :
673 : static sql_rel *
674 14 : set_setop_side(visitor *v, sql_rel *rel, sql_rel *side)
675 : {
676 14 : if (side == rel->r)
677 0 : rel->r = NULL;
678 : else
679 14 : rel->l = NULL;
680 14 : if (!is_simple_project(side->op) || rel_is_ref(side) || !list_empty(side->r)) {
681 0 : side = rel_project(v->sql->sa, side, rel_projections(v->sql, side, NULL, 0, 1));
682 0 : set_count_prop(v->sql->sa, side, get_rel_count(side->l));
683 0 : side->exps = exps_exp_visitor_bottomup(v, side, side->exps, 0, &rel_propagate_statistics, false);
684 : }
685 35 : for (node *n = rel->exps->h, *m = side->exps->h ; n && m ; n = n->next, m = m->next) {
686 21 : sql_exp *e1 = n->data, *e2 = m->data;
687 21 : exp_prop_alias(v->sql->sa, e2, e1);
688 : }
689 14 : list_hash_clear(side->exps);
690 :
691 14 : if (need_distinct(rel))
692 0 : set_distinct(side);
693 14 : side->p = prop_copy(v->sql->sa, rel->p);
694 14 : rel_destroy(rel);
695 14 : return side;
696 : }
697 :
698 : static BUN
699 26424 : trivial_project_exp_card(sql_exp *e)
700 : {
701 26799 : if (e->type == e_convert)
702 375 : return trivial_project_exp_card(e->l);
703 26424 : return e->type == e_atom && e->f ? (BUN) list_length(e->f) : 1;
704 : }
705 :
706 : static BUN
707 26198 : rel_calc_nuniques(mvc *sql, sql_rel *l, list *exps)
708 : {
709 26198 : BUN lv = get_rel_count(l);
710 :
711 26199 : if (lv == 0)
712 : return 0;
713 23461 : if (!list_empty(exps)) {
714 23461 : BUN nuniques = 0;
715 : /* compute the highest number of unique values */
716 58643 : for (node *n = exps->h ; n && nuniques != BUN_NONE ; n = n->next) {
717 35181 : sql_exp *e = n->data;
718 35181 : sql_rel *bt = NULL;
719 35181 : prop *p = NULL;
720 35181 : BUN euniques = BUN_NONE;
721 35181 : atom *min, *max, *sub = NULL;
722 35181 : sql_subtype *tp = exp_subtype(e);
723 35181 : sql_class ec = tp ? tp->type->eclass : EC_STRING; /* if 'e' has no type (eg parameter), use a non-number type to fail condition */
724 :
725 35181 : if ((p = find_prop(e->p, PROP_NUNIQUES))) {
726 25416 : euniques = (BUN) p->value.dval;
727 9765 : } else if (e->type == e_column && e->nid && rel_find_exp_and_corresponding_rel(l, e, false, &bt, NULL) && bt && (p = find_prop(bt->p, PROP_COUNT))) {
728 7665 : euniques = (BUN) p->value.lval;
729 : }
730 : /* use min to max range to compute number of possible values in the domain for number types */
731 35181 : if ((EC_TEMP(ec)||ec==EC_NUM||ec==EC_MONTH||ec==EC_POS) &&
732 24291 : (min = find_prop_and_get(e->p, PROP_MIN)) && (max = find_prop_and_get(e->p, PROP_MAX))) {
733 : /* the range includes min and max, so the atom_inc call is needed */
734 : /* if 'euniques' has number of distinct values, compute min between both */
735 19346 : if ((sub = atom_sub(sql->sa, max, min)) && (sub = atom_inc(sql->sa, sub)) && (sub = atom_cast(sql->sa, sub, sql_bind_localtype("oid"))))
736 0 : euniques = MIN(euniques, (BUN) sub->data.val.oval);
737 : }
738 35182 : if (euniques != BUN_NONE)
739 33082 : nuniques = MAX(nuniques, euniques); /* the highest cardinality sets the estimation */
740 : else
741 : nuniques = BUN_NONE;
742 : }
743 23462 : if (nuniques != BUN_NONE)
744 : return nuniques;
745 : }
746 : return lv;
747 : }
748 :
749 : static sql_rel *
750 1366516 : rel_get_statistics_(visitor *v, sql_rel *rel)
751 : {
752 : /* Don't prune updates as pruning will possibly result in removing the joins which therefore cannot be used for constraint checking */
753 1366516 : uint8_t has_special_modify = *(uint8_t*) v->data;
754 1366516 : bool can_be_pruned = !has_special_modify && v->storage_based_opt;
755 :
756 : /* Don't look at the same relation twice */
757 1366516 : if (are_statistics_gathered(rel->used))
758 : return rel;
759 1366417 : rel->used |= statistics_gathered;
760 :
761 1366417 : switch (rel->op) {
762 318940 : case op_basetable: {
763 318940 : sql_table *t = (sql_table *) rel->l;
764 318940 : sqlstore *store = v->sql->session->tr->store;
765 :
766 318940 : if (!list_empty(rel->exps)) {
767 1272418 : for (node *n = rel->exps->h ; n ; n = n->next)
768 953274 : rel_basetable_column_get_statistics(v->sql, rel, n->data);
769 : }
770 : /* Set table row count. TODO? look for remote tables. Don't look at storage for declared tables, because it won't be cleaned */
771 319147 : if (isTable(t) && t->s && !isDeclaredTable(t)) /* count active rows only */
772 264463 : set_count_prop(v->sql->sa, rel, (BUN)store->storage_api.count_col(v->sql->session->tr, ol_first_node(t->columns)->data, 10));
773 : break;
774 : }
775 2794 : case op_union:
776 : case op_inter:
777 : case op_except: {
778 2794 : bool empty_cross = false;
779 2794 : int i = 0;
780 2794 : sql_rel *l = rel->l, *pl = rel->l, *r = rel->r, *pr = rel->r;
781 :
782 2794 : while (is_sample(pl->op) || is_topn(pl->op)) /* skip topN and sample relations in the middle */
783 0 : pl = pl->l;
784 2794 : while (is_sample(pr->op) || is_topn(pr->op))
785 0 : pr = pr->l;
786 : /* if it's not a projection, then project and propagate statistics */
787 2794 : if (!is_project(pl->op) && !is_base(pl->op)) {
788 0 : pl = rel_project(v->sql->sa, pl, rel_projections(v->sql, pl, NULL, 0, 1));
789 0 : set_count_prop(v->sql->sa, pl, get_rel_count(pl->l));
790 0 : pl->exps = exps_exp_visitor_bottomup(v, pl, pl->exps, 0, &rel_propagate_statistics, false);
791 : }
792 2794 : if (!is_project(pr->op) && !is_base(pr->op)) {
793 0 : pr = rel_project(v->sql->sa, pr, rel_projections(v->sql, pr, NULL, 0, 1));
794 0 : set_count_prop(v->sql->sa, pr, get_rel_count(pr->l));
795 0 : pr->exps = exps_exp_visitor_bottomup(v, pr, pr->exps, 0, &rel_propagate_statistics, false);
796 : }
797 :
798 11670 : for (node *n = rel->exps->h ; n ; n = n->next) {
799 8876 : empty_cross |= rel_setop_get_statistics(v->sql, rel, pl->exps, pr->exps, n->data, i);
800 8876 : i++;
801 : }
802 :
803 : /* propagate row count */
804 2794 : if (is_union(rel->op)) {
805 277 : BUN lv = need_distinct(rel) ? rel_calc_nuniques(v->sql, l, l->exps) : get_rel_count(l),
806 277 : rv = need_distinct(rel) ? rel_calc_nuniques(v->sql, r, r->exps) : get_rel_count(r);
807 :
808 277 : if (lv == 0 && rv == 0) { /* both sides empty */
809 2 : if (can_be_pruned)
810 : empty_cross = true;
811 : else
812 2 : set_count_prop(v->sql->sa, rel, 0);
813 275 : } else if (can_be_pruned && lv == 0 && !rel_is_ref(rel)) { /* left side empty */
814 0 : rel = set_setop_side(v, rel, r);
815 0 : empty_cross = false; /* don't rewrite again */
816 275 : } else if (can_be_pruned && rv == 0 && !rel_is_ref(rel)) { /* right side empty */
817 0 : rel = set_setop_side(v, rel, l);
818 0 : empty_cross = false; /* don't rewrite again */
819 275 : } else if (lv != BUN_NONE && rv != BUN_NONE) {
820 7 : set_count_prop(v->sql->sa, rel, (rv > (BUN_MAX - lv)) ? BUN_MAX : (lv + rv)); /* overflow check */
821 : }
822 2517 : } else if (is_inter(rel->op) || is_except(rel->op)) {
823 2517 : BUN lv = need_distinct(rel) ? rel_calc_nuniques(v->sql, l, l->exps) : get_rel_count(l),
824 2517 : rv = need_distinct(rel) ? rel_calc_nuniques(v->sql, r, r->exps) : get_rel_count(r);
825 :
826 2517 : if (lv == 0) { /* left side empty */
827 63 : if (can_be_pruned)
828 : empty_cross = true;
829 : else
830 5 : set_count_prop(v->sql->sa, rel, 0);
831 2454 : } else if (rv == 0) { /* right side empty */
832 17 : if (is_inter(rel->op)) {
833 3 : if (can_be_pruned)
834 : empty_cross = true;
835 : else
836 0 : set_count_prop(v->sql->sa, rel, 0);
837 14 : } else if (can_be_pruned && !rel_is_ref(rel)) {
838 14 : rel = set_setop_side(v, rel, l);
839 14 : empty_cross = false; /* don't rewrite again */
840 : } else {
841 0 : set_count_prop(v->sql->sa, rel, lv);
842 : }
843 : } else {
844 2437 : set_count_prop(v->sql->sa, rel, lv);
845 : }
846 : }
847 2794 : if (can_be_pruned && empty_cross) {
848 81 : rel_destroy(rel->l);
849 81 : rel->l = NULL;
850 81 : rel_destroy(rel->r);
851 81 : rel->r = NULL;
852 246 : for (node *n = rel->exps->h ; n ; n = n->next) {
853 165 : sql_exp *e = n->data, *a = exp_atom(v->sql->sa, atom_general(v->sql->sa, exp_subtype(e), NULL, 0));
854 165 : exp_prop_alias(v->sql->sa, a, e);
855 165 : n->data = a;
856 : }
857 81 : list_hash_clear(rel->exps);
858 81 : sql_rel *l = rel_project(v->sql->sa, NULL, rel->exps);
859 81 : set_count_prop(v->sql->sa, l, 1);
860 81 : l = rel_select(v->sql->sa, l, exp_atom_bool(v->sql->sa, 0));
861 81 : set_count_prop(v->sql->sa, l, 0);
862 81 : rel->op = op_project;
863 81 : rel->l = l;
864 81 : rel->exps = rel_projections(v->sql, l, NULL, 1, 1);
865 81 : set_count_prop(v->sql->sa, rel, 0);
866 81 : set_nodistinct(rel); /* set relations may have distinct flag set */
867 81 : v->changes++;
868 : }
869 : break;
870 : }
871 13473 : case op_munion: {
872 13473 : list *l = rel->l, *nrels = sa_list(v->sql->sa);
873 13473 : BUN cnt = 0;
874 13473 : bool needs_pruning = false;
875 :
876 13473 : if (is_recursive(rel))
877 : break;
878 51231 : for (node *n = l->h; n; n = n->next) {
879 37829 : sql_rel *r = n->data, *pl = r;
880 :
881 37829 : while (is_sample(pl->op) || is_topn(pl->op)) /* skip topN and sample relations in the middle */
882 0 : pl = pl->l;
883 : /* if it's not a projection, then project and propagate statistics */
884 37829 : if (!is_project(pl->op) && !is_base(pl->op)) {
885 0 : pl = rel_project(v->sql->sa, pl, rel_projections(v->sql, pl, NULL, 0, 1));
886 0 : set_count_prop(v->sql->sa, pl, get_rel_count(pl->l));
887 0 : pl->exps = exps_exp_visitor_bottomup(v, pl, pl->exps, 0, &rel_propagate_statistics, false);
888 : }
889 37829 : nrels = append(nrels, pl);
890 : /* we need new munion statistics */
891 : /* propagate row count */
892 37829 : BUN rv = need_distinct(rel) ? rel_calc_nuniques(v->sql, r, r->exps) : get_rel_count(r);
893 : /* if PROP_COUNT does not exist we assume at least a row (see get_rel_count def) */
894 37829 : if (rv == BUN_NONE) {
895 1220 : cnt++;
896 1220 : continue;
897 : }
898 36609 : if (!rv && can_be_pruned)
899 6724 : needs_pruning = true;
900 : /* overflow check */
901 36609 : if (rv > (BUN_MAX - cnt))
902 37829 : rv = BUN_MAX;
903 : else
904 36609 : cnt += rv;
905 : }
906 13402 : int i = 0;
907 76239 : for (node *n = rel->exps->h ; n ; n = n->next, i++)
908 62837 : rel_munion_get_statistics(v->sql, rel, nrels, n->data, i);
909 :
910 13402 : if (needs_pruning && !rel_is_ref(rel)) {
911 4563 : v->changes++;
912 4563 : list *nl = sa_list(l->sa);
913 :
914 16832 : for (node *n = nrels->h; n; n = n->next) {
915 12269 : sql_rel *r = n->data;
916 12269 : BUN rv = need_distinct(rel) ? rel_calc_nuniques(v->sql, r, r->exps) : get_rel_count(r);
917 :
918 12269 : if (!rv) { /* keep last for now */
919 6254 : rel_destroy(r);
920 6254 : continue;
921 : }
922 6015 : nl = append(nl, r);
923 : }
924 4563 : rel->l = nl;
925 4563 : if (list_length(nl) == 1) {
926 4227 : sql_rel *l = rel->l = nl->h->data; /* ugh */
927 4227 : rel->r = NULL;
928 4227 : rel->op = op_project;
929 :
930 20767 : for (node *n = rel->exps->h, *m = l->exps->h ; n && m ; n = n->next, m = m->next) {
931 16540 : sql_exp *pe = n->data, *ie = m->data;
932 16540 : sql_exp *ne = exp_ref(v->sql, ie);
933 16540 : exp_prop_alias(v->sql->sa, ne, pe);
934 16540 : n->data = ne;
935 : }
936 4227 : list_hash_clear(rel->exps);
937 336 : } else if (list_empty(nl)) {
938 : /* empty select (project [ nils ] ) */
939 441 : for (node *n = rel->exps->h ; n ; n = n->next) {
940 341 : sql_exp *e = n->data, *a = exp_atom(v->sql->sa, atom_general(v->sql->sa, exp_subtype(e), NULL, 0));
941 341 : exp_prop_alias(v->sql->sa, a, e);
942 341 : n->data = a;
943 : }
944 100 : list_hash_clear(rel->exps);
945 100 : sql_rel *l = rel_project(v->sql->sa, NULL, rel->exps);
946 100 : set_count_prop(v->sql->sa, l, 1);
947 100 : l = rel_select(v->sql->sa, l, exp_atom_bool(v->sql->sa, 0));
948 100 : set_count_prop(v->sql->sa, l, 0);
949 100 : rel->op = op_project;
950 100 : rel->r = NULL;
951 100 : rel->l = l;
952 100 : rel->exps = rel_projections(v->sql, l, NULL, 1, 1);
953 100 : set_count_prop(v->sql->sa, rel, 0);
954 100 : set_nodistinct(rel); /* set relations may have distinct flag set */
955 : }
956 : } else {
957 8839 : set_count_prop(v->sql->sa, rel, cnt);
958 : }
959 : break;
960 : }
961 549135 : case op_join:
962 : case op_left:
963 : case op_right:
964 : case op_full:
965 : case op_semi:
966 : case op_anti:
967 : case op_select:
968 : case op_project:
969 : case op_groupby: {
970 549135 : if (is_groupby(rel->op) && !list_empty(rel->r))
971 16821 : rel->r = exps_exp_visitor_bottomup(v, rel, rel->r, 0, &rel_propagate_statistics, false);
972 549134 : rel->exps = exps_exp_visitor_bottomup(v, rel, rel->exps, 0, &rel_propagate_statistics, false);
973 549120 : if ((is_simple_project(rel->op) || is_groupby(rel->op)) && !list_empty(rel->r))
974 41871 : rel->r = exps_exp_visitor_bottomup(v, rel, rel->r, 0, &rel_propagate_statistics, false);
975 : /* The following optimizations can only be applied after propagating the statistics to rel->exps */
976 549124 : if (can_be_pruned && (is_join(rel->op) || is_select(rel->op)) && !list_empty(rel->exps)) {
977 143712 : int changes = v->changes;
978 143712 : rel->exps = rel_prune_predicates(v, rel);
979 143712 : if (v->changes > changes)
980 3665 : rel = rewrite_simplify(v, 0, v->value_based_opt, rel);
981 : }
982 :
983 : /* propagate row count */
984 549124 : sql_rel *l = rel->l, *r = rel->r;
985 549124 : switch (rel->op) {
986 137081 : case op_join:
987 : case op_left:
988 : case op_right:
989 : case op_full: {
990 137081 : BUN lv = get_rel_count(l), rv = get_rel_count(r), uniques_estimate = BUN_MAX, join_idx_estimate = BUN_MAX;
991 :
992 137081 : if (!list_empty(rel->exps) && !is_single(rel)) {
993 250665 : for (node *n = rel->exps->h ; n ; n = n->next) {
994 127811 : sql_exp *e = n->data, *el = e->l, *er = e->r;
995 :
996 127811 : if (find_prop(e->p, PROP_JOINIDX)) {
997 670 : join_idx_estimate = lv>rv?lv:rv;
998 127141 : } else if (e->type == e_cmp && e->flag == cmp_equal) {
999 : /* if one of the sides is unique, the cardinality will be that exact number, but look for nulls */
1000 123256 : if (!is_semantics(e) || !has_nil(el) || !has_nil(er)) {
1001 123079 : BUN lu = 0, ru = 0;
1002 123079 : prop *p = NULL;
1003 123079 : if ((p = find_prop(el->p, PROP_NUNIQUES)))
1004 91307 : lu = (BUN) p->value.dval;
1005 123079 : if ((p = find_prop(er->p, PROP_NUNIQUES)))
1006 105977 : ru = (BUN) p->value.dval;
1007 123079 : if (is_unique(el) || lu > lv) {
1008 74988 : BUN ncount = (is_right(rel->op) || is_full(rel->op)) ? MAX(lv, rv) : lv;
1009 74988 : uniques_estimate = MIN(uniques_estimate, ncount);
1010 48091 : } else if (is_unique(er) || ru > rv) {
1011 30062 : BUN ncount = (is_left(rel->op) || is_full(rel->op)) ? MAX(lv, rv) : rv;
1012 30062 : uniques_estimate = MIN(uniques_estimate, ncount);
1013 : }
1014 : }
1015 : }
1016 : }
1017 : }
1018 137081 : if (is_single(rel)) {
1019 4801 : set_count_prop(v->sql->sa, rel, lv);
1020 132280 : } else if (join_idx_estimate != BUN_MAX) {
1021 668 : set_count_prop(v->sql->sa, rel, join_idx_estimate);
1022 131612 : } else if (uniques_estimate != BUN_MAX) {
1023 98496 : set_count_prop(v->sql->sa, rel, uniques_estimate);
1024 33116 : } else if (list_length(rel->exps) == 1 && (exp_is_false(rel->exps->h->data) || exp_is_null(rel->exps->h->data))) {
1025 : /* corner cases for outer joins */
1026 126 : if (is_left(rel->op)) {
1027 114 : set_count_prop(v->sql->sa, rel, lv);
1028 12 : } else if (is_right(rel->op)) {
1029 3 : set_count_prop(v->sql->sa, rel, rv);
1030 9 : } else if (is_full(rel->op) && lv != BUN_NONE && rv != BUN_NONE) {
1031 9 : set_count_prop(v->sql->sa, rel, (rv > (BUN_MAX - lv)) ? BUN_MAX : (lv + rv)); /* overflow check */
1032 0 : } else if (lv != BUN_NONE && rv != BUN_NONE) {
1033 0 : set_count_prop(v->sql->sa, rel, 0);
1034 : }
1035 32990 : } else if (lv == 0) {
1036 1204 : set_count_prop(v->sql->sa, rel, (is_right(rel->op) || is_full(rel->op)) ? rv : 0);
1037 32374 : } else if (rv == 0) {
1038 1574 : set_count_prop(v->sql->sa, rel, (is_left(rel->op) || is_full(rel->op)) ? lv : 0);
1039 31294 : } else if (lv != BUN_NONE && rv != BUN_NONE) {
1040 18511 : set_count_prop(v->sql->sa, rel, (rv > (BUN_MAX / lv)) ? BUN_MAX : (lv * rv)); /* overflow check */
1041 : }
1042 : break;
1043 : }
1044 2935 : case op_anti:
1045 2935 : set_count_prop(v->sql->sa, rel, get_rel_count(l));
1046 2935 : break;
1047 112955 : case op_semi:
1048 : case op_select:
1049 : /* TODO calculate cardinalities using selectivities */
1050 112955 : if (list_length(rel->exps) == 1 && (exp_is_false(rel->exps->h->data) || exp_is_null(rel->exps->h->data))) {
1051 5158 : set_count_prop(v->sql->sa, rel, 0);
1052 : } else {
1053 212309 : if (!list_empty(rel->exps) && !is_single(rel)) {
1054 104512 : BUN cnt = get_rel_count(l), u = 1;
1055 170388 : for (node *n = rel->exps->h ; n ; n = n->next) {
1056 111676 : sql_exp *e = n->data, *el = e->l, *er = e->r;
1057 :
1058 : /* simple expressions first */
1059 111676 : if (e->type == e_cmp && e->flag == cmp_equal && exp_is_atom(er)) {
1060 : /* use selectivity */
1061 61262 : prop *p;
1062 61262 : if ((p = find_prop(el->p, PROP_NUNIQUES))) {
1063 45800 : u = (BUN) p->value.dval;
1064 45800 : break;
1065 : }
1066 : }
1067 : }
1068 : /* u is an *estimate*, so don't set count_prop to 0 unless cnt is 0 */
1069 104512 : set_count_prop(v->sql->sa, rel, cnt == 0 ? 0 : u == 0 || u > cnt ? 1 : cnt/u);
1070 : } else {
1071 3285 : set_count_prop(v->sql->sa, rel, get_rel_count(l));
1072 : }
1073 : }
1074 : break;
1075 271425 : case op_project:
1076 271425 : if (l) {
1077 258732 : if (need_distinct(rel)) {
1078 114 : set_count_prop(v->sql->sa, rel, rel_calc_nuniques(v->sql, l, rel->exps));
1079 : } else {
1080 258618 : set_count_prop(v->sql->sa, rel, get_rel_count(l));
1081 : }
1082 : } else {
1083 12693 : BUN card = 1;
1084 :
1085 12693 : if (!list_empty(rel->exps)) {
1086 37768 : for (node *n = rel->exps->h ; n ; n = n->next) {
1087 25075 : sql_exp *e = n->data;
1088 25075 : card = MAX(card, trivial_project_exp_card(e));
1089 : }
1090 : }
1091 12693 : set_count_prop(v->sql->sa, rel, card);
1092 : }
1093 : break;
1094 24317 : case op_groupby:
1095 24317 : if (list_empty(rel->r)) {
1096 7496 : set_count_prop(v->sql->sa, rel, 1);
1097 : } else {
1098 16821 : set_count_prop(v->sql->sa, rel, rel_calc_nuniques(v->sql, l, rel->r));
1099 : }
1100 : break;
1101 : default:
1102 : break;
1103 : }
1104 : break;
1105 : }
1106 16933 : case op_topn: {
1107 16933 : BUN lv = get_rel_count(rel->l);
1108 :
1109 16933 : if (lv != BUN_NONE) {
1110 16910 : sql_exp *le = rel->exps->h->data, *oe = list_length(rel->exps) > 1 ? rel->exps->h->next->data : NULL;
1111 84 : if (oe && oe->l && exp_is_not_null(oe)) { /* no parameters */
1112 84 : BUN offset = (BUN) ((atom*)oe->l)->data.val.lval;
1113 84 : lv = offset >= lv ? 0 : lv - offset;
1114 : }
1115 16910 : if (le->l && exp_is_not_null(le)) {
1116 16875 : BUN limit = (BUN) ((atom*)le->l)->data.val.lval;
1117 16875 : lv = MIN(lv, limit);
1118 : }
1119 16910 : set_count_prop(v->sql->sa, rel, lv);
1120 : }
1121 : break;
1122 : }
1123 22 : case op_sample: {
1124 22 : BUN lv = get_rel_count(rel->l);
1125 :
1126 22 : if (lv != BUN_NONE) {
1127 4 : sql_exp *se = rel->exps->h->data;
1128 4 : sql_subtype *tp = exp_subtype(se);
1129 :
1130 4 : if (se->l && tp->type->eclass == EC_NUM) { /* sample is a number of rows */
1131 4 : BUN sample = (BUN) ((atom*)se->l)->data.val.lval;
1132 4 : lv = MIN(lv, sample);
1133 0 : } else if (se->l) { /* sample is a percentage of rows */
1134 0 : dbl percent = ((atom*)se->l)->data.val.dval;
1135 0 : assert(tp->type->eclass == EC_FLT);
1136 0 : lv = (BUN) ceil((dbl)lv * percent);
1137 : }
1138 4 : set_count_prop(v->sql->sa, rel, lv);
1139 : }
1140 : break;
1141 : }
1142 6196 : case op_table: {
1143 6196 : sql_exp *op = rel->r;
1144 6196 : if (rel->flag != TRIGGER_WRAPPER && op) {
1145 5884 : sql_subfunc *f = op->f;
1146 5884 : if (f->func->lang == FUNC_LANG_SQL) {
1147 97 : set_count_prop(v->sql->sa, rel, 1000 /* just some fallback value */);
1148 5787 : } else if (f->func->lang == FUNC_LANG_MAL && strcmp(f->func->base.name, "storage") == 0) {
1149 837 : set_count_prop(v->sql->sa, rel, 1000 /* TODO get size of catalog */);
1150 4950 : } else if (f->func->lang == FUNC_LANG_MAL && strcmp(f->func->base.name, "db_users") == 0) {
1151 0 : set_count_prop(v->sql->sa, rel, 1000 /* TODO get size of users */);
1152 4950 : } else if (f->func->lang == FUNC_LANG_MAL && strncmp(f->func->base.name, "querylog", 8) == 0) {
1153 1101 : set_count_prop(v->sql->sa, rel, 1000 /* TODO get size of querylog */);
1154 3849 : } else if (f->func->lang == FUNC_LANG_MAL &&
1155 3684 : (strcmp(f->func->base.name, "queue") == 0 ||
1156 3416 : strcmp(f->func->base.name, "optimizers") == 0 ||
1157 3100 : strcmp(f->func->base.name, "env") == 0 ||
1158 2734 : strcmp(f->func->base.name, "keywords") == 0 ||
1159 2734 : strcmp(f->func->base.name, "statistics") == 0 ||
1160 2057 : strcmp(f->func->base.name, "rejects") == 0 ||
1161 1799 : strcmp(f->func->base.name, "schemastorage") == 0 ||
1162 1799 : strncmp(f->func->base.name, "storage", 7) == 0 ||
1163 1799 : strcmp(f->func->base.name, "sessions") == 0) ) {
1164 2179 : set_count_prop(v->sql->sa, rel, 1000 /* TODO get size of queue */);
1165 : }
1166 : /* else {
1167 : printf("%%func needs stats : %s\n", f->func->base.name);
1168 : } */
1169 : }
1170 : break;
1171 : }
1172 : /*These relations are less important for now
1173 : TODO later we can tune it */
1174 : case op_insert:
1175 : case op_update:
1176 : case op_delete:
1177 : case op_truncate:
1178 : case op_ddl:
1179 : default:
1180 : break;
1181 : }
1182 :
1183 : return rel;
1184 : }
1185 :
1186 : static sql_rel *
1187 554812 : rel_get_statistics(visitor *v, global_props *gp, sql_rel *rel)
1188 : {
1189 : /* Don't prune updates as pruning will possibly result in removing the joins which therefore cannot be used for constraint checking */
1190 554812 : uint8_t has_special_modify = (uint8_t) gp->has_special_modify;
1191 554812 : v->data = &has_special_modify;
1192 554812 : rel = rel_visitor_bottomup(v, rel, &rel_get_statistics_);
1193 555189 : v->data = gp;
1194 555189 : return rel;
1195 : }
1196 :
1197 : run_optimizer
1198 731194 : bind_get_statistics(visitor *v, global_props *gp)
1199 : {
1200 731194 : (void) v;
1201 731194 : return (gp->opt_level == 1 && !gp->cnt[op_insert]) ? rel_get_statistics : NULL;
1202 : }
1203 :
1204 :
1205 : static bool
1206 44894 : point_select_on_unique_column(sql_rel *rel)
1207 : {
1208 44894 : if (is_select(rel->op) && !list_empty(rel->exps)) {
1209 38356 : for (node *n = rel->exps->h; n ; n = n->next) {
1210 19958 : sql_exp *e = n->data, *el = e->l, *er = e->r, *found = NULL;
1211 :
1212 19958 : if (is_compare(e->type) && e->flag == cmp_equal) {
1213 15205 : if (is_numeric_upcast(el))
1214 0 : el = el->l;
1215 15205 : if (is_numeric_upcast(er))
1216 0 : er = er->l;
1217 15205 : if (is_alias(el->type) && exp_is_atom(er) && (found = rel_find_exp(rel->l, el)) &&
1218 761 : is_unique(found) && (!is_semantics(e) || !has_nil(found) || !has_nil(er)))
1219 : return true;
1220 14444 : if (is_alias(er->type) && exp_is_atom(el) && (found = rel_find_exp(rel->l, er)) &&
1221 0 : is_unique(found) && (!is_semantics(e) || !has_nil(el) || !has_nil(found)))
1222 : return true;
1223 : }
1224 : }
1225 : }
1226 : return false;
1227 : }
1228 :
1229 : /*
1230 : * A point select on an unique column reduces the number of rows to 1. If the same select is under a
1231 : * join, the opposite side's select can be pushed above the join.
1232 : */
1233 : static inline sql_rel *
1234 697659 : rel_push_select_up(visitor *v, sql_rel *rel)
1235 : {
1236 697659 : if ((is_innerjoin(rel->op) || is_left(rel->op) || is_right(rel->op) || is_semi(rel->op)) && !is_single(rel)) {
1237 122618 : sql_rel *l = rel->l, *r = rel->r;
1238 122618 : bool can_pushup_left = is_select(l->op) && !rel_is_ref(l) && !is_single(l) && (is_innerjoin(rel->op) || is_left(rel->op) || is_semi(rel->op)),
1239 122618 : can_pushup_right = is_select(r->op) && !rel_is_ref(r) && !is_single(r) && (is_innerjoin(rel->op) || is_right(rel->op));
1240 :
1241 122618 : if (can_pushup_left || can_pushup_right) {
1242 35440 : if (can_pushup_left)
1243 20626 : can_pushup_left = point_select_on_unique_column(r);
1244 35440 : if (can_pushup_right)
1245 24268 : can_pushup_right = point_select_on_unique_column(l);
1246 :
1247 : /* if both selects retrieve one row each, it's not worth it to push both up */
1248 35440 : if (can_pushup_left && !can_pushup_right) {
1249 695 : sql_rel *nrel = rel_dup_copy(v->sql->sa, rel);
1250 695 : nrel->l = l->l;
1251 695 : rel = rel_inplace_select(rel, nrel, l->exps);
1252 695 : assert(is_select(rel->op));
1253 695 : v->changes++;
1254 34745 : } else if (!can_pushup_left && can_pushup_right) {
1255 14 : sql_rel *nrel = rel_dup_copy(v->sql->sa, rel);
1256 14 : nrel->r = r->l;
1257 14 : rel = rel_inplace_select(rel, nrel, r->exps);
1258 14 : assert(is_select(rel->op));
1259 14 : v->changes++;
1260 : }
1261 : }
1262 : }
1263 697659 : return rel;
1264 : }
1265 :
1266 : static int
1267 38565 : sql_class_base_score(visitor *v, sql_column *c, sql_subtype *t, bool equality_based)
1268 : {
1269 38565 : int de;
1270 :
1271 38565 : if (!t)
1272 : return 0;
1273 38565 : switch (ATOMstorage(t->type->localtype)) {
1274 : case TYPE_bte:
1275 : return 150 - 8;
1276 1606 : case TYPE_sht:
1277 1606 : return 150 - 16;
1278 16442 : case TYPE_int:
1279 16442 : return 150 - 32;
1280 469 : case TYPE_void:
1281 : case TYPE_lng:
1282 469 : return 150 - 64;
1283 : case TYPE_uuid:
1284 : #ifdef HAVE_HGE
1285 : case TYPE_hge:
1286 : #endif
1287 : return 150 - 128;
1288 2 : case TYPE_flt:
1289 2 : return 75 - 24;
1290 : case TYPE_dbl:
1291 : return 75 - 53;
1292 15726 : default:
1293 15726 : if (equality_based && c && v->storage_based_opt && (de = mvc_is_duplicate_eliminated(v->sql, c)))
1294 1491 : return 150 - de * 8;
1295 : /* strings and blobs not duplicate eliminated don't get any points here */
1296 : return 0;
1297 : }
1298 : }
1299 :
1300 : static int
1301 14651 : score_se_base(visitor *v, sql_rel *rel, sql_exp *e)
1302 : {
1303 14651 : int res = 0;
1304 14651 : sql_subtype *t = exp_subtype(e);
1305 14651 : sql_column *c = NULL;
1306 :
1307 14651 : if (e->type == e_convert) /* keep unsafes at the end (TODO improve) */
1308 : return -1000;
1309 : /* can we find out if the underlying table is sorted */
1310 14243 : if ((c = exp_find_column(rel, e, -2)) && v->storage_based_opt && mvc_is_sorted(v->sql, c))
1311 14243 : res += 600;
1312 :
1313 : /* prefer the shorter var types over the longer ones */
1314 14243 : res += sql_class_base_score(v, c, t, is_equality_or_inequality_exp(e->flag)); /* smaller the type, better */
1315 14243 : return res;
1316 : }
1317 :
1318 : static int
1319 17635 : score_se(visitor *v, sql_rel *rel, sql_exp *e)
1320 : {
1321 17635 : int score = 0;
1322 17635 : if (e->type == e_cmp && !is_complex_exp(e->flag)) {
1323 14651 : sql_exp *l = e->l;
1324 :
1325 14651 : while (l->type == e_cmp) { /* go through nested comparisons */
1326 2 : sql_exp *ll;
1327 :
1328 2 : if (l->flag == cmp_filter || l->flag == cmp_or)
1329 0 : ll = ((list*)l->l)->h->data;
1330 : else
1331 2 : ll = l->l;
1332 2 : if (ll->type != e_cmp)
1333 : break;
1334 : l = ll;
1335 : }
1336 14651 : score += score_se_base(v, rel, l);
1337 : }
1338 17635 : score += exp_keyvalue(e);
1339 17635 : return score;
1340 : }
1341 :
1342 : static inline sql_rel *
1343 697659 : rel_select_order(visitor *v, sql_rel *rel)
1344 : {
1345 697659 : int *scores = NULL;
1346 697659 : sql_exp **exps = NULL;
1347 :
1348 697659 : if (is_select(rel->op) && list_length(rel->exps) > 1) {
1349 7476 : node *n;
1350 7476 : int i, nexps = list_length(rel->exps);
1351 7476 : scores = SA_NEW_ARRAY(v->sql->ta, int, nexps);
1352 7476 : exps = SA_NEW_ARRAY(v->sql->ta, sql_exp*, nexps);
1353 :
1354 25111 : for (i = 0, n = rel->exps->h; n; i++, n = n->next) {
1355 17664 : exps[i] = n->data;
1356 17664 : if (find_prop(exps[i]->p, PROP_HASHCOL))
1357 : return rel;
1358 17635 : scores[i] = score_se(v, rel, n->data);
1359 : }
1360 7447 : GDKqsort(scores, exps, NULL, nexps, sizeof(int), sizeof(void *), TYPE_int, true, true);
1361 :
1362 25082 : for (i = 0, n = rel->exps->h; n; i++, n = n->next)
1363 17635 : n->data = exps[i];
1364 : }
1365 :
1366 : return rel;
1367 : }
1368 :
1369 : /* Compute the efficiency of using this expression early in a group by list */
1370 : static int
1371 24322 : score_gbe(visitor *v, sql_rel *rel, sql_exp *e)
1372 : {
1373 24322 : int res = 0;
1374 24322 : sql_subtype *t = exp_subtype(e);
1375 24322 : sql_column *c = exp_find_column(rel, e, -2);
1376 :
1377 24322 : if (e->card == CARD_ATOM) /* constants are trivial to group */
1378 38 : res += 1000;
1379 : /* can we find out if the underlying table is sorted */
1380 24322 : if (is_unique(e) || find_prop(e->p, PROP_HASHCOL) || (c && v->storage_based_opt && mvc_is_unique(v->sql, c))) /* distinct columns */
1381 2476 : res += 700;
1382 21084 : if (c && v->storage_based_opt && mvc_is_sorted(v->sql, c))
1383 2491 : res += 500;
1384 24322 : if (find_prop(e->p, PROP_HASHIDX)) /* has hash index */
1385 0 : res += 200;
1386 :
1387 : /* prefer the shorter var types over the longer ones */
1388 24322 : res += sql_class_base_score(v, c, t, true); /* smaller the type, better */
1389 24322 : return res;
1390 : }
1391 :
1392 : /* reorder group by expressions */
1393 : static inline sql_rel *
1394 697659 : rel_groupby_order(visitor *v, sql_rel *rel)
1395 : {
1396 697659 : int *scores = NULL;
1397 697659 : sql_exp **exps = NULL;
1398 :
1399 697659 : if (is_groupby(rel->op) && list_length(rel->r) > 1) {
1400 7983 : node *n;
1401 7983 : list *gbe = rel->r;
1402 7983 : int i, ngbe = list_length(gbe);
1403 7983 : scores = SA_NEW_ARRAY(v->sql->ta, int, ngbe);
1404 7983 : exps = SA_NEW_ARRAY(v->sql->ta, sql_exp*, ngbe);
1405 :
1406 : /* first sorting step, give priority for integers and sorted columns */
1407 32305 : for (i = 0, n = gbe->h; n; i++, n = n->next) {
1408 24322 : exps[i] = n->data;
1409 24322 : scores[i] = score_gbe(v, rel, exps[i]);
1410 : }
1411 7983 : GDKqsort(scores, exps, NULL, ngbe, sizeof(int), sizeof(void *), TYPE_int, true, true);
1412 :
1413 : /* second sorting step, give priority to strings with lower number of digits */
1414 17439 : for (i = ngbe - 1; i && !scores[i]; i--); /* find expressions with no score from the first round */
1415 7983 : if (scores[i])
1416 6991 : i++;
1417 7983 : if (ngbe - i > 1) {
1418 12002 : for (int j = i; j < ngbe; j++) {
1419 9025 : sql_subtype *t = exp_subtype(exps[j]);
1420 9025 : scores[j] = t ? t->digits : 0;
1421 : }
1422 : /* the less number of digits the better, order ascending */
1423 2977 : GDKqsort(scores + i, exps + i, NULL, ngbe - i, sizeof(int), sizeof(void *), TYPE_int, false, true);
1424 : }
1425 :
1426 32305 : for (i = 0, n = gbe->h; n; i++, n = n->next)
1427 24322 : n->data = exps[i];
1428 : }
1429 :
1430 697659 : return rel;
1431 : }
1432 :
1433 : /* This optimization loop contains optimizations that can potentially use statistics */
1434 : static sql_rel *
1435 697659 : rel_final_optimization_loop_(visitor *v, sql_rel *rel)
1436 : {
1437 : /* Run rel_push_select_up only once at the end to avoid an infinite optimization loop */
1438 697659 : rel = rel_push_select_up(v, rel);
1439 697659 : rel = rel_select_order(v, rel);
1440 :
1441 : /* TODO? Maybe later add rel_simplify_count, rel_join2semijoin, rel_simplify_fk_joins,
1442 : rel_distinct_project2groupby, rel_simplify_predicates, rel_simplify_math,
1443 : rel_distinct_aggregate_on_unique_values */
1444 :
1445 697659 : rel = rel_groupby_order(v, rel);
1446 697659 : return rel;
1447 : }
1448 :
1449 : static sql_rel *
1450 70587 : rel_final_optimization_loop(visitor *v, global_props *gp, sql_rel *rel)
1451 : {
1452 70587 : (void) gp;
1453 70587 : return rel_visitor_bottomup(v, rel, &rel_final_optimization_loop_);
1454 : }
1455 :
1456 : run_optimizer
1457 731637 : bind_final_optimization_loop(visitor *v, global_props *gp)
1458 : {
1459 731637 : int flag = v->sql->sql_optimizer;
1460 : /* At the moment, this optimizer has dependency on 3 flags */
1461 563110 : return gp->opt_level == 1 && (gp->cnt[op_groupby] || gp->cnt[op_select]) &&
1462 802225 : (flag & push_select_up) && (flag & optimize_select_and_joins_topdown) && (flag & optimize_projections) ? rel_final_optimization_loop : NULL;
1463 : }
|