Line data Source code
1 : /*
2 : * SPDX-License-Identifier: MPL-2.0
3 : *
4 : * This Source Code Form is subject to the terms of the Mozilla Public
5 : * License, v. 2.0. If a copy of the MPL was not distributed with this
6 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
7 : *
8 : * Copyright 2024, 2025 MonetDB Foundation;
9 : * Copyright August 2008 - 2023 MonetDB B.V.;
10 : * Copyright 1997 - July 2008 CWI.
11 : */
12 :
13 : #include "monetdb_config.h"
14 : #include "rel_optimizer_private.h"
15 : #include "rel_statistics.h"
16 : #include "rel_basetable.h"
17 : #include "rel_rewriter.h"
18 : #include "sql_storage.h"
19 :
20 : static sql_exp *
21 3533012 : comparison_find_column(sql_exp *input, sql_exp *e)
22 : {
23 3533012 : switch (input->type) {
24 100435 : case e_convert: {
25 100435 : list *types = (list *)input->r;
26 100435 : sql_class from = ((sql_subtype*)types->h->data)->type->eclass, to = ((sql_subtype*)types->h->next->data)->type->eclass;
27 100435 : if (from == to)
28 194115 : return comparison_find_column(input->l, e) ? input : NULL;
29 : return NULL;
30 : }
31 3107670 : case e_column:
32 3107670 : return exp_match(e, input) ? input : NULL;
33 : default:
34 : return NULL;
35 : }
36 : }
37 :
38 : /* multi lo <= col <= hi, maybe still be false even if lo or hi are NULL, possibly similar for filter on multiple
39 : * columns */
40 : #define comp_single_column(c) (!c->f)
41 :
42 : static sql_exp *
43 8824892 : rel_propagate_column_ref_statistics(mvc *sql, sql_rel *rel, sql_exp *e)
44 : {
45 8908668 : assert(e->type == e_column);
46 8908668 : if (rel) {
47 8908587 : switch(rel->op) {
48 4129330 : case op_left:
49 : case op_right:
50 : case op_full:
51 : case op_join:
52 : case op_select:
53 : case op_anti:
54 : case op_semi: {
55 4129330 : bool found_without_semantics = false, found_left = false, found_right = false, still_unique = false;
56 :
57 4129330 : if ((is_innerjoin(rel->op) || is_select(rel->op)) && list_length(rel->exps) == 1 && exp_is_false(rel->exps->h->data))
58 : return NULL; /* nothing will pass, skip */
59 :
60 : /* propagate from the bottom first */
61 4108695 : if (rel_propagate_column_ref_statistics(sql, rel->l, e))
62 : found_left = true;
63 2596480 : if (!found_left && is_join(rel->op) && rel_propagate_column_ref_statistics(sql, rel->r, e))
64 4108695 : found_right = true;
65 :
66 4108695 : if (!found_left && !found_right)
67 : return NULL;
68 1809610 : if (!list_empty(rel->exps) && rel->op != op_anti) { /* if there's an or, the MIN and MAX get difficult to propagate */
69 3559647 : for (node *n = rel->exps->h ; n ; n = n->next) {
70 1846387 : sql_exp *comp = n->data, *le = comp->l, *lne = NULL, *re = comp->r, *rne = NULL, *fe = comp->f, *fne = NULL;
71 :
72 1846387 : if (comp->type == e_cmp) {
73 1845519 : if (is_theta_exp(comp->flag) && ((lne = comparison_find_column(le, e)) || (rne = comparison_find_column(re, e)) || (fe && (fne = comparison_find_column(fe, e))))) {
74 127457 : atom *lval_min = find_prop_and_get(le->p, PROP_MIN), *lval_max = find_prop_and_get(le->p, PROP_MAX),
75 127457 : *rval_min = find_prop_and_get(re->p, PROP_MIN), *rval_max = find_prop_and_get(re->p, PROP_MAX);
76 :
77 : /* not semantics found or if explicitly filtering not null values from the column */
78 127457 : found_without_semantics |= (!is_semantics(comp) && comp_single_column(comp)) || (comp->flag == cmp_equal && lne && is_anti(comp) && exp_is_null(re));
79 127457 : still_unique |= comp->flag == cmp_equal && is_unique(le) && is_unique(re); /* unique if only equi-joins on unique columns are there */
80 127457 : if (is_full(rel->op) || (is_left(rel->op) && found_left) || (is_right(rel->op) && found_right)) /* on outer joins, min and max cannot be propagated on some cases */
81 22655 : continue;
82 : /* if (end2 >= start1 && start2 <= end1) then the 2 intervals are intersected */
83 104802 : if (fe && lval_min && lval_max) { /* range case, the middle expression must intersect the other two */
84 4618 : atom *fval_min = find_prop_and_get(fe->p, PROP_MIN), *fval_max = find_prop_and_get(fe->p, PROP_MAX);
85 4618 : int int1 = rval_min && rval_max && atom_cmp(rval_max, lval_min) >= 0 && atom_cmp(rval_min, lval_max) <= 0,
86 4618 : int2 = fval_min && fval_max && atom_cmp(fval_max, lval_min) >= 0 && atom_cmp(fval_min, lval_max) <= 0,
87 4618 : symmetric = is_symmetric(comp);
88 :
89 4618 : if (is_anti(comp) || (!symmetric && fval_min && rval_max && atom_cmp(fval_min, rval_max) < 0)) /* for asymmetric case the re range must be after the fe range */
90 1815 : continue;
91 2803 : if (lne && int1 && int2) {
92 103 : if (symmetric) {
93 0 : prop *p1 = find_prop(e->p, PROP_MIN), *p2 = find_prop(e->p, PROP_MAX);
94 0 : atom *nmin = statistics_atom_min(sql, rval_min, fval_min), *nmax = statistics_atom_max(sql, rval_max, fval_max);
95 : /* max is min from le and (max from re and fe max) */
96 0 : set_minmax_property(sql, e, PROP_MAX, p2 ? statistics_atom_min(sql, nmax, p2->value.pval) : nmax);
97 : /* min is max from le and (min from re and fe min) */
98 0 : set_minmax_property(sql, e, PROP_MIN, p1 ? statistics_atom_max(sql, nmin, p1->value.pval) : nmin);
99 : } else {
100 103 : prop *p1 = find_prop(e->p, PROP_MIN), *p2 = find_prop(e->p, PROP_MAX);
101 : /* max is min from le and fe max */
102 103 : set_minmax_property(sql, e, PROP_MAX, p2 ? statistics_atom_min(sql, fval_max, p2->value.pval) : fval_max);
103 : /* min is max from le and re min */
104 103 : set_minmax_property(sql, e, PROP_MIN, p1 ? statistics_atom_max(sql, rval_min, p1->value.pval) : rval_min);
105 : }
106 2700 : } else if (rne) {
107 590 : if (symmetric && int1 && int2) { /* min is max from le and (min from re and fe min) */
108 0 : prop *p = find_prop(e->p, PROP_MIN);
109 0 : atom *nmin = p ? statistics_atom_min(sql, p->value.pval, fval_min) : fval_min;
110 0 : set_minmax_property(sql, e, PROP_MIN, statistics_atom_max(sql, nmin, lval_min));
111 590 : } else if (int1) { /* min is max from le and re min */
112 100 : prop *p = find_prop(e->p, PROP_MIN);
113 100 : set_minmax_property(sql, e, PROP_MIN, p ? statistics_atom_max(sql, lval_min, p->value.pval) : lval_min);
114 : }
115 2110 : } else if (fne) {
116 549 : if (symmetric && int1 && int2) { /* max is min from le and (max from re and fe max) */
117 0 : prop *p = find_prop(e->p, PROP_MAX);
118 0 : atom *nmax = p ? statistics_atom_max(sql, p->value.pval, rval_max) : rval_max;
119 0 : set_minmax_property(sql, e, PROP_MAX, p ? statistics_atom_min(sql, nmax, lval_max) : nmax);
120 549 : } else if (int2) { /* max is min from le and fe max */
121 266 : prop *p = find_prop(e->p, PROP_MAX);
122 266 : set_minmax_property(sql, e, PROP_MAX, p ? statistics_atom_min(sql, lval_max, p->value.pval) : lval_max);
123 : }
124 : }
125 100184 : } else if (lval_min && lval_max && rval_min && rval_max && atom_cmp(rval_max, lval_min) >= 0 && atom_cmp(rval_min, lval_max) <= 0) {
126 : /* both min and max must be set and the intervals must overlap */
127 41896 : switch (comp->flag) {
128 28235 : case cmp_equal: /* for equality reduce */
129 28235 : set_minmax_property(sql, e, PROP_MAX, is_anti(comp) ? statistics_atom_max(sql, lval_max, rval_max) : statistics_atom_min(sql, lval_max, rval_max));
130 28235 : set_minmax_property(sql, e, PROP_MIN, is_anti(comp) ? statistics_atom_min(sql, lval_min, rval_min) : statistics_atom_max(sql, lval_min, rval_min));
131 28235 : break;
132 4345 : case cmp_notequal: /* for inequality expand */
133 4345 : set_minmax_property(sql, e, PROP_MAX, is_anti(comp) ? statistics_atom_min(sql, lval_max, rval_max) : statistics_atom_max(sql, lval_max, rval_max));
134 4345 : set_minmax_property(sql, e, PROP_MIN, is_anti(comp) ? statistics_atom_max(sql, lval_min, rval_min) : statistics_atom_min(sql, lval_min, rval_min));
135 4345 : break;
136 5310 : case cmp_gt:
137 : case cmp_gte:
138 9682 : if (!is_anti(comp) && lne) { /* min is max from both min */
139 4372 : prop *p = find_prop(e->p, PROP_MIN);
140 4372 : set_minmax_property(sql, e, PROP_MIN, p ? statistics_atom_max(sql, rval_min, p->value.pval) : rval_min);
141 938 : } else if (!is_anti(comp)) { /* max is min from both max */
142 938 : prop *p = find_prop(e->p, PROP_MAX);
143 938 : set_minmax_property(sql, e, PROP_MAX, p ? statistics_atom_min(sql, lval_max, p->value.pval) : lval_max);
144 : }
145 : break;
146 4006 : case cmp_lt:
147 : case cmp_lte:
148 7205 : if (!is_anti(comp) && lne) { /* max is min from both max */
149 3199 : prop *p = find_prop(e->p, PROP_MAX);
150 3199 : set_minmax_property(sql, e, PROP_MAX, p ? statistics_atom_min(sql, rval_max, p->value.pval) : rval_max);
151 807 : } else if (!is_anti(comp)) { /* min is max from both min */
152 807 : prop *p = find_prop(e->p, PROP_MIN);
153 807 : set_minmax_property(sql, e, PROP_MIN, p ? statistics_atom_max(sql, lval_min, p->value.pval) : lval_min);
154 : }
155 : break;
156 : default: /* Maybe later I can do cmp_in and cmp_notin */
157 : break;
158 : }
159 : }
160 : }
161 : }
162 : }
163 : }
164 1809610 : if (is_full(rel->op) || (is_left(rel->op) && found_right) || (is_right(rel->op) && found_left))
165 36924 : set_has_nil(e);
166 1809610 : if (!is_outerjoin(rel->op) && found_without_semantics) /* at an outer join, null values pass */
167 93944 : set_has_no_nil(e);
168 1809610 : if (is_join(rel->op) && is_unique(e) && !still_unique)
169 119744 : set_not_unique(e);
170 1809610 : return e;
171 : }
172 4676113 : case op_table:
173 : case op_basetable:
174 : case op_union:
175 : case op_except:
176 : case op_inter:
177 : case op_munion:
178 : case op_project:
179 : case op_groupby: {
180 4676113 : if (is_recursive(rel))
181 : return NULL;
182 4675916 : sql_exp *found;
183 4675916 : atom *fval;
184 4675916 : prop *est;
185 4675916 : if ((found = rel_find_exp(rel, e))) {
186 2198515 : if (rel->op != op_table) { /* At the moment don't propagate statistics for table relations */
187 2153357 : if ((fval = find_prop_and_get(found->p, PROP_MAX)))
188 1137417 : set_minmax_property(sql, e, PROP_MAX, fval);
189 2153349 : if ((fval = find_prop_and_get(found->p, PROP_MIN)))
190 1144459 : set_minmax_property(sql, e, PROP_MIN, fval);
191 2153361 : if (!has_nil(found))
192 1397146 : set_has_no_nil(e);
193 2153361 : if (is_unique(found) || (need_distinct(rel) && list_length(rel->exps) == 1) ||
194 1735810 : (is_groupby(rel->op) && (list_empty(rel->r) || (list_length(rel->r) == 1 && exps_find_exp(rel->r, e)))))
195 428473 : set_unique(e);
196 : /* propagate unique estimation for known cases */
197 2153359 : if (is_groupby(rel->op) && list_empty(rel->r) && !find_prop(e->p, PROP_NUNIQUES)) { /* global aggregate case */
198 7647 : prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
199 7647 : p->value.dval = 1;
200 2145712 : } else if (((is_basetable(rel->op) || is_except(rel->op) || is_inter(rel->op) || is_simple_project(rel->op) ||
201 71851 : (is_groupby(rel->op) && exps_find_exp(rel->r, e))) &&
202 2085194 : (est = find_prop(found->p, PROP_NUNIQUES)) && !find_prop(e->p, PROP_NUNIQUES))) {
203 194851 : prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
204 194851 : p->value.dval = est->value.dval;
205 : }
206 : }
207 2198518 : return e;
208 : }
209 : return NULL;
210 : }
211 83776 : case op_topn:
212 : case op_sample:
213 83776 : return rel_propagate_column_ref_statistics(sql, rel->l, e);
214 : default:
215 : break;
216 : }
217 : }
218 : return NULL;
219 : }
220 :
221 : static atom *
222 4716978 : atom_from_valptr( allocator *sa, sql_subtype *tpe, ValRecord *v)
223 : {
224 4716978 : atom *a = SA_NEW(sa, atom);
225 :
226 4717023 : assert(!VALisnil(v));
227 4716969 : *a = (atom) {.tpe = *tpe,};
228 4716969 : SA_VALcopy(sa, &a->data, v);
229 4717155 : return a;
230 : }
231 :
232 : void
233 4375773 : sql_column_get_statistics(mvc *sql, sql_column *c, sql_exp *e)
234 : {
235 4375773 : bool nonil = false, unique = false;
236 4375773 : double unique_est = 0.0;
237 4375773 : ValRecord min, max;
238 4375773 : int ok = mvc_col_stats(sql, c, &nonil, &unique, &unique_est, &min, &max);
239 :
240 4376531 : if (has_nil(e) && nonil)
241 2917459 : set_has_no_nil(e);
242 4376531 : if (!is_unique(e) && unique)
243 1153143 : set_unique(e);
244 4376531 : if (unique_est != 0.0) {
245 3088478 : prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
246 3088303 : p->value.dval = unique_est;
247 : }
248 4376356 : unsigned int digits = 0;
249 4376356 : sql_subtype *et = exp_subtype(e);
250 4376599 : if (et->type->eclass == EC_DEC || et->type->eclass == EC_NUM)
251 2866396 : digits = et->digits;
252 4376599 : if ((ok & 2) == 2) {
253 2355801 : if (!VALisnil(&max)) {
254 2355746 : prop *p = e->p = prop_create(sql->sa, PROP_MAX, e->p);
255 2355695 : p->value.pval = atom_from_valptr(sql->sa, &c->type, &max);
256 2355673 : if (digits) {
257 1751668 : unsigned int nd = atom_digits(p->value.pval);
258 1751640 : if (nd < digits)
259 : digits = nd;
260 1751640 : if (!digits)
261 : digits = 1;
262 : }
263 : }
264 2355641 : VALclear(&max);
265 : }
266 4376429 : if ((ok & 1) == 1) {
267 2361772 : if (!VALisnil(&min)) {
268 2361785 : prop *p = e->p = prop_create(sql->sa, PROP_MIN, e->p);
269 2361819 : p->value.pval = atom_from_valptr(sql->sa, &c->type, &min);
270 2361887 : if (digits) {
271 1758892 : unsigned int nd = atom_digits(p->value.pval);
272 1758915 : if (nd > digits) /* possibly set to low by max value */
273 : digits = nd;
274 : if (!digits)
275 : digits = 1;
276 : }
277 : }
278 2361914 : VALclear(&min);
279 : }
280 4376582 : if (digits)
281 2866370 : et->digits = digits;
282 4376582 : if (et->type->eclass == EC_DEC && et->digits <= et->scale)
283 216 : et->digits = et->scale + 1;
284 4376582 : }
285 :
286 : static void
287 958375 : rel_basetable_column_get_statistics(mvc *sql, sql_rel *rel, sql_exp *e)
288 : {
289 958375 : if (e->p)
290 : return;
291 307956 : sql_column *c = NULL;
292 :
293 307956 : if ((c = rel_base_find_column(rel, e->nid))) {
294 209788 : sql_column_get_statistics(sql, c, e);
295 : }
296 : }
297 :
298 : static bool
299 8876 : rel_setop_get_statistics(mvc *sql, sql_rel *rel, list *lexps, list *rexps, sql_exp *e, int i)
300 : {
301 8876 : sql_exp *le = list_fetch(lexps, i), *re = list_fetch(rexps, i);
302 8876 : atom *lval_min = find_prop_and_get(le->p, PROP_MIN), *lval_max = find_prop_and_get(le->p, PROP_MAX),
303 8876 : *rval_min = find_prop_and_get(re->p, PROP_MIN), *rval_max = find_prop_and_get(re->p, PROP_MAX);
304 8876 : prop *est;
305 :
306 : /* for the intersection, if both expressions don't overlap, it can be pruned */
307 8876 : if (is_inter(rel->op) && !has_nil(le) && !has_nil(re) &&
308 511 : ((rval_max && lval_min && atom_cmp(rval_max, lval_min) < 0) || (rval_min && lval_max && atom_cmp(rval_min, lval_max) > 0)))
309 26 : return true;
310 :
311 8850 : if (lval_max && rval_max) {
312 2557 : if (is_union(rel->op))
313 3 : set_minmax_property(sql, e, PROP_MAX, statistics_atom_max(sql, lval_max, rval_max)); /* for union the new max will be the max of the two */
314 2554 : else if (is_inter(rel->op))
315 399 : set_minmax_property(sql, e, PROP_MAX, statistics_atom_min(sql, lval_max, rval_max)); /* for intersect the new max will be the min of the two */
316 : else /* except */
317 2155 : set_minmax_property(sql, e, PROP_MAX, lval_max);
318 : }
319 8850 : if (lval_min && rval_min) {
320 2557 : if (is_union(rel->op))
321 3 : set_minmax_property(sql, e, PROP_MIN, statistics_atom_min(sql, lval_min, rval_min)); /* for union the new min will be the min of the two */
322 2554 : else if (is_inter(rel->op))
323 399 : set_minmax_property(sql, e, PROP_MIN, statistics_atom_max(sql, lval_min, rval_min)); /* for intersect the new min will be the max of the two */
324 : else /* except */
325 2155 : set_minmax_property(sql, e, PROP_MIN, lval_min);
326 : }
327 :
328 8850 : if (is_union(rel->op) || is_munion(rel->op)) {
329 5986 : if (!has_nil(le) && !has_nil(re))
330 879 : set_has_no_nil(e);
331 5986 : if (need_distinct(rel) && list_length(rel->exps) == 1)
332 0 : set_unique(e);
333 2864 : } else if (is_inter(rel->op)) {
334 564 : if (!has_nil(le) || !has_nil(re))
335 509 : set_has_no_nil(e);
336 564 : if (is_unique(le) || (need_distinct(rel) && list_length(rel->exps) == 1))
337 345 : set_unique(e);
338 : } else {
339 2300 : assert(is_except(rel->op));
340 2300 : if (!has_nil(le))
341 2236 : set_has_no_nil(e);
342 2300 : if (is_unique(le) || (need_distinct(rel) && list_length(rel->exps) == 1))
343 2012 : set_unique(e);
344 : }
345 : /* propagate unique estimation for known cases */
346 8850 : if (!is_union(rel->op) && (est = find_prop(le->p, PROP_NUNIQUES)) && !find_prop(e->p, PROP_NUNIQUES)) {
347 207 : prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
348 207 : p->value.dval = est->value.dval;
349 : }
350 : return false;
351 : }
352 :
353 :
354 : static void
355 63909 : rel_munion_get_statistics(mvc *sql, sql_rel *rel, list *rels, sql_exp *e, int i)
356 : {
357 63909 : if (is_recursive(rel))
358 : return ;
359 63909 : assert(is_munion(rel->op));
360 :
361 63909 : sql_rel *l = rels->h->data;
362 63909 : sql_exp *le = list_fetch(l->exps, i);
363 63909 : atom *lval_min = find_prop_and_get(le->p, PROP_MIN), *lval_max = find_prop_and_get(le->p, PROP_MAX);
364 63909 : bool has_nonil = !has_nil(le);
365 :
366 184878 : for(node *n = rels->h->next; n; n = n->next) {
367 120969 : sql_rel *r = n->data;
368 120969 : sql_exp *re = list_fetch(r->exps, i);
369 120969 : atom *rval_min = find_prop_and_get(re->p, PROP_MIN), *rval_max = find_prop_and_get(re->p, PROP_MAX);
370 :
371 120969 : if (lval_max && rval_max) {
372 44365 : set_minmax_property(sql, e, PROP_MAX, statistics_atom_max(sql, lval_max, rval_max)); /* for union the new max will be the max of the two */
373 44365 : lval_max = find_prop_and_get(e->p, PROP_MAX);
374 : }
375 120969 : if (lval_min && rval_min) {
376 43816 : set_minmax_property(sql, e, PROP_MIN, statistics_atom_min(sql, lval_min, rval_min)); /* for union the new min will be the min of the two */
377 43816 : lval_min = find_prop_and_get(e->p, PROP_MIN);
378 : }
379 120969 : has_nonil &= !has_nil(re);
380 :
381 : }
382 :
383 63909 : if (has_nonil)
384 43847 : set_has_no_nil(e);
385 :
386 63909 : if (need_distinct(rel) && list_length(rel->exps) == 1)
387 1597 : set_unique(e);
388 : }
389 :
390 :
391 : static sql_exp *
392 3565573 : rel_propagate_statistics(visitor *v, sql_rel *rel, sql_exp *e, int depth)
393 : {
394 3565573 : mvc *sql = v->sql;
395 3565573 : atom *lval;
396 :
397 3565573 : (void) depth;
398 3565573 : switch(e->type) {
399 2206924 : case e_column:
400 2206924 : switch (rel->op) { /* set relations don't call rel_propagate_statistics */
401 284908 : case op_join:
402 : case op_left:
403 : case op_right:
404 : case op_full:
405 : case op_semi:
406 : case op_anti: {
407 284908 : sql_exp *found = rel_propagate_column_ref_statistics(sql, rel->l, e);
408 284908 : if (!found)
409 142902 : (void) rel_propagate_column_ref_statistics(sql, rel->r, e);
410 : break;
411 : }
412 1922016 : case op_select:
413 : case op_project:
414 : case op_groupby: {
415 1922016 : sql_exp *found = rel_propagate_column_ref_statistics(sql, rel->l, e); /* labels may be found on the same projection, ugh */
416 1922010 : if (!found && is_simple_project(rel->op))
417 133713 : (void) rel_propagate_column_ref_statistics(sql, rel, e);
418 : break;
419 : }
420 0 : case op_insert:
421 : case op_update:
422 : case op_delete:
423 0 : (void) rel_propagate_column_ref_statistics(sql, rel->r, e);
424 0 : break;
425 : default:
426 : break;
427 : }
428 : break;
429 102365 : case e_convert: {
430 102365 : sql_subtype *to = exp_totype(e), *from = exp_fromtype(e);
431 102365 : sql_exp *l = e->l;
432 102365 : sql_class fr = from->type->eclass, too = to->type->eclass;
433 102365 : prop *est;
434 :
435 102365 : if (fr == too) {
436 93198 : if ((lval = find_prop_and_get(l->p, PROP_MAX))) {
437 60303 : atom *res = atom_copy(sql->sa, lval);
438 60303 : if ((res = atom_cast(sql->sa, res, to)))
439 60280 : set_minmax_property(sql, e, PROP_MAX, res);
440 : }
441 93198 : if ((lval = find_prop_and_get(l->p, PROP_MIN))) {
442 60941 : atom *res = atom_copy(sql->sa, lval);
443 60941 : if ((res = atom_cast(sql->sa, res, to)))
444 60918 : set_minmax_property(sql, e, PROP_MIN, res);
445 : }
446 : }
447 : /* propagating nuniques through conversions is tricky. I am adding just the general cases */
448 102365 : if ((est = find_prop(l->p, PROP_NUNIQUES)) &&
449 63788 : (((fr == EC_TIME || fr == EC_TIME_TZ || fr == EC_DATE) && (too == EC_TIMESTAMP || too == EC_TIMESTAMP_TZ)) ||
450 63763 : ((fr == EC_NUM || fr == EC_BIT) && too == EC_NUM) || (fr == EC_DEC && too == EC_DEC) || (fr != EC_EXTERNAL && too == EC_STRING))) {
451 60329 : prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
452 60329 : p->value.dval = est->value.dval;
453 : }
454 102365 : if (!has_nil(l))
455 58044 : set_has_no_nil(e);
456 : break;
457 : }
458 379391 : case e_aggr:
459 : case e_func: {
460 379391 : BUN lv;
461 379391 : sql_subfunc *f = e->f;
462 :
463 379391 : if (!f->func->s) {
464 352272 : int key = hash_key(f->func->base.name); /* Using hash lookup */
465 352272 : sql_hash_e *he = sql_functions_lookup->buckets[key&(sql_functions_lookup->size-1)];
466 352272 : lookup_function look = NULL;
467 :
468 754052 : for (; he && !look; he = he->chain) {
469 401780 : struct function_properties* fp = (struct function_properties*) he->value;
470 :
471 401780 : if (!strcmp(f->func->base.name, fp->name))
472 108607 : look = fp->func;
473 : }
474 352272 : if (look)
475 108607 : look(sql, e);
476 : }
477 : /* for global aggregates with no semantics, if the left relation has values, then the output won't be NULL */
478 379391 : if (!is_semantics(e) && e->l && !have_nil(e->l) &&
479 109333 : (e->type != e_aggr || (is_groupby(rel->op) && list_length(rel->r)) || ((lv = get_rel_count(rel->l)) != BUN_NONE && lv > 0)))
480 108997 : set_has_no_nil(e);
481 : /* set properties for global aggregates */
482 379391 : if (e->type == e_aggr && is_groupby(rel->op) && list_empty(rel->r)) {
483 7950 : if (!find_prop(e->p, PROP_NUNIQUES)) {
484 7950 : prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
485 7950 : p->value.dval = 1;
486 : }
487 7950 : set_unique(e);
488 : }
489 : break;
490 : }
491 593499 : case e_atom:
492 593499 : if (e->l) {
493 575461 : atom *a = (atom*) e->l;
494 575461 : if (!a->isnull) {
495 506221 : set_minmax_property(sql, e, PROP_MAX, a);
496 506221 : set_minmax_property(sql, e, PROP_MIN, a);
497 : }
498 575461 : prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
499 575461 : p->value.dval = 1;
500 18038 : } else if (e->f) {
501 4311 : list *vals = (list *) e->f;
502 4311 : sql_exp *first = vals->h ? vals->h->data : NULL;
503 4311 : atom *max = NULL, *min = NULL; /* all child values must have a valid min/max */
504 4311 : int has_nil = 0;
505 :
506 4311 : if (first) {
507 4311 : max = ((lval = find_prop_and_get(first->p, PROP_MAX))) ? lval : NULL;
508 4311 : min = ((lval = find_prop_and_get(first->p, PROP_MIN))) ? lval : NULL;
509 4311 : has_nil |= has_nil(first);
510 : }
511 :
512 18478 : for (node *n = vals->h ? vals->h->next : NULL ; n ; n = n->next) {
513 9856 : sql_exp *ee = n->data;
514 :
515 9856 : if (min && max) {
516 9359 : if ((lval = find_prop_and_get(ee->p, PROP_MAX))) {
517 9313 : max = atom_cmp(lval, max) > 0 ? lval : max;
518 : } else {
519 : max = NULL;
520 : }
521 9359 : if ((lval = find_prop_and_get(ee->p, PROP_MIN))) {
522 9313 : min = atom_cmp(min, lval) > 0 ? lval : min;
523 : } else {
524 : min = NULL;
525 : }
526 : }
527 9856 : has_nil |= has_nil(ee);
528 : }
529 :
530 4311 : if (!has_nil)
531 3937 : set_has_no_nil(e);
532 4311 : if (min && max) {
533 3893 : set_minmax_property(sql, e, PROP_MAX, max);
534 3893 : set_minmax_property(sql, e, PROP_MIN, min);
535 : }
536 4311 : prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
537 4311 : p->value.dval = (dbl) list_length(vals);
538 : } else {
539 13727 : prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
540 13727 : p->value.dval = 1;
541 : }
542 : break;
543 283394 : case e_cmp:
544 : /* TODO? propagating min/max/unique of booleans is not very worth it */
545 283394 : if (e->flag == cmp_or || e->flag == cmp_filter) {
546 15247 : if (!have_nil(e->l) && !have_nil(e->r))
547 10186 : set_has_no_nil(e);
548 268147 : } else if (e->flag == cmp_in || e->flag == cmp_notin) {
549 20181 : sql_exp *le = e->l;
550 20181 : if (!has_nil(le) && !have_nil(e->r))
551 16946 : set_has_no_nil(e);
552 : } else {
553 247966 : sql_exp *le = e->l, *re = e->r, *fe = e->f;
554 247966 : if (!has_nil(le) && !has_nil(re) && (!e->f || !has_nil(fe)))
555 175013 : set_has_no_nil(e);
556 : }
557 : break;
558 : case e_psm:
559 : break;
560 : }
561 :
562 : #ifndef NDEBUG
563 : {
564 : /* min and max cannot be NULL and min must be <= than max, if it happens the inner relation must be empty! */
565 3565567 : atom *min = find_prop_and_get(e->p, PROP_MIN), *max = find_prop_and_get(e->p, PROP_MAX);
566 :
567 3565575 : (void) min;
568 3565575 : (void) max;
569 3565575 : assert(!min || !min->isnull);
570 3565575 : assert(!max || !max->isnull);
571 3565575 : assert(!min || !max || (min && max)/* atom_cmp(min, max) <= 0*/);
572 : }
573 : #endif
574 3565575 : return e;
575 : }
576 :
577 : static list * /* Remove predicates always false from min/max values */
578 144417 : rel_prune_predicates(visitor *v, sql_rel *rel)
579 : {
580 144417 : if (rel->l) {
581 144417 : sql_rel *l = rel->l;
582 144417 : if (is_single(l))
583 3374 : return rel->exps;
584 : }
585 141043 : if (!list_empty(rel->attr))
586 3011 : return rel->exps;
587 290950 : for (node *n = rel->exps->h ; n ; n = n->next) {
588 152918 : sql_exp *e = n->data;
589 :
590 152918 : if (e->type == e_cmp && is_theta_exp(e->flag)) {
591 125895 : sql_exp *le = e->l, *re = e->r, *fe = e->f;
592 125895 : atom *lval_min = find_prop_and_get(le->p, PROP_MIN), *lval_max = find_prop_and_get(le->p, PROP_MAX),
593 125895 : *rval_min = find_prop_and_get(re->p, PROP_MIN), *rval_max = find_prop_and_get(re->p, PROP_MAX);
594 125895 : bool always_false = false, always_true = false;
595 :
596 125895 : if (fe && !is_symmetric(e)) {
597 3045 : atom *fval_min = find_prop_and_get(fe->p, PROP_MIN), *fval_max = find_prop_and_get(fe->p, PROP_MAX);
598 3045 : comp_type lower = range2lcompare(e->flag), higher = range2rcompare(e->flag);
599 3644 : int not_int1 = rval_min && lval_max && /* the middle and left intervals don't overlap */
600 1654 : (!is_anti(e) && (lower == cmp_gte ? atom_cmp(rval_min, lval_max) > 0 : atom_cmp(rval_min, lval_max) >= 0)),
601 4059 : not_int2 = lval_min && fval_max && /* the middle and right intervals don't overlap */
602 2482 : (!is_anti(e) && (higher == cmp_lte ? atom_cmp(lval_min, fval_max) > 0 : atom_cmp(lval_min, fval_max) >= 0)),
603 3045 : not_int3 = rval_min && fval_max && /* the left interval is after the right one */
604 1285 : (!is_anti(e) && (atom_cmp(rval_min, fval_max) > 0));
605 :
606 3045 : always_false |= not_int1 || not_int2 || not_int3;
607 : /* for anti the middle must be before the left or after the right or the right after the left, for the other the middle must be always between the left and right intervals */
608 4089 : always_true |= !has_nil(le) && !has_nil(re) && !has_nil(fe) &&
609 3941 : lval_min && lval_max && rval_min && rval_max && fval_min && fval_max &&
610 573 : (is_anti(e) ? ((lower == cmp_gte ? atom_cmp(rval_min, lval_max) > 0 : atom_cmp(rval_min, lval_max) >= 0) || (higher == cmp_lte ? atom_cmp(lval_min, fval_max) > 0 : atom_cmp(lval_min, fval_max) >= 0) || atom_cmp(rval_min, fval_max) > 0) :
611 488 : ((lower == cmp_gte ? atom_cmp(lval_min, rval_max) >= 0 : atom_cmp(lval_min, rval_max) > 0) && (higher == cmp_lte ? atom_cmp(fval_min, lval_max) >= 0 : atom_cmp(fval_min, lval_max) > 0)));
612 : } else if (!fe) {
613 122834 : if (!is_semantics(e)) /* trivial not null cmp null case */
614 234208 : always_false |= !is_anti(e) && ((exp_is_not_null(le) && exp_is_null(re)) || (exp_is_null(le) && exp_is_not_null(re)));
615 122834 : switch (e->flag) {
616 109297 : case cmp_equal:
617 109297 : if (lval_min && lval_max && rval_min && rval_max && (!is_semantics(e) || !has_nil(le) || !has_nil(re)))
618 136844 : always_false |= (is_anti(e) ? (atom_cmp(lval_min, rval_min) == 0 && atom_cmp(lval_max, rval_max) <= 0) : (atom_cmp(rval_max, lval_min) < 0 || atom_cmp(rval_min, lval_max) > 0));
619 109297 : if (is_semantics(e)) { /* prune *= NULL cases */
620 5693 : always_false |= (is_anti(e) ? (exp_is_null(le) && exp_is_null(re)) : ((exp_is_not_null(le) && exp_is_null(re)) || (exp_is_null(le) && exp_is_not_null(re))));
621 11386 : always_true |= (is_anti(e) ? ((exp_is_not_null(le) && exp_is_null(re)) || (exp_is_null(le) && exp_is_not_null(re))) : (exp_is_null(le) && exp_is_null(re)));
622 : }
623 : break;
624 5345 : case cmp_notequal:
625 5345 : if (lval_min && lval_max && rval_min && rval_max)
626 7386 : always_true |= !has_nil(le) && !has_nil(re) && (is_anti(e) ? (atom_cmp(lval_min, rval_min) == 0 && atom_cmp(lval_max, rval_max) <= 0) : (atom_cmp(rval_max, lval_min) < 0 || atom_cmp(rval_min, lval_max) > 0));
627 5345 : if (is_semantics(e)) {
628 37 : always_true |= (is_anti(e) ? (exp_is_null(le) && exp_is_null(re)) : ((exp_is_not_null(le) && exp_is_null(re)) || (exp_is_null(le) && exp_is_not_null(re))));
629 74 : always_false |= (is_anti(e) ? ((exp_is_not_null(le) && exp_is_null(re)) || (exp_is_null(le) && exp_is_not_null(re))) : (exp_is_null(le) && exp_is_null(re)));
630 : }
631 : break;
632 5487 : case cmp_gt:
633 5487 : if (lval_max && rval_min)
634 1834 : always_false |= (is_anti(e) ? atom_cmp(lval_max, rval_min) > 0 : atom_cmp(lval_max, rval_min) <= 0);
635 5487 : if (lval_min && rval_max)
636 3668 : always_true |= !has_nil(le) && !has_nil(re) && (is_anti(e) ? atom_cmp(lval_min, rval_max) <= 0 : atom_cmp(lval_min, rval_max) > 0);
637 : break;
638 121 : case cmp_gte:
639 121 : if (lval_max && rval_min)
640 51 : always_false |= (is_anti(e) ? atom_cmp(lval_max, rval_min) >= 0 : atom_cmp(lval_max, rval_min) < 0);
641 121 : if (lval_min && rval_max)
642 102 : always_true |= !has_nil(le) && !has_nil(re) && (is_anti(e) ? atom_cmp(lval_min, rval_max) < 0 : atom_cmp(lval_min, rval_max) >= 0);
643 : break;
644 2502 : case cmp_lt:
645 2502 : if (lval_min && rval_max)
646 1382 : always_false |= (is_anti(e) ? atom_cmp(lval_min, rval_max) < 0 : atom_cmp(lval_min, rval_max) >= 0);
647 2502 : if (lval_max && rval_min)
648 2812 : always_true |= !has_nil(le) && !has_nil(re) && (is_anti(e) ? atom_cmp(lval_max, rval_min) >= 0 : atom_cmp(lval_max, rval_min) < 0);
649 : break;
650 82 : case cmp_lte:
651 82 : if (lval_min && rval_max)
652 11 : always_false |= (is_anti(e) ? atom_cmp(lval_min, rval_max) <= 0 : atom_cmp(lval_min, rval_max) > 0);
653 82 : if (lval_max && rval_min)
654 22 : always_true |= !has_nil(le) && !has_nil(re) && (is_anti(e) ? atom_cmp(lval_max, rval_min) > 0 : atom_cmp(lval_max, rval_min) <= 0);
655 : break;
656 : default: /* Maybe later I can do cmp_in and cmp_notin but don't forget to remove is_theta_exp check up there */
657 : break;
658 : }
659 : }
660 125895 : assert(!always_false || !always_true);
661 125895 : if (always_false || always_true) {
662 3706 : sql_exp *ne = exp_atom_bool(v->sql->sa, always_true /* if always_true set then true else false */);
663 3706 : if (exp_name(e))
664 0 : exp_prop_alias(v->sql->sa, ne, e);
665 3706 : n->data = ne;
666 3706 : v->changes++;
667 : }
668 : }
669 : }
670 138032 : return rel->exps;
671 : }
672 :
673 : static sql_rel *
674 14 : set_setop_side(visitor *v, sql_rel *rel, sql_rel *side)
675 : {
676 14 : if (side == rel->r)
677 0 : rel->r = NULL;
678 : else
679 14 : rel->l = NULL;
680 14 : if (!is_simple_project(side->op) || rel_is_ref(side) || !list_empty(side->r)) {
681 0 : side = rel_project(v->sql->sa, side, rel_projections(v->sql, side, NULL, 0, 1));
682 0 : set_count_prop(v->sql->sa, side, get_rel_count(side->l));
683 0 : side->exps = exps_exp_visitor_bottomup(v, side, side->exps, 0, &rel_propagate_statistics, false);
684 : }
685 35 : for (node *n = rel->exps->h, *m = side->exps->h ; n && m ; n = n->next, m = m->next) {
686 21 : sql_exp *e1 = n->data, *e2 = m->data;
687 21 : exp_prop_alias(v->sql->sa, e2, e1);
688 : }
689 14 : list_hash_clear(side->exps);
690 :
691 14 : if (need_distinct(rel))
692 0 : set_distinct(side);
693 14 : side->p = prop_copy(v->sql->sa, rel->p);
694 14 : rel_destroy(rel);
695 14 : return side;
696 : }
697 :
698 : static BUN
699 26588 : trivial_project_exp_card(sql_exp *e)
700 : {
701 26960 : if (e->type == e_convert)
702 372 : return trivial_project_exp_card(e->l);
703 26588 : return e->type == e_atom && e->f ? (BUN) list_length(e->f) : 1;
704 : }
705 :
706 : static BUN
707 26189 : rel_calc_nuniques(mvc *sql, sql_rel *l, list *exps)
708 : {
709 26189 : BUN lv = get_rel_count(l);
710 :
711 26188 : if (lv == 0)
712 : return 0;
713 23452 : if (!list_empty(exps)) {
714 23452 : BUN nuniques = 0;
715 : /* compute the highest number of unique values */
716 58620 : for (node *n = exps->h ; n && nuniques != BUN_NONE ; n = n->next) {
717 35167 : sql_exp *e = n->data;
718 35167 : sql_rel *bt = NULL;
719 35167 : prop *p = NULL;
720 35167 : BUN euniques = BUN_NONE;
721 35167 : atom *min, *max, *sub = NULL;
722 35167 : sql_subtype *tp = exp_subtype(e);
723 35167 : sql_class ec = tp ? tp->type->eclass : EC_STRING; /* if 'e' has no type (eg parameter), use a non-number type to fail condition */
724 :
725 35167 : if ((p = find_prop(e->p, PROP_NUNIQUES))) {
726 25403 : euniques = (BUN) p->value.dval;
727 9765 : } else if (e->type == e_column && e->nid && rel_find_exp_and_corresponding_rel(l, e, false, &bt, NULL) && bt && (p = find_prop(bt->p, PROP_COUNT))) {
728 7665 : euniques = (BUN) p->value.lval;
729 : }
730 : /* use min to max range to compute number of possible values in the domain for number types */
731 35168 : if ((EC_TEMP(ec)||ec==EC_NUM||ec==EC_MONTH||ec==EC_POS) &&
732 24272 : (min = find_prop_and_get(e->p, PROP_MIN)) && (max = find_prop_and_get(e->p, PROP_MAX))) {
733 : /* the range includes min and max, so the atom_inc call is needed */
734 : /* if 'euniques' has number of distinct values, compute min between both */
735 19326 : if ((sub = atom_sub(sql->sa, max, min)) && (sub = atom_inc(sql->sa, sub)) && (sub = atom_cast(sql->sa, sub, sql_bind_localtype("oid"))))
736 0 : euniques = MIN(euniques, (BUN) sub->data.val.oval);
737 : }
738 35168 : if (euniques != BUN_NONE)
739 33068 : nuniques = MAX(nuniques, euniques); /* the highest cardinality sets the estimation */
740 : else
741 : nuniques = BUN_NONE;
742 : }
743 23453 : if (nuniques != BUN_NONE)
744 : return nuniques;
745 : }
746 : return lv;
747 : }
748 :
749 : static sql_rel *
750 1366784 : rel_get_statistics_(visitor *v, sql_rel *rel)
751 : {
752 : /* Don't prune updates as pruning will possibly result in removing the joins which therefore cannot be used for constraint checking */
753 1366784 : uint8_t has_special_modify = *(uint8_t*) v->data;
754 1366784 : bool can_be_pruned = !has_special_modify && v->storage_based_opt;
755 :
756 : /* Don't look at the same relation twice */
757 1366784 : if (are_statistics_gathered(rel->used))
758 : return rel;
759 1366685 : rel->used |= statistics_gathered;
760 :
761 1366685 : switch (rel->op) {
762 319281 : case op_basetable: {
763 319281 : sql_table *t = (sql_table *) rel->l;
764 319281 : sqlstore *store = v->sql->session->tr->store;
765 :
766 319281 : if (!list_empty(rel->exps)) {
767 1277975 : for (node *n = rel->exps->h ; n ; n = n->next)
768 958514 : rel_basetable_column_get_statistics(v->sql, rel, n->data);
769 : }
770 : /* Set table row count. TODO? look for remote tables. Don't look at storage for declared tables, because it won't be cleaned */
771 319468 : if (isTable(t) && t->s && !isDeclaredTable(t)) /* count active rows only */
772 264817 : set_count_prop(v->sql->sa, rel, (BUN)store->storage_api.count_del(v->sql->session->tr, t, CNT_ACTIVE));
773 : break;
774 : }
775 2794 : case op_union:
776 : case op_inter:
777 : case op_except: {
778 2794 : bool empty_cross = false;
779 2794 : int i = 0;
780 2794 : sql_rel *l = rel->l, *pl = rel->l, *r = rel->r, *pr = rel->r;
781 :
782 2794 : while (is_sample(pl->op) || is_topn(pl->op)) /* skip topN and sample relations in the middle */
783 0 : pl = pl->l;
784 2794 : while (is_sample(pr->op) || is_topn(pr->op))
785 0 : pr = pr->l;
786 : /* if it's not a projection, then project and propagate statistics */
787 2794 : if (!is_project(pl->op) && !is_base(pl->op)) {
788 0 : pl = rel_project(v->sql->sa, pl, rel_projections(v->sql, pl, NULL, 0, 1));
789 0 : set_count_prop(v->sql->sa, pl, get_rel_count(pl->l));
790 0 : pl->exps = exps_exp_visitor_bottomup(v, pl, pl->exps, 0, &rel_propagate_statistics, false);
791 : }
792 2794 : if (!is_project(pr->op) && !is_base(pr->op)) {
793 0 : pr = rel_project(v->sql->sa, pr, rel_projections(v->sql, pr, NULL, 0, 1));
794 0 : set_count_prop(v->sql->sa, pr, get_rel_count(pr->l));
795 0 : pr->exps = exps_exp_visitor_bottomup(v, pr, pr->exps, 0, &rel_propagate_statistics, false);
796 : }
797 :
798 11670 : for (node *n = rel->exps->h ; n ; n = n->next) {
799 8876 : empty_cross |= rel_setop_get_statistics(v->sql, rel, pl->exps, pr->exps, n->data, i);
800 8876 : i++;
801 : }
802 :
803 : /* propagate row count */
804 2794 : if (is_union(rel->op)) {
805 277 : BUN lv = need_distinct(rel) ? rel_calc_nuniques(v->sql, l, l->exps) : get_rel_count(l),
806 277 : rv = need_distinct(rel) ? rel_calc_nuniques(v->sql, r, r->exps) : get_rel_count(r);
807 :
808 277 : if (lv == 0 && rv == 0) { /* both sides empty */
809 2 : if (can_be_pruned)
810 : empty_cross = true;
811 : else
812 2 : set_count_prop(v->sql->sa, rel, 0);
813 275 : } else if (can_be_pruned && lv == 0 && !rel_is_ref(rel)) { /* left side empty */
814 0 : rel = set_setop_side(v, rel, r);
815 0 : empty_cross = false; /* don't rewrite again */
816 275 : } else if (can_be_pruned && rv == 0 && !rel_is_ref(rel)) { /* right side empty */
817 0 : rel = set_setop_side(v, rel, l);
818 0 : empty_cross = false; /* don't rewrite again */
819 275 : } else if (lv != BUN_NONE && rv != BUN_NONE) {
820 7 : set_count_prop(v->sql->sa, rel, (rv > (BUN_MAX - lv)) ? BUN_MAX : (lv + rv)); /* overflow check */
821 : }
822 2517 : } else if (is_inter(rel->op) || is_except(rel->op)) {
823 2517 : BUN lv = need_distinct(rel) ? rel_calc_nuniques(v->sql, l, l->exps) : get_rel_count(l),
824 2517 : rv = need_distinct(rel) ? rel_calc_nuniques(v->sql, r, r->exps) : get_rel_count(r);
825 :
826 2517 : if (lv == 0) { /* left side empty */
827 63 : if (can_be_pruned)
828 : empty_cross = true;
829 : else
830 5 : set_count_prop(v->sql->sa, rel, 0);
831 2454 : } else if (rv == 0) { /* right side empty */
832 17 : if (is_inter(rel->op)) {
833 3 : if (can_be_pruned)
834 : empty_cross = true;
835 : else
836 0 : set_count_prop(v->sql->sa, rel, 0);
837 14 : } else if (can_be_pruned && !rel_is_ref(rel)) {
838 14 : rel = set_setop_side(v, rel, l);
839 14 : empty_cross = false; /* don't rewrite again */
840 : } else {
841 0 : set_count_prop(v->sql->sa, rel, lv);
842 : }
843 : } else {
844 2437 : set_count_prop(v->sql->sa, rel, lv);
845 : }
846 : }
847 2794 : if (can_be_pruned && empty_cross) {
848 81 : rel_destroy(rel->l);
849 81 : rel->l = NULL;
850 81 : rel_destroy(rel->r);
851 81 : rel->r = NULL;
852 246 : for (node *n = rel->exps->h ; n ; n = n->next) {
853 165 : sql_exp *e = n->data, *a = exp_atom(v->sql->sa, atom_general(v->sql->sa, exp_subtype(e), NULL, 0));
854 165 : exp_prop_alias(v->sql->sa, a, e);
855 165 : n->data = a;
856 : }
857 81 : list_hash_clear(rel->exps);
858 81 : sql_rel *l = rel_project(v->sql->sa, NULL, rel->exps);
859 81 : set_count_prop(v->sql->sa, l, 1);
860 81 : l = rel_select(v->sql->sa, l, exp_atom_bool(v->sql->sa, 0));
861 81 : set_count_prop(v->sql->sa, l, 0);
862 81 : rel->op = op_project;
863 81 : rel->l = l;
864 81 : rel->exps = rel_projections(v->sql, l, NULL, 1, 1);
865 81 : set_count_prop(v->sql->sa, rel, 0);
866 81 : set_nodistinct(rel); /* set relations may have distinct flag set */
867 81 : v->changes++;
868 : }
869 : break;
870 : }
871 13485 : case op_munion: {
872 13485 : list *l = rel->l, *nrels = sa_list(v->sql->sa);
873 13485 : BUN cnt = 0;
874 13485 : bool needs_pruning = false;
875 :
876 13485 : if (is_recursive(rel))
877 : break;
878 51267 : for (node *n = l->h; n; n = n->next) {
879 37853 : sql_rel *r = n->data, *pl = r;
880 :
881 37853 : while (is_sample(pl->op) || is_topn(pl->op)) /* skip topN and sample relations in the middle */
882 0 : pl = pl->l;
883 : /* if it's not a projection, then project and propagate statistics */
884 37853 : if (!is_project(pl->op) && !is_base(pl->op)) {
885 0 : pl = rel_project(v->sql->sa, pl, rel_projections(v->sql, pl, NULL, 0, 1));
886 0 : set_count_prop(v->sql->sa, pl, get_rel_count(pl->l));
887 0 : pl->exps = exps_exp_visitor_bottomup(v, pl, pl->exps, 0, &rel_propagate_statistics, false);
888 : }
889 37853 : nrels = append(nrels, pl);
890 : /* we need new munion statistics */
891 : /* propagate row count */
892 37853 : BUN rv = need_distinct(rel) ? rel_calc_nuniques(v->sql, r, r->exps) : get_rel_count(r);
893 : /* if PROP_COUNT does not exist we assume at least a row (see get_rel_count def) */
894 37853 : if (rv == BUN_NONE) {
895 1220 : cnt++;
896 1220 : continue;
897 : }
898 36633 : if (!rv && can_be_pruned)
899 6736 : needs_pruning = true;
900 : /* overflow check */
901 36633 : if (rv > (BUN_MAX - cnt))
902 37853 : rv = BUN_MAX;
903 : else
904 36633 : cnt += rv;
905 : }
906 13414 : int i = 0;
907 77323 : for (node *n = rel->exps->h ; n ; n = n->next, i++)
908 63909 : rel_munion_get_statistics(v->sql, rel, nrels, n->data, i);
909 :
910 13414 : if (needs_pruning && !rel_is_ref(rel)) {
911 4574 : v->changes++;
912 4574 : list *nl = sa_list(l->sa);
913 :
914 16865 : for (node *n = nrels->h; n; n = n->next) {
915 12291 : sql_rel *r = n->data;
916 12291 : BUN rv = need_distinct(rel) ? rel_calc_nuniques(v->sql, r, r->exps) : get_rel_count(r);
917 :
918 12291 : if (!rv) { /* keep last for now */
919 6265 : rel_destroy(r);
920 6265 : continue;
921 : }
922 6026 : nl = append(nl, r);
923 : }
924 4574 : rel->l = nl;
925 4574 : if (list_length(nl) == 1) {
926 4238 : sql_rel *l = rel->l = nl->h->data; /* ugh */
927 4238 : rel->r = NULL;
928 4238 : rel->op = op_project;
929 :
930 20975 : for (node *n = rel->exps->h, *m = l->exps->h ; n && m ; n = n->next, m = m->next) {
931 16737 : sql_exp *pe = n->data, *ie = m->data;
932 16737 : sql_exp *ne = exp_ref(v->sql, ie);
933 16737 : exp_prop_alias(v->sql->sa, ne, pe);
934 16737 : n->data = ne;
935 : }
936 4238 : list_hash_clear(rel->exps);
937 336 : } else if (list_empty(nl)) {
938 : /* empty select (project [ nils ] ) */
939 445 : for (node *n = rel->exps->h ; n ; n = n->next) {
940 345 : sql_exp *e = n->data, *a = exp_atom(v->sql->sa, atom_general(v->sql->sa, exp_subtype(e), NULL, 0));
941 345 : exp_prop_alias(v->sql->sa, a, e);
942 345 : n->data = a;
943 : }
944 100 : list_hash_clear(rel->exps);
945 100 : sql_rel *l = rel_project(v->sql->sa, NULL, rel->exps);
946 100 : set_count_prop(v->sql->sa, l, 1);
947 100 : l = rel_select(v->sql->sa, l, exp_atom_bool(v->sql->sa, 0));
948 100 : set_count_prop(v->sql->sa, l, 0);
949 100 : rel->op = op_project;
950 100 : rel->r = NULL;
951 100 : rel->l = l;
952 100 : rel->exps = rel_projections(v->sql, l, NULL, 1, 1);
953 100 : set_count_prop(v->sql->sa, rel, 0);
954 100 : set_nodistinct(rel); /* set relations may have distinct flag set */
955 : }
956 : } else {
957 8840 : set_count_prop(v->sql->sa, rel, cnt);
958 : }
959 : break;
960 : }
961 548391 : case op_join:
962 : case op_left:
963 : case op_right:
964 : case op_full:
965 : case op_semi:
966 : case op_anti:
967 : case op_select:
968 : case op_project:
969 : case op_groupby: {
970 548391 : if (is_groupby(rel->op) && !list_empty(rel->r))
971 16811 : rel->r = exps_exp_visitor_bottomup(v, rel, rel->r, 0, &rel_propagate_statistics, false);
972 548391 : rel->exps = exps_exp_visitor_bottomup(v, rel, rel->exps, 0, &rel_propagate_statistics, false);
973 548378 : if ((is_simple_project(rel->op) || is_groupby(rel->op)) && !list_empty(rel->r))
974 41876 : rel->r = exps_exp_visitor_bottomup(v, rel, rel->r, 0, &rel_propagate_statistics, false);
975 : /* The following optimizations can only be applied after propagating the statistics to rel->exps */
976 548381 : if (can_be_pruned && (is_join(rel->op) || is_select(rel->op)) && !list_empty(rel->exps)) {
977 144417 : int changes = v->changes;
978 144417 : rel->exps = rel_prune_predicates(v, rel);
979 144417 : if (v->changes > changes)
980 3673 : rel = rewrite_simplify(v, 0, v->value_based_opt, rel);
981 : }
982 :
983 : /* propagate row count */
984 548381 : sql_rel *l = rel->l, *r = rel->r;
985 548381 : switch (rel->op) {
986 137189 : case op_join:
987 : case op_left:
988 : case op_right:
989 : case op_full: {
990 137189 : BUN lv = get_rel_count(l), rv = get_rel_count(r), uniques_estimate = BUN_MAX, join_idx_estimate = BUN_MAX;
991 :
992 137189 : if (!list_empty(rel->exps) && !is_single(rel)) {
993 250875 : for (node *n = rel->exps->h ; n ; n = n->next) {
994 127916 : sql_exp *e = n->data, *el = e->l, *er = e->r;
995 :
996 127916 : if (find_prop(e->p, PROP_JOINIDX)) {
997 670 : join_idx_estimate = lv>rv?lv:rv;
998 127246 : } else if (e->type == e_cmp && e->flag == cmp_equal) {
999 : /* if one of the sides is unique, the cardinality will be that exact number, but look for nulls */
1000 123364 : if (!is_semantics(e) || !has_nil(el) || !has_nil(er)) {
1001 123188 : BUN lu = 0, ru = 0;
1002 123188 : prop *p = NULL;
1003 123188 : if ((p = find_prop(el->p, PROP_NUNIQUES)))
1004 91390 : lu = (BUN) p->value.dval;
1005 123188 : if ((p = find_prop(er->p, PROP_NUNIQUES)))
1006 106083 : ru = (BUN) p->value.dval;
1007 123188 : if (is_unique(el) || lu > lv) {
1008 74674 : BUN ncount = (is_right(rel->op) || is_full(rel->op)) ? MAX(lv, rv) : lv;
1009 74674 : uniques_estimate = MIN(uniques_estimate, ncount);
1010 48514 : } else if (is_unique(er) || ru > rv) {
1011 30533 : BUN ncount = (is_left(rel->op) || is_full(rel->op)) ? MAX(lv, rv) : rv;
1012 30533 : uniques_estimate = MIN(uniques_estimate, ncount);
1013 : }
1014 : }
1015 : }
1016 : }
1017 : }
1018 137189 : if (is_single(rel)) {
1019 4803 : set_count_prop(v->sql->sa, rel, lv);
1020 132386 : } else if (join_idx_estimate != BUN_MAX) {
1021 668 : set_count_prop(v->sql->sa, rel, join_idx_estimate);
1022 131718 : } else if (uniques_estimate != BUN_MAX) {
1023 98602 : set_count_prop(v->sql->sa, rel, uniques_estimate);
1024 33116 : } else if (list_length(rel->exps) == 1 && (exp_is_false(rel->exps->h->data) || exp_is_null(rel->exps->h->data))) {
1025 : /* corner cases for outer joins */
1026 123 : if (is_left(rel->op)) {
1027 111 : set_count_prop(v->sql->sa, rel, lv);
1028 12 : } else if (is_right(rel->op)) {
1029 3 : set_count_prop(v->sql->sa, rel, rv);
1030 9 : } else if (is_full(rel->op) && lv != BUN_NONE && rv != BUN_NONE) {
1031 9 : set_count_prop(v->sql->sa, rel, (rv > (BUN_MAX - lv)) ? BUN_MAX : (lv + rv)); /* overflow check */
1032 0 : } else if (lv != BUN_NONE && rv != BUN_NONE) {
1033 0 : set_count_prop(v->sql->sa, rel, 0);
1034 : }
1035 32993 : } else if (lv == 0) {
1036 1204 : set_count_prop(v->sql->sa, rel, (is_right(rel->op) || is_full(rel->op)) ? rv : 0);
1037 32377 : } else if (rv == 0) {
1038 1574 : set_count_prop(v->sql->sa, rel, (is_left(rel->op) || is_full(rel->op)) ? lv : 0);
1039 31297 : } else if (lv != BUN_NONE && rv != BUN_NONE) {
1040 18506 : set_count_prop(v->sql->sa, rel, (rv > (BUN_MAX / lv)) ? BUN_MAX : (lv * rv)); /* overflow check */
1041 : }
1042 : break;
1043 : }
1044 2935 : case op_anti:
1045 2935 : set_count_prop(v->sql->sa, rel, get_rel_count(l));
1046 2935 : break;
1047 113557 : case op_semi:
1048 : case op_select:
1049 : /* TODO calculate cardinalities using selectivities */
1050 113557 : if (list_length(rel->exps) == 1 && (exp_is_false(rel->exps->h->data) || exp_is_null(rel->exps->h->data))) {
1051 5158 : set_count_prop(v->sql->sa, rel, 0);
1052 : } else {
1053 213513 : if (!list_empty(rel->exps) && !is_single(rel)) {
1054 105114 : BUN cnt = get_rel_count(l), u = 1;
1055 171099 : for (node *n = rel->exps->h ; n ; n = n->next) {
1056 112284 : sql_exp *e = n->data, *el = e->l, *er = e->r;
1057 :
1058 : /* simple expressions first */
1059 112284 : if (e->type == e_cmp && e->flag == cmp_equal && exp_is_atom(er)) {
1060 : /* use selectivity */
1061 61772 : prop *p;
1062 61772 : if ((p = find_prop(el->p, PROP_NUNIQUES))) {
1063 46299 : u = (BUN) p->value.dval;
1064 46299 : break;
1065 : }
1066 : }
1067 : }
1068 : /* u is an *estimate*, so don't set count_prop to 0 unless cnt is 0 */
1069 105114 : set_count_prop(v->sql->sa, rel, cnt == 0 ? 0 : u == 0 || u > cnt ? 1 : cnt/u);
1070 : } else {
1071 3285 : set_count_prop(v->sql->sa, rel, get_rel_count(l));
1072 : }
1073 : }
1074 : break;
1075 269979 : case op_project:
1076 269979 : if (l) {
1077 257283 : if (need_distinct(rel)) {
1078 114 : set_count_prop(v->sql->sa, rel, rel_calc_nuniques(v->sql, l, rel->exps));
1079 : } else {
1080 257169 : set_count_prop(v->sql->sa, rel, get_rel_count(l));
1081 : }
1082 : } else {
1083 12696 : BUN card = 1;
1084 :
1085 12696 : if (!list_empty(rel->exps)) {
1086 37935 : for (node *n = rel->exps->h ; n ; n = n->next) {
1087 25239 : sql_exp *e = n->data;
1088 25239 : card = MAX(card, trivial_project_exp_card(e));
1089 : }
1090 : }
1091 12696 : set_count_prop(v->sql->sa, rel, card);
1092 : }
1093 : break;
1094 24309 : case op_groupby:
1095 24309 : if (list_empty(rel->r)) {
1096 7497 : set_count_prop(v->sql->sa, rel, 1);
1097 : } else {
1098 16811 : set_count_prop(v->sql->sa, rel, rel_calc_nuniques(v->sql, l, rel->r));
1099 : }
1100 : break;
1101 : default:
1102 : break;
1103 : }
1104 : break;
1105 : }
1106 17035 : case op_topn: {
1107 17035 : BUN lv = get_rel_count(rel->l);
1108 :
1109 17035 : if (lv != BUN_NONE) {
1110 17012 : sql_exp *le = rel->exps->h->data, *oe = list_length(rel->exps) > 1 ? rel->exps->h->next->data : NULL;
1111 84 : if (oe && oe->l && exp_is_not_null(oe)) { /* no parameters */
1112 84 : BUN offset = (BUN) ((atom*)oe->l)->data.val.lval;
1113 84 : lv = offset >= lv ? 0 : lv - offset;
1114 : }
1115 17012 : if (le->l && exp_is_not_null(le)) {
1116 16977 : BUN limit = (BUN) ((atom*)le->l)->data.val.lval;
1117 16977 : lv = MIN(lv, limit);
1118 : }
1119 17012 : set_count_prop(v->sql->sa, rel, lv);
1120 : }
1121 : break;
1122 : }
1123 22 : case op_sample: {
1124 22 : BUN lv = get_rel_count(rel->l);
1125 :
1126 22 : if (lv != BUN_NONE) {
1127 4 : sql_exp *se = rel->exps->h->data;
1128 4 : sql_subtype *tp = exp_subtype(se);
1129 :
1130 4 : if (se->l && tp->type->eclass == EC_NUM) { /* sample is a number of rows */
1131 4 : BUN sample = (BUN) ((atom*)se->l)->data.val.lval;
1132 4 : lv = MIN(lv, sample);
1133 0 : } else if (se->l) { /* sample is a percentage of rows */
1134 0 : dbl percent = ((atom*)se->l)->data.val.dval;
1135 0 : assert(tp->type->eclass == EC_FLT);
1136 0 : lv = (BUN) ceil((dbl)lv * percent);
1137 : }
1138 4 : set_count_prop(v->sql->sa, rel, lv);
1139 : }
1140 : break;
1141 : }
1142 6201 : case op_table: {
1143 6201 : sql_exp *op = rel->r;
1144 6201 : if (rel->flag != TRIGGER_WRAPPER && op) {
1145 5889 : sql_subfunc *f = op->f;
1146 5889 : if (f->func->lang == FUNC_LANG_SQL) {
1147 97 : set_count_prop(v->sql->sa, rel, 1000 /* just some fallback value */);
1148 5792 : } else if (f->func->lang == FUNC_LANG_MAL && strcmp(f->func->base.name, "storage") == 0) {
1149 837 : set_count_prop(v->sql->sa, rel, 1000 /* TODO get size of catalog */);
1150 4955 : } else if (f->func->lang == FUNC_LANG_MAL && strcmp(f->func->base.name, "db_users") == 0) {
1151 0 : set_count_prop(v->sql->sa, rel, 1000 /* TODO get size of users */);
1152 4955 : } else if (f->func->lang == FUNC_LANG_MAL && strncmp(f->func->base.name, "querylog", 8) == 0) {
1153 1101 : set_count_prop(v->sql->sa, rel, 1000 /* TODO get size of querylog */);
1154 3854 : } else if (f->func->lang == FUNC_LANG_MAL &&
1155 3685 : (strcmp(f->func->base.name, "queue") == 0 ||
1156 3417 : strcmp(f->func->base.name, "optimizers") == 0 ||
1157 3101 : strcmp(f->func->base.name, "env") == 0 ||
1158 2735 : strcmp(f->func->base.name, "keywords") == 0 ||
1159 2735 : strcmp(f->func->base.name, "statistics") == 0 ||
1160 2058 : strcmp(f->func->base.name, "rejects") == 0 ||
1161 1800 : strcmp(f->func->base.name, "schemastorage") == 0 ||
1162 1800 : strncmp(f->func->base.name, "storage", 7) == 0 ||
1163 1800 : strcmp(f->func->base.name, "sessions") == 0) ) {
1164 2179 : set_count_prop(v->sql->sa, rel, 1000 /* TODO get size of queue */);
1165 : }
1166 : /* else {
1167 : printf("%%func needs stats : %s\n", f->func->base.name);
1168 : } */
1169 : }
1170 : break;
1171 : }
1172 : /*These relations are less important for now
1173 : TODO later we can tune it */
1174 : case op_insert:
1175 : case op_update:
1176 : case op_delete:
1177 : case op_truncate:
1178 : case op_ddl:
1179 : default:
1180 : break;
1181 : }
1182 :
1183 : return rel;
1184 : }
1185 :
1186 : static sql_rel *
1187 555534 : rel_get_statistics(visitor *v, global_props *gp, sql_rel *rel)
1188 : {
1189 : /* Don't prune updates as pruning will possibly result in removing the joins which therefore cannot be used for constraint checking */
1190 555534 : uint8_t has_special_modify = (uint8_t) gp->has_special_modify;
1191 555534 : v->data = &has_special_modify;
1192 555534 : rel = rel_visitor_bottomup(v, rel, &rel_get_statistics_);
1193 555965 : v->data = gp;
1194 555965 : return rel;
1195 : }
1196 :
1197 : run_optimizer
1198 732253 : bind_get_statistics(visitor *v, global_props *gp)
1199 : {
1200 732253 : (void) v;
1201 732253 : return (gp->opt_level == 1 && !gp->cnt[op_insert]) ? rel_get_statistics : NULL;
1202 : }
1203 :
1204 :
1205 : static bool
1206 45373 : point_select_on_unique_column(sql_rel *rel)
1207 : {
1208 45373 : if (is_select(rel->op) && !list_empty(rel->exps)) {
1209 38490 : for (node *n = rel->exps->h; n ; n = n->next) {
1210 20026 : sql_exp *e = n->data, *el = e->l, *er = e->r, *found = NULL;
1211 :
1212 20026 : if (is_compare(e->type) && e->flag == cmp_equal) {
1213 15211 : if (is_numeric_upcast(el))
1214 0 : el = el->l;
1215 15211 : if (is_numeric_upcast(er))
1216 0 : er = er->l;
1217 15211 : if (is_alias(el->type) && exp_is_atom(er) && (found = rel_find_exp(rel->l, el)) &&
1218 761 : is_unique(found) && (!is_semantics(e) || !has_nil(found) || !has_nil(er)))
1219 : return true;
1220 14450 : if (is_alias(er->type) && exp_is_atom(el) && (found = rel_find_exp(rel->l, er)) &&
1221 0 : is_unique(found) && (!is_semantics(e) || !has_nil(el) || !has_nil(found)))
1222 : return true;
1223 : }
1224 : }
1225 : }
1226 : return false;
1227 : }
1228 :
1229 : /*
1230 : * A point select on an unique column reduces the number of rows to 1. If the same select is under a
1231 : * join, the opposite side's select can be pushed above the join.
1232 : */
1233 : static inline sql_rel *
1234 697336 : rel_push_select_up(visitor *v, sql_rel *rel)
1235 : {
1236 697336 : if ((is_innerjoin(rel->op) || is_left(rel->op) || is_right(rel->op) || is_semi(rel->op)) && !is_single(rel)) {
1237 122761 : sql_rel *l = rel->l, *r = rel->r;
1238 122761 : bool can_pushup_left = is_select(l->op) && !rel_is_ref(l) && !is_single(l) && (is_innerjoin(rel->op) || is_left(rel->op) || is_semi(rel->op)),
1239 122761 : can_pushup_right = is_select(r->op) && !rel_is_ref(r) && !is_single(r) && (is_innerjoin(rel->op) || is_right(rel->op));
1240 :
1241 122761 : if (can_pushup_left || can_pushup_right) {
1242 35886 : if (can_pushup_left)
1243 20669 : can_pushup_left = point_select_on_unique_column(r);
1244 35886 : if (can_pushup_right)
1245 24704 : can_pushup_right = point_select_on_unique_column(l);
1246 :
1247 : /* if both selects retrieve one row each, it's not worth it to push both up */
1248 35886 : if (can_pushup_left && !can_pushup_right) {
1249 695 : sql_rel *nrel = rel_dup_copy(v->sql->sa, rel);
1250 695 : nrel->l = l->l;
1251 695 : rel = rel_inplace_select(rel, nrel, l->exps);
1252 695 : assert(is_select(rel->op));
1253 695 : v->changes++;
1254 35191 : } else if (!can_pushup_left && can_pushup_right) {
1255 14 : sql_rel *nrel = rel_dup_copy(v->sql->sa, rel);
1256 14 : nrel->r = r->l;
1257 14 : rel = rel_inplace_select(rel, nrel, r->exps);
1258 14 : assert(is_select(rel->op));
1259 14 : v->changes++;
1260 : }
1261 : }
1262 : }
1263 697336 : return rel;
1264 : }
1265 :
1266 : static int
1267 39058 : sql_class_base_score(visitor *v, sql_column *c, sql_subtype *t, bool equality_based)
1268 : {
1269 39058 : int de;
1270 :
1271 39058 : if (!t)
1272 : return 0;
1273 39058 : switch (ATOMstorage(t->type->localtype)) {
1274 : case TYPE_bte:
1275 : return 150 - 8;
1276 1606 : case TYPE_sht:
1277 1606 : return 150 - 16;
1278 16472 : case TYPE_int:
1279 16472 : return 150 - 32;
1280 469 : case TYPE_void:
1281 : case TYPE_lng:
1282 469 : return 150 - 64;
1283 : case TYPE_uuid:
1284 : #ifdef HAVE_HGE
1285 : case TYPE_hge:
1286 : #endif
1287 : return 150 - 128;
1288 2 : case TYPE_flt:
1289 2 : return 75 - 24;
1290 : case TYPE_dbl:
1291 : return 75 - 53;
1292 16187 : default:
1293 16187 : if (equality_based && c && v->storage_based_opt && (de = mvc_is_duplicate_eliminated(v->sql, c)))
1294 1491 : return 150 - de * 8;
1295 : /* strings and blobs not duplicate eliminated don't get any points here */
1296 : return 0;
1297 : }
1298 : }
1299 :
1300 : static int
1301 15154 : score_se_base(visitor *v, sql_rel *rel, sql_exp *e)
1302 : {
1303 15154 : int res = 0;
1304 15154 : sql_subtype *t = exp_subtype(e);
1305 15154 : sql_column *c = NULL;
1306 :
1307 15154 : if (e->type == e_convert) /* keep unsafes at the end (TODO improve) */
1308 : return -1000;
1309 : /* can we find out if the underlying table is sorted */
1310 14746 : if ((c = exp_find_column(rel, e, -2)) && v->storage_based_opt && mvc_is_sorted(v->sql, c))
1311 14746 : res += 600;
1312 :
1313 : /* prefer the shorter var types over the longer ones */
1314 14746 : res += sql_class_base_score(v, c, t, is_equality_or_inequality_exp(e->flag)); /* smaller the type, better */
1315 14746 : return res;
1316 : }
1317 :
1318 : static int
1319 18547 : score_se(visitor *v, sql_rel *rel, sql_exp *e)
1320 : {
1321 18547 : int score = 0;
1322 18547 : if (e->type == e_cmp && !is_complex_exp(e->flag)) {
1323 15154 : sql_exp *l = e->l;
1324 :
1325 15154 : while (l->type == e_cmp) { /* go through nested comparisons */
1326 2 : sql_exp *ll;
1327 :
1328 2 : if (l->flag == cmp_filter || l->flag == cmp_or)
1329 0 : ll = ((list*)l->l)->h->data;
1330 : else
1331 2 : ll = l->l;
1332 2 : if (ll->type != e_cmp)
1333 : break;
1334 : l = ll;
1335 : }
1336 15154 : score += score_se_base(v, rel, l);
1337 : }
1338 18547 : score += exp_keyvalue(e);
1339 18547 : return score;
1340 : }
1341 :
1342 : static inline sql_rel *
1343 697336 : rel_select_order(visitor *v, sql_rel *rel)
1344 : {
1345 697336 : int *scores = NULL;
1346 697336 : sql_exp **exps = NULL;
1347 :
1348 697336 : if (is_select(rel->op) && list_length(rel->exps) > 1) {
1349 7926 : node *n;
1350 7926 : int i, nexps = list_length(rel->exps);
1351 7926 : scores = SA_NEW_ARRAY(v->sql->ta, int, nexps);
1352 7926 : exps = SA_NEW_ARRAY(v->sql->ta, sql_exp*, nexps);
1353 :
1354 26473 : for (i = 0, n = rel->exps->h; n; i++, n = n->next) {
1355 18576 : exps[i] = n->data;
1356 18576 : if (find_prop(exps[i]->p, PROP_HASHCOL))
1357 : return rel;
1358 18547 : scores[i] = score_se(v, rel, n->data);
1359 : }
1360 7897 : GDKqsort(scores, exps, NULL, nexps, sizeof(int), sizeof(void *), TYPE_int, true, true);
1361 :
1362 26444 : for (i = 0, n = rel->exps->h; n; i++, n = n->next)
1363 18547 : n->data = exps[i];
1364 : }
1365 :
1366 : return rel;
1367 : }
1368 :
1369 : /* Compute the efficiency of using this expression early in a group by list */
1370 : static int
1371 24312 : score_gbe(visitor *v, sql_rel *rel, sql_exp *e)
1372 : {
1373 24312 : int res = 0;
1374 24312 : sql_subtype *t = exp_subtype(e);
1375 24312 : sql_column *c = exp_find_column(rel, e, -2);
1376 :
1377 24312 : if (e->card == CARD_ATOM) /* constants are trivial to group */
1378 38 : res += 1000;
1379 : /* can we find out if the underlying table is sorted */
1380 24312 : if (is_unique(e) || find_prop(e->p, PROP_HASHCOL) || (c && v->storage_based_opt && mvc_is_unique(v->sql, c))) /* distinct columns */
1381 2464 : res += 700;
1382 21072 : if (c && v->storage_based_opt && mvc_is_sorted(v->sql, c))
1383 2479 : res += 500;
1384 24312 : if (find_prop(e->p, PROP_HASHIDX)) /* has hash index */
1385 0 : res += 200;
1386 :
1387 : /* prefer the shorter var types over the longer ones */
1388 24312 : res += sql_class_base_score(v, c, t, true); /* smaller the type, better */
1389 24312 : return res;
1390 : }
1391 :
1392 : /* reorder group by expressions */
1393 : static inline sql_rel *
1394 697336 : rel_groupby_order(visitor *v, sql_rel *rel)
1395 : {
1396 697336 : int *scores = NULL;
1397 697336 : sql_exp **exps = NULL;
1398 :
1399 697336 : if (is_groupby(rel->op) && list_length(rel->r) > 1) {
1400 7978 : node *n;
1401 7978 : list *gbe = rel->r;
1402 7978 : int i, ngbe = list_length(gbe);
1403 7978 : scores = SA_NEW_ARRAY(v->sql->ta, int, ngbe);
1404 7978 : exps = SA_NEW_ARRAY(v->sql->ta, sql_exp*, ngbe);
1405 :
1406 : /* first sorting step, give priority for integers and sorted columns */
1407 32290 : for (i = 0, n = gbe->h; n; i++, n = n->next) {
1408 24312 : exps[i] = n->data;
1409 24312 : scores[i] = score_gbe(v, rel, exps[i]);
1410 : }
1411 7978 : GDKqsort(scores, exps, NULL, ngbe, sizeof(int), sizeof(void *), TYPE_int, true, true);
1412 :
1413 : /* second sorting step, give priority to strings with lower number of digits */
1414 17435 : for (i = ngbe - 1; i && !scores[i]; i--); /* find expressions with no score from the first round */
1415 7978 : if (scores[i])
1416 6986 : i++;
1417 7978 : if (ngbe - i > 1) {
1418 12002 : for (int j = i; j < ngbe; j++) {
1419 9025 : sql_subtype *t = exp_subtype(exps[j]);
1420 9025 : scores[j] = t ? t->digits : 0;
1421 : }
1422 : /* the less number of digits the better, order ascending */
1423 2977 : GDKqsort(scores + i, exps + i, NULL, ngbe - i, sizeof(int), sizeof(void *), TYPE_int, false, true);
1424 : }
1425 :
1426 32290 : for (i = 0, n = gbe->h; n; i++, n = n->next)
1427 24312 : n->data = exps[i];
1428 : }
1429 :
1430 697335 : return rel;
1431 : }
1432 :
1433 : /* This optimization loop contains optimizations that can potentially use statistics */
1434 : static sql_rel *
1435 697336 : rel_final_optimization_loop_(visitor *v, sql_rel *rel)
1436 : {
1437 : /* Run rel_push_select_up only once at the end to avoid an infinite optimization loop */
1438 697336 : rel = rel_push_select_up(v, rel);
1439 697336 : rel = rel_select_order(v, rel);
1440 :
1441 : /* TODO? Maybe later add rel_simplify_count, rel_join2semijoin, rel_simplify_fk_joins,
1442 : rel_distinct_project2groupby, rel_simplify_predicates, rel_simplify_math,
1443 : rel_distinct_aggregate_on_unique_values */
1444 :
1445 697336 : rel = rel_groupby_order(v, rel);
1446 697335 : return rel;
1447 : }
1448 :
1449 : static sql_rel *
1450 70729 : rel_final_optimization_loop(visitor *v, global_props *gp, sql_rel *rel)
1451 : {
1452 70729 : (void) gp;
1453 70729 : return rel_visitor_bottomup(v, rel, &rel_final_optimization_loop_);
1454 : }
1455 :
1456 : run_optimizer
1457 732793 : bind_final_optimization_loop(visitor *v, global_props *gp)
1458 : {
1459 732793 : int flag = v->sql->sql_optimizer;
1460 : /* At the moment, this optimizer has dependency on 3 flags */
1461 563880 : return gp->opt_level == 1 && (gp->cnt[op_groupby] || gp->cnt[op_select]) &&
1462 803524 : (flag & push_select_up) && (flag & optimize_select_and_joins_topdown) && (flag & optimize_projections) ? rel_final_optimization_loop : NULL;
1463 : }
|