Line data Source code
1 : /*
2 : * SPDX-License-Identifier: MPL-2.0
3 : *
4 : * This Source Code Form is subject to the terms of the Mozilla Public
5 : * License, v. 2.0. If a copy of the MPL was not distributed with this
6 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
7 : *
8 : * Copyright 2024, 2025 MonetDB Foundation;
9 : * Copyright August 2008 - 2023 MonetDB B.V.;
10 : * Copyright 1997 - July 2008 CWI.
11 : */
12 :
13 : #include "monetdb_config.h"
14 : #include "rel_optimizer_private.h"
15 : #include "rel_statistics.h"
16 : #include "rel_basetable.h"
17 : #include "rel_rewriter.h"
18 : #include "sql_storage.h"
19 :
20 : static sql_exp *
21 3538870 : comparison_find_column(sql_exp *input, sql_exp *e)
22 : {
23 3538870 : switch (input->type) {
24 100727 : case e_convert: {
25 100727 : list *types = (list *)input->r;
26 100727 : sql_class from = ((sql_subtype*)types->h->data)->type->eclass, to = ((sql_subtype*)types->h->next->data)->type->eclass;
27 100727 : if (from == to)
28 194673 : return comparison_find_column(input->l, e) ? input : NULL;
29 : return NULL;
30 : }
31 3112697 : case e_column:
32 3112697 : return exp_match(e, input) ? input : NULL;
33 : default:
34 : return NULL;
35 : }
36 : }
37 :
38 : /* multi lo <= col <= hi, maybe still be false even if lo or hi are NULL, possibly similar for filter on multiple
39 : * columns */
40 : #define comp_single_column(c) (!c->f)
41 :
42 : static sql_exp *
43 8835698 : rel_propagate_column_ref_statistics(mvc *sql, sql_rel *rel, sql_exp *e)
44 : {
45 8917555 : assert(e->type == e_column);
46 8917555 : if (rel) {
47 8917474 : switch(rel->op) {
48 4134447 : case op_left:
49 : case op_right:
50 : case op_full:
51 : case op_join:
52 : case op_select:
53 : case op_anti:
54 : case op_semi: {
55 4134447 : bool found_without_semantics = false, found_left = false, found_right = false, still_unique = false;
56 :
57 4134447 : if ((is_innerjoin(rel->op) || is_select(rel->op)) && list_length(rel->exps) == 1 && exp_is_false(rel->exps->h->data))
58 : return NULL; /* nothing will pass, skip */
59 :
60 : /* propagate from the bottom first */
61 4113809 : if (rel_propagate_column_ref_statistics(sql, rel->l, e))
62 : found_left = true;
63 2599437 : if (!found_left && is_join(rel->op) && rel_propagate_column_ref_statistics(sql, rel->r, e))
64 4113809 : found_right = true;
65 :
66 4113809 : if (!found_left && !found_right)
67 : return NULL;
68 1812470 : if (!list_empty(rel->exps) && rel->op != op_anti) { /* if there's an or, the MIN and MAX get difficult to propagate */
69 3565423 : for (node *n = rel->exps->h ; n ; n = n->next) {
70 1849380 : sql_exp *comp = n->data, *le = comp->l, *lne = NULL, *re = comp->r, *rne = NULL, *fe = comp->f, *fne = NULL;
71 :
72 1849380 : if (comp->type == e_cmp) {
73 1848512 : if (is_theta_exp(comp->flag) && ((lne = comparison_find_column(le, e)) || (rne = comparison_find_column(re, e)) || (fe && (fne = comparison_find_column(fe, e))))) {
74 127726 : atom *lval_min = find_prop_and_get(le->p, PROP_MIN), *lval_max = find_prop_and_get(le->p, PROP_MAX),
75 127726 : *rval_min = find_prop_and_get(re->p, PROP_MIN), *rval_max = find_prop_and_get(re->p, PROP_MAX);
76 :
77 : /* not semantics found or if explicitly filtering not null values from the column */
78 127726 : found_without_semantics |= (!is_semantics(comp) && comp_single_column(comp)) || (comp->flag == cmp_equal && lne && is_anti(comp) && exp_is_null(re));
79 127726 : still_unique |= comp->flag == cmp_equal && is_unique(le) && is_unique(re); /* unique if only equi-joins on unique columns are there */
80 127726 : if (is_full(rel->op) || (is_left(rel->op) && found_left) || (is_right(rel->op) && found_right)) /* on outer joins, min and max cannot be propagated on some cases */
81 22721 : continue;
82 : /* if (end2 >= start1 && start2 <= end1) then the 2 intervals are intersected */
83 105005 : if (fe && lval_min && lval_max) { /* range case, the middle expression must intersect the other two */
84 4618 : atom *fval_min = find_prop_and_get(fe->p, PROP_MIN), *fval_max = find_prop_and_get(fe->p, PROP_MAX);
85 4618 : int int1 = rval_min && rval_max && atom_cmp(rval_max, lval_min) >= 0 && atom_cmp(rval_min, lval_max) <= 0,
86 4618 : int2 = fval_min && fval_max && atom_cmp(fval_max, lval_min) >= 0 && atom_cmp(fval_min, lval_max) <= 0,
87 4618 : symmetric = is_symmetric(comp);
88 :
89 4618 : if (is_anti(comp) || (!symmetric && fval_min && rval_max && atom_cmp(fval_min, rval_max) < 0)) /* for asymmetric case the re range must be after the fe range */
90 1815 : continue;
91 2803 : if (lne && int1 && int2) {
92 103 : if (symmetric) {
93 0 : prop *p1 = find_prop(e->p, PROP_MIN), *p2 = find_prop(e->p, PROP_MAX);
94 0 : atom *nmin = statistics_atom_min(sql, rval_min, fval_min), *nmax = statistics_atom_max(sql, rval_max, fval_max);
95 : /* max is min from le and (max from re and fe max) */
96 0 : set_minmax_property(sql, e, PROP_MAX, p2 ? statistics_atom_min(sql, nmax, p2->value.pval) : nmax);
97 : /* min is max from le and (min from re and fe min) */
98 0 : set_minmax_property(sql, e, PROP_MIN, p1 ? statistics_atom_max(sql, nmin, p1->value.pval) : nmin);
99 : } else {
100 103 : prop *p1 = find_prop(e->p, PROP_MIN), *p2 = find_prop(e->p, PROP_MAX);
101 : /* max is min from le and fe max */
102 103 : set_minmax_property(sql, e, PROP_MAX, p2 ? statistics_atom_min(sql, fval_max, p2->value.pval) : fval_max);
103 : /* min is max from le and re min */
104 103 : set_minmax_property(sql, e, PROP_MIN, p1 ? statistics_atom_max(sql, rval_min, p1->value.pval) : rval_min);
105 : }
106 2700 : } else if (rne) {
107 590 : if (symmetric && int1 && int2) { /* min is max from le and (min from re and fe min) */
108 0 : prop *p = find_prop(e->p, PROP_MIN);
109 0 : atom *nmin = p ? statistics_atom_min(sql, p->value.pval, fval_min) : fval_min;
110 0 : set_minmax_property(sql, e, PROP_MIN, statistics_atom_max(sql, nmin, lval_min));
111 590 : } else if (int1) { /* min is max from le and re min */
112 100 : prop *p = find_prop(e->p, PROP_MIN);
113 100 : set_minmax_property(sql, e, PROP_MIN, p ? statistics_atom_max(sql, lval_min, p->value.pval) : lval_min);
114 : }
115 2110 : } else if (fne) {
116 549 : if (symmetric && int1 && int2) { /* max is min from le and (max from re and fe max) */
117 0 : prop *p = find_prop(e->p, PROP_MAX);
118 0 : atom *nmax = p ? statistics_atom_max(sql, p->value.pval, rval_max) : rval_max;
119 0 : set_minmax_property(sql, e, PROP_MAX, p ? statistics_atom_min(sql, nmax, lval_max) : nmax);
120 549 : } else if (int2) { /* max is min from le and fe max */
121 266 : prop *p = find_prop(e->p, PROP_MAX);
122 266 : set_minmax_property(sql, e, PROP_MAX, p ? statistics_atom_min(sql, lval_max, p->value.pval) : lval_max);
123 : }
124 : }
125 100387 : } else if (lval_min && lval_max && rval_min && rval_max && atom_cmp(rval_max, lval_min) >= 0 && atom_cmp(rval_min, lval_max) <= 0) {
126 : /* both min and max must be set and the intervals must overlap */
127 41977 : switch (comp->flag) {
128 28304 : case cmp_equal: /* for equality reduce */
129 28304 : set_minmax_property(sql, e, PROP_MAX, is_anti(comp) ? statistics_atom_max(sql, lval_max, rval_max) : statistics_atom_min(sql, lval_max, rval_max));
130 28304 : set_minmax_property(sql, e, PROP_MIN, is_anti(comp) ? statistics_atom_min(sql, lval_min, rval_min) : statistics_atom_max(sql, lval_min, rval_min));
131 28304 : break;
132 4357 : case cmp_notequal: /* for inequality expand */
133 4357 : set_minmax_property(sql, e, PROP_MAX, is_anti(comp) ? statistics_atom_min(sql, lval_max, rval_max) : statistics_atom_max(sql, lval_max, rval_max));
134 4357 : set_minmax_property(sql, e, PROP_MIN, is_anti(comp) ? statistics_atom_max(sql, lval_min, rval_min) : statistics_atom_min(sql, lval_min, rval_min));
135 4357 : break;
136 5310 : case cmp_gt:
137 : case cmp_gte:
138 9682 : if (!is_anti(comp) && lne) { /* min is max from both min */
139 4372 : prop *p = find_prop(e->p, PROP_MIN);
140 4372 : set_minmax_property(sql, e, PROP_MIN, p ? statistics_atom_max(sql, rval_min, p->value.pval) : rval_min);
141 938 : } else if (!is_anti(comp)) { /* max is min from both max */
142 938 : prop *p = find_prop(e->p, PROP_MAX);
143 938 : set_minmax_property(sql, e, PROP_MAX, p ? statistics_atom_min(sql, lval_max, p->value.pval) : lval_max);
144 : }
145 : break;
146 4006 : case cmp_lt:
147 : case cmp_lte:
148 7205 : if (!is_anti(comp) && lne) { /* max is min from both max */
149 3199 : prop *p = find_prop(e->p, PROP_MAX);
150 3199 : set_minmax_property(sql, e, PROP_MAX, p ? statistics_atom_min(sql, rval_max, p->value.pval) : rval_max);
151 807 : } else if (!is_anti(comp)) { /* min is max from both min */
152 807 : prop *p = find_prop(e->p, PROP_MIN);
153 807 : set_minmax_property(sql, e, PROP_MIN, p ? statistics_atom_max(sql, lval_min, p->value.pval) : lval_min);
154 : }
155 : break;
156 : default: /* Maybe later I can do cmp_in and cmp_notin */
157 : break;
158 : }
159 : }
160 : }
161 : }
162 : }
163 : }
164 1812470 : if (is_full(rel->op) || (is_left(rel->op) && found_right) || (is_right(rel->op) && found_left))
165 37037 : set_has_nil(e);
166 1812470 : if (!is_outerjoin(rel->op) && found_without_semantics) /* at an outer join, null values pass */
167 94143 : set_has_no_nil(e);
168 1812470 : if (is_join(rel->op) && is_unique(e) && !still_unique)
169 119978 : set_not_unique(e);
170 1812470 : return e;
171 : }
172 4681802 : case op_table:
173 : case op_basetable:
174 : case op_union:
175 : case op_except:
176 : case op_inter:
177 : case op_munion:
178 : case op_project:
179 : case op_groupby: {
180 4681802 : if (is_recursive(rel))
181 : return NULL;
182 4681605 : sql_exp *found;
183 4681605 : atom *fval;
184 4681605 : prop *est;
185 4681605 : if ((found = rel_find_exp(rel, e))) {
186 2201331 : if (rel->op != op_table) { /* At the moment don't propagate statistics for table relations */
187 2156026 : if ((fval = find_prop_and_get(found->p, PROP_MAX)))
188 1137611 : set_minmax_property(sql, e, PROP_MAX, fval);
189 2156024 : if ((fval = find_prop_and_get(found->p, PROP_MIN)))
190 1144655 : set_minmax_property(sql, e, PROP_MIN, fval);
191 2156024 : if (!has_nil(found))
192 1400115 : set_has_no_nil(e);
193 2156024 : if (is_unique(found) || (need_distinct(rel) && list_length(rel->exps) == 1) ||
194 1737700 : (is_groupby(rel->op) && (list_empty(rel->r) || (list_length(rel->r) == 1 && exps_find_exp(rel->r, e)))))
195 429264 : set_unique(e);
196 : /* propagate unique estimation for known cases */
197 2156025 : if (is_groupby(rel->op) && list_empty(rel->r) && !find_prop(e->p, PROP_NUNIQUES)) { /* global aggregate case */
198 7665 : prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
199 7664 : p->value.dval = 1;
200 2148359 : } else if (((is_basetable(rel->op) || is_except(rel->op) || is_inter(rel->op) || is_simple_project(rel->op) ||
201 72037 : (is_groupby(rel->op) && exps_find_exp(rel->r, e))) &&
202 2087683 : (est = find_prop(found->p, PROP_NUNIQUES)) && !find_prop(e->p, PROP_NUNIQUES))) {
203 195334 : prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
204 195334 : p->value.dval = est->value.dval;
205 : }
206 : }
207 2201333 : return e;
208 : }
209 : return NULL;
210 : }
211 81857 : case op_topn:
212 : case op_sample:
213 81857 : return rel_propagate_column_ref_statistics(sql, rel->l, e);
214 : default:
215 : break;
216 : }
217 : }
218 : return NULL;
219 : }
220 :
221 : static atom *
222 4723352 : atom_from_valptr( allocator *sa, sql_subtype *tpe, ValRecord *v)
223 : {
224 4723352 : atom *a = SA_NEW(sa, atom);
225 :
226 4723361 : assert(!VALisnil(v));
227 4723384 : *a = (atom) {.tpe = *tpe,};
228 4723384 : SA_VALcopy(sa, &a->data, v);
229 4723524 : return a;
230 : }
231 :
232 : void
233 4383620 : sql_column_get_statistics(mvc *sql, sql_column *c, sql_exp *e)
234 : {
235 4383620 : bool nonil = false, unique = false;
236 4383620 : double unique_est = 0.0;
237 4383620 : ValRecord min, max;
238 4383620 : int ok = mvc_col_stats(sql, c, &nonil, &unique, &unique_est, &min, &max);
239 :
240 4384487 : if (has_nil(e) && nonil)
241 2926808 : set_has_no_nil(e);
242 4384487 : if (!is_unique(e) && unique)
243 1156174 : set_unique(e);
244 4384487 : if (unique_est != 0.0) {
245 3092566 : prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
246 3092385 : p->value.dval = unique_est;
247 : }
248 4384306 : unsigned int digits = 0;
249 4384306 : sql_subtype *et = exp_subtype(e);
250 4384549 : if (et->type->eclass == EC_DEC || et->type->eclass == EC_NUM)
251 2870291 : digits = et->digits;
252 4384549 : if ((ok & 2) == 2) {
253 2359024 : if (!VALisnil(&max)) {
254 2358938 : prop *p = e->p = prop_create(sql->sa, PROP_MAX, e->p);
255 2358875 : p->value.pval = atom_from_valptr(sql->sa, &c->type, &max);
256 2358852 : if (digits) {
257 1753262 : unsigned int nd = atom_digits(p->value.pval);
258 1753201 : if (nd < digits)
259 : digits = nd;
260 1753201 : if (!digits)
261 : digits = 1;
262 : }
263 : }
264 2358778 : VALclear(&max);
265 : }
266 4384293 : if ((ok & 1) == 1) {
267 2364915 : if (!VALisnil(&min)) {
268 2364924 : prop *p = e->p = prop_create(sql->sa, PROP_MIN, e->p);
269 2364964 : p->value.pval = atom_from_valptr(sql->sa, &c->type, &min);
270 2365116 : if (digits) {
271 1760522 : unsigned int nd = atom_digits(p->value.pval);
272 1760542 : if (nd > digits) /* possibly set to low by max value */
273 : digits = nd;
274 : if (!digits)
275 : digits = 1;
276 : }
277 : }
278 2365138 : VALclear(&min);
279 : }
280 4384523 : if (digits)
281 2870206 : et->digits = digits;
282 4384523 : if (et->type->eclass == EC_DEC && et->digits <= et->scale)
283 216 : et->digits = et->scale + 1;
284 4384523 : }
285 :
286 : static void
287 958035 : rel_basetable_column_get_statistics(mvc *sql, sql_rel *rel, sql_exp *e)
288 : {
289 958035 : if (e->p)
290 : return;
291 308683 : sql_column *c = NULL;
292 :
293 308683 : if ((c = rel_base_find_column(rel, e->nid))) {
294 210336 : sql_column_get_statistics(sql, c, e);
295 : }
296 : }
297 :
298 : static bool
299 8876 : rel_setop_get_statistics(mvc *sql, sql_rel *rel, list *lexps, list *rexps, sql_exp *e, int i)
300 : {
301 8876 : sql_exp *le = list_fetch(lexps, i), *re = list_fetch(rexps, i);
302 8876 : atom *lval_min = find_prop_and_get(le->p, PROP_MIN), *lval_max = find_prop_and_get(le->p, PROP_MAX),
303 8876 : *rval_min = find_prop_and_get(re->p, PROP_MIN), *rval_max = find_prop_and_get(re->p, PROP_MAX);
304 8876 : prop *est;
305 :
306 : /* for the intersection, if both expressions don't overlap, it can be pruned */
307 8876 : if (is_inter(rel->op) && !has_nil(le) && !has_nil(re) &&
308 511 : ((rval_max && lval_min && atom_cmp(rval_max, lval_min) < 0) || (rval_min && lval_max && atom_cmp(rval_min, lval_max) > 0)))
309 26 : return true;
310 :
311 8850 : if (lval_max && rval_max) {
312 2557 : if (is_union(rel->op))
313 3 : set_minmax_property(sql, e, PROP_MAX, statistics_atom_max(sql, lval_max, rval_max)); /* for union the new max will be the max of the two */
314 2554 : else if (is_inter(rel->op))
315 399 : set_minmax_property(sql, e, PROP_MAX, statistics_atom_min(sql, lval_max, rval_max)); /* for intersect the new max will be the min of the two */
316 : else /* except */
317 2155 : set_minmax_property(sql, e, PROP_MAX, lval_max);
318 : }
319 8850 : if (lval_min && rval_min) {
320 2557 : if (is_union(rel->op))
321 3 : set_minmax_property(sql, e, PROP_MIN, statistics_atom_min(sql, lval_min, rval_min)); /* for union the new min will be the min of the two */
322 2554 : else if (is_inter(rel->op))
323 399 : set_minmax_property(sql, e, PROP_MIN, statistics_atom_max(sql, lval_min, rval_min)); /* for intersect the new min will be the max of the two */
324 : else /* except */
325 2155 : set_minmax_property(sql, e, PROP_MIN, lval_min);
326 : }
327 :
328 8850 : if (is_union(rel->op) || is_munion(rel->op)) {
329 5986 : if (!has_nil(le) && !has_nil(re))
330 879 : set_has_no_nil(e);
331 5986 : if (need_distinct(rel) && list_length(rel->exps) == 1)
332 0 : set_unique(e);
333 2864 : } else if (is_inter(rel->op)) {
334 564 : if (!has_nil(le) || !has_nil(re))
335 509 : set_has_no_nil(e);
336 564 : if (is_unique(le) || (need_distinct(rel) && list_length(rel->exps) == 1))
337 345 : set_unique(e);
338 : } else {
339 2300 : assert(is_except(rel->op));
340 2300 : if (!has_nil(le))
341 2236 : set_has_no_nil(e);
342 2300 : if (is_unique(le) || (need_distinct(rel) && list_length(rel->exps) == 1))
343 2012 : set_unique(e);
344 : }
345 : /* propagate unique estimation for known cases */
346 8850 : if (!is_union(rel->op) && (est = find_prop(le->p, PROP_NUNIQUES)) && !find_prop(e->p, PROP_NUNIQUES)) {
347 207 : prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
348 207 : p->value.dval = est->value.dval;
349 : }
350 : return false;
351 : }
352 :
353 :
354 : static void
355 64038 : rel_munion_get_statistics(mvc *sql, sql_rel *rel, list *rels, sql_exp *e, int i)
356 : {
357 64038 : if (is_recursive(rel))
358 : return ;
359 64038 : assert(is_munion(rel->op));
360 :
361 64038 : sql_rel *l = rels->h->data;
362 64038 : sql_exp *le = list_fetch(l->exps, i);
363 64038 : atom *lval_min = find_prop_and_get(le->p, PROP_MIN), *lval_max = find_prop_and_get(le->p, PROP_MAX);
364 64038 : bool has_nonil = !has_nil(le);
365 :
366 185274 : for(node *n = rels->h->next; n; n = n->next) {
367 121236 : sql_rel *r = n->data;
368 121236 : sql_exp *re = list_fetch(r->exps, i);
369 121236 : atom *rval_min = find_prop_and_get(re->p, PROP_MIN), *rval_max = find_prop_and_get(re->p, PROP_MAX);
370 :
371 121236 : if (lval_max && rval_max) {
372 44461 : set_minmax_property(sql, e, PROP_MAX, statistics_atom_max(sql, lval_max, rval_max)); /* for union the new max will be the max of the two */
373 44461 : lval_max = find_prop_and_get(e->p, PROP_MAX);
374 : }
375 121236 : if (lval_min && rval_min) {
376 43912 : set_minmax_property(sql, e, PROP_MIN, statistics_atom_min(sql, lval_min, rval_min)); /* for union the new min will be the min of the two */
377 43912 : lval_min = find_prop_and_get(e->p, PROP_MIN);
378 : }
379 121236 : has_nonil &= !has_nil(re);
380 :
381 : }
382 :
383 64038 : if (has_nonil)
384 43934 : set_has_no_nil(e);
385 :
386 64038 : if (need_distinct(rel) && list_length(rel->exps) == 1)
387 1597 : set_unique(e);
388 : }
389 :
390 :
391 : static sql_exp *
392 3571061 : rel_propagate_statistics(visitor *v, sql_rel *rel, sql_exp *e, int depth)
393 : {
394 3571061 : mvc *sql = v->sql;
395 3571061 : atom *lval;
396 :
397 3571061 : (void) depth;
398 3571061 : switch(e->type) {
399 2209738 : case e_column:
400 2209738 : switch (rel->op) { /* set relations don't call rel_propagate_statistics */
401 285520 : case op_join:
402 : case op_left:
403 : case op_right:
404 : case op_full:
405 : case op_semi:
406 : case op_anti: {
407 285520 : sql_exp *found = rel_propagate_column_ref_statistics(sql, rel->l, e);
408 285520 : if (!found)
409 143209 : (void) rel_propagate_column_ref_statistics(sql, rel->r, e);
410 : break;
411 : }
412 1924218 : case op_select:
413 : case op_project:
414 : case op_groupby: {
415 1924218 : sql_exp *found = rel_propagate_column_ref_statistics(sql, rel->l, e); /* labels may be found on the same projection, ugh */
416 1924223 : if (!found && is_simple_project(rel->op))
417 134040 : (void) rel_propagate_column_ref_statistics(sql, rel, e);
418 : break;
419 : }
420 0 : case op_insert:
421 : case op_update:
422 : case op_delete:
423 0 : (void) rel_propagate_column_ref_statistics(sql, rel->r, e);
424 0 : break;
425 : default:
426 : break;
427 : }
428 : break;
429 102590 : case e_convert: {
430 102590 : sql_subtype *to = exp_totype(e), *from = exp_fromtype(e);
431 102590 : sql_exp *l = e->l;
432 102590 : sql_class fr = from->type->eclass, too = to->type->eclass;
433 102590 : prop *est;
434 :
435 102590 : if (fr == too) {
436 93407 : if ((lval = find_prop_and_get(l->p, PROP_MAX))) {
437 60427 : atom *res = atom_copy(sql->sa, lval);
438 60427 : if ((res = atom_cast(sql->sa, res, to)))
439 60399 : set_minmax_property(sql, e, PROP_MAX, res);
440 : }
441 93407 : if ((lval = find_prop_and_get(l->p, PROP_MIN))) {
442 61065 : atom *res = atom_copy(sql->sa, lval);
443 61065 : if ((res = atom_cast(sql->sa, res, to)))
444 61038 : set_minmax_property(sql, e, PROP_MIN, res);
445 : }
446 : }
447 : /* propagating nuniques through conversions is tricky. I am adding just the general cases */
448 102590 : if ((est = find_prop(l->p, PROP_NUNIQUES)) &&
449 63913 : (((fr == EC_TIME || fr == EC_TIME_TZ || fr == EC_DATE) && (too == EC_TIMESTAMP || too == EC_TIMESTAMP_TZ)) ||
450 63888 : ((fr == EC_NUM || fr == EC_BIT) && too == EC_NUM) || (fr == EC_DEC && too == EC_DEC) || (fr != EC_EXTERNAL && too == EC_STRING))) {
451 60448 : prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
452 60447 : p->value.dval = est->value.dval;
453 : }
454 102589 : if (!has_nil(l))
455 58177 : set_has_no_nil(e);
456 : break;
457 : }
458 380303 : case e_aggr:
459 : case e_func: {
460 380303 : BUN lv;
461 380303 : sql_subfunc *f = e->f;
462 :
463 380303 : if (!f->func->s) {
464 353078 : int key = hash_key(f->func->base.name); /* Using hash lookup */
465 353078 : sql_hash_e *he = sql_functions_lookup->buckets[key&(sql_functions_lookup->size-1)];
466 353078 : lookup_function look = NULL;
467 :
468 755753 : for (; he && !look; he = he->chain) {
469 402675 : struct function_properties* fp = (struct function_properties*) he->value;
470 :
471 402675 : if (!strcmp(f->func->base.name, fp->name))
472 108758 : look = fp->func;
473 : }
474 353078 : if (look)
475 108758 : look(sql, e);
476 : }
477 : /* for global aggregates with no semantics, if the left relation has values, then the output won't be NULL */
478 380303 : if (!is_semantics(e) && e->l && !have_nil(e->l) &&
479 109514 : (e->type != e_aggr || (is_groupby(rel->op) && list_length(rel->r)) || ((lv = get_rel_count(rel->l)) != BUN_NONE && lv > 0)))
480 109177 : set_has_no_nil(e);
481 : /* set properties for global aggregates */
482 380303 : if (e->type == e_aggr && is_groupby(rel->op) && list_empty(rel->r)) {
483 7970 : if (!find_prop(e->p, PROP_NUNIQUES)) {
484 7970 : prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
485 7970 : p->value.dval = 1;
486 : }
487 7970 : set_unique(e);
488 : }
489 : break;
490 : }
491 594596 : case e_atom:
492 594596 : if (e->l) {
493 576505 : atom *a = (atom*) e->l;
494 576505 : if (!a->isnull) {
495 507067 : set_minmax_property(sql, e, PROP_MAX, a);
496 507067 : set_minmax_property(sql, e, PROP_MIN, a);
497 : }
498 576504 : prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
499 576506 : p->value.dval = 1;
500 18091 : } else if (e->f) {
501 4314 : list *vals = (list *) e->f;
502 4314 : sql_exp *first = vals->h ? vals->h->data : NULL;
503 4314 : atom *max = NULL, *min = NULL; /* all child values must have a valid min/max */
504 4314 : int has_nil = 0;
505 :
506 4314 : if (first) {
507 4314 : max = ((lval = find_prop_and_get(first->p, PROP_MAX))) ? lval : NULL;
508 4314 : min = ((lval = find_prop_and_get(first->p, PROP_MIN))) ? lval : NULL;
509 4314 : has_nil |= has_nil(first);
510 : }
511 :
512 18497 : for (node *n = vals->h ? vals->h->next : NULL ; n ; n = n->next) {
513 9869 : sql_exp *ee = n->data;
514 :
515 9869 : if (min && max) {
516 9372 : if ((lval = find_prop_and_get(ee->p, PROP_MAX))) {
517 9326 : max = atom_cmp(lval, max) > 0 ? lval : max;
518 : } else {
519 : max = NULL;
520 : }
521 9372 : if ((lval = find_prop_and_get(ee->p, PROP_MIN))) {
522 9326 : min = atom_cmp(min, lval) > 0 ? lval : min;
523 : } else {
524 : min = NULL;
525 : }
526 : }
527 9869 : has_nil |= has_nil(ee);
528 : }
529 :
530 4314 : if (!has_nil)
531 3939 : set_has_no_nil(e);
532 4314 : if (min && max) {
533 3895 : set_minmax_property(sql, e, PROP_MAX, max);
534 3895 : set_minmax_property(sql, e, PROP_MIN, min);
535 : }
536 4314 : prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
537 4314 : p->value.dval = (dbl) list_length(vals);
538 : } else {
539 13777 : prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, e->p);
540 13777 : p->value.dval = 1;
541 : }
542 : break;
543 283834 : case e_cmp:
544 : /* TODO? propagating min/max/unique of booleans is not very worth it */
545 283834 : if (e->flag == cmp_or || e->flag == cmp_filter) {
546 15246 : if (!have_nil(e->l) && !have_nil(e->r))
547 10188 : set_has_no_nil(e);
548 268588 : } else if (e->flag == cmp_in || e->flag == cmp_notin) {
549 20212 : sql_exp *le = e->l;
550 20212 : if (!has_nil(le) && !have_nil(e->r))
551 16967 : set_has_no_nil(e);
552 : } else {
553 248376 : sql_exp *le = e->l, *re = e->r, *fe = e->f;
554 248376 : if (!has_nil(le) && !has_nil(re) && (!e->f || !has_nil(fe)))
555 175241 : set_has_no_nil(e);
556 : }
557 : break;
558 : case e_psm:
559 : break;
560 : }
561 :
562 : #ifndef NDEBUG
563 : {
564 : /* min and max cannot be NULL and min must be <= than max, if it happens the inner relation must be empty! */
565 3571066 : atom *min = find_prop_and_get(e->p, PROP_MIN), *max = find_prop_and_get(e->p, PROP_MAX);
566 :
567 3571068 : (void) min;
568 3571068 : (void) max;
569 3571068 : assert(!min || !min->isnull);
570 3571068 : assert(!max || !max->isnull);
571 3571068 : assert(!min || !max || (min && max)/* atom_cmp(min, max) <= 0*/);
572 : }
573 : #endif
574 3571068 : return e;
575 : }
576 :
577 : static list * /* Remove predicates always false from min/max values */
578 144472 : rel_prune_predicates(visitor *v, sql_rel *rel)
579 : {
580 144472 : if (rel->l) {
581 144472 : sql_rel *l = rel->l;
582 144472 : if (is_single(l))
583 3372 : return rel->exps;
584 : }
585 141100 : if (!list_empty(rel->attr))
586 3015 : return rel->exps;
587 291033 : for (node *n = rel->exps->h ; n ; n = n->next) {
588 152948 : sql_exp *e = n->data;
589 :
590 152948 : if (e->type == e_cmp && is_theta_exp(e->flag)) {
591 125918 : sql_exp *le = e->l, *re = e->r, *fe = e->f;
592 125918 : atom *lval_min = find_prop_and_get(le->p, PROP_MIN), *lval_max = find_prop_and_get(le->p, PROP_MAX),
593 125918 : *rval_min = find_prop_and_get(re->p, PROP_MIN), *rval_max = find_prop_and_get(re->p, PROP_MAX);
594 125918 : bool always_false = false, always_true = false;
595 :
596 125918 : if (fe && !is_symmetric(e)) {
597 3045 : atom *fval_min = find_prop_and_get(fe->p, PROP_MIN), *fval_max = find_prop_and_get(fe->p, PROP_MAX);
598 3045 : comp_type lower = range2lcompare(e->flag), higher = range2rcompare(e->flag);
599 3644 : int not_int1 = rval_min && lval_max && /* the middle and left intervals don't overlap */
600 1654 : (!is_anti(e) && (lower == cmp_gte ? atom_cmp(rval_min, lval_max) > 0 : atom_cmp(rval_min, lval_max) >= 0)),
601 4059 : not_int2 = lval_min && fval_max && /* the middle and right intervals don't overlap */
602 2482 : (!is_anti(e) && (higher == cmp_lte ? atom_cmp(lval_min, fval_max) > 0 : atom_cmp(lval_min, fval_max) >= 0)),
603 3045 : not_int3 = rval_min && fval_max && /* the left interval is after the right one */
604 1285 : (!is_anti(e) && (atom_cmp(rval_min, fval_max) > 0));
605 :
606 3045 : always_false |= not_int1 || not_int2 || not_int3;
607 : /* for anti the middle must be before the left or after the right or the right after the left, for the other the middle must be always between the left and right intervals */
608 4089 : always_true |= !has_nil(le) && !has_nil(re) && !has_nil(fe) &&
609 3941 : lval_min && lval_max && rval_min && rval_max && fval_min && fval_max &&
610 573 : (is_anti(e) ? ((lower == cmp_gte ? atom_cmp(rval_min, lval_max) > 0 : atom_cmp(rval_min, lval_max) >= 0) || (higher == cmp_lte ? atom_cmp(lval_min, fval_max) > 0 : atom_cmp(lval_min, fval_max) >= 0) || atom_cmp(rval_min, fval_max) > 0) :
611 488 : ((lower == cmp_gte ? atom_cmp(lval_min, rval_max) >= 0 : atom_cmp(lval_min, rval_max) > 0) && (higher == cmp_lte ? atom_cmp(fval_min, lval_max) >= 0 : atom_cmp(fval_min, lval_max) > 0)));
612 : } else if (!fe) {
613 122857 : if (!is_semantics(e)) /* trivial not null cmp null case */
614 234248 : always_false |= !is_anti(e) && ((exp_is_not_null(le) && exp_is_null(re)) || (exp_is_null(le) && exp_is_not_null(re)));
615 122857 : switch (e->flag) {
616 109318 : case cmp_equal:
617 109318 : if (lval_min && lval_max && rval_min && rval_max && (!is_semantics(e) || !has_nil(le) || !has_nil(re)))
618 136810 : always_false |= (is_anti(e) ? (atom_cmp(lval_min, rval_min) == 0 && atom_cmp(lval_max, rval_max) <= 0) : (atom_cmp(rval_max, lval_min) < 0 || atom_cmp(rval_min, lval_max) > 0));
619 109318 : if (is_semantics(e)) { /* prune *= NULL cases */
620 5696 : always_false |= (is_anti(e) ? (exp_is_null(le) && exp_is_null(re)) : ((exp_is_not_null(le) && exp_is_null(re)) || (exp_is_null(le) && exp_is_not_null(re))));
621 11392 : always_true |= (is_anti(e) ? ((exp_is_not_null(le) && exp_is_null(re)) || (exp_is_null(le) && exp_is_not_null(re))) : (exp_is_null(le) && exp_is_null(re)));
622 : }
623 : break;
624 5347 : case cmp_notequal:
625 5347 : if (lval_min && lval_max && rval_min && rval_max)
626 7390 : always_true |= !has_nil(le) && !has_nil(re) && (is_anti(e) ? (atom_cmp(lval_min, rval_min) == 0 && atom_cmp(lval_max, rval_max) <= 0) : (atom_cmp(rval_max, lval_min) < 0 || atom_cmp(rval_min, lval_max) > 0));
627 5347 : if (is_semantics(e)) {
628 37 : always_true |= (is_anti(e) ? (exp_is_null(le) && exp_is_null(re)) : ((exp_is_not_null(le) && exp_is_null(re)) || (exp_is_null(le) && exp_is_not_null(re))));
629 74 : always_false |= (is_anti(e) ? ((exp_is_not_null(le) && exp_is_null(re)) || (exp_is_null(le) && exp_is_not_null(re))) : (exp_is_null(le) && exp_is_null(re)));
630 : }
631 : break;
632 5487 : case cmp_gt:
633 5487 : if (lval_max && rval_min)
634 1834 : always_false |= (is_anti(e) ? atom_cmp(lval_max, rval_min) > 0 : atom_cmp(lval_max, rval_min) <= 0);
635 5487 : if (lval_min && rval_max)
636 3668 : always_true |= !has_nil(le) && !has_nil(re) && (is_anti(e) ? atom_cmp(lval_min, rval_max) <= 0 : atom_cmp(lval_min, rval_max) > 0);
637 : break;
638 121 : case cmp_gte:
639 121 : if (lval_max && rval_min)
640 51 : always_false |= (is_anti(e) ? atom_cmp(lval_max, rval_min) >= 0 : atom_cmp(lval_max, rval_min) < 0);
641 121 : if (lval_min && rval_max)
642 102 : always_true |= !has_nil(le) && !has_nil(re) && (is_anti(e) ? atom_cmp(lval_min, rval_max) < 0 : atom_cmp(lval_min, rval_max) >= 0);
643 : break;
644 2502 : case cmp_lt:
645 2502 : if (lval_min && rval_max)
646 1382 : always_false |= (is_anti(e) ? atom_cmp(lval_min, rval_max) < 0 : atom_cmp(lval_min, rval_max) >= 0);
647 2502 : if (lval_max && rval_min)
648 2812 : always_true |= !has_nil(le) && !has_nil(re) && (is_anti(e) ? atom_cmp(lval_max, rval_min) >= 0 : atom_cmp(lval_max, rval_min) < 0);
649 : break;
650 82 : case cmp_lte:
651 82 : if (lval_min && rval_max)
652 11 : always_false |= (is_anti(e) ? atom_cmp(lval_min, rval_max) <= 0 : atom_cmp(lval_min, rval_max) > 0);
653 82 : if (lval_max && rval_min)
654 22 : always_true |= !has_nil(le) && !has_nil(re) && (is_anti(e) ? atom_cmp(lval_max, rval_min) > 0 : atom_cmp(lval_max, rval_min) <= 0);
655 : break;
656 : default: /* Maybe later I can do cmp_in and cmp_notin but don't forget to remove is_theta_exp check up there */
657 : break;
658 : }
659 : }
660 125918 : assert(!always_false || !always_true);
661 125918 : if (always_false || always_true) {
662 3708 : sql_exp *ne = exp_atom_bool(v->sql->sa, always_true /* if always_true set then true else false */);
663 3708 : if (exp_name(e))
664 0 : exp_prop_alias(v->sql->sa, ne, e);
665 3708 : n->data = ne;
666 3708 : v->changes++;
667 : }
668 : }
669 : }
670 138085 : return rel->exps;
671 : }
672 :
673 : static sql_rel *
674 14 : set_setop_side(visitor *v, sql_rel *rel, sql_rel *side)
675 : {
676 14 : if (side == rel->r)
677 0 : rel->r = NULL;
678 : else
679 14 : rel->l = NULL;
680 14 : if (!is_simple_project(side->op) || rel_is_ref(side) || !list_empty(side->r)) {
681 0 : side = rel_project(v->sql->sa, side, rel_projections(v->sql, side, NULL, 0, 1));
682 0 : set_count_prop(v->sql->sa, side, get_rel_count(side->l));
683 0 : side->exps = exps_exp_visitor_bottomup(v, side, side->exps, 0, &rel_propagate_statistics, false);
684 : }
685 35 : for (node *n = rel->exps->h, *m = side->exps->h ; n && m ; n = n->next, m = m->next) {
686 21 : sql_exp *e1 = n->data, *e2 = m->data;
687 21 : exp_prop_alias(v->sql->sa, e2, e1);
688 : }
689 14 : list_hash_clear(side->exps);
690 :
691 14 : if (need_distinct(rel))
692 0 : set_distinct(side);
693 14 : side->p = prop_copy(v->sql->sa, rel->p);
694 14 : rel_destroy(rel);
695 14 : return side;
696 : }
697 :
698 : static BUN
699 26619 : trivial_project_exp_card(sql_exp *e)
700 : {
701 26990 : if (e->type == e_convert)
702 371 : return trivial_project_exp_card(e->l);
703 26619 : return e->type == e_atom && e->f ? (BUN) list_length(e->f) : 1;
704 : }
705 :
706 : static BUN
707 26223 : rel_calc_nuniques(mvc *sql, sql_rel *l, list *exps)
708 : {
709 26223 : BUN lv = get_rel_count(l);
710 :
711 26223 : if (lv == 0)
712 : return 0;
713 23481 : if (!list_empty(exps)) {
714 23481 : BUN nuniques = 0;
715 : /* compute the highest number of unique values */
716 58709 : for (node *n = exps->h ; n && nuniques != BUN_NONE ; n = n->next) {
717 35228 : sql_exp *e = n->data;
718 35228 : sql_rel *bt = NULL;
719 35228 : prop *p = NULL;
720 35228 : BUN euniques = BUN_NONE;
721 35228 : atom *min, *max, *sub = NULL;
722 35228 : sql_subtype *tp = exp_subtype(e);
723 35228 : sql_class ec = tp ? tp->type->eclass : EC_STRING; /* if 'e' has no type (eg parameter), use a non-number type to fail condition */
724 :
725 35228 : if ((p = find_prop(e->p, PROP_NUNIQUES))) {
726 25435 : euniques = (BUN) p->value.dval;
727 9793 : } else if (e->type == e_column && e->nid && rel_find_exp_and_corresponding_rel(l, e, false, &bt, NULL) && bt && (p = find_prop(bt->p, PROP_COUNT))) {
728 7685 : euniques = (BUN) p->value.lval;
729 : }
730 : /* use min to max range to compute number of possible values in the domain for number types */
731 35228 : if ((EC_TEMP(ec)||ec==EC_NUM||ec==EC_MONTH||ec==EC_POS) &&
732 24295 : (min = find_prop_and_get(e->p, PROP_MIN)) && (max = find_prop_and_get(e->p, PROP_MAX))) {
733 : /* the range includes min and max, so the atom_inc call is needed */
734 : /* if 'euniques' has number of distinct values, compute min between both */
735 19341 : if ((sub = atom_sub(sql->sa, max, min)) && (sub = atom_inc(sql->sa, sub)) && (sub = atom_cast(sql->sa, sub, sql_bind_localtype("oid"))))
736 0 : euniques = MIN(euniques, (BUN) sub->data.val.oval);
737 : }
738 35228 : if (euniques != BUN_NONE)
739 33120 : nuniques = MAX(nuniques, euniques); /* the highest cardinality sets the estimation */
740 : else
741 : nuniques = BUN_NONE;
742 : }
743 23481 : if (nuniques != BUN_NONE)
744 : return nuniques;
745 : }
746 : return lv;
747 : }
748 :
749 : static sql_rel *
750 1369029 : rel_get_statistics_(visitor *v, sql_rel *rel)
751 : {
752 : /* Don't prune updates as pruning will possibly result in removing the joins which therefore cannot be used for constraint checking */
753 1369029 : uint8_t has_special_modify = *(uint8_t*) v->data;
754 1369029 : bool can_be_pruned = !has_special_modify && v->storage_based_opt;
755 :
756 : /* Don't look at the same relation twice */
757 1369029 : if (are_statistics_gathered(rel->used))
758 : return rel;
759 1368930 : rel->used |= statistics_gathered;
760 :
761 1368930 : switch (rel->op) {
762 319473 : case op_basetable: {
763 319473 : sql_table *t = (sql_table *) rel->l;
764 319473 : sqlstore *store = v->sql->session->tr->store;
765 :
766 319473 : if (!list_empty(rel->exps)) {
767 1277785 : for (node *n = rel->exps->h ; n ; n = n->next)
768 958115 : rel_basetable_column_get_statistics(v->sql, rel, n->data);
769 : }
770 : /* Set table row count. TODO? look for remote tables. Don't look at storage for declared tables, because it won't be cleaned */
771 319674 : if (isTable(t) && t->s && !isDeclaredTable(t)) /* count active rows only */
772 264778 : set_count_prop(v->sql->sa, rel, (BUN)store->storage_api.count_del(v->sql->session->tr, t, CNT_ACTIVE));
773 : break;
774 : }
775 2794 : case op_union:
776 : case op_inter:
777 : case op_except: {
778 2794 : bool empty_cross = false;
779 2794 : int i = 0;
780 2794 : sql_rel *l = rel->l, *pl = rel->l, *r = rel->r, *pr = rel->r;
781 :
782 2794 : while (is_sample(pl->op) || is_topn(pl->op)) /* skip topN and sample relations in the middle */
783 0 : pl = pl->l;
784 2794 : while (is_sample(pr->op) || is_topn(pr->op))
785 0 : pr = pr->l;
786 : /* if it's not a projection, then project and propagate statistics */
787 2794 : if (!is_project(pl->op) && !is_base(pl->op)) {
788 0 : pl = rel_project(v->sql->sa, pl, rel_projections(v->sql, pl, NULL, 0, 1));
789 0 : set_count_prop(v->sql->sa, pl, get_rel_count(pl->l));
790 0 : pl->exps = exps_exp_visitor_bottomup(v, pl, pl->exps, 0, &rel_propagate_statistics, false);
791 : }
792 2794 : if (!is_project(pr->op) && !is_base(pr->op)) {
793 0 : pr = rel_project(v->sql->sa, pr, rel_projections(v->sql, pr, NULL, 0, 1));
794 0 : set_count_prop(v->sql->sa, pr, get_rel_count(pr->l));
795 0 : pr->exps = exps_exp_visitor_bottomup(v, pr, pr->exps, 0, &rel_propagate_statistics, false);
796 : }
797 :
798 11670 : for (node *n = rel->exps->h ; n ; n = n->next) {
799 8876 : empty_cross |= rel_setop_get_statistics(v->sql, rel, pl->exps, pr->exps, n->data, i);
800 8876 : i++;
801 : }
802 :
803 : /* propagate row count */
804 2794 : if (is_union(rel->op)) {
805 277 : BUN lv = need_distinct(rel) ? rel_calc_nuniques(v->sql, l, l->exps) : get_rel_count(l),
806 277 : rv = need_distinct(rel) ? rel_calc_nuniques(v->sql, r, r->exps) : get_rel_count(r);
807 :
808 277 : if (lv == 0 && rv == 0) { /* both sides empty */
809 2 : if (can_be_pruned)
810 : empty_cross = true;
811 : else
812 2 : set_count_prop(v->sql->sa, rel, 0);
813 275 : } else if (can_be_pruned && lv == 0 && !rel_is_ref(rel)) { /* left side empty */
814 0 : rel = set_setop_side(v, rel, r);
815 0 : empty_cross = false; /* don't rewrite again */
816 275 : } else if (can_be_pruned && rv == 0 && !rel_is_ref(rel)) { /* right side empty */
817 0 : rel = set_setop_side(v, rel, l);
818 0 : empty_cross = false; /* don't rewrite again */
819 275 : } else if (lv != BUN_NONE && rv != BUN_NONE) {
820 7 : set_count_prop(v->sql->sa, rel, (rv > (BUN_MAX - lv)) ? BUN_MAX : (lv + rv)); /* overflow check */
821 : }
822 2517 : } else if (is_inter(rel->op) || is_except(rel->op)) {
823 2517 : BUN lv = need_distinct(rel) ? rel_calc_nuniques(v->sql, l, l->exps) : get_rel_count(l),
824 2517 : rv = need_distinct(rel) ? rel_calc_nuniques(v->sql, r, r->exps) : get_rel_count(r);
825 :
826 2517 : if (lv == 0) { /* left side empty */
827 63 : if (can_be_pruned)
828 : empty_cross = true;
829 : else
830 5 : set_count_prop(v->sql->sa, rel, 0);
831 2454 : } else if (rv == 0) { /* right side empty */
832 17 : if (is_inter(rel->op)) {
833 3 : if (can_be_pruned)
834 : empty_cross = true;
835 : else
836 0 : set_count_prop(v->sql->sa, rel, 0);
837 14 : } else if (can_be_pruned && !rel_is_ref(rel)) {
838 14 : rel = set_setop_side(v, rel, l);
839 14 : empty_cross = false; /* don't rewrite again */
840 : } else {
841 0 : set_count_prop(v->sql->sa, rel, lv);
842 : }
843 : } else {
844 2437 : set_count_prop(v->sql->sa, rel, lv);
845 : }
846 : }
847 2794 : if (can_be_pruned && empty_cross) {
848 81 : rel_destroy(rel->l);
849 81 : rel->l = NULL;
850 81 : rel_destroy(rel->r);
851 81 : rel->r = NULL;
852 246 : for (node *n = rel->exps->h ; n ; n = n->next) {
853 165 : sql_exp *e = n->data, *a = exp_atom(v->sql->sa, atom_general(v->sql->sa, exp_subtype(e), NULL, 0));
854 165 : exp_prop_alias(v->sql->sa, a, e);
855 165 : n->data = a;
856 : }
857 81 : list_hash_clear(rel->exps);
858 81 : sql_rel *l = rel_project(v->sql->sa, NULL, rel->exps);
859 81 : set_count_prop(v->sql->sa, l, 1);
860 81 : l = rel_select(v->sql->sa, l, exp_atom_bool(v->sql->sa, 0));
861 81 : set_count_prop(v->sql->sa, l, 0);
862 81 : rel->op = op_project;
863 81 : rel->l = l;
864 81 : rel->exps = rel_projections(v->sql, l, NULL, 1, 1);
865 81 : set_count_prop(v->sql->sa, rel, 0);
866 81 : set_nodistinct(rel); /* set relations may have distinct flag set */
867 81 : v->changes++;
868 : }
869 : break;
870 : }
871 13507 : case op_munion: {
872 13507 : list *l = rel->l, *nrels = sa_list(v->sql->sa);
873 13507 : BUN cnt = 0;
874 13507 : bool needs_pruning = false;
875 :
876 13507 : if (is_recursive(rel))
877 : break;
878 51358 : for (node *n = l->h; n; n = n->next) {
879 37922 : sql_rel *r = n->data, *pl = r;
880 :
881 37922 : while (is_sample(pl->op) || is_topn(pl->op)) /* skip topN and sample relations in the middle */
882 0 : pl = pl->l;
883 : /* if it's not a projection, then project and propagate statistics */
884 37922 : if (!is_project(pl->op) && !is_base(pl->op)) {
885 0 : pl = rel_project(v->sql->sa, pl, rel_projections(v->sql, pl, NULL, 0, 1));
886 0 : set_count_prop(v->sql->sa, pl, get_rel_count(pl->l));
887 0 : pl->exps = exps_exp_visitor_bottomup(v, pl, pl->exps, 0, &rel_propagate_statistics, false);
888 : }
889 37922 : nrels = append(nrels, pl);
890 : /* we need new munion statistics */
891 : /* propagate row count */
892 37922 : BUN rv = need_distinct(rel) ? rel_calc_nuniques(v->sql, r, r->exps) : get_rel_count(r);
893 : /* if PROP_COUNT does not exist we assume at least a row (see get_rel_count def) */
894 37922 : if (rv == BUN_NONE) {
895 1224 : cnt++;
896 1224 : continue;
897 : }
898 36698 : if (!rv && can_be_pruned)
899 6738 : needs_pruning = true;
900 : /* overflow check */
901 36698 : if (rv > (BUN_MAX - cnt))
902 37922 : rv = BUN_MAX;
903 : else
904 36698 : cnt += rv;
905 : }
906 13436 : int i = 0;
907 77474 : for (node *n = rel->exps->h ; n ; n = n->next, i++)
908 64038 : rel_munion_get_statistics(v->sql, rel, nrels, n->data, i);
909 :
910 13436 : if (needs_pruning && !rel_is_ref(rel)) {
911 4576 : v->changes++;
912 4576 : list *nl = sa_list(l->sa);
913 :
914 16871 : for (node *n = nrels->h; n; n = n->next) {
915 12295 : sql_rel *r = n->data;
916 12295 : BUN rv = need_distinct(rel) ? rel_calc_nuniques(v->sql, r, r->exps) : get_rel_count(r);
917 :
918 12295 : if (!rv) { /* keep last for now */
919 6267 : rel_destroy(r);
920 6267 : continue;
921 : }
922 6028 : nl = append(nl, r);
923 : }
924 4576 : rel->l = nl;
925 4576 : if (list_length(nl) == 1) {
926 4240 : sql_rel *l = rel->l = nl->h->data; /* ugh */
927 4240 : rel->r = NULL;
928 4240 : rel->op = op_project;
929 :
930 20979 : for (node *n = rel->exps->h, *m = l->exps->h ; n && m ; n = n->next, m = m->next) {
931 16739 : sql_exp *pe = n->data, *ie = m->data;
932 16739 : sql_exp *ne = exp_ref(v->sql, ie);
933 16739 : exp_prop_alias(v->sql->sa, ne, pe);
934 16739 : n->data = ne;
935 : }
936 4240 : list_hash_clear(rel->exps);
937 336 : } else if (list_empty(nl)) {
938 : /* empty select (project [ nils ] ) */
939 445 : for (node *n = rel->exps->h ; n ; n = n->next) {
940 345 : sql_exp *e = n->data, *a = exp_atom(v->sql->sa, atom_general(v->sql->sa, exp_subtype(e), NULL, 0));
941 345 : exp_prop_alias(v->sql->sa, a, e);
942 345 : n->data = a;
943 : }
944 100 : list_hash_clear(rel->exps);
945 100 : sql_rel *l = rel_project(v->sql->sa, NULL, rel->exps);
946 100 : set_count_prop(v->sql->sa, l, 1);
947 100 : l = rel_select(v->sql->sa, l, exp_atom_bool(v->sql->sa, 0));
948 100 : set_count_prop(v->sql->sa, l, 0);
949 100 : rel->op = op_project;
950 100 : rel->r = NULL;
951 100 : rel->l = l;
952 100 : rel->exps = rel_projections(v->sql, l, NULL, 1, 1);
953 100 : set_count_prop(v->sql->sa, rel, 0);
954 100 : set_nodistinct(rel); /* set relations may have distinct flag set */
955 : }
956 : } else {
957 8860 : set_count_prop(v->sql->sa, rel, cnt);
958 : }
959 : break;
960 : }
961 549025 : case op_join:
962 : case op_left:
963 : case op_right:
964 : case op_full:
965 : case op_semi:
966 : case op_anti:
967 : case op_select:
968 : case op_project:
969 : case op_groupby: {
970 549025 : if (is_groupby(rel->op) && !list_empty(rel->r))
971 16842 : rel->r = exps_exp_visitor_bottomup(v, rel, rel->r, 0, &rel_propagate_statistics, false);
972 549023 : rel->exps = exps_exp_visitor_bottomup(v, rel, rel->exps, 0, &rel_propagate_statistics, false);
973 549007 : if ((is_simple_project(rel->op) || is_groupby(rel->op)) && !list_empty(rel->r))
974 41964 : rel->r = exps_exp_visitor_bottomup(v, rel, rel->r, 0, &rel_propagate_statistics, false);
975 : /* The following optimizations can only be applied after propagating the statistics to rel->exps */
976 549010 : if (can_be_pruned && (is_join(rel->op) || is_select(rel->op)) && !list_empty(rel->exps)) {
977 144472 : int changes = v->changes;
978 144472 : rel->exps = rel_prune_predicates(v, rel);
979 144472 : if (v->changes > changes)
980 3675 : rel = rewrite_simplify(v, 0, v->value_based_opt, rel);
981 : }
982 :
983 : /* propagate row count */
984 549010 : sql_rel *l = rel->l, *r = rel->r;
985 549010 : switch (rel->op) {
986 137465 : case op_join:
987 : case op_left:
988 : case op_right:
989 : case op_full: {
990 137465 : BUN lv = get_rel_count(l), rv = get_rel_count(r), uniques_estimate = BUN_MAX, join_idx_estimate = BUN_MAX;
991 :
992 137465 : if (!list_empty(rel->exps) && !is_single(rel)) {
993 251415 : for (node *n = rel->exps->h ; n ; n = n->next) {
994 128189 : sql_exp *e = n->data, *el = e->l, *er = e->r;
995 :
996 128189 : if (find_prop(e->p, PROP_JOINIDX)) {
997 670 : join_idx_estimate = lv>rv?lv:rv;
998 127519 : } else if (e->type == e_cmp && e->flag == cmp_equal) {
999 : /* if one of the sides is unique, the cardinality will be that exact number, but look for nulls */
1000 123637 : if (!is_semantics(e) || !has_nil(el) || !has_nil(er)) {
1001 123461 : BUN lu = 0, ru = 0;
1002 123461 : prop *p = NULL;
1003 123461 : if ((p = find_prop(el->p, PROP_NUNIQUES)))
1004 91543 : lu = (BUN) p->value.dval;
1005 123461 : if ((p = find_prop(er->p, PROP_NUNIQUES)))
1006 106297 : ru = (BUN) p->value.dval;
1007 123461 : if (is_unique(el) || lu > lv) {
1008 74787 : BUN ncount = (is_right(rel->op) || is_full(rel->op)) ? MAX(lv, rv) : lv;
1009 74787 : uniques_estimate = MIN(uniques_estimate, ncount);
1010 48674 : } else if (is_unique(er) || ru > rv) {
1011 30619 : BUN ncount = (is_left(rel->op) || is_full(rel->op)) ? MAX(lv, rv) : rv;
1012 30619 : uniques_estimate = MIN(uniques_estimate, ncount);
1013 : }
1014 : }
1015 : }
1016 : }
1017 : }
1018 137465 : if (is_single(rel)) {
1019 4806 : set_count_prop(v->sql->sa, rel, lv);
1020 132659 : } else if (join_idx_estimate != BUN_MAX) {
1021 668 : set_count_prop(v->sql->sa, rel, join_idx_estimate);
1022 131991 : } else if (uniques_estimate != BUN_MAX) {
1023 98787 : set_count_prop(v->sql->sa, rel, uniques_estimate);
1024 33204 : } else if (list_length(rel->exps) == 1 && (exp_is_false(rel->exps->h->data) || exp_is_null(rel->exps->h->data))) {
1025 : /* corner cases for outer joins */
1026 123 : if (is_left(rel->op)) {
1027 111 : set_count_prop(v->sql->sa, rel, lv);
1028 12 : } else if (is_right(rel->op)) {
1029 3 : set_count_prop(v->sql->sa, rel, rv);
1030 9 : } else if (is_full(rel->op) && lv != BUN_NONE && rv != BUN_NONE) {
1031 9 : set_count_prop(v->sql->sa, rel, (rv > (BUN_MAX - lv)) ? BUN_MAX : (lv + rv)); /* overflow check */
1032 0 : } else if (lv != BUN_NONE && rv != BUN_NONE) {
1033 0 : set_count_prop(v->sql->sa, rel, 0);
1034 : }
1035 33081 : } else if (lv == 0) {
1036 1206 : set_count_prop(v->sql->sa, rel, (is_right(rel->op) || is_full(rel->op)) ? rv : 0);
1037 32464 : } else if (rv == 0) {
1038 1576 : set_count_prop(v->sql->sa, rel, (is_left(rel->op) || is_full(rel->op)) ? lv : 0);
1039 31382 : } else if (lv != BUN_NONE && rv != BUN_NONE) {
1040 18544 : set_count_prop(v->sql->sa, rel, (rv > (BUN_MAX / lv)) ? BUN_MAX : (lv * rv)); /* overflow check */
1041 : }
1042 : break;
1043 : }
1044 2941 : case op_anti:
1045 2941 : set_count_prop(v->sql->sa, rel, get_rel_count(l));
1046 2941 : break;
1047 113712 : case op_semi:
1048 : case op_select:
1049 : /* TODO calculate cardinalities using selectivities */
1050 113712 : if (list_length(rel->exps) == 1 && (exp_is_false(rel->exps->h->data) || exp_is_null(rel->exps->h->data))) {
1051 5160 : set_count_prop(v->sql->sa, rel, 0);
1052 : } else {
1053 213812 : if (!list_empty(rel->exps) && !is_single(rel)) {
1054 105260 : BUN cnt = get_rel_count(l), u = 1;
1055 171338 : for (node *n = rel->exps->h ; n ; n = n->next) {
1056 112434 : sql_exp *e = n->data, *el = e->l, *er = e->r;
1057 :
1058 : /* simple expressions first */
1059 112434 : if (e->type == e_cmp && e->flag == cmp_equal && exp_is_atom(er)) {
1060 : /* use selectivity */
1061 61863 : prop *p;
1062 61863 : if ((p = find_prop(el->p, PROP_NUNIQUES))) {
1063 46356 : u = (BUN) p->value.dval;
1064 46356 : break;
1065 : }
1066 : }
1067 : }
1068 : /* u is an *estimate*, so don't set count_prop to 0 unless cnt is 0 */
1069 105260 : set_count_prop(v->sql->sa, rel, cnt == 0 ? 0 : u == 0 || u > cnt ? 1 : cnt/u);
1070 : } else {
1071 3292 : set_count_prop(v->sql->sa, rel, get_rel_count(l));
1072 : }
1073 : }
1074 : break;
1075 270121 : case op_project:
1076 270121 : if (l) {
1077 257398 : if (need_distinct(rel)) {
1078 114 : set_count_prop(v->sql->sa, rel, rel_calc_nuniques(v->sql, l, rel->exps));
1079 : } else {
1080 257284 : set_count_prop(v->sql->sa, rel, get_rel_count(l));
1081 : }
1082 : } else {
1083 12723 : BUN card = 1;
1084 :
1085 12723 : if (!list_empty(rel->exps)) {
1086 37990 : for (node *n = rel->exps->h ; n ; n = n->next) {
1087 25267 : sql_exp *e = n->data;
1088 25267 : card = MAX(card, trivial_project_exp_card(e));
1089 : }
1090 : }
1091 12723 : set_count_prop(v->sql->sa, rel, card);
1092 : }
1093 : break;
1094 24359 : case op_groupby:
1095 24359 : if (list_empty(rel->r)) {
1096 7517 : set_count_prop(v->sql->sa, rel, 1);
1097 : } else {
1098 16842 : set_count_prop(v->sql->sa, rel, rel_calc_nuniques(v->sql, l, rel->r));
1099 : }
1100 : break;
1101 : default:
1102 : break;
1103 : }
1104 : break;
1105 : }
1106 16653 : case op_topn: {
1107 16653 : BUN lv = get_rel_count(rel->l);
1108 :
1109 16653 : if (lv != BUN_NONE) {
1110 16630 : sql_exp *le = rel->exps->h->data, *oe = list_length(rel->exps) > 1 ? rel->exps->h->next->data : NULL;
1111 84 : if (oe && oe->l && exp_is_not_null(oe)) { /* no parameters */
1112 84 : BUN offset = (BUN) ((atom*)oe->l)->data.val.lval;
1113 84 : lv = offset >= lv ? 0 : lv - offset;
1114 : }
1115 16630 : if (le->l && exp_is_not_null(le)) {
1116 16595 : BUN limit = (BUN) ((atom*)le->l)->data.val.lval;
1117 16595 : lv = MIN(lv, limit);
1118 : }
1119 16630 : set_count_prop(v->sql->sa, rel, lv);
1120 : }
1121 : break;
1122 : }
1123 22 : case op_sample: {
1124 22 : BUN lv = get_rel_count(rel->l);
1125 :
1126 22 : if (lv != BUN_NONE) {
1127 4 : sql_exp *se = rel->exps->h->data;
1128 4 : sql_subtype *tp = exp_subtype(se);
1129 :
1130 4 : if (se->l && tp->type->eclass == EC_NUM) { /* sample is a number of rows */
1131 4 : BUN sample = (BUN) ((atom*)se->l)->data.val.lval;
1132 4 : lv = MIN(lv, sample);
1133 0 : } else if (se->l) { /* sample is a percentage of rows */
1134 0 : dbl percent = ((atom*)se->l)->data.val.dval;
1135 0 : assert(tp->type->eclass == EC_FLT);
1136 0 : lv = (BUN) ceil((dbl)lv * percent);
1137 : }
1138 4 : set_count_prop(v->sql->sa, rel, lv);
1139 : }
1140 : break;
1141 : }
1142 6225 : case op_table: {
1143 6225 : sql_exp *op = rel->r;
1144 6225 : if (rel->flag != TRIGGER_WRAPPER && op) {
1145 5913 : sql_subfunc *f = op->f;
1146 5913 : if (f->func->lang == FUNC_LANG_SQL) {
1147 97 : set_count_prop(v->sql->sa, rel, 1000 /* just some fallback value */);
1148 5816 : } else if (f->func->lang == FUNC_LANG_MAL && strcmp(f->func->base.name, "storage") == 0) {
1149 839 : set_count_prop(v->sql->sa, rel, 1000 /* TODO get size of catalog */);
1150 4977 : } else if (f->func->lang == FUNC_LANG_MAL && strcmp(f->func->base.name, "db_users") == 0) {
1151 0 : set_count_prop(v->sql->sa, rel, 1000 /* TODO get size of users */);
1152 4977 : } else if (f->func->lang == FUNC_LANG_MAL && strncmp(f->func->base.name, "querylog", 8) == 0) {
1153 1105 : set_count_prop(v->sql->sa, rel, 1000 /* TODO get size of querylog */);
1154 3872 : } else if (f->func->lang == FUNC_LANG_MAL &&
1155 3697 : (strcmp(f->func->base.name, "queue") == 0 ||
1156 3428 : strcmp(f->func->base.name, "optimizers") == 0 ||
1157 3111 : strcmp(f->func->base.name, "env") == 0 ||
1158 2744 : strcmp(f->func->base.name, "keywords") == 0 ||
1159 2744 : strcmp(f->func->base.name, "statistics") == 0 ||
1160 2065 : strcmp(f->func->base.name, "rejects") == 0 ||
1161 1806 : strcmp(f->func->base.name, "schemastorage") == 0 ||
1162 1806 : strncmp(f->func->base.name, "storage", 7) == 0 ||
1163 1806 : strcmp(f->func->base.name, "sessions") == 0) ) {
1164 2186 : set_count_prop(v->sql->sa, rel, 1000 /* TODO get size of queue */);
1165 : }
1166 : /* else {
1167 : printf("%%func needs stats : %s\n", f->func->base.name);
1168 : } */
1169 : }
1170 : break;
1171 : }
1172 : /*These relations are less important for now
1173 : TODO later we can tune it */
1174 : case op_insert:
1175 : case op_update:
1176 : case op_delete:
1177 : case op_truncate:
1178 : case op_ddl:
1179 : default:
1180 : break;
1181 : }
1182 :
1183 : return rel;
1184 : }
1185 :
1186 : static sql_rel *
1187 557072 : rel_get_statistics(visitor *v, global_props *gp, sql_rel *rel)
1188 : {
1189 : /* Don't prune updates as pruning will possibly result in removing the joins which therefore cannot be used for constraint checking */
1190 557072 : uint8_t has_special_modify = (uint8_t) gp->has_special_modify;
1191 557072 : v->data = &has_special_modify;
1192 557072 : rel = rel_visitor_bottomup(v, rel, &rel_get_statistics_);
1193 557504 : v->data = gp;
1194 557504 : return rel;
1195 : }
1196 :
1197 : run_optimizer
1198 734184 : bind_get_statistics(visitor *v, global_props *gp)
1199 : {
1200 734184 : (void) v;
1201 734184 : return (gp->opt_level == 1 && !gp->cnt[op_insert]) ? rel_get_statistics : NULL;
1202 : }
1203 :
1204 :
1205 : static bool
1206 45468 : point_select_on_unique_column(sql_rel *rel)
1207 : {
1208 45468 : if (is_select(rel->op) && !list_empty(rel->exps)) {
1209 38569 : for (node *n = rel->exps->h; n ; n = n->next) {
1210 20067 : sql_exp *e = n->data, *el = e->l, *er = e->r, *found = NULL;
1211 :
1212 20067 : if (is_compare(e->type) && e->flag == cmp_equal) {
1213 15239 : if (is_numeric_upcast(el))
1214 0 : el = el->l;
1215 15239 : if (is_numeric_upcast(er))
1216 0 : er = er->l;
1217 15239 : if (is_alias(el->type) && exp_is_atom(er) && (found = rel_find_exp(rel->l, el)) &&
1218 764 : is_unique(found) && (!is_semantics(e) || !has_nil(found) || !has_nil(er)))
1219 : return true;
1220 14475 : if (is_alias(er->type) && exp_is_atom(el) && (found = rel_find_exp(rel->l, er)) &&
1221 0 : is_unique(found) && (!is_semantics(e) || !has_nil(el) || !has_nil(found)))
1222 : return true;
1223 : }
1224 : }
1225 : }
1226 : return false;
1227 : }
1228 :
1229 : /*
1230 : * A point select on an unique column reduces the number of rows to 1. If the same select is under a
1231 : * join, the opposite side's select can be pushed above the join.
1232 : */
1233 : static inline sql_rel *
1234 698658 : rel_push_select_up(visitor *v, sql_rel *rel)
1235 : {
1236 698658 : if ((is_innerjoin(rel->op) || is_left(rel->op) || is_right(rel->op) || is_semi(rel->op)) && !is_single(rel)) {
1237 123001 : sql_rel *l = rel->l, *r = rel->r;
1238 123001 : bool can_pushup_left = is_select(l->op) && !rel_is_ref(l) && !is_single(l) && (is_innerjoin(rel->op) || is_left(rel->op) || is_semi(rel->op)),
1239 123001 : can_pushup_right = is_select(r->op) && !rel_is_ref(r) && !is_single(r) && (is_innerjoin(rel->op) || is_right(rel->op));
1240 :
1241 123001 : if (can_pushup_left || can_pushup_right) {
1242 35961 : if (can_pushup_left)
1243 20713 : can_pushup_left = point_select_on_unique_column(r);
1244 35961 : if (can_pushup_right)
1245 24755 : can_pushup_right = point_select_on_unique_column(l);
1246 :
1247 : /* if both selects retrieve one row each, it's not worth it to push both up */
1248 35961 : if (can_pushup_left && !can_pushup_right) {
1249 698 : sql_rel *nrel = rel_dup_copy(v->sql->sa, rel);
1250 698 : nrel->l = l->l;
1251 698 : rel = rel_inplace_select(rel, nrel, l->exps);
1252 698 : assert(is_select(rel->op));
1253 698 : v->changes++;
1254 35263 : } else if (!can_pushup_left && can_pushup_right) {
1255 14 : sql_rel *nrel = rel_dup_copy(v->sql->sa, rel);
1256 14 : nrel->r = r->l;
1257 14 : rel = rel_inplace_select(rel, nrel, r->exps);
1258 14 : assert(is_select(rel->op));
1259 14 : v->changes++;
1260 : }
1261 : }
1262 : }
1263 698658 : return rel;
1264 : }
1265 :
1266 : static int
1267 39115 : sql_class_base_score(visitor *v, sql_column *c, sql_subtype *t, bool equality_based)
1268 : {
1269 39115 : int de;
1270 :
1271 39115 : if (!t)
1272 : return 0;
1273 39115 : switch (ATOMstorage(t->type->localtype)) {
1274 : case TYPE_bte:
1275 : return 150 - 8;
1276 1607 : case TYPE_sht:
1277 1607 : return 150 - 16;
1278 16484 : case TYPE_int:
1279 16484 : return 150 - 32;
1280 471 : case TYPE_void:
1281 : case TYPE_lng:
1282 471 : return 150 - 64;
1283 : case TYPE_uuid:
1284 : #ifdef HAVE_HGE
1285 : case TYPE_hge:
1286 : #endif
1287 : return 150 - 128;
1288 2 : case TYPE_flt:
1289 2 : return 75 - 24;
1290 : case TYPE_dbl:
1291 : return 75 - 53;
1292 16220 : default:
1293 16220 : if (equality_based && c && v->storage_based_opt && (de = mvc_is_duplicate_eliminated(v->sql, c)))
1294 1495 : return 150 - de * 8;
1295 : /* strings and blobs not duplicate eliminated don't get any points here */
1296 : return 0;
1297 : }
1298 : }
1299 :
1300 : static int
1301 15137 : score_se_base(visitor *v, sql_rel *rel, sql_exp *e)
1302 : {
1303 15137 : int res = 0;
1304 15137 : sql_subtype *t = exp_subtype(e);
1305 15137 : sql_column *c = NULL;
1306 :
1307 15137 : if (e->type == e_convert) /* keep unsafes at the end (TODO improve) */
1308 : return -1000;
1309 : /* can we find out if the underlying table is sorted */
1310 14729 : if ((c = exp_find_column(rel, e, -2)) && v->storage_based_opt && mvc_is_sorted(v->sql, c))
1311 14729 : res += 600;
1312 :
1313 : /* prefer the shorter var types over the longer ones */
1314 14729 : res += sql_class_base_score(v, c, t, is_equality_or_inequality_exp(e->flag)); /* smaller the type, better */
1315 14729 : return res;
1316 : }
1317 :
1318 : static int
1319 18526 : score_se(visitor *v, sql_rel *rel, sql_exp *e)
1320 : {
1321 18526 : int score = 0;
1322 18526 : if (e->type == e_cmp && !is_complex_exp(e->flag)) {
1323 15137 : sql_exp *l = e->l;
1324 :
1325 15137 : while (l->type == e_cmp) { /* go through nested comparisons */
1326 2 : sql_exp *ll;
1327 :
1328 2 : if (l->flag == cmp_filter || l->flag == cmp_or)
1329 0 : ll = ((list*)l->l)->h->data;
1330 : else
1331 2 : ll = l->l;
1332 2 : if (ll->type != e_cmp)
1333 : break;
1334 : l = ll;
1335 : }
1336 15137 : score += score_se_base(v, rel, l);
1337 : }
1338 18526 : score += exp_keyvalue(e);
1339 18526 : return score;
1340 : }
1341 :
1342 : static inline sql_rel *
1343 698659 : rel_select_order(visitor *v, sql_rel *rel)
1344 : {
1345 698659 : int *scores = NULL;
1346 698659 : sql_exp **exps = NULL;
1347 :
1348 698659 : if (is_select(rel->op) && list_length(rel->exps) > 1) {
1349 7920 : node *n;
1350 7920 : int i, nexps = list_length(rel->exps);
1351 7920 : scores = SA_NEW_ARRAY(v->sql->ta, int, nexps);
1352 7920 : exps = SA_NEW_ARRAY(v->sql->ta, sql_exp*, nexps);
1353 :
1354 26446 : for (i = 0, n = rel->exps->h; n; i++, n = n->next) {
1355 18555 : exps[i] = n->data;
1356 18555 : if (find_prop(exps[i]->p, PROP_HASHCOL))
1357 : return rel;
1358 18526 : scores[i] = score_se(v, rel, n->data);
1359 : }
1360 7891 : GDKqsort(scores, exps, NULL, nexps, sizeof(int), sizeof(void *), TYPE_int, true, true);
1361 :
1362 26417 : for (i = 0, n = rel->exps->h; n; i++, n = n->next)
1363 18526 : n->data = exps[i];
1364 : }
1365 :
1366 : return rel;
1367 : }
1368 :
1369 : /* Compute the efficiency of using this expression early in a group by list */
1370 : static int
1371 24386 : score_gbe(visitor *v, sql_rel *rel, sql_exp *e)
1372 : {
1373 24386 : int res = 0;
1374 24386 : sql_subtype *t = exp_subtype(e);
1375 24386 : sql_column *c = exp_find_column(rel, e, -2);
1376 :
1377 24386 : if (e->card == CARD_ATOM) /* constants are trivial to group */
1378 38 : res += 1000;
1379 : /* can we find out if the underlying table is sorted */
1380 24386 : if (is_unique(e) || find_prop(e->p, PROP_HASHCOL) || (c && v->storage_based_opt && mvc_is_unique(v->sql, c))) /* distinct columns */
1381 2466 : res += 700;
1382 21132 : if (c && v->storage_based_opt && mvc_is_sorted(v->sql, c))
1383 2484 : res += 500;
1384 24386 : if (find_prop(e->p, PROP_HASHIDX)) /* has hash index */
1385 0 : res += 200;
1386 :
1387 : /* prefer the shorter var types over the longer ones */
1388 24386 : res += sql_class_base_score(v, c, t, true); /* smaller the type, better */
1389 24386 : return res;
1390 : }
1391 :
1392 : /* reorder group by expressions */
1393 : static inline sql_rel *
1394 698659 : rel_groupby_order(visitor *v, sql_rel *rel)
1395 : {
1396 698659 : int *scores = NULL;
1397 698659 : sql_exp **exps = NULL;
1398 :
1399 698659 : if (is_groupby(rel->op) && list_length(rel->r) > 1) {
1400 7998 : node *n;
1401 7998 : list *gbe = rel->r;
1402 7998 : int i, ngbe = list_length(gbe);
1403 7998 : scores = SA_NEW_ARRAY(v->sql->ta, int, ngbe);
1404 7998 : exps = SA_NEW_ARRAY(v->sql->ta, sql_exp*, ngbe);
1405 :
1406 : /* first sorting step, give priority for integers and sorted columns */
1407 32384 : for (i = 0, n = gbe->h; n; i++, n = n->next) {
1408 24386 : exps[i] = n->data;
1409 24386 : scores[i] = score_gbe(v, rel, exps[i]);
1410 : }
1411 7998 : GDKqsort(scores, exps, NULL, ngbe, sizeof(int), sizeof(void *), TYPE_int, true, true);
1412 :
1413 : /* second sorting step, give priority to strings with lower number of digits */
1414 17496 : for (i = ngbe - 1; i && !scores[i]; i--); /* find expressions with no score from the first round */
1415 7998 : if (scores[i])
1416 7002 : i++;
1417 7998 : if (ngbe - i > 1) {
1418 12055 : for (int j = i; j < ngbe; j++) {
1419 9065 : sql_subtype *t = exp_subtype(exps[j]);
1420 9065 : scores[j] = t ? t->digits : 0;
1421 : }
1422 : /* the less number of digits the better, order ascending */
1423 2990 : GDKqsort(scores + i, exps + i, NULL, ngbe - i, sizeof(int), sizeof(void *), TYPE_int, false, true);
1424 : }
1425 :
1426 32384 : for (i = 0, n = gbe->h; n; i++, n = n->next)
1427 24386 : n->data = exps[i];
1428 : }
1429 :
1430 698660 : return rel;
1431 : }
1432 :
1433 : /* This optimization loop contains optimizations that can potentially use statistics */
1434 : static sql_rel *
1435 698659 : rel_final_optimization_loop_(visitor *v, sql_rel *rel)
1436 : {
1437 : /* Run rel_push_select_up only once at the end to avoid an infinite optimization loop */
1438 698659 : rel = rel_push_select_up(v, rel);
1439 698658 : rel = rel_select_order(v, rel);
1440 :
1441 : /* TODO? Maybe later add rel_simplify_count, rel_join2semijoin, rel_simplify_fk_joins,
1442 : rel_distinct_project2groupby, rel_simplify_predicates, rel_simplify_math,
1443 : rel_distinct_aggregate_on_unique_values */
1444 :
1445 698659 : rel = rel_groupby_order(v, rel);
1446 698659 : return rel;
1447 : }
1448 :
1449 : static sql_rel *
1450 70855 : rel_final_optimization_loop(visitor *v, global_props *gp, sql_rel *rel)
1451 : {
1452 70855 : (void) gp;
1453 70855 : return rel_visitor_bottomup(v, rel, &rel_final_optimization_loop_);
1454 : }
1455 :
1456 : run_optimizer
1457 734656 : bind_final_optimization_loop(visitor *v, global_props *gp)
1458 : {
1459 734656 : int flag = v->sql->sql_optimizer;
1460 : /* At the moment, this optimizer has dependency on 3 flags */
1461 565428 : return gp->opt_level == 1 && (gp->cnt[op_groupby] || gp->cnt[op_select]) &&
1462 805514 : (flag & push_select_up) && (flag & optimize_select_and_joins_topdown) && (flag & optimize_projections) ? rel_final_optimization_loop : NULL;
1463 : }
|