Line data Source code
1 : /*
2 : * SPDX-License-Identifier: MPL-2.0
3 : *
4 : * This Source Code Form is subject to the terms of the Mozilla Public
5 : * License, v. 2.0. If a copy of the MPL was not distributed with this
6 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
7 : *
8 : * Copyright 2024 MonetDB Foundation;
9 : * Copyright August 2008 - 2023 MonetDB B.V.;
10 : * Copyright 1997 - July 2008 CWI.
11 : */
12 :
13 : #include "monetdb_config.h"
14 : #include "rel_optimizer.h"
15 : #include "rel_optimizer_private.h"
16 : #include "rel_exp.h"
17 : #include "rel_select.h"
18 :
19 : static void
20 0 : rel_no_rename_exps( list *exps )
21 : {
22 0 : for (node *n = exps->h; n; n = n->next) {
23 0 : sql_exp *e = n->data;
24 :
25 0 : exp_setalias(e, e->alias.label, e->l, e->r);
26 : }
27 0 : list_hash_clear(exps);
28 0 : }
29 :
30 : /* positional renames ! */
31 : /* get the names (aka aliases) from the src_exps list and rename the dest_exps list */
32 : void
33 70267 : rel_rename_exps( mvc *sql, list *src_exps, list *dest_exps)
34 : {
35 70267 : node *n, *m;
36 :
37 70267 : (void)sql;
38 : /* check if a column uses an alias earlier in the list */
39 :
40 70267 : assert(list_length(src_exps) <= list_length(dest_exps));
41 585345 : for (n = src_exps->h, m = dest_exps->h; n && m; n = n->next, m = m->next) {
42 515078 : sql_exp *s = n->data;
43 515078 : sql_exp *d = m->data;
44 515078 : const char *rname = exp_relname(s);
45 :
46 515078 : exp_setalias(d, s->alias.label, rname, exp_name(s));
47 : }
48 70267 : list_hash_clear(dest_exps);
49 70267 : }
50 :
51 : sql_rel *
52 206999 : rel_find_ref( sql_rel *r)
53 : {
54 626678 : while (!rel_is_ref(r) && r->l &&
55 539524 : (is_project(r->op) || is_select(r->op) /*|| is_join(r->op)*/))
56 : r = r->l;
57 206999 : if (rel_is_ref(r))
58 73023 : return r;
59 : return NULL;
60 : }
61 :
62 : /* merge projection */
63 :
64 : /* push an expression through a projection.
65 : * The result should again be used in a projection.
66 : */
67 : static list *
68 54618 : exps_push_down_prj(mvc *sql, list *exps, sql_rel *f, sql_rel *t, bool keepalias)
69 : {
70 54618 : node *n;
71 54618 : list *nl = new_exp_list(sql->sa);
72 :
73 190683 : for(n = exps->h; n; n = n->next) {
74 138416 : sql_exp *arg = n->data, *narg = NULL;
75 :
76 138416 : narg = exp_push_down_prj(sql, arg, f, t);
77 138416 : if (!narg)
78 : return NULL;
79 136065 : narg = exp_propagate(sql->sa, narg, arg);
80 136065 : if (!keepalias && narg->type == e_column)
81 39483 : exp_setalias(narg, arg->alias.label, narg->l, narg->r);
82 136065 : append(nl, narg);
83 : }
84 : return nl;
85 : }
86 :
87 : sql_exp *
88 2307776 : exp_push_down_prj(mvc *sql, sql_exp *e, sql_rel *f, sql_rel *t)
89 : {
90 2307776 : sql_exp *ne = NULL, *l = NULL, *r = NULL, *r2 = NULL;
91 :
92 2307776 : assert(is_project(f->op));
93 :
94 2307776 : switch(e->type) {
95 1928502 : case e_column:
96 1928502 : assert(e->nid);
97 1928502 : ne = exps_bind_nid(f->exps, e->nid);
98 1928501 : if (!ne || (ne->type != e_column && (ne->type != e_atom || ne->f)))
99 : return NULL;
100 1650128 : while (ne && (has_label(ne) || is_selfref(ne) /*inside this list */) && is_simple_project(f->op) && ne->type == e_column) {
101 11689 : sql_exp *oe = e, *one = ne;
102 :
103 11689 : e = ne;
104 11689 : ne = NULL;
105 11689 : if (e->nid)
106 11689 : ne = exps_bind_nid(f->exps, e->nid);
107 11689 : if (ne && ne != one && list_position(f->exps, ne) >= list_position(f->exps, one))
108 11689 : ne = NULL;
109 11689 : if (!ne || ne == one) {
110 : ne = one;
111 1649729 : e = oe;
112 : break;
113 : }
114 399 : if (ne->type != e_column && (ne->type != e_atom || ne->f))
115 : return NULL;
116 : }
117 : /* possibly a groupby/project column is renamed */
118 1649729 : if (is_groupby(f->op) && !list_empty(f->r) && ne->type == e_column) {
119 135 : sql_exp *gbe = NULL;
120 135 : if (ne->nid)
121 135 : gbe = exps_bind_nid(f->exps, ne->nid);
122 135 : ne = gbe;
123 135 : if (!ne || (ne->type != e_column && (ne->type != e_atom || ne->f)))
124 0 : return NULL;
125 : }
126 1649729 : return exp_copy(sql, ne);
127 52273 : case e_cmp:
128 52273 : if (e->flag == cmp_or || e->flag == cmp_filter) {
129 3045 : list *l = NULL, *r = NULL;
130 :
131 3045 : if (!(l = exps_push_down_prj(sql, e->l, f, t, true)) || !(r = exps_push_down_prj(sql, e->r, f, t, true)))
132 1194 : return NULL;
133 1851 : if (e->flag == cmp_filter) {
134 757 : ne = exp_filter(sql->sa, l, r, e->f, is_anti(e));
135 : } else {
136 1094 : ne = exp_or(sql->sa, l, r, is_anti(e));
137 : }
138 49228 : } else if (e->flag == cmp_in || e->flag == cmp_notin) {
139 4184 : list *r = NULL;
140 :
141 4184 : if (!(l = exp_push_down_prj(sql, e->l, f, t)) || !(r = exps_push_down_prj(sql, e->r, f, t, true)))
142 3717 : return NULL;
143 467 : ne = exp_in(sql->sa, l, r, e->flag);
144 : } else {
145 45044 : if (!(l = exp_push_down_prj(sql, e->l, f, t)) || !(r = exp_push_down_prj(sql, e->r, f, t)) || (e->f && !(r2 = exp_push_down_prj(sql, e->f, f, t))))
146 21719 : return NULL;
147 23325 : if (e->f) {
148 741 : ne = exp_compare2(sql->sa, l, r, r2, e->flag, is_symmetric(e));
149 : } else {
150 22584 : ne = exp_compare(sql->sa, l, r, e->flag);
151 : }
152 : }
153 25643 : if (!ne)
154 : return NULL;
155 25643 : return exp_propagate(sql->sa, ne, e);
156 180370 : case e_convert:
157 180370 : if (!(l = exp_push_down_prj(sql, e->l, f, t)))
158 : return NULL;
159 122349 : ne = exp_convert(sql, l, exp_fromtype(e), exp_totype(e));
160 122349 : return exp_propagate(sql->sa, ne, e);
161 48351 : case e_aggr:
162 : case e_func: {
163 48351 : list *l = e->l, *nl = NULL;
164 48351 : sql_exp *ne = NULL;
165 :
166 48351 : if (e->type == e_func && exp_unsafe(e, false, false))
167 : return NULL;
168 48335 : if (!list_empty(l)) {
169 48335 : nl = exps_push_down_prj(sql, l, f, t, false);
170 48335 : if (!nl)
171 : return NULL;
172 : }
173 47188 : if (e->type == e_func)
174 47188 : ne = exp_op(sql->sa, nl, e->f);
175 : else
176 0 : ne = exp_aggr(sql->sa, nl, e->f, need_distinct(e), need_no_nil(e), e->card, has_nil(e));
177 47188 : return exp_propagate(sql->sa, ne, e);
178 : }
179 98280 : case e_atom: {
180 98280 : list *l = e->f, *nl = NULL;
181 :
182 98280 : if (!list_empty(l)) {
183 766 : nl = exps_push_down_prj(sql, l, f, t, false);
184 766 : if (!nl)
185 : return NULL;
186 756 : ne = exp_values(sql->sa, nl);
187 : } else {
188 97514 : ne = exp_copy(sql, e);
189 : }
190 98270 : return exp_propagate(sql->sa, ne, e);
191 : }
192 : case e_psm:
193 : if (e->type == e_atom && e->f) /* value list */
194 : return NULL;
195 : return e;
196 : }
197 : return NULL;
198 : }
199 :
200 : atom *
201 628 : exp_flatten(mvc *sql, bool value_based_opt, sql_exp *e)
202 : {
203 628 : if (e->type == e_atom) {
204 379 : return value_based_opt ? exp_value(sql, e) : (atom *) e->l;
205 249 : } else if (e->type == e_convert) {
206 51 : atom *v = exp_flatten(sql, value_based_opt, e->l);
207 :
208 51 : if (v)
209 24 : return atom_cast(sql->sa, v, exp_subtype(e));
210 198 : } else if (e->type == e_func) {
211 198 : sql_subfunc *f = e->f;
212 198 : list *l = e->l;
213 198 : sql_arg *res = (f->func->res)?(f->func->res->h->data):NULL;
214 :
215 : /* TODO handle date + x months */
216 198 : if (!f->func->s && strcmp(f->func->base.name, "sql_add") == 0 && list_length(l) == 2 && res && EC_NUMBER(res->type.type->eclass)) {
217 24 : atom *l1 = exp_flatten(sql, value_based_opt, l->h->data);
218 24 : atom *l2 = exp_flatten(sql, value_based_opt, l->h->next->data);
219 24 : if (l1 && l2)
220 3 : return atom_add(sql->sa, l1, l2);
221 174 : } else if (!f->func->s && strcmp(f->func->base.name, "sql_sub") == 0 && list_length(l) == 2 && res && EC_NUMBER(res->type.type->eclass)) {
222 10 : atom *l1 = exp_flatten(sql, value_based_opt, l->h->data);
223 10 : atom *l2 = exp_flatten(sql, value_based_opt, l->h->next->data);
224 10 : if (l1 && l2)
225 9 : return atom_sub(sql->sa, l1, l2);
226 : }
227 : }
228 : return NULL;
229 : }
230 :
231 : int
232 150 : exp_range_overlap(atom *min, atom *max, atom *emin, atom *emax, bool min_exclusive, bool max_exclusive)
233 : {
234 150 : if (!min || !max || !emin || !emax || min->isnull || max->isnull || emin->isnull || emax->isnull)
235 : return 0;
236 :
237 150 : if ((!min_exclusive && VALcmp(&(emax->data), &(min->data)) < 0) || (min_exclusive && VALcmp(&(emax->data), &(min->data)) <= 0))
238 29 : return 0;
239 121 : if ((!max_exclusive && VALcmp(&(emin->data), &(max->data)) > 0) || (max_exclusive && VALcmp(&(emin->data), &(max->data)) >= 0))
240 34 : return 0;
241 : return 1;
242 : }
243 :
244 :
245 : /* if local_proj is >= -1, the current expression is from the same projection
246 : if local_proj is -1, then we don't care about self references (eg used to check for order by exps) */
247 : static int exp_mark_used(sql_rel *subrel, sql_exp *e, int local_proj);
248 :
249 : static int
250 775722 : exps_mark_used(sql_rel *subrel, list *l, int local_proj)
251 : {
252 775722 : int nr = 0;
253 775722 : if (list_empty(l))
254 : return nr;
255 :
256 2499500 : for (node *n = l->h; n != NULL; n = n->next)
257 1723840 : nr += exp_mark_used(subrel, n->data, local_proj);
258 : return nr;
259 : }
260 :
261 : static int
262 6436180 : exp_mark_used(sql_rel *subrel, sql_exp *e, int local_proj)
263 : {
264 6635317 : int nr = 0;
265 6635317 : sql_exp *ne = NULL;
266 :
267 6635317 : switch(e->type) {
268 4350643 : case e_column:
269 4350643 : ne = rel_find_exp(subrel, e);
270 : /* if looking in the same projection, make sure 'ne' is projected before the searched column */
271 4350669 : if (ne && local_proj > -1 && list_position(subrel->exps, ne) >= local_proj)
272 5499961 : ne = NULL;
273 : break;
274 199137 : case e_convert:
275 199137 : return exp_mark_used(subrel, e->l, local_proj);
276 736859 : case e_aggr:
277 : case e_func: {
278 736859 : if (e->l)
279 707184 : nr += exps_mark_used(subrel, e->l, local_proj);
280 736857 : if (e->r) {
281 1844 : list *r = e->r;
282 1844 : list *obes = r->h->data;
283 1844 : nr += exps_mark_used(subrel, obes, local_proj);
284 1844 : if (r->h->next) {
285 0 : list *exps = r->h->next->data;
286 0 : nr += exps_mark_used(subrel, exps, local_proj);
287 : }
288 : }
289 : break;
290 : }
291 412439 : case e_cmp:
292 412439 : if (e->flag == cmp_or || e->flag == cmp_filter) {
293 20832 : nr += exps_mark_used(subrel, e->l, local_proj);
294 20832 : nr += exps_mark_used(subrel, e->r, local_proj);
295 391607 : } else if (e->flag == cmp_in || e->flag == cmp_notin) {
296 20585 : nr += exp_mark_used(subrel, e->l, local_proj);
297 20585 : nr += exps_mark_used(subrel, e->r, local_proj);
298 : } else {
299 371022 : nr += exp_mark_used(subrel, e->l, local_proj);
300 371022 : nr += exp_mark_used(subrel, e->r, local_proj);
301 371022 : if (e->f)
302 6432 : nr += exp_mark_used(subrel, e->f, local_proj);
303 : }
304 : break;
305 936239 : case e_atom:
306 : /* atoms are used in e_cmp */
307 936239 : e->used = 1;
308 : /* return 0 as constants may require a full column ! */
309 936239 : if (e->f)
310 4446 : nr += exps_mark_used(subrel, e->f, local_proj);
311 : return nr;
312 0 : case e_psm:
313 0 : if (e->flag & PSM_SET || e->flag & PSM_RETURN || e->flag & PSM_EXCEPTION) {
314 0 : nr += exp_mark_used(subrel, e->l, local_proj);
315 0 : } else if (e->flag & PSM_WHILE || e->flag & PSM_IF) {
316 0 : nr += exp_mark_used(subrel, e->l, local_proj);
317 0 : nr += exps_mark_used(subrel, e->r, local_proj);
318 0 : if (e->flag == PSM_IF && e->f)
319 0 : nr += exps_mark_used(subrel, e->f, local_proj);
320 : }
321 0 : e->used = 1;
322 0 : break;
323 : }
324 5499961 : if (ne && e != ne) {
325 2364545 : if (local_proj == -2 || ne->type != e_column || (has_label(ne) || (ne->alias.rname && ne->alias.rname[0] == '%')) || (subrel->l && !is_munion(subrel->op) && !rel_find_exp(subrel->l, e)))
326 2256976 : ne->used = 1;
327 2364545 : return ne->used;
328 : }
329 : return nr;
330 : }
331 :
332 : static void
333 40095 : positional_exps_mark_used( sql_rel *rel, sql_rel *subrel )
334 : {
335 40095 : assert(rel->exps);
336 :
337 40095 : if ((is_topn(subrel->op) || is_sample(subrel->op)) && subrel->l)
338 40095 : subrel = subrel->l;
339 : /* everything is used within the set operation */
340 40095 : if (rel->exps && subrel->exps) {
341 40095 : node *m;
342 340049 : for (m=subrel->exps->h; m; m = m->next) {
343 299954 : sql_exp *se = m->data;
344 :
345 299954 : se->used = 1;
346 : }
347 : }
348 40095 : }
349 :
350 : static void
351 710813 : rel_exps_mark_used(allocator *sa, sql_rel *rel, sql_rel *subrel)
352 : {
353 710813 : int nr = 0;
354 :
355 710813 : if (rel->r && (is_simple_project(rel->op) || is_groupby(rel->op))) {
356 28939 : list *l = rel->r;
357 28939 : node *n;
358 :
359 101388 : for (n=l->h; n; n = n->next) {
360 72450 : sql_exp *e = n->data;
361 :
362 72450 : e->used = 1;
363 72450 : exp_mark_used(rel, e, -1);
364 : }
365 : }
366 710812 : if (rel->attr) {
367 26215 : for (node *n = rel->attr->h; n; n = n->next) {
368 13106 : sql_exp *e = n->data;
369 :
370 13106 : if (e->type != e_aggr) /* keep all group by's */
371 13106 : e->used = 1;
372 13106 : if (e->used)
373 13106 : nr += exp_mark_used(subrel, e, -2);
374 : }
375 : }
376 710815 : if (rel->exps) {
377 674620 : node *n;
378 674620 : int len = list_length(rel->exps), i;
379 674621 : sql_exp **exps = SA_NEW_ARRAY(sa, sql_exp*, len);
380 :
381 3289094 : for (n=rel->exps->h, i = 0; n; n = n->next, i++) {
382 2614468 : sql_exp *e = exps[i] = n->data;
383 :
384 2614468 : nr += e->used;
385 : }
386 :
387 674626 : if (!nr && is_project(rel->op) && len > 0) /* project at least one column if exists */
388 6497 : exps[0]->used = 1;
389 :
390 3289132 : for (i = len-1; i >= 0; i--) {
391 2614501 : sql_exp *e = exps[i];
392 :
393 2614501 : if (!is_project(rel->op) || (e->used && !is_set(rel->op))) {
394 2086647 : if (is_project(rel->op))
395 1698500 : nr += exp_mark_used(rel, e, i);
396 2086645 : nr += exp_mark_used(subrel, e, -2);
397 : }
398 : }
399 : }
400 : /* for count/rank we need at least one column */
401 710826 : if (!nr && subrel && (is_project(subrel->op) || is_base(subrel->op)) && !list_empty(subrel->exps) &&
402 21541 : (is_simple_project(rel->op) && project_unsafe(rel, false))) {
403 34 : sql_exp *e = subrel->exps->h->data;
404 34 : e->used = 1;
405 : }
406 710826 : if (rel->r && (is_simple_project(rel->op) || is_groupby(rel->op))) {
407 28939 : list *l = rel->r;
408 28939 : node *n;
409 :
410 101388 : for (n=l->h; n; n = n->next) {
411 72449 : sql_exp *e = n->data;
412 :
413 72449 : e->used = 1;
414 : /* possibly project/groupby uses columns from the inner */
415 72449 : exp_mark_used(subrel, e, -2);
416 : }
417 : }
418 710826 : }
419 :
420 : static void exps_used(list *l);
421 :
422 : static void
423 3484429 : exp_used(sql_exp *e)
424 : {
425 3515685 : if (e) {
426 3497353 : e->used = 1;
427 :
428 3497353 : switch (e->type) {
429 30060 : case e_convert:
430 30060 : exp_used(e->l);
431 30060 : break;
432 96202 : case e_func:
433 : case e_aggr:
434 96202 : exps_used(e->l);
435 96202 : break;
436 7895 : case e_cmp:
437 7895 : if (e->flag == cmp_or || e->flag == cmp_filter) {
438 1042 : exps_used(e->l);
439 1042 : exps_used(e->r);
440 6853 : } else if (e->flag == cmp_in || e->flag == cmp_notin) {
441 232 : exp_used(e->l);
442 232 : exps_used(e->r);
443 : } else {
444 6621 : exp_used(e->l);
445 6621 : exp_used(e->r);
446 6621 : if (e->f)
447 : exp_used(e->f);
448 : }
449 : break;
450 : default:
451 : break;
452 : }
453 : }
454 3484429 : }
455 :
456 : static void
457 875012 : exps_used(list *l)
458 : {
459 875012 : if (l) {
460 4344814 : for (node *n = l->h; n; n = n->next)
461 3469647 : exp_used(n->data);
462 : }
463 875290 : }
464 :
465 : static void
466 814740 : rel_used(sql_rel *rel)
467 : {
468 814740 : if (!rel)
469 : return;
470 771222 : if (is_join(rel->op) || is_set(rel->op) || is_semi(rel->op) || is_modify(rel->op)) {
471 71976 : rel_used(rel->l);
472 72215 : rel_used(rel->r);
473 : } else if (rel->op == op_munion) {
474 12634 : list *l = rel->l;
475 38317 : for(node *n = l->h; n; n = n->next)
476 25683 : rel_used(n->data);
477 : } else if (is_topn(rel->op) || is_select(rel->op) || is_sample(rel->op)) {
478 20593 : rel_used(rel->l);
479 20599 : rel = rel->l;
480 : } else if (is_ddl(rel->op)) {
481 402593 : if (rel->flag == ddl_output || rel->flag == ddl_create_seq || rel->flag == ddl_alter_seq || rel->flag == ddl_alter_table || rel->flag == ddl_create_table || rel->flag == ddl_create_view) {
482 36801 : rel_used(rel->l);
483 : } else if (rel->flag == ddl_list || rel->flag == ddl_exception) {
484 282 : rel_used(rel->l);
485 282 : rel_used(rel->r);
486 : } else if (rel->flag == ddl_psm) {
487 7 : exps_used(rel->exps);
488 : }
489 : } else if (rel->op == op_table) {
490 1090 : if (IS_TABLE_PROD_FUNC(rel->flag) || rel->flag == TABLE_FROM_RELATION)
491 1090 : rel_used(rel->l);
492 1090 : exp_used(rel->r);
493 : }
494 881932 : if (rel && rel->exps) {
495 758678 : exps_used(rel->exps);
496 758959 : if (rel->r && (is_simple_project(rel->op) || is_groupby(rel->op)))
497 17805 : exps_used(rel->r);
498 : }
499 : }
500 :
501 : static void
502 1208916 : rel_mark_used(mvc *sql, sql_rel *rel, int proj)
503 : {
504 1780856 : if (proj && (need_distinct(rel)))
505 10884 : rel_used(rel);
506 :
507 1780843 : switch(rel->op) {
508 : case op_basetable:
509 : case op_truncate:
510 : case op_insert:
511 : break;
512 :
513 12180 : case op_table:
514 :
515 12180 : if (rel->l && rel->flag != TRIGGER_WRAPPER) {
516 144 : rel_used(rel);
517 144 : if (rel->r)
518 144 : exp_mark_used(rel->l, rel->r, -2);
519 144 : rel_mark_used(sql, rel->l, proj);
520 : }
521 : break;
522 :
523 16613 : case op_topn:
524 : case op_sample:
525 16613 : if (proj) {
526 16530 : rel = rel ->l;
527 16530 : rel_mark_used(sql, rel, proj);
528 16530 : break;
529 : }
530 : /* fall through */
531 : case op_project:
532 : case op_groupby:
533 475451 : if (proj && rel->l) {
534 273273 : rel_exps_mark_used(sql->sa, rel, rel->l);
535 273279 : rel_mark_used(sql, rel->l, 0);
536 9662 : } else if (proj) {
537 9579 : rel_exps_mark_used(sql->sa, rel, NULL);
538 : }
539 : break;
540 7516 : case op_update:
541 : case op_delete:
542 7516 : if (proj && rel->r) {
543 5962 : rel_used(rel);
544 5962 : sql_rel *r = rel->r;
545 :
546 5962 : if (!list_empty(r->exps)) {
547 7062 : for (node *n = r->exps->h; n; n = n->next) {
548 5963 : sql_exp *e = n->data;
549 5963 : const char *nname = exp_name(e);
550 :
551 5963 : if (nname && nname[0] == '%' && strcmp(nname, TID) == 0) { /* TID is used */
552 4863 : e->used = 1;
553 4863 : break;
554 : }
555 : }
556 : }
557 5962 : rel_exps_mark_used(sql->sa, rel, rel->r);
558 5962 : rel_mark_used(sql, rel->r, 0);
559 : }
560 : break;
561 :
562 398793 : case op_ddl:
563 398793 : if (rel->flag == ddl_output || rel->flag == ddl_create_seq || rel->flag == ddl_alter_seq || rel->flag == ddl_alter_table || rel->flag == ddl_create_table || rel->flag == ddl_create_view) {
564 33001 : if (rel->l)
565 : rel_mark_used(sql, rel->l, 0);
566 : } else if (rel->flag == ddl_list || rel->flag == ddl_exception) {
567 282 : if (rel->l)
568 282 : rel_mark_used(sql, rel->l, 0);
569 282 : if (rel->r)
570 : rel_mark_used(sql, rel->r, 0);
571 : }
572 : break;
573 :
574 98735 : case op_select:
575 98735 : if (rel->l) {
576 98735 : rel_exps_mark_used(sql->sa, rel, rel->l);
577 98735 : rel_mark_used(sql, rel->l, 0);
578 : }
579 : break;
580 :
581 5144 : case op_union:
582 : case op_inter:
583 : case op_except:
584 : /* For now we mark all union expression as used */
585 :
586 : /* Later we should (in case of union all) remove unused
587 : * columns from the projection.
588 : *
589 : * Project part of union is based on column position.
590 : */
591 5144 : if (proj && (need_distinct(rel) || !rel->exps)) {
592 2334 : rel_used(rel);
593 2334 : if (!rel->exps) {
594 0 : rel_used(rel->l);
595 0 : rel_used(rel->r);
596 : }
597 2334 : rel_mark_used(sql, rel->l, 0);
598 2334 : rel_mark_used(sql, rel->r, 0);
599 482 : } else if (proj && !need_distinct(rel)) {
600 482 : sql_rel *l = rel->l;
601 :
602 482 : positional_exps_mark_used(rel, l);
603 482 : rel_exps_mark_used(sql->sa, rel, l);
604 482 : rel_mark_used(sql, rel->l, 0);
605 : /* based on child check set expression list */
606 482 : if (is_project(l->op) && need_distinct(l))
607 0 : positional_exps_mark_used(l, rel);
608 482 : positional_exps_mark_used(rel, rel->r);
609 482 : rel_exps_mark_used(sql->sa, rel, rel->r);
610 482 : rel_mark_used(sql, rel->r, 0);
611 : }
612 : break;
613 :
614 44303 : case op_munion:
615 44303 : assert(rel->l);
616 : // TODO: here we blindly follow the same logic as op_union. RE-evaluate
617 44303 : if (proj && (need_distinct(rel) || !rel->exps)) {
618 2221 : rel_used(rel);
619 2221 : if (!rel->exps) {
620 0 : for (node *n = ((list*)rel->l)->h; n; n = n->next) {
621 0 : rel_used(n->data);
622 0 : rel_mark_used(sql, n->data, 0);
623 : }
624 : }
625 19410 : } else if (proj && !need_distinct(rel)) {
626 19410 : bool first = true;
627 58541 : for (node *n = ((list*)rel->l)->h; n; n = n->next) {
628 39131 : sql_rel *l = n->data;
629 :
630 39131 : positional_exps_mark_used(rel, l);
631 39131 : rel_exps_mark_used(sql->sa, rel, l);
632 39131 : rel_mark_used(sql, l, 0);
633 : /* based on child check set expression list */
634 39131 : if (first && is_project(l->op) && need_distinct(l))
635 0 : positional_exps_mark_used(l, rel);
636 39131 : first = false;
637 : }
638 : }
639 : break;
640 141585 : case op_join:
641 : case op_left:
642 : case op_right:
643 : case op_full:
644 : case op_semi:
645 : case op_anti:
646 : case op_merge:
647 141585 : rel_exps_mark_used(sql->sa, rel, rel->l);
648 141585 : rel_exps_mark_used(sql->sa, rel, rel->r);
649 141585 : rel_mark_used(sql, rel->l, 0);
650 141585 : rel_mark_used(sql, rel->r, 0);
651 141585 : break;
652 : }
653 1208909 : }
654 :
655 : static sql_rel * rel_dce_sub(mvc *sql, sql_rel *rel);
656 :
657 : static sql_rel *
658 1080652 : rel_remove_unused(mvc *sql, sql_rel *rel)
659 : {
660 1080652 : int needed = 0;
661 :
662 1080652 : if (!rel)
663 : return rel;
664 :
665 1074409 : switch(rel->op) {
666 287606 : case op_basetable: {
667 287606 : sql_table *t = rel->l;
668 :
669 287606 : if (t && isReplicaTable(t)) /* TODO fix rewriting in rel_distribute.c */
670 : return rel;
671 : }
672 : /* fall through */
673 : case op_table:
674 293826 : if (rel->exps && (rel->op != op_table || !IS_TABLE_PROD_FUNC(rel->flag))) {
675 1273315 : for(node *n=rel->exps->h; n && !needed; n = n->next) {
676 985732 : sql_exp *e = n->data;
677 :
678 985732 : if (!e->used)
679 212615 : needed = 1;
680 : }
681 :
682 287583 : if (!needed)
683 : return rel;
684 :
685 1254834 : for(node *n=rel->exps->h; n;) {
686 1042221 : node *next = n->next;
687 1042221 : sql_exp *e = n->data;
688 :
689 : /* at least one (needed for crossproducts, count(*), rank() and single value projections) !, handled by rel_exps_mark_used */
690 1042221 : if (!e->used && list_length(rel->exps) > 1)
691 352863 : list_remove_node(rel->exps, NULL, n);
692 : n = next;
693 : }
694 : }
695 218856 : if (rel->op == op_table && (IS_TABLE_PROD_FUNC(rel->flag) || rel->flag == TABLE_FROM_RELATION))
696 6243 : rel->l = rel_remove_unused(sql, rel->l);
697 : return rel;
698 :
699 16528 : case op_topn:
700 : case op_sample:
701 :
702 16528 : if (rel->l)
703 16528 : rel->l = rel_remove_unused(sql, rel->l);
704 : return rel;
705 :
706 282863 : case op_project:
707 : case op_groupby:
708 :
709 282863 : if (/*rel->l &&*/ rel->exps) {
710 1770860 : for(node *n=rel->exps->h; n && !needed; n = n->next) {
711 1487997 : sql_exp *e = n->data;
712 :
713 1487997 : if (!e->used)
714 20293 : needed = 1;
715 : }
716 282863 : if (!needed)
717 : return rel;
718 :
719 513074 : for(node *n=rel->exps->h; n;) {
720 492781 : node *next = n->next;
721 492781 : sql_exp *e = n->data;
722 :
723 : /* at least one (needed for crossproducts, count(*), rank() and single value projections) */
724 492781 : if (!e->used && list_length(rel->exps) > 1)
725 393034 : list_remove_node(rel->exps, NULL, n);
726 : n = next;
727 : }
728 : }
729 : return rel;
730 :
731 3150 : case op_join:
732 : case op_left:
733 : case op_right:
734 : case op_full:
735 3150 : if (list_length(rel->attr) > 1) {
736 0 : for(node *n=rel->attr->h; n && !needed; n = n->next) {
737 0 : sql_exp *e = n->data;
738 :
739 0 : if (!e->used)
740 0 : needed = 1;
741 : }
742 0 : if (!needed)
743 : return rel;
744 :
745 0 : for(node *n=rel->attr->h; n;) {
746 0 : node *next = n->next;
747 0 : sql_exp *e = n->data;
748 :
749 0 : if (!e->used)
750 0 : list_remove_node(rel->attr, NULL, n);
751 : n = next;
752 : }
753 : }
754 : return rel;
755 :
756 : case op_union:
757 : case op_inter:
758 : case op_except:
759 : case op_munion:
760 :
761 : case op_insert:
762 : case op_update:
763 : case op_delete:
764 : case op_truncate:
765 : case op_merge:
766 :
767 : case op_select:
768 :
769 : case op_semi:
770 : case op_anti:
771 : return rel;
772 398477 : case op_ddl:
773 398477 : if (rel->flag == ddl_output || rel->flag == ddl_create_seq || rel->flag == ddl_alter_seq || rel->flag == ddl_alter_table || rel->flag == ddl_create_table || rel->flag == ddl_create_view) {
774 32729 : if (rel->l)
775 32369 : rel->l = rel_remove_unused(sql, rel->l);
776 : } else if (rel->flag == ddl_list || rel->flag == ddl_exception) {
777 238 : if (rel->l)
778 238 : rel->l = rel_remove_unused(sql, rel->l);
779 238 : if (rel->r)
780 238 : rel->r = rel_remove_unused(sql, rel->r);
781 : }
782 : return rel;
783 : }
784 : return rel;
785 : }
786 :
787 : static void
788 1244340 : rel_dce_refs(mvc *sql, sql_rel *rel, list *refs)
789 : {
790 1244340 : if (!rel || (rel_is_ref(rel) && list_find(refs, rel, NULL)))
791 13549 : return ;
792 :
793 1230791 : switch(rel->op) {
794 366344 : case op_table:
795 : case op_topn:
796 : case op_sample:
797 : case op_project:
798 : case op_groupby:
799 : case op_select:
800 :
801 366344 : if (rel->l && (rel->op != op_table || rel->flag != TRIGGER_WRAPPER))
802 352277 : rel_dce_refs(sql, rel->l, refs);
803 : break;
804 :
805 : case op_basetable:
806 : case op_insert:
807 : case op_truncate:
808 : break;
809 :
810 6314 : case op_update:
811 : case op_delete:
812 :
813 6314 : if (rel->r)
814 5962 : rel_dce_refs(sql, rel->r, refs);
815 : break;
816 :
817 134261 : case op_union:
818 : case op_inter:
819 : case op_except:
820 : case op_join:
821 : case op_left:
822 : case op_right:
823 : case op_full:
824 : case op_semi:
825 : case op_anti:
826 : case op_merge:
827 :
828 134261 : if (rel->l)
829 134261 : rel_dce_refs(sql, rel->l, refs);
830 134261 : if (rel->r)
831 134261 : rel_dce_refs(sql, rel->r, refs);
832 : break;
833 20780 : case op_munion:
834 20780 : assert(rel->l);
835 62944 : for (node *n = ((list*)rel->l)->h; n; n = n->next)
836 42164 : rel_dce_refs(sql, n->data, refs);
837 : break;
838 399892 : case op_ddl:
839 :
840 399892 : if (rel->flag == ddl_output || rel->flag == ddl_create_seq || rel->flag == ddl_alter_seq || rel->flag == ddl_alter_table || rel->flag == ddl_create_table || rel->flag == ddl_create_view) {
841 34100 : if (rel->l)
842 33740 : rel_dce_refs(sql, rel->l, refs);
843 : } else if (rel->flag == ddl_list || rel->flag == ddl_exception) {
844 282 : if (rel->l)
845 282 : rel_dce_refs(sql, rel->l, refs);
846 282 : if (rel->r)
847 248 : rel_dce_refs(sql, rel->r, refs);
848 : } break;
849 : }
850 :
851 1230803 : if (rel_is_ref(rel) && !list_find(refs, rel, NULL))
852 15818 : list_prepend(refs, rel);
853 : }
854 :
855 : static sql_rel *
856 1770748 : rel_dce_down(mvc *sql, sql_rel *rel, int skip_proj)
857 : {
858 1770748 : if (!rel)
859 : return rel;
860 :
861 1770748 : if (!skip_proj && rel_is_ref(rel))
862 : return rel;
863 :
864 1741874 : switch(rel->op) {
865 522777 : case op_basetable:
866 : case op_table:
867 :
868 522777 : if (skip_proj && rel->l && rel->op == op_table && rel->flag != TRIGGER_WRAPPER)
869 83 : rel->l = rel_dce_down(sql, rel->l, 0);
870 261138 : if (!skip_proj)
871 261138 : rel_dce_sub(sql, rel);
872 : /* fall through */
873 :
874 : case op_truncate:
875 : return rel;
876 :
877 7297 : case op_insert:
878 7297 : rel_used(rel->r);
879 7297 : rel_dce_sub(sql, rel->r);
880 7297 : return rel;
881 :
882 7516 : case op_update:
883 : case op_delete:
884 :
885 7516 : if (skip_proj && rel->r)
886 5962 : rel->r = rel_dce_down(sql, rel->r, 0);
887 1202 : if (!skip_proj)
888 1202 : rel_dce_sub(sql, rel);
889 : return rel;
890 :
891 488402 : case op_topn:
892 : case op_sample:
893 : case op_project:
894 : case op_groupby:
895 :
896 488402 : if (skip_proj && rel->l)
897 289764 : rel->l = rel_dce_down(sql, rel->l, is_topn(rel->op) || is_sample(rel->op));
898 189081 : if (!skip_proj)
899 189081 : rel_dce_sub(sql, rel);
900 : return rel;
901 :
902 5137 : case op_union:
903 : case op_inter:
904 : case op_except:
905 5137 : if (skip_proj) {
906 2816 : if (rel->l)
907 2816 : rel->l = rel_dce_down(sql, rel->l, 0);
908 2816 : if (rel->r)
909 2816 : rel->r = rel_dce_down(sql, rel->r, 0);
910 : }
911 2321 : if (!skip_proj)
912 2321 : rel_dce_sub(sql, rel);
913 : return rel;
914 :
915 41383 : case op_munion:
916 41383 : if (skip_proj) {
917 65202 : for (node *n = ((list*)rel->l)->h; n; n = n->next)
918 43573 : n->data = rel_dce_down(sql, n->data, 0);
919 : }
920 19754 : if (!skip_proj)
921 19754 : rel_dce_sub(sql, rel);
922 : return rel;
923 91232 : case op_select:
924 91232 : if (rel->l)
925 91232 : rel->l = rel_dce_down(sql, rel->l, 0);
926 : return rel;
927 :
928 138154 : case op_join:
929 : case op_left:
930 : case op_right:
931 : case op_full:
932 : case op_semi:
933 : case op_anti:
934 : case op_merge:
935 138154 : if (rel->l)
936 138154 : rel->l = rel_dce_down(sql, rel->l, 0);
937 138154 : if (rel->r)
938 138154 : rel->r = rel_dce_down(sql, rel->r, 0);
939 138154 : if (!skip_proj && !list_empty(rel->attr))
940 3150 : rel_dce_sub(sql, rel);
941 : return rel;
942 :
943 398599 : case op_ddl:
944 398599 : if (rel->flag == ddl_output || rel->flag == ddl_create_seq || rel->flag == ddl_alter_seq || rel->flag == ddl_alter_table || rel->flag == ddl_create_table || rel->flag == ddl_create_view) {
945 32807 : if (rel->l)
946 32641 : rel->l = rel_dce_down(sql, rel->l, 0);
947 : } else if (rel->flag == ddl_list || rel->flag == ddl_exception) {
948 282 : if (rel->l)
949 282 : rel->l = rel_dce_down(sql, rel->l, 0);
950 282 : if (rel->r)
951 248 : rel->r = rel_dce_down(sql, rel->r, 0);
952 : }
953 : return rel;
954 : }
955 : return rel;
956 : }
957 :
958 : /* DCE
959 : *
960 : * Based on top relation expressions mark sub expressions as used.
961 : * Then recurse down until the projections. Clean them up and repeat.
962 : */
963 :
964 : static sql_rel *
965 1025164 : rel_dce_sub(mvc *sql, sql_rel *rel)
966 : {
967 1025164 : if (!rel)
968 : return rel;
969 :
970 : /*
971 : * Mark used up until the next project
972 : * For setops we need to first mark, then remove
973 : * because of positional dependency
974 : */
975 1025164 : rel_mark_used(sql, rel, 1);
976 1025147 : rel = rel_remove_unused(sql, rel);
977 1025142 : rel_dce_down(sql, rel, 1);
978 1025142 : return rel;
979 : }
980 :
981 : /* add projects under set ops */
982 : static sql_rel *
983 2009538 : rel_add_projects(mvc *sql, sql_rel *rel)
984 : {
985 2009538 : if (!rel)
986 : return rel;
987 :
988 2009538 : switch(rel->op) {
989 : case op_basetable:
990 : case op_truncate:
991 : return rel;
992 13611 : case op_insert:
993 : case op_update:
994 : case op_delete:
995 13611 : if (rel->r)
996 13259 : rel->r = rel_add_projects(sql, rel->r);
997 : return rel;
998 2912 : case op_union:
999 : case op_inter:
1000 : case op_except:
1001 : /* We can only reduce the list of expressions of an set op
1002 : * if the projection under it can also be reduced.
1003 : */
1004 2912 : if (rel->l) {
1005 2912 : sql_rel *l = rel->l;
1006 :
1007 2912 : if (!is_project(l->op) && !need_distinct(rel))
1008 1 : l = rel_project(sql->sa, l, rel_projections(sql, l, NULL, 1, 1));
1009 2912 : rel->l = rel_add_projects(sql, l);
1010 : }
1011 2912 : if (rel->r) {
1012 2912 : sql_rel *r = rel->r;
1013 :
1014 2912 : if (!is_project(r->op) && !need_distinct(rel))
1015 1 : r = rel_project(sql->sa, r, rel_projections(sql, r, NULL, 1, 1));
1016 2912 : rel->r = rel_add_projects(sql, r);
1017 : }
1018 : return rel;
1019 31841 : case op_munion:
1020 31841 : assert(rel->l);
1021 145242 : for (node *n = ((list*)rel->l)->h; n; n = n->next) {
1022 113401 : sql_rel* r = n->data;
1023 113401 : if (!is_project(r->op) && !need_distinct(rel))
1024 15 : r = rel_project(sql->sa, r, rel_projections(sql, r, NULL, 1, 1));
1025 113401 : r = rel_add_projects(sql, r);
1026 113401 : n->data = r;
1027 : }
1028 : return rel;
1029 751726 : case op_topn:
1030 : case op_sample:
1031 : case op_project:
1032 : case op_groupby:
1033 : case op_select:
1034 : case op_table:
1035 751726 : if (rel->l && (rel->op != op_table || rel->flag != TRIGGER_WRAPPER))
1036 723283 : rel->l = rel_add_projects(sql, rel->l);
1037 : return rel;
1038 289066 : case op_join:
1039 : case op_left:
1040 : case op_right:
1041 : case op_full:
1042 : case op_semi:
1043 : case op_anti:
1044 : case op_merge:
1045 289066 : if (rel->l)
1046 289066 : rel->l = rel_add_projects(sql, rel->l);
1047 289066 : if (rel->r)
1048 289066 : rel->r = rel_add_projects(sql, rel->r);
1049 : return rel;
1050 400160 : case op_ddl:
1051 400160 : if (rel->flag == ddl_output || rel->flag == ddl_create_seq || rel->flag == ddl_alter_seq || rel->flag == ddl_alter_table || rel->flag == ddl_create_table || rel->flag == ddl_create_view) {
1052 34368 : if (rel->l)
1053 34008 : rel->l = rel_add_projects(sql, rel->l);
1054 : } else if (rel->flag == ddl_list || rel->flag == ddl_exception) {
1055 282 : if (rel->l)
1056 282 : rel->l = rel_add_projects(sql, rel->l);
1057 282 : if (rel->r)
1058 248 : rel->r = rel_add_projects(sql, rel->r);
1059 : }
1060 : return rel;
1061 : }
1062 : return rel;
1063 : }
1064 :
1065 : static sql_rel *
1066 541131 : rel_dce_(mvc *sql, sql_rel *rel)
1067 : {
1068 541131 : list *refs = sa_list(sql->sa);
1069 :
1070 541268 : rel_dce_refs(sql, rel, refs);
1071 541223 : if (refs) {
1072 556955 : for(node *n = refs->h; n; n = n->next) {
1073 15818 : sql_rel *i = n->data;
1074 :
1075 15818 : while (!rel_is_ref(i) && i->l && !is_base(i->op))
1076 : i = i->l;
1077 15818 : if (i)
1078 15818 : rel_used(i);
1079 : }
1080 : }
1081 541137 : rel = rel_add_projects(sql, rel);
1082 541116 : rel_used(rel);
1083 541577 : rel_dce_sub(sql, rel);
1084 541124 : return rel;
1085 : }
1086 :
1087 :
1088 : /* Remove unused expressions */
1089 : static sql_rel *
1090 541169 : rel_dce(visitor *v, global_props *gp, sql_rel *rel)
1091 : {
1092 541169 : (void) gp;
1093 541169 : return rel_dce_(v->sql, rel);
1094 : }
1095 :
1096 : /* keep export for other projects */
1097 : sql_rel *
1098 0 : rel_deadcode_elimination(mvc *sql, sql_rel *rel)
1099 : {
1100 0 : return rel_dce_(sql, rel);
1101 : }
1102 :
1103 : run_optimizer
1104 611477 : bind_dce(visitor *v, global_props *gp)
1105 : {
1106 611477 : int flag = v->sql->sql_optimizer;
1107 611477 : return gp->opt_cycle == 0 && gp->opt_level == 1 && (flag & dce) ? rel_dce : NULL;
1108 : }
1109 :
1110 :
1111 : static int
1112 84238 : topn_sample_safe_exps( list *exps, bool nil_limit )
1113 : {
1114 : /* Limit only expression lists are always save */
1115 84238 : if (list_length(exps) == 1)
1116 : return 1;
1117 1352 : for (node *n = exps->h; n; n = n->next ) {
1118 920 : sql_exp *e = n->data;
1119 :
1120 920 : if (!e || e->type != e_atom || (!nil_limit && exp_is_null(e)))
1121 59 : return 0;
1122 : }
1123 : return 1;
1124 : }
1125 :
1126 : static list *
1127 129 : sum_limit_offset(mvc *sql, sql_rel *rel)
1128 : {
1129 : /* for sample we always propagate, or if the expression list only consists of a limit expression, we copy it */
1130 129 : if (is_sample(rel->op) || list_length(rel->exps) == 1)
1131 126 : return exps_copy(sql, rel->exps);
1132 3 : assert(list_length(rel->exps) == 2);
1133 3 : sql_subtype *lng = sql_bind_localtype("lng");
1134 3 : sql_exp *add = rel_binop_(sql, NULL, exp_copy(sql, rel->exps->h->data), exp_copy(sql, rel->exps->h->next->data), "sys", "sql_add", card_value, true);
1135 : /* for remote plans, make sure the output type is a bigint */
1136 3 : if (subtype_cmp(lng, exp_subtype(add)) != 0)
1137 3 : add = exp_convert(sql, add, exp_subtype(add), lng);
1138 3 : return list_append(sa_list(sql->sa), add);
1139 : }
1140 :
1141 : /*
1142 : * Push TopN (only LIMIT, no ORDER BY) down through projections underneath crossproduct, i.e.,
1143 : *
1144 : * topn( topn(
1145 : * project( project(
1146 : * crossproduct( crossproduct(
1147 : * L, => topn( L )[ n ],
1148 : * R topn( R )[ n ]
1149 : * ) )
1150 : * )[ Cs ]* )[ Cs ]*
1151 : * )[ n ] )[ n ]
1152 : *
1153 : * (TODO: in case of n==1 we can omit the original top-level TopN)
1154 : *
1155 : * also push topn under (non reordering) projections.
1156 : */
1157 : static sql_rel *
1158 137071 : rel_push_topn_and_sample_down_(visitor *v, sql_rel *rel)
1159 : {
1160 137071 : sql_rel *rp = NULL, *r = rel->l, *rpp = NULL;
1161 :
1162 137071 : if (is_topn(rel->op) && !rel_is_ref(rel) &&
1163 50162 : r && r->op == op_table && r->flag != TRIGGER_WRAPPER && !rel_is_ref(r) && r->r) {
1164 3 : sql_exp *op = r->r;
1165 3 : sql_subfunc *f = op->f;
1166 :
1167 3 : if (is_func(op->type) && strcmp(f->func->base.name, "file_loader") == 0 && !sql_func_mod(f->func)[0] && !sql_func_imp(f->func)[0]) {
1168 : /* push limit, to arguments of file_loader */
1169 0 : list *args = op->l;
1170 0 : if (list_length(args) == 2) {
1171 0 : sql_exp *topN = rel->exps->h->data;
1172 0 : sql_exp *offset = rel->exps->h->next? rel->exps->h->next->data:NULL;
1173 0 : atom *topn = topN->l;
1174 0 : if (offset) {
1175 0 : atom *b1 = (atom *)offset->l, *c = atom_add(v->sql->sa, b1, topn);
1176 :
1177 0 : if (!c)
1178 : return rel;
1179 0 : if (atom_cmp(c, topn) < 0) /* overflow */
1180 : return rel;
1181 : topn = c;
1182 : }
1183 0 : append(args, exp_atom(v->sql->sa, topn));
1184 0 : v->changes++;
1185 : }
1186 : }
1187 : }
1188 :
1189 137071 : if ((is_topn(rel->op) || is_sample(rel->op)) && topn_sample_safe_exps(rel->exps, true)) {
1190 50255 : sql_rel *(*func) (allocator *, sql_rel *, list *) = is_topn(rel->op) ? rel_topn : rel_sample;
1191 :
1192 : /* nested topN relations */
1193 50255 : if (r && is_topn(rel->op) && is_topn(r->op) && !rel_is_ref(r)) {
1194 0 : sql_exp *topN1 = rel->exps->h->data, *topN2 = r->exps->h->data;
1195 0 : sql_exp *offset1 = list_length(rel->exps) > 1 ? rel->exps->h->next->data : NULL;
1196 0 : sql_exp *offset2 = list_length(r->exps) > 1 ? r->exps->h->next->data : NULL;
1197 :
1198 0 : if (topN1->l && topN2->l && (!offset1 || offset1->l) && (!offset2 || offset2->l)) { /* no parameters */
1199 0 : bool changed = false;
1200 :
1201 0 : if ((!offset1 || (offset1->type == e_atom && offset1->l)) && (!offset2 || (offset2->type == e_atom && offset2->l))) { /* only atoms */
1202 0 : if (!offset1 && offset2) {
1203 0 : list_append(rel->exps, exp_copy(v->sql, offset2));
1204 0 : changed = true;
1205 0 : } else if (offset1 && offset2) { /* sum offsets */
1206 0 : atom *b1 = (atom *)offset1->l, *b2 = (atom *)offset2->l, *c = atom_add(v->sql->sa, b1, b2);
1207 :
1208 0 : if (!c) /* error, don't apply optimization, WARNING because of this the offset optimization must come before the limit one */
1209 : return rel;
1210 0 : if (atom_cmp(c, b2) < 0) /* overflow */
1211 0 : c = atom_int(v->sql->sa, sql_bind_localtype("lng"), GDK_lng_max);
1212 0 : offset1->l = c;
1213 0 : changed = true;
1214 : }
1215 : }
1216 :
1217 0 : if (topN1->type == e_atom && topN1->l && topN2->type == e_atom && topN2->l) { /* only atoms */
1218 0 : atom *a1 = (atom *)topN1->l, *a2 = (atom *)topN2->l;
1219 :
1220 0 : if (!a2->isnull && (a1->isnull || atom_cmp(a1, a2) >= 0)) { /* topN1 is not set or is larger than topN2 */
1221 0 : rel->exps->h->data = exp_copy(v->sql, topN2);
1222 0 : changed = true;
1223 : }
1224 : }
1225 :
1226 0 : if (changed) {
1227 0 : rel->l = r->l;
1228 0 : r->l = NULL;
1229 0 : rel_destroy(r);
1230 0 : v->changes++;
1231 0 : return rel;
1232 : }
1233 : }
1234 : }
1235 :
1236 50255 : if (r && is_simple_project(r->op) && need_distinct(r))
1237 : return rel;
1238 :
1239 : /* push topn/sample under projections */
1240 50250 : if (!rel_is_ref(rel) && r && is_simple_project(r->op) && !need_distinct(r) && !rel_is_ref(r) && r->l && list_empty(r->r)) {
1241 : sql_rel *x = r, *px = x;
1242 :
1243 32526 : while (is_simple_project(x->op) && !need_distinct(x) && !rel_is_ref(x) && x->l && list_empty(x->r)) {
1244 16276 : px = x;
1245 16276 : x = x->l;
1246 : }
1247 : /* only push topn once */
1248 16242 : if (x && x->op == rel->op)
1249 : return rel;
1250 :
1251 16236 : rel->l = x;
1252 16236 : px->l = rel;
1253 16236 : rel = r;
1254 16236 : v->changes++;
1255 16236 : return rel;
1256 : }
1257 :
1258 34003 : if (!topn_sample_safe_exps(rel->exps, false))
1259 : return rel;
1260 :
1261 : /* duplicate topn/sample direct under union or crossproduct */
1262 33936 : if (r && !rel_is_ref(r) && r->l && r->r && ((is_union(r->op) && r->exps) || (r->op == op_join && list_empty(r->exps)))) {
1263 150 : sql_rel *u = r, *x;
1264 150 : sql_rel *ul = u->l;
1265 150 : sql_rel *ur = u->r;
1266 150 : bool changed = false;
1267 :
1268 150 : x = ul;
1269 156 : while (is_simple_project(x->op) && !need_distinct(x) && !rel_is_ref(x) && x->l && list_empty(x->r))
1270 6 : x = x->l;
1271 150 : if (x && x->op != rel->op) { /* only push topn once */
1272 60 : ul = func(v->sql->sa, ul, sum_limit_offset(v->sql, rel));
1273 60 : set_processed(ul);
1274 60 : u->l = ul;
1275 60 : changed = true;
1276 : }
1277 :
1278 150 : x = ur;
1279 158 : while (is_simple_project(x->op) && !need_distinct(x) && !rel_is_ref(x) && x->l && list_empty(x->r))
1280 8 : x = x->l;
1281 150 : if (x && x->op != rel->op) { /* only push topn once */
1282 61 : ur = func(v->sql->sa, ur, sum_limit_offset(v->sql, rel));
1283 61 : set_processed(ur);
1284 61 : u->r = ur;
1285 61 : changed = true;
1286 : }
1287 :
1288 150 : if (changed)
1289 61 : v->changes++;
1290 150 : return rel;
1291 : }
1292 :
1293 : /* duplicate topn/sample + [ project-order ] under union */
1294 : if (r && !rp)
1295 33781 : rp = r->l;
1296 33781 : if (r && r->exps && is_simple_project(r->op) && !rel_is_ref(r) && !list_empty(r->r) && r->l && is_union(rp->op)) {
1297 0 : sql_rel *u = rp, *ou = u, *x, *ul = u->l, *ur = u->r;
1298 0 : list *rcopy = NULL;
1299 :
1300 : /* only push topn/sample once */
1301 0 : x = ul;
1302 0 : while (is_simple_project(x->op) && !need_distinct(x) && !rel_is_ref(x) && x->l && list_empty(x->r))
1303 0 : x = x->l;
1304 0 : if (x && x->op == rel->op)
1305 : return rel;
1306 : x = ur;
1307 0 : while (is_simple_project(x->op) && !need_distinct(x) && !rel_is_ref(x) && x->l && list_empty(x->r))
1308 0 : x = x->l;
1309 0 : if (x && x->op == rel->op)
1310 : return rel;
1311 :
1312 0 : rcopy = exps_copy(v->sql, r->r);
1313 0 : for (node *n = rcopy->h ; n ; n = n->next) {
1314 0 : sql_exp *e = n->data;
1315 0 : set_descending(e); /* remove ordering properties for projected columns */
1316 0 : set_nulls_first(e);
1317 : }
1318 0 : ul = rel_dup(ul);
1319 0 : ur = rel_dup(ur);
1320 0 : if (!is_project(ul->op))
1321 0 : ul = rel_project(v->sql->sa, ul,
1322 : rel_projections(v->sql, ul, NULL, 1, 1));
1323 0 : if (!is_project(ur->op))
1324 0 : ur = rel_project(v->sql->sa, ur,
1325 : rel_projections(v->sql, ur, NULL, 1, 1));
1326 0 : rel_rename_exps(v->sql, u->exps, ul->exps);
1327 0 : rel_rename_exps(v->sql, u->exps, ur->exps);
1328 :
1329 : /* introduce projects under the set */
1330 0 : ul = rel_project(v->sql->sa, ul, NULL);
1331 0 : ul->exps = exps_copy(v->sql, r->exps);
1332 : /* possibly add order by column */
1333 0 : ul->exps = list_distinct(list_merge(ul->exps, exps_copy(v->sql, rcopy), NULL), (fcmp) exp_equal, (fdup) NULL);
1334 0 : ul->nrcols = list_length(ul->exps);
1335 0 : ul->r = exps_copy(v->sql, r->r);
1336 0 : set_processed(ul);
1337 0 : ul = func(v->sql->sa, ul, sum_limit_offset(v->sql, rel));
1338 0 : set_processed(ul);
1339 :
1340 0 : ur = rel_project(v->sql->sa, ur, NULL);
1341 0 : ur->exps = exps_copy(v->sql, r->exps);
1342 : /* possibly add order by column */
1343 0 : ur->exps = list_distinct(list_merge(ur->exps, exps_copy(v->sql, rcopy), NULL), (fcmp) exp_equal, (fdup) NULL);
1344 0 : ur->nrcols = list_length(ur->exps);
1345 0 : ur->r = exps_copy(v->sql, r->r);
1346 0 : set_processed(ur);
1347 0 : ur = func(v->sql->sa, ur, sum_limit_offset(v->sql, rel));
1348 0 : set_processed(ur);
1349 :
1350 0 : u = rel_setop(v->sql->sa, ul, ur, op_union);
1351 0 : u->exps = exps_alias(v->sql, r->exps);
1352 0 : u->nrcols = list_length(u->exps);
1353 0 : set_processed(u);
1354 : /* possibly add order by column */
1355 0 : u->exps = list_distinct(list_merge(u->exps, rcopy, NULL), (fcmp) exp_equal, (fdup) NULL);
1356 0 : if (need_distinct(r)) {
1357 0 : set_distinct(ul);
1358 0 : set_distinct(ur);
1359 : }
1360 :
1361 : /* zap names */
1362 0 : rel_no_rename_exps(u->exps);
1363 0 : rel_destroy(ou);
1364 :
1365 0 : ur = rel_project(v->sql->sa, u, exps_alias(v->sql, r->exps));
1366 0 : ur->r = r->r;
1367 0 : r->l = NULL;
1368 :
1369 0 : if (need_distinct(r))
1370 0 : set_distinct(ur);
1371 :
1372 0 : rel_destroy(r);
1373 0 : rel->l = ur;
1374 0 : v->changes++;
1375 0 : return rel;
1376 : }
1377 : /* a left outer join b order by a.* limit L, can be copied into a */
1378 : /* topn ( project (orderby)( optional project ( left ())
1379 : * rel r rp */
1380 33786 : if (r && !rp)
1381 66 : rp = r->l;
1382 33786 : if (r && rp && is_simple_project(rp->op) && !rp->r && rp->l)
1383 33786 : rpp = rp->l;
1384 33786 : if (r && r->exps && is_simple_project(r->op) && !rel_is_ref(r) && r->r && r->l && ((!rpp && is_left(rp->op)) ||
1385 300 : (rpp && is_left(rpp->op)))) {
1386 24 : sql_rel *lj = rpp?rpp:rp;
1387 24 : sql_rel *l = lj->l;
1388 24 : list *obes = r->r, *nobes = sa_list(v->sql->sa);
1389 24 : int fnd = 1;
1390 63 : for (node *n = obes->h; n && fnd; n = n->next) {
1391 41 : sql_exp *obe = n->data;
1392 41 : int part = is_partitioning(obe);
1393 41 : int asc = is_ascending(obe);
1394 41 : int nl = nulls_last(obe);
1395 : /* only simple rename expressions */
1396 41 : sql_exp *pe = exps_find_exp(r->exps, obe);
1397 41 : if (pe && rpp)
1398 33 : pe = exps_find_exp(rp->exps, pe);
1399 41 : if (pe)
1400 41 : pe = rel_find_exp(l, pe);
1401 41 : if (pe) {
1402 41 : if (exp_is_atom(pe))
1403 : return rel;
1404 39 : pe = exp_ref(v->sql, pe);
1405 39 : if (part)
1406 0 : set_partitioning(pe);
1407 39 : if (asc)
1408 29 : set_ascending(pe);
1409 39 : if (nl)
1410 5 : set_nulls_last(pe);
1411 39 : append(nobes, pe);
1412 : }
1413 : else
1414 : fnd = 0;
1415 : }
1416 22 : if (fnd && ((is_topn(rel->op) && !is_topn(l->op)) || (is_sample(rel->op) && !is_sample(l->op)))) {
1417 : /* inject topn */
1418 : /* Todo add order by */
1419 8 : sql_rel *ob = lj->l = rel_project(v->sql->sa, lj->l, rel_projections(v->sql, lj->l, NULL, 1, 1));
1420 8 : ob->r = nobes;
1421 8 : lj->l = func(v->sql->sa, lj->l, sum_limit_offset(v->sql, rel));
1422 8 : v->changes++;
1423 8 : return rel;
1424 : }
1425 : }
1426 : }
1427 : return rel;
1428 : }
1429 :
1430 : static sql_rel *
1431 33506 : rel_push_topn_and_sample_down(visitor *v, global_props *gp, sql_rel *rel)
1432 : {
1433 33506 : (void) gp;
1434 33506 : return rel_visitor_topdown(v, rel, &rel_push_topn_and_sample_down_);
1435 : }
1436 :
1437 : run_optimizer
1438 611695 : bind_push_topn_and_sample_down(visitor *v, global_props *gp)
1439 : {
1440 611695 : int flag = v->sql->sql_optimizer;
1441 611476 : return gp->opt_level == 1 && (gp->cnt[op_topn] || gp->cnt[op_sample]) &&
1442 645057 : (flag & push_topn_and_sample_down) ? rel_push_topn_and_sample_down : NULL;
1443 : }
|