Line data Source code
1 : /*
2 : * SPDX-License-Identifier: MPL-2.0
3 : *
4 : * This Source Code Form is subject to the terms of the Mozilla Public
5 : * License, v. 2.0. If a copy of the MPL was not distributed with this
6 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
7 : *
8 : * Copyright 2024, 2025 MonetDB Foundation;
9 : * Copyright August 2008 - 2023 MonetDB B.V.;
10 : * Copyright 1997 - July 2008 CWI.
11 : */
12 :
13 : #include "monetdb_config.h"
14 : #include "rel_optimizer.h"
15 : #include "rel_optimizer_private.h"
16 : #include "rel_exp.h"
17 : #include "rel_select.h"
18 :
19 : static void
20 0 : rel_no_rename_exps( list *exps )
21 : {
22 0 : for (node *n = exps->h; n; n = n->next) {
23 0 : sql_exp *e = n->data;
24 :
25 0 : exp_setalias(e, e->alias.label, e->l, e->r);
26 : }
27 0 : list_hash_clear(exps);
28 0 : }
29 :
30 : /* positional renames ! */
31 : /* get the names (aka aliases) from the src_exps list and rename the dest_exps list */
32 : void
33 82490 : rel_rename_exps( mvc *sql, list *src_exps, list *dest_exps)
34 : {
35 82490 : node *n, *m;
36 :
37 82490 : (void)sql;
38 : /* check if a column uses an alias earlier in the list */
39 :
40 82490 : assert(list_length(src_exps) <= list_length(dest_exps));
41 695844 : for (n = src_exps->h, m = dest_exps->h; n && m; n = n->next, m = m->next) {
42 613354 : sql_exp *s = n->data;
43 613354 : sql_exp *d = m->data;
44 613354 : sql_alias *rname = exp_relname(s);
45 :
46 613354 : exp_setalias(d, s->alias.label, rname, exp_name(s));
47 : }
48 82490 : list_hash_clear(dest_exps);
49 82490 : }
50 :
51 : sql_rel *
52 100766 : rel_find_ref( sql_rel *r)
53 : {
54 313242 : while (!rel_is_ref(r) && r->l &&
55 270879 : (is_project(r->op) || is_select(r->op) /*|| is_join(r->op)*/))
56 : r = r->l;
57 100766 : if (rel_is_ref(r))
58 28033 : return r;
59 : return NULL;
60 : }
61 :
62 : /* merge projection */
63 :
64 : /* push an expression through a projection.
65 : * The result should again be used in a projection.
66 : */
67 : static list *
68 91075 : exps_push_down_prj(mvc *sql, list *exps, sql_rel *f, sql_rel *t, bool keepalias)
69 : {
70 91075 : node *n;
71 91075 : list *nl = new_exp_list(sql->sa);
72 :
73 310890 : for(n = exps->h; n; n = n->next) {
74 222821 : sql_exp *arg = n->data, *narg = NULL;
75 :
76 222821 : narg = exp_push_down_prj(sql, arg, f, t);
77 222821 : if (!narg)
78 : return NULL;
79 219815 : narg = exp_propagate(sql->sa, narg, arg);
80 219815 : if (!keepalias && narg->type == e_column)
81 65546 : exp_setalias(narg, arg->alias.label, narg->l, narg->r);
82 219815 : append(nl, narg);
83 : }
84 : return nl;
85 : }
86 :
87 : sql_exp *
88 2716425 : exp_push_down_prj(mvc *sql, sql_exp *e, sql_rel *f, sql_rel *t)
89 : {
90 2716425 : sql_exp *ne = NULL, *l = NULL, *r = NULL, *r2 = NULL;
91 :
92 2716425 : assert(is_project(f->op));
93 :
94 2716425 : switch(e->type) {
95 2212088 : case e_column:
96 2212088 : assert(e->nid);
97 2212088 : ne = exps_bind_nid(f->exps, e->nid);
98 2212088 : if (!ne || (ne->type != e_column && (ne->type != e_atom || ne->f)))
99 : return NULL;
100 1896383 : while (ne && (has_label(ne) || is_selfref(ne) /*inside this list */) && is_simple_project(f->op) && ne->type == e_column) {
101 12144 : sql_exp *oe = e, *one = ne;
102 :
103 12144 : e = ne;
104 12144 : ne = NULL;
105 12144 : if (e->nid)
106 12144 : ne = exps_bind_nid(f->exps, e->nid);
107 12144 : if (ne && ne != one && list_position(f->exps, ne) >= list_position(f->exps, one))
108 12144 : ne = NULL;
109 12144 : if (!ne || ne == one) {
110 : ne = one;
111 1896092 : e = oe;
112 : break;
113 : }
114 291 : if (ne->type != e_column && (ne->type != e_atom || ne->f))
115 : return NULL;
116 : }
117 : /* possibly a groupby/project column is renamed */
118 1896092 : if (is_groupby(f->op) && !list_empty(f->r) && ne->type == e_column) {
119 140 : sql_exp *gbe = NULL;
120 140 : if (ne->nid)
121 140 : gbe = exps_bind_nid(f->exps, ne->nid);
122 140 : ne = gbe;
123 140 : if (!ne || (ne->type != e_column && (ne->type != e_atom || ne->f)))
124 0 : return NULL;
125 : }
126 1896092 : return exp_copy(sql, ne);
127 65414 : case e_cmp:
128 65414 : if (e->flag == cmp_or || e->flag == cmp_filter) {
129 4118 : list *l = NULL, *r = NULL;
130 :
131 4118 : if (!(l = exps_push_down_prj(sql, e->l, f, t, true)) || !(r = exps_push_down_prj(sql, e->r, f, t, true)))
132 975 : return NULL;
133 3143 : if (e->flag == cmp_filter) {
134 1858 : ne = exp_filter(sql->sa, l, r, e->f, is_anti(e));
135 : } else {
136 1285 : ne = exp_or(sql->sa, l, r, is_anti(e));
137 : }
138 61296 : } else if (e->flag == cmp_in || e->flag == cmp_notin) {
139 5030 : list *r = NULL;
140 :
141 5030 : if (!(l = exp_push_down_prj(sql, e->l, f, t)) || !(r = exps_push_down_prj(sql, e->r, f, t, true)))
142 3867 : return NULL;
143 1163 : ne = exp_in(sql->sa, l, r, e->flag);
144 : } else {
145 56266 : if (!(l = exp_push_down_prj(sql, e->l, f, t)) || !(r = exp_push_down_prj(sql, e->r, f, t)) || (e->f && !(r2 = exp_push_down_prj(sql, e->f, f, t))))
146 20391 : return NULL;
147 35875 : if (e->f) {
148 740 : ne = exp_compare2(sql->sa, l, r, r2, e->flag, is_symmetric(e));
149 : } else {
150 35135 : ne = exp_compare(sql->sa, l, r, e->flag);
151 : }
152 : }
153 40181 : if (!ne)
154 : return NULL;
155 40181 : return exp_propagate(sql->sa, ne, e);
156 214444 : case e_convert:
157 214444 : if (e->f || !(l = exp_push_down_prj(sql, e->l, f, t)))
158 67394 : return NULL;
159 147050 : ne = exp_convert(sql, l, exp_fromtype(e), exp_totype(e));
160 147050 : return exp_propagate(sql->sa, ne, e);
161 81011 : case e_aggr:
162 : case e_func: {
163 81011 : list *l = e->l, *nl = NULL;
164 81011 : sql_exp *ne = NULL;
165 :
166 81011 : if (e->type == e_func && exp_unsafe(e, false, false))
167 : return NULL;
168 81003 : if (!list_empty(l)) {
169 81003 : nl = exps_push_down_prj(sql, l, f, t, false);
170 81003 : if (!nl)
171 : return NULL;
172 : }
173 78982 : if (e->type == e_func)
174 78982 : ne = exp_op(sql->sa, nl, e->f);
175 : else
176 0 : ne = exp_aggr(sql->sa, nl, e->f, need_distinct(e), need_no_nil(e), e->card, has_nil(e));
177 78982 : return exp_propagate(sql->sa, ne, e);
178 : }
179 143468 : case e_atom: {
180 143468 : list *l = e->f, *nl = NULL;
181 :
182 143468 : if (!list_empty(l)) {
183 1546 : nl = exps_push_down_prj(sql, l, f, t, false);
184 1546 : if (!nl)
185 : return NULL;
186 1536 : ne = exp_values(sql->sa, nl);
187 : } else {
188 141922 : ne = exp_copy(sql, e);
189 : }
190 143458 : return exp_propagate(sql->sa, ne, e);
191 : }
192 : case e_psm:
193 : if (e->type == e_atom && e->f) /* value list */
194 : return NULL;
195 : return e;
196 : }
197 : return NULL;
198 : }
199 :
200 : atom *
201 583 : exp_flatten(mvc *sql, bool value_based_opt, sql_exp *e)
202 : {
203 583 : if (e->type == e_atom) {
204 373 : return value_based_opt ? exp_value(sql, e) : (atom *) e->l;
205 210 : } else if (e->type == e_convert) {
206 44 : atom *v = exp_flatten(sql, value_based_opt, e->l);
207 :
208 44 : if (v)
209 22 : return atom_cast(sql->sa, v, exp_subtype(e));
210 166 : } else if (e->type == e_func) {
211 166 : sql_subfunc *f = e->f;
212 166 : list *l = e->l;
213 166 : sql_arg *res = (f->func->res)?(f->func->res->h->data):NULL;
214 :
215 : /* TODO handle date + x months */
216 166 : if (!f->func->s && strcmp(f->func->base.name, "sql_add") == 0 && list_length(l) == 2 && res && EC_NUMBER(res->type.type->eclass)) {
217 18 : atom *l1 = exp_flatten(sql, value_based_opt, l->h->data);
218 18 : atom *l2 = exp_flatten(sql, value_based_opt, l->h->next->data);
219 18 : if (l1 && l2)
220 2 : return atom_add(sql->sa, l1, l2);
221 148 : } else if (!f->func->s && strcmp(f->func->base.name, "sql_sub") == 0 && list_length(l) == 2 && res && EC_NUMBER(res->type.type->eclass)) {
222 10 : atom *l1 = exp_flatten(sql, value_based_opt, l->h->data);
223 10 : atom *l2 = exp_flatten(sql, value_based_opt, l->h->next->data);
224 10 : if (l1 && l2)
225 9 : return atom_sub(sql->sa, l1, l2);
226 : }
227 : }
228 : return NULL;
229 : }
230 :
231 : int
232 150 : exp_range_overlap(atom *min, atom *max, atom *emin, atom *emax, bool min_exclusive, bool max_exclusive)
233 : {
234 150 : if (!min || !max || !emin || !emax || min->isnull || max->isnull || emin->isnull || emax->isnull)
235 : return 0;
236 :
237 150 : if ((!min_exclusive && VALcmp(&(emax->data), &(min->data)) < 0) || (min_exclusive && VALcmp(&(emax->data), &(min->data)) <= 0))
238 29 : return 0;
239 121 : if ((!max_exclusive && VALcmp(&(emin->data), &(max->data)) > 0) || (max_exclusive && VALcmp(&(emin->data), &(max->data)) >= 0))
240 34 : return 0;
241 : return 1;
242 : }
243 :
244 :
245 : /* if local_proj is > -1, the current expression is from the same projection
246 : if local_proj is -1, then we don't care about self references (eg used to check for order by exps) */
247 : static int exp_mark_used(sql_rel *subrel, sql_exp *e, int local_proj);
248 :
249 : static int
250 846555 : exps_mark_used(sql_rel *subrel, list *l, int local_proj)
251 : {
252 846555 : int nr = 0;
253 846555 : if (list_empty(l))
254 : return nr;
255 :
256 2718638 : for (node *n = l->h; n != NULL; n = n->next)
257 1872805 : nr += exp_mark_used(subrel, n->data, local_proj);
258 : return nr;
259 : }
260 :
261 : /* mark all expression related to this nid */
262 : static int
263 6997746 : exps_mark_all_used(list *exps, int nid, int local_proj)
264 : {
265 6997746 : if (!list_empty(exps)) {
266 6997755 : int i = 0;
267 86780574 : for(node *n = exps->h; n; n = n->next, i++) {
268 82403908 : sql_exp *e = n->data;
269 :
270 82403908 : if (e->alias.label == nid) {
271 3921791 : if (local_proj <= -1 || i < local_proj) {
272 2667297 : if (local_proj < 0 || e->nid != e->alias.label) {
273 2621024 : e->used = 1;
274 2621024 : return 1;
275 : }
276 : }
277 : }
278 79782884 : if (e->f && e->type == e_column && (local_proj <= -1 || i < local_proj)) {
279 175 : if (exps_mark_all_used(e->f, nid, -2)) {
280 65 : e->used = 1;
281 65 : return 1;
282 : }
283 : }
284 : }
285 : }
286 : return 0;
287 : }
288 :
289 : static int
290 8039460 : rel_mark_all_used(sql_rel *r, int nid, int local_proj)
291 : {
292 10855828 : if (is_project(r->op) || (is_base(r->op) && r->exps))
293 6997602 : return exps_mark_all_used(r->exps, nid, local_proj);
294 3858226 : if (is_select(r->op) || is_semi(r->op))
295 780840 : return rel_mark_all_used(r->l, nid, local_proj);
296 3077386 : if (is_join(r->op)) {
297 3075745 : if (r->l && rel_mark_all_used(r->l, nid, local_proj))
298 : return 1;
299 2035528 : else if (r->r)
300 : return rel_mark_all_used(r->r, nid, local_proj);
301 : }
302 : return 0;
303 : }
304 :
305 : static int
306 7309694 : exp_mark_used(sql_rel *subrel, sql_exp *e, int local_proj)
307 : {
308 7528910 : int nr = 0;
309 7528910 : sql_exp *ne = NULL;
310 :
311 7528910 : switch(e->type) {
312 5013297 : case e_column:
313 5013297 : if (e->nid && subrel && subrel->exps && rel_mark_all_used(subrel, e->nid, local_proj))
314 : nr++;
315 : else
316 2392305 : ne = rel_find_exp(subrel, e);
317 : /* if looking in the same projection, make sure 'ne' is projected before the searched column */
318 5013344 : if (ne && local_proj > -1 && list_position(subrel->exps, ne) >= local_proj)
319 6271717 : ne = NULL;
320 : break;
321 219216 : case e_convert:
322 219216 : return exp_mark_used(subrel, e->l, local_proj);
323 813513 : case e_aggr:
324 : case e_func: {
325 813513 : if (e->l)
326 780707 : nr += exps_mark_used(subrel, e->l, local_proj);
327 813512 : if (e->r) {
328 2004 : list *r = e->r;
329 2004 : list *obes = r->h->data;
330 2004 : nr += exps_mark_used(subrel, obes, local_proj);
331 2004 : if (r->h->next) {
332 0 : list *exps = r->h->next->data;
333 0 : nr += exps_mark_used(subrel, exps, local_proj);
334 : }
335 : }
336 : break;
337 : }
338 444869 : case e_cmp:
339 444869 : if (e->flag == cmp_or || e->flag == cmp_filter) {
340 19131 : nr += exps_mark_used(subrel, e->l, local_proj);
341 19131 : nr += exps_mark_used(subrel, e->r, local_proj);
342 425738 : } else if (e->flag == cmp_in || e->flag == cmp_notin) {
343 19593 : nr += exp_mark_used(subrel, e->l, local_proj);
344 19593 : nr += exps_mark_used(subrel, e->r, local_proj);
345 : } else {
346 406145 : nr += exp_mark_used(subrel, e->l, local_proj);
347 406145 : nr += exp_mark_used(subrel, e->r, local_proj);
348 406145 : if (e->f)
349 6439 : nr += exp_mark_used(subrel, e->f, local_proj);
350 : }
351 : break;
352 1038015 : case e_atom:
353 : /* atoms are used in e_cmp */
354 1038015 : e->used = 1;
355 : /* return 0 as constants may require a full column ! */
356 1038015 : if (e->f)
357 5990 : nr += exps_mark_used(subrel, e->f, local_proj);
358 : return nr;
359 0 : case e_psm:
360 0 : if (e->flag & PSM_SET || e->flag & PSM_RETURN || e->flag & PSM_EXCEPTION) {
361 0 : nr += exp_mark_used(subrel, e->l, local_proj);
362 0 : } else if (e->flag & PSM_WHILE || e->flag & PSM_IF) {
363 0 : nr += exp_mark_used(subrel, e->l, local_proj);
364 0 : nr += exps_mark_used(subrel, e->r, local_proj);
365 0 : if (e->flag == PSM_IF && e->f)
366 0 : nr += exps_mark_used(subrel, e->f, local_proj);
367 : }
368 0 : e->used = 1;
369 0 : break;
370 : }
371 6271717 : if (ne && e != ne) {
372 109754 : if (local_proj == -2 || ne->type != e_column || (has_label(ne) || (ne->alias.parent && ne->alias.parent->name && ne->alias.parent->name[0] == '%')) || (subrel->l && !is_munion(subrel->op) && !rel_find_exp(subrel->l, e)))
373 53502 : ne->used = 1;
374 109754 : return ne->used;
375 : }
376 : return nr;
377 : }
378 :
379 : static void
380 45624 : positional_exps_mark_used( sql_rel *rel, sql_rel *subrel )
381 : {
382 45624 : assert(rel->exps);
383 :
384 45624 : if ((is_topn(subrel->op) || is_sample(subrel->op)) && subrel->l)
385 45624 : subrel = subrel->l;
386 : /* everything is used within the set operation */
387 45624 : if (rel->exps && subrel->exps) {
388 45624 : node *m;
389 386550 : for (m=subrel->exps->h; m; m = m->next) {
390 340926 : sql_exp *se = m->data;
391 :
392 340926 : se->used = 1;
393 : }
394 : }
395 45624 : }
396 :
397 : static void
398 802836 : rel_exps_mark_used(allocator *sa, sql_rel *rel, sql_rel *subrel)
399 : {
400 802836 : int nr = 0;
401 :
402 802836 : if (rel->l && rel->r && (is_simple_project(rel->op) || is_groupby(rel->op))) {
403 34799 : list *l = rel->r;
404 34799 : node *n;
405 :
406 117292 : for (n=l->h; n; n = n->next) {
407 82491 : sql_exp *e = n->data;
408 :
409 82491 : e->used = 1;
410 82491 : exp_mark_used(rel->l, e, -1);
411 : }
412 : }
413 802838 : if (rel->attr) {
414 26633 : for (node *n = rel->attr->h; n; n = n->next) {
415 13314 : sql_exp *e = n->data;
416 :
417 13314 : if (e->type != e_aggr) /* keep all group by's */
418 13314 : e->used = 1;
419 13314 : if (e->used)
420 13314 : nr += exp_mark_used(subrel, e, -2);
421 : }
422 : }
423 802843 : if (rel->exps) {
424 762746 : node *n;
425 762746 : int len = list_length(rel->exps), i;
426 762744 : sql_exp **exps = SA_NEW_ARRAY(sa, sql_exp*, len);
427 :
428 3759868 : for (n=rel->exps->h, i = 0; n; n = n->next, i++) {
429 2997126 : sql_exp *e = exps[i] = n->data;
430 :
431 2997126 : nr += e->used;
432 : }
433 :
434 762742 : if (!nr && is_project(rel->op) && len > 0) /* project at least one column if exists */
435 6784 : exps[0]->used = 1;
436 :
437 3759937 : for (i = len-1; i >= 0; i--) {
438 2997183 : sql_exp *e = exps[i];
439 :
440 2997183 : if (!is_project(rel->op) || (e->used && !is_set(rel->op))) {
441 2425310 : if (is_project(rel->op))
442 1994856 : nr += exp_mark_used(rel, e, i);
443 2425305 : nr += exp_mark_used(subrel, e, -2);
444 : }
445 : }
446 : }
447 : /* for count/rank we need at least one column */
448 802851 : if (!nr && subrel && (is_project(subrel->op) || is_base(subrel->op)) && !list_empty(subrel->exps) &&
449 27419 : (is_simple_project(rel->op) && project_unsafe(rel, false))) {
450 34 : sql_exp *e = subrel->exps->h->data;
451 34 : e->used = 1;
452 : }
453 802851 : if (rel->r && (is_simple_project(rel->op) || is_groupby(rel->op))) {
454 34799 : list *l = rel->r;
455 34799 : node *n;
456 :
457 117292 : for (n=l->h; n; n = n->next) {
458 82492 : sql_exp *e = n->data;
459 :
460 82492 : e->used = 1;
461 : /* possibly project/groupby uses columns from the inner */
462 82492 : exp_mark_used(subrel, e, -2);
463 : }
464 : }
465 802852 : }
466 :
467 : static void exps_used(list *l);
468 :
469 : static void
470 3632404 : exp_used(sql_exp *e)
471 : {
472 3665218 : if (e) {
473 3646354 : e->used = 1;
474 :
475 3646354 : switch (e->type) {
476 31618 : case e_convert:
477 31618 : exp_used(e->l);
478 31618 : break;
479 99048 : case e_func:
480 : case e_aggr:
481 99048 : exps_used(e->l);
482 99048 : break;
483 8031 : case e_cmp:
484 8031 : if (e->flag == cmp_or || e->flag == cmp_filter) {
485 1043 : exps_used(e->l);
486 1043 : exps_used(e->r);
487 6988 : } else if (e->flag == cmp_in || e->flag == cmp_notin) {
488 252 : exp_used(e->l);
489 252 : exps_used(e->r);
490 : } else {
491 6736 : exp_used(e->l);
492 6736 : exp_used(e->r);
493 6736 : if (e->f)
494 : exp_used(e->f);
495 : }
496 : break;
497 : default:
498 : break;
499 : }
500 : }
501 3632404 : }
502 :
503 : static void
504 911972 : exps_used(list *l)
505 : {
506 911972 : if (l) {
507 4529536 : for (node *n = l->h; n; n = n->next)
508 3617514 : exp_used(n->data);
509 : }
510 912106 : }
511 :
512 : static void
513 845172 : rel_used(sql_rel *rel)
514 : {
515 845172 : if (!rel)
516 : return;
517 801523 : if (is_join(rel->op) || is_set(rel->op) || is_semi(rel->op) || is_modify(rel->op)) {
518 73736 : rel_used(rel->l);
519 73906 : rel_used(rel->r);
520 : } else if (rel->op == op_munion) {
521 13172 : list *l = rel->l;
522 39679 : for(node *n = l->h; n; n = n->next)
523 26507 : rel_used(n->data);
524 : } else if (is_topn(rel->op) || is_select(rel->op) || is_sample(rel->op)) {
525 20830 : rel_used(rel->l);
526 20843 : rel = rel->l;
527 : } else if (is_ddl(rel->op)) {
528 413506 : if (rel->flag == ddl_output || rel->flag == ddl_create_seq || rel->flag == ddl_alter_seq || rel->flag == ddl_alter_table || rel->flag == ddl_create_table || rel->flag == ddl_create_view) {
529 37606 : rel_used(rel->l);
530 : } else if (rel->flag == ddl_list || rel->flag == ddl_exception) {
531 285 : rel_used(rel->l);
532 285 : rel_used(rel->r);
533 : } else if (rel->flag == ddl_psm) {
534 7 : exps_used(rel->exps);
535 : }
536 : } else if (rel->op == op_table) {
537 1093 : if (IS_TABLE_PROD_FUNC(rel->flag) || rel->flag == TABLE_FROM_RELATION)
538 1093 : rel_used(rel->l);
539 1093 : exp_used(rel->r);
540 : }
541 914783 : if (rel && rel->exps) {
542 788349 : exps_used(rel->exps);
543 788508 : if (rel->r && (is_simple_project(rel->op) || is_groupby(rel->op)))
544 22200 : exps_used(rel->r);
545 : }
546 : }
547 :
548 : static void
549 1309077 : rel_mark_used(mvc *sql, sql_rel *rel, int proj)
550 : {
551 1950934 : if (proj && (need_distinct(rel)))
552 11040 : rel_used(rel);
553 :
554 1950921 : switch(rel->op) {
555 : case op_basetable:
556 : case op_truncate:
557 : case op_insert:
558 : break;
559 :
560 13868 : case op_table:
561 :
562 13868 : if (rel->l && rel->flag != TRIGGER_WRAPPER) {
563 147 : rel_used(rel);
564 147 : if (rel->r)
565 147 : exp_mark_used(rel->l, rel->r, -2);
566 147 : rel_mark_used(sql, rel->l, proj);
567 : }
568 : break;
569 :
570 16824 : case op_topn:
571 : case op_sample:
572 16824 : if (proj) {
573 16724 : rel = rel ->l;
574 16724 : rel_mark_used(sql, rel, proj);
575 16724 : break;
576 : }
577 : /* fall through */
578 : case op_project:
579 : case op_groupby:
580 549526 : if (proj && rel->l) {
581 311968 : rel_exps_mark_used(sql->sa, rel, rel->l);
582 311982 : rel_mark_used(sql, rel->l, 0);
583 13547 : } else if (proj) {
584 13447 : rel_exps_mark_used(sql->sa, rel, NULL);
585 : }
586 : break;
587 7981 : case op_update:
588 : case op_delete:
589 7981 : if (proj && rel->r) {
590 6418 : rel_used(rel);
591 6418 : sql_rel *r = rel->r;
592 :
593 6418 : if (!list_empty(r->exps)) {
594 7520 : for (node *n = r->exps->h; n; n = n->next) {
595 6420 : sql_exp *e = n->data;
596 6420 : const char *nname = exp_name(e);
597 :
598 6420 : if (nname && nname[0] == '%' && strcmp(nname, TID) == 0) { /* TID is used */
599 5318 : e->used = 1;
600 5318 : break;
601 : }
602 : }
603 : }
604 6418 : rel_exps_mark_used(sql->sa, rel, rel->r);
605 6418 : rel_mark_used(sql, rel->r, 0);
606 : }
607 : break;
608 :
609 409692 : case op_ddl:
610 409692 : if (rel->flag == ddl_output || rel->flag == ddl_create_seq || rel->flag == ddl_alter_seq || rel->flag == ddl_alter_table || rel->flag == ddl_create_table || rel->flag == ddl_create_view) {
611 33792 : if (rel->l)
612 : rel_mark_used(sql, rel->l, 0);
613 : } else if (rel->flag == ddl_list || rel->flag == ddl_exception) {
614 285 : if (rel->l)
615 285 : rel_mark_used(sql, rel->l, 0);
616 285 : if (rel->r)
617 : rel_mark_used(sql, rel->r, 0);
618 : }
619 : break;
620 :
621 114788 : case op_select:
622 114788 : if (rel->l) {
623 114788 : rel_exps_mark_used(sql->sa, rel, rel->l);
624 114788 : rel_mark_used(sql, rel->l, 0);
625 : }
626 : break;
627 :
628 5154 : case op_union:
629 : case op_inter:
630 : case op_except:
631 : /* For now we mark all union expression as used */
632 :
633 : /* Later we should (in case of union all) remove unused
634 : * columns from the projection.
635 : *
636 : * Project part of union is based on column position.
637 : */
638 5154 : if (proj && (need_distinct(rel) || !rel->exps)) {
639 2337 : rel_used(rel);
640 2337 : if (!rel->exps) {
641 0 : rel_used(rel->l);
642 0 : rel_used(rel->r);
643 : }
644 2337 : rel_mark_used(sql, rel->l, 0);
645 2337 : rel_mark_used(sql, rel->r, 0);
646 485 : } else if (proj && !need_distinct(rel)) {
647 485 : sql_rel *l = rel->l;
648 :
649 485 : positional_exps_mark_used(rel, l);
650 485 : rel_exps_mark_used(sql->sa, rel, l);
651 485 : rel_mark_used(sql, rel->l, 0);
652 : /* based on child check set expression list */
653 485 : if (is_project(l->op) && need_distinct(l))
654 0 : positional_exps_mark_used(l, rel);
655 485 : positional_exps_mark_used(rel, rel->r);
656 485 : rel_exps_mark_used(sql->sa, rel, rel->r);
657 485 : rel_mark_used(sql, rel->r, 0);
658 : }
659 : break;
660 :
661 49869 : case op_munion:
662 49869 : assert(rel->l);
663 : // TODO: here we blindly follow the same logic as op_union. RE-evaluate
664 49869 : if (proj && (need_distinct(rel) || !rel->exps)) {
665 2280 : rel_used(rel);
666 2280 : if (!rel->exps) {
667 0 : for (node *n = ((list*)rel->l)->h; n; n = n->next) {
668 0 : rel_used(n->data);
669 0 : rel_mark_used(sql, n->data, 0);
670 : }
671 : }
672 22171 : } else if (proj && !need_distinct(rel)) {
673 22171 : bool first = true;
674 66825 : for (node *n = ((list*)rel->l)->h; n; n = n->next) {
675 44654 : sql_rel *l = n->data;
676 :
677 44654 : positional_exps_mark_used(rel, l);
678 44654 : rel_exps_mark_used(sql->sa, rel, l);
679 44654 : rel_mark_used(sql, l, 0);
680 : /* based on child check set expression list */
681 44654 : if (first && is_project(l->op) && need_distinct(l))
682 0 : positional_exps_mark_used(l, rel);
683 44654 : first = false;
684 : }
685 : }
686 : break;
687 155296 : case op_join:
688 : case op_left:
689 : case op_right:
690 : case op_full:
691 : case op_semi:
692 : case op_anti:
693 : case op_merge:
694 155296 : rel_exps_mark_used(sql->sa, rel, rel->l);
695 155296 : rel_exps_mark_used(sql->sa, rel, rel->r);
696 155296 : rel_mark_used(sql, rel->l, 0);
697 155296 : rel_mark_used(sql, rel->r, 0);
698 155296 : break;
699 : }
700 1309079 : }
701 :
702 : static sql_rel * rel_dce_sub(mvc *sql, sql_rel *rel);
703 :
704 : static sql_rel *
705 1163431 : rel_remove_unused(mvc *sql, sql_rel *rel)
706 : {
707 1163431 : int needed = 0;
708 :
709 1163431 : if (!rel)
710 : return rel;
711 :
712 1156347 : switch(rel->op) {
713 310980 : case op_basetable: {
714 310980 : sql_table *t = rel->l;
715 :
716 310980 : if (t && isReplicaTable(t)) /* TODO fix rewriting in rel_distribute.c */
717 : return rel;
718 : }
719 : /* fall through */
720 : case op_table:
721 318048 : if (rel->exps && (rel->op != op_table || !IS_TABLE_PROD_FUNC(rel->flag))) {
722 1389570 : for(node *n=rel->exps->h; n && !needed; n = n->next) {
723 1078606 : sql_exp *e = n->data;
724 :
725 1078606 : if (!e->used)
726 234236 : needed = 1;
727 : }
728 :
729 310964 : if (!needed)
730 : return rel;
731 :
732 1394032 : for(node *n=rel->exps->h; n;) {
733 1159805 : node *next = n->next;
734 1159805 : sql_exp *e = n->data;
735 :
736 : /* at least one (needed for crossproducts, count(*), rank() and single value projections) !, handled by rel_exps_mark_used */
737 1159805 : if (!e->used && list_length(rel->exps) > 1)
738 395573 : list_remove_node(rel->exps, NULL, n);
739 : n = next;
740 : }
741 : }
742 241311 : if (rel->op == op_table && (IS_TABLE_PROD_FUNC(rel->flag) || rel->flag == TABLE_FROM_RELATION))
743 7084 : rel->l = rel_remove_unused(sql, rel->l);
744 : return rel;
745 :
746 16722 : case op_topn:
747 : case op_sample:
748 :
749 16722 : if (rel->l)
750 16722 : rel->l = rel_remove_unused(sql, rel->l);
751 : return rel;
752 :
753 325432 : case op_project:
754 : case op_groupby:
755 :
756 325432 : if (/*rel->l &&*/ rel->exps) {
757 2072582 : for(node *n=rel->exps->h; n && !needed; n = n->next) {
758 1747150 : sql_exp *e = n->data;
759 :
760 1747150 : if (!e->used)
761 24121 : needed = 1;
762 : }
763 325432 : if (!needed)
764 : return rel;
765 :
766 566063 : for(node *n=rel->exps->h; n;) {
767 541942 : node *next = n->next;
768 541942 : sql_exp *e = n->data;
769 :
770 : /* at least one (needed for crossproducts, count(*), rank() and single value projections) */
771 541942 : if (!e->used && list_length(rel->exps) > 1)
772 423899 : list_remove_node(rel->exps, NULL, n);
773 : n = next;
774 : }
775 : }
776 : return rel;
777 :
778 3201 : case op_join:
779 : case op_left:
780 : case op_right:
781 : case op_full:
782 3201 : if (list_length(rel->attr) > 1) {
783 0 : for(node *n=rel->attr->h; n && !needed; n = n->next) {
784 0 : sql_exp *e = n->data;
785 :
786 0 : if (!e->used)
787 0 : needed = 1;
788 : }
789 0 : if (!needed)
790 : return rel;
791 :
792 0 : for(node *n=rel->attr->h; n;) {
793 0 : node *next = n->next;
794 0 : sql_exp *e = n->data;
795 :
796 0 : if (!e->used)
797 0 : list_remove_node(rel->attr, NULL, n);
798 : n = next;
799 : }
800 : }
801 : return rel;
802 :
803 : case op_union:
804 : case op_inter:
805 : case op_except:
806 : case op_munion:
807 :
808 : case op_insert:
809 : case op_update:
810 : case op_delete:
811 : case op_truncate:
812 : case op_merge:
813 :
814 : case op_select:
815 :
816 : case op_semi:
817 : case op_anti:
818 : return rel;
819 409374 : case op_ddl:
820 409374 : if (rel->flag == ddl_output || rel->flag == ddl_create_seq || rel->flag == ddl_alter_seq || rel->flag == ddl_alter_table || rel->flag == ddl_create_table || rel->flag == ddl_create_view) {
821 33518 : if (rel->l)
822 33155 : rel->l = rel_remove_unused(sql, rel->l);
823 : } else if (rel->flag == ddl_list || rel->flag == ddl_exception) {
824 241 : if (rel->l)
825 241 : rel->l = rel_remove_unused(sql, rel->l);
826 241 : if (rel->r)
827 241 : rel->r = rel_remove_unused(sql, rel->r);
828 : }
829 : return rel;
830 : }
831 : return rel;
832 : }
833 :
834 : static void
835 1348811 : rel_dce_refs(mvc *sql, sql_rel *rel, list *refs)
836 : {
837 1348811 : if (!rel || (rel_is_ref(rel) && list_find(refs, rel, NULL)))
838 14139 : return ;
839 :
840 1334672 : switch(rel->op) {
841 419178 : case op_table:
842 : case op_topn:
843 : case op_sample:
844 : case op_project:
845 : case op_groupby:
846 : case op_select:
847 :
848 419178 : if (rel->l && (rel->op != op_table || rel->flag != TRIGGER_WRAPPER))
849 400348 : rel_dce_refs(sql, rel->l, refs);
850 : break;
851 :
852 : case op_basetable:
853 : case op_insert:
854 : case op_truncate:
855 : break;
856 :
857 6774 : case op_update:
858 : case op_delete:
859 :
860 6774 : if (rel->r)
861 6418 : rel_dce_refs(sql, rel->r, refs);
862 : break;
863 :
864 147396 : case op_union:
865 : case op_inter:
866 : case op_except:
867 : case op_join:
868 : case op_left:
869 : case op_right:
870 : case op_full:
871 : case op_semi:
872 : case op_anti:
873 : case op_merge:
874 :
875 147396 : if (rel->l)
876 147396 : rel_dce_refs(sql, rel->l, refs);
877 147396 : if (rel->r)
878 147396 : rel_dce_refs(sql, rel->r, refs);
879 : break;
880 23954 : case op_munion:
881 23954 : assert(rel->l);
882 72206 : for (node *n = ((list*)rel->l)->h; n; n = n->next)
883 48252 : rel_dce_refs(sql, n->data, refs);
884 : break;
885 410792 : case op_ddl:
886 :
887 410792 : if (rel->flag == ddl_output || rel->flag == ddl_create_seq || rel->flag == ddl_alter_seq || rel->flag == ddl_alter_table || rel->flag == ddl_create_table || rel->flag == ddl_create_view) {
888 34892 : if (rel->l)
889 34529 : rel_dce_refs(sql, rel->l, refs);
890 : } else if (rel->flag == ddl_list || rel->flag == ddl_exception) {
891 285 : if (rel->l)
892 285 : rel_dce_refs(sql, rel->l, refs);
893 285 : if (rel->r)
894 251 : rel_dce_refs(sql, rel->r, refs);
895 : } break;
896 : }
897 :
898 1334680 : if (rel_is_ref(rel) && !list_find(refs, rel, NULL))
899 16867 : list_prepend(refs, rel);
900 : }
901 :
902 : static sql_rel *
903 1940536 : rel_dce_down(mvc *sql, sql_rel *rel, int skip_proj)
904 : {
905 1940536 : if (!rel)
906 : return rel;
907 :
908 1940536 : if (!skip_proj && rel_is_ref(rel))
909 : return rel;
910 :
911 1909971 : switch(rel->op) {
912 569658 : case op_basetable:
913 : case op_table:
914 :
915 569658 : if (skip_proj && rel->l && rel->op == op_table && rel->flag != TRIGGER_WRAPPER)
916 86 : rel->l = rel_dce_down(sql, rel->l, 0);
917 284578 : if (!skip_proj)
918 284578 : rel_dce_sub(sql, rel);
919 : /* fall through */
920 :
921 : case op_truncate:
922 : return rel;
923 :
924 7878 : case op_insert:
925 7878 : rel_used(rel->r);
926 7879 : rel_dce_sub(sql, rel->r);
927 7879 : return rel;
928 :
929 7981 : case op_update:
930 : case op_delete:
931 :
932 7981 : if (skip_proj && rel->r)
933 6418 : rel->r = rel_dce_down(sql, rel->r, 0);
934 1207 : if (!skip_proj)
935 1207 : rel_dce_sub(sql, rel);
936 : return rel;
937 :
938 562511 : case op_topn:
939 : case op_sample:
940 : case op_project:
941 : case op_groupby:
942 :
943 562511 : if (skip_proj && rel->l)
944 328652 : rel->l = rel_dce_down(sql, rel->l, is_topn(rel->op) || is_sample(rel->op));
945 220436 : if (!skip_proj)
946 220436 : rel_dce_sub(sql, rel);
947 : return rel;
948 :
949 5147 : case op_union:
950 : case op_inter:
951 : case op_except:
952 5147 : if (skip_proj) {
953 2822 : if (rel->l)
954 2822 : rel->l = rel_dce_down(sql, rel->l, 0);
955 2822 : if (rel->r)
956 2822 : rel->r = rel_dce_down(sql, rel->r, 0);
957 : }
958 2325 : if (!skip_proj)
959 2325 : rel_dce_sub(sql, rel);
960 : return rel;
961 :
962 46932 : case op_munion:
963 46932 : if (skip_proj) {
964 73664 : for (node *n = ((list*)rel->l)->h; n; n = n->next)
965 49215 : n->data = rel_dce_down(sql, n->data, 0);
966 : }
967 22483 : if (!skip_proj)
968 22483 : rel_dce_sub(sql, rel);
969 : return rel;
970 107101 : case op_select:
971 107101 : if (rel->l)
972 107101 : rel->l = rel_dce_down(sql, rel->l, 0);
973 : return rel;
974 :
975 151728 : case op_join:
976 : case op_left:
977 : case op_right:
978 : case op_full:
979 : case op_semi:
980 : case op_anti:
981 : case op_merge:
982 151728 : if (rel->l)
983 151728 : rel->l = rel_dce_down(sql, rel->l, 0);
984 151728 : if (rel->r)
985 151728 : rel->r = rel_dce_down(sql, rel->r, 0);
986 151728 : if (!skip_proj && !list_empty(rel->attr))
987 3200 : rel_dce_sub(sql, rel);
988 : return rel;
989 :
990 409494 : case op_ddl:
991 409494 : if (rel->flag == ddl_output || rel->flag == ddl_create_seq || rel->flag == ddl_alter_seq || rel->flag == ddl_alter_table || rel->flag == ddl_create_table || rel->flag == ddl_create_view) {
992 33594 : if (rel->l)
993 33428 : rel->l = rel_dce_down(sql, rel->l, 0);
994 : } else if (rel->flag == ddl_list || rel->flag == ddl_exception) {
995 285 : if (rel->l)
996 285 : rel->l = rel_dce_down(sql, rel->l, 0);
997 285 : if (rel->r)
998 251 : rel->r = rel_dce_down(sql, rel->r, 0);
999 : }
1000 : return rel;
1001 : }
1002 : return rel;
1003 : }
1004 :
1005 : /* DCE
1006 : *
1007 : * Based on top relation expressions mark sub expressions as used.
1008 : * Then recurse down until the projections. Clean them up and repeat.
1009 : */
1010 :
1011 : static sql_rel *
1012 1106074 : rel_dce_sub(mvc *sql, sql_rel *rel)
1013 : {
1014 1106074 : if (!rel)
1015 : return rel;
1016 :
1017 : /*
1018 : * Mark used up until the next project
1019 : * For setops we need to first mark, then remove
1020 : * because of positional dependency
1021 : */
1022 1106074 : rel_mark_used(sql, rel, 1);
1023 1106017 : rel = rel_remove_unused(sql, rel);
1024 1106046 : rel_dce_down(sql, rel, 1);
1025 1106046 : return rel;
1026 : }
1027 :
1028 : /* add projects under set ops */
1029 : static sql_rel *
1030 2176735 : rel_add_projects(mvc *sql, sql_rel *rel)
1031 : {
1032 2176735 : if (!rel)
1033 : return rel;
1034 :
1035 2176735 : switch(rel->op) {
1036 : case op_basetable:
1037 : case op_truncate:
1038 : return rel;
1039 14652 : case op_insert:
1040 : case op_update:
1041 : case op_delete:
1042 14652 : if (rel->r)
1043 14296 : rel->r = rel_add_projects(sql, rel->r);
1044 : return rel;
1045 2918 : case op_union:
1046 : case op_inter:
1047 : case op_except:
1048 : /* We can only reduce the list of expressions of an set op
1049 : * if the projection under it can also be reduced.
1050 : */
1051 2918 : if (rel->l) {
1052 2918 : sql_rel *l = rel->l;
1053 :
1054 2918 : if (!is_project(l->op) && !need_distinct(rel))
1055 3 : l = rel_project(sql->sa, l, rel_projections(sql, l, NULL, 1, 1));
1056 2918 : rel->l = rel_add_projects(sql, l);
1057 : }
1058 2918 : if (rel->r) {
1059 2918 : sql_rel *r = rel->r;
1060 :
1061 2918 : if (!is_project(r->op) && !need_distinct(rel))
1062 4 : r = rel_project(sql->sa, r, rel_projections(sql, r, NULL, 1, 1));
1063 2918 : rel->r = rel_add_projects(sql, r);
1064 : }
1065 : return rel;
1066 84061 : case op_munion:
1067 84061 : assert(rel->l);
1068 252942 : for (node *n = ((list*)rel->l)->h; n; n = n->next) {
1069 168881 : sql_rel* r = n->data;
1070 168881 : if (!is_project(r->op) && !need_distinct(rel))
1071 0 : r = rel_project(sql->sa, r, rel_projections(sql, r, NULL, 1, 1));
1072 168881 : r = rel_add_projects(sql, r);
1073 168881 : n->data = r;
1074 : }
1075 : return rel;
1076 811501 : case op_topn:
1077 : case op_sample:
1078 : case op_project:
1079 : case op_groupby:
1080 : case op_select:
1081 : case op_table:
1082 811501 : if (rel->l && (rel->op != op_table || rel->flag != TRIGGER_WRAPPER))
1083 777943 : rel->l = rel_add_projects(sql, rel->l);
1084 : return rel;
1085 305275 : case op_join:
1086 : case op_left:
1087 : case op_right:
1088 : case op_full:
1089 : case op_semi:
1090 : case op_anti:
1091 : case op_merge:
1092 305275 : if (rel->l)
1093 305275 : rel->l = rel_add_projects(sql, rel->l);
1094 305275 : if (rel->r)
1095 305275 : rel->r = rel_add_projects(sql, rel->r);
1096 : return rel;
1097 411059 : case op_ddl:
1098 411059 : if (rel->flag == ddl_output || rel->flag == ddl_create_seq || rel->flag == ddl_alter_seq || rel->flag == ddl_alter_table || rel->flag == ddl_create_table || rel->flag == ddl_create_view) {
1099 35159 : if (rel->l)
1100 34796 : rel->l = rel_add_projects(sql, rel->l);
1101 : } else if (rel->flag == ddl_list || rel->flag == ddl_exception) {
1102 285 : if (rel->l)
1103 285 : rel->l = rel_add_projects(sql, rel->l);
1104 285 : if (rel->r)
1105 251 : rel->r = rel_add_projects(sql, rel->r);
1106 : }
1107 : return rel;
1108 : }
1109 : return rel;
1110 : }
1111 :
1112 : static sql_rel *
1113 563931 : rel_dce_(mvc *sql, sql_rel *rel)
1114 : {
1115 563931 : list *refs = sa_list(sql->sa);
1116 :
1117 563981 : rel_dce_refs(sql, rel, refs);
1118 564003 : if (refs) {
1119 580897 : for(node *n = refs->h; n; n = n->next) {
1120 16867 : sql_rel *i = n->data;
1121 :
1122 16867 : while (!rel_is_ref(i) && i->l && !is_base(i->op))
1123 : i = i->l;
1124 16867 : if (i)
1125 16867 : rel_used(i);
1126 : }
1127 : }
1128 564030 : rel = rel_add_projects(sql, rel);
1129 563921 : rel_used(rel);
1130 564282 : rel_dce_sub(sql, rel);
1131 563927 : return rel;
1132 : }
1133 :
1134 :
1135 : /* Remove unused expressions */
1136 : static sql_rel *
1137 563939 : rel_dce(visitor *v, global_props *gp, sql_rel *rel)
1138 : {
1139 563939 : (void) gp;
1140 563939 : return rel_dce_(v->sql, rel);
1141 : }
1142 :
1143 : /* keep export for other projects */
1144 : sql_rel *
1145 0 : rel_deadcode_elimination(mvc *sql, sql_rel *rel)
1146 : {
1147 0 : return rel_dce_(sql, rel);
1148 : }
1149 :
1150 : run_optimizer
1151 643716 : bind_dce(visitor *v, global_props *gp)
1152 : {
1153 643716 : int flag = v->sql->sql_optimizer;
1154 643716 : return gp->opt_cycle == 0 && gp->opt_level == 1 && (flag & dce) ? rel_dce : NULL;
1155 : }
1156 :
1157 :
1158 : static int
1159 85113 : topn_sample_safe_exps( list *exps, bool nil_limit )
1160 : {
1161 : /* Limit only expression lists are always save */
1162 85113 : if (list_length(exps) == 1)
1163 : return 1;
1164 1188 : for (node *n = exps->h; n; n = n->next ) {
1165 803 : sql_exp *e = n->data;
1166 :
1167 803 : if (!e || e->type != e_atom || (!nil_limit && exp_is_null(e)))
1168 51 : return 0;
1169 : }
1170 : return 1;
1171 : }
1172 :
1173 : static list *
1174 139 : sum_limit_offset(mvc *sql, sql_rel *rel)
1175 : {
1176 : /* for sample we always propagate, or if the expression list only consists of a limit expression, we copy it */
1177 139 : if (is_sample(rel->op) || list_length(rel->exps) == 1)
1178 136 : return exps_copy(sql, rel->exps);
1179 3 : assert(list_length(rel->exps) == 2);
1180 3 : sql_subtype *lng = sql_bind_localtype("lng");
1181 3 : sql_exp *add = rel_binop_(sql, NULL, exp_copy(sql, rel->exps->h->data), exp_copy(sql, rel->exps->h->next->data), "sys", "sql_add", card_value, true);
1182 : /* for remote plans, make sure the output type is a bigint */
1183 3 : if (subtype_cmp(lng, exp_subtype(add)) != 0)
1184 3 : add = exp_convert(sql, add, exp_subtype(add), lng);
1185 3 : return list_append(sa_list(sql->sa), add);
1186 : }
1187 :
1188 : /*
1189 : * Push TopN (only LIMIT, no ORDER BY) down through projections underneath crossproduct, i.e.,
1190 : *
1191 : * topn( topn(
1192 : * project( project(
1193 : * crossproduct( crossproduct(
1194 : * L, => topn( L )[ n ],
1195 : * R topn( R )[ n ]
1196 : * ) )
1197 : * )[ Cs ]* )[ Cs ]*
1198 : * )[ n ] )[ n ]
1199 : *
1200 : * (TODO: in case of n==1 we can omit the original top-level TopN)
1201 : *
1202 : * also push topn under (non reordering) projections.
1203 : */
1204 : static sql_rel *
1205 125174 : rel_push_topn_and_sample_down_(visitor *v, sql_rel *rel)
1206 : {
1207 125174 : sql_rel *rp = NULL, *r = rel->l, *rpp = NULL;
1208 :
1209 125174 : if (is_topn(rel->op) && !rel_is_ref(rel) &&
1210 50709 : r && r->op == op_table && r->flag != TRIGGER_WRAPPER && !rel_is_ref(r) && r->r) {
1211 3 : sql_exp *op = r->r;
1212 3 : sql_subfunc *f = op->f;
1213 :
1214 3 : if (is_func(op->type) && strcmp(f->func->base.name, "file_loader") == 0 && !sql_func_mod(f->func)[0] && !sql_func_imp(f->func)[0]) {
1215 : /* push limit, to arguments of file_loader */
1216 0 : list *args = op->l;
1217 0 : if (list_length(args) == 2) {
1218 0 : sql_exp *topN = rel->exps->h->data;
1219 0 : sql_exp *offset = rel->exps->h->next? rel->exps->h->next->data:NULL;
1220 0 : atom *topn = topN->l;
1221 0 : if (offset) {
1222 0 : atom *b1 = (atom *)offset->l, *c = atom_add(v->sql->sa, b1, topn);
1223 :
1224 0 : if (!c)
1225 : return rel;
1226 0 : if (atom_cmp(c, topn) < 0) /* overflow */
1227 : return rel;
1228 : topn = c;
1229 : }
1230 0 : append(args, exp_atom(v->sql->sa, topn));
1231 0 : v->changes++;
1232 : }
1233 : }
1234 : }
1235 :
1236 125174 : if ((is_topn(rel->op) || is_sample(rel->op)) && topn_sample_safe_exps(rel->exps, true)) {
1237 50783 : sql_rel *(*func) (allocator *, sql_rel *, list *) = is_topn(rel->op) ? rel_topn : rel_sample;
1238 :
1239 : /* nested topN relations */
1240 50783 : if (r && is_topn(rel->op) && is_topn(r->op) && !rel_is_ref(r)) {
1241 0 : sql_exp *topN1 = rel->exps->h->data, *topN2 = r->exps->h->data;
1242 0 : sql_exp *offset1 = list_length(rel->exps) > 1 ? rel->exps->h->next->data : NULL;
1243 0 : sql_exp *offset2 = list_length(r->exps) > 1 ? r->exps->h->next->data : NULL;
1244 :
1245 0 : if (topN1->l && topN2->l && (!offset1 || offset1->l) && (!offset2 || offset2->l)) { /* no parameters */
1246 0 : bool changed = false;
1247 :
1248 0 : if ((!offset1 || (offset1->type == e_atom && offset1->l)) && (!offset2 || (offset2->type == e_atom && offset2->l))) { /* only atoms */
1249 0 : if (!offset1 && offset2) {
1250 0 : list_append(rel->exps, exp_copy(v->sql, offset2));
1251 0 : changed = true;
1252 0 : } else if (offset1 && offset2) { /* sum offsets */
1253 0 : atom *b1 = (atom *)offset1->l, *b2 = (atom *)offset2->l, *c = atom_add(v->sql->sa, b1, b2);
1254 :
1255 0 : if (!c) /* error, don't apply optimization, WARNING because of this the offset optimization must come before the limit one */
1256 : return rel;
1257 0 : if (atom_cmp(c, b2) < 0) /* overflow */
1258 0 : c = atom_int(v->sql->sa, sql_bind_localtype("lng"), GDK_lng_max);
1259 0 : offset1->l = c;
1260 0 : changed = true;
1261 : }
1262 : }
1263 :
1264 0 : if (topN1->type == e_atom && topN1->l && topN2->type == e_atom && topN2->l) { /* only atoms */
1265 0 : atom *a1 = (atom *)topN1->l, *a2 = (atom *)topN2->l;
1266 :
1267 0 : if (!a2->isnull && (a1->isnull || atom_cmp(a1, a2) >= 0)) { /* topN1 is not set or is larger than topN2 */
1268 0 : rel->exps->h->data = exp_copy(v->sql, topN2);
1269 0 : changed = true;
1270 : }
1271 : }
1272 :
1273 0 : if (changed) {
1274 0 : rel->l = r->l;
1275 0 : r->l = NULL;
1276 0 : rel_destroy(r);
1277 0 : v->changes++;
1278 0 : return rel;
1279 : }
1280 : }
1281 : }
1282 :
1283 50783 : if (r && is_simple_project(r->op) && need_distinct(r))
1284 : return rel;
1285 :
1286 : /* push topn/sample under projections */
1287 50778 : if (!rel_is_ref(rel) && r && is_simple_project(r->op) && !need_distinct(r) && !rel_is_ref(r) && r->l && list_empty(r->r)) {
1288 : sql_rel *x = r, *px = x;
1289 :
1290 32902 : while (is_simple_project(x->op) && !need_distinct(x) && !rel_is_ref(x) && x->l && list_empty(x->r)) {
1291 16463 : px = x;
1292 16463 : x = x->l;
1293 : }
1294 : /* only push topn once */
1295 16440 : if (x && x->op == rel->op)
1296 : return rel;
1297 :
1298 16433 : rel->l = x;
1299 16433 : px->l = rel;
1300 16433 : rel = r;
1301 16433 : v->changes++;
1302 16433 : return rel;
1303 : }
1304 :
1305 34338 : if (!topn_sample_safe_exps(rel->exps, false))
1306 : return rel;
1307 :
1308 : /* duplicate topn/sample direct under union or crossproduct */
1309 34284 : if (r && !rel_is_ref(r) && r->l && r->r && ((is_union(r->op) && r->exps) || (r->op == op_join && list_empty(r->exps)))) {
1310 152 : sql_rel *u = r, *x;
1311 152 : sql_rel *ul = u->l;
1312 152 : sql_rel *ur = u->r;
1313 152 : bool changed = false;
1314 :
1315 152 : x = ul;
1316 158 : while (is_simple_project(x->op) && !need_distinct(x) && !rel_is_ref(x) && x->l && list_empty(x->r))
1317 6 : x = x->l;
1318 152 : if (x && x->op != rel->op) { /* only push topn once */
1319 65 : ul = func(v->sql->sa, ul, sum_limit_offset(v->sql, rel));
1320 65 : set_processed(ul);
1321 65 : u->l = ul;
1322 65 : changed = true;
1323 : }
1324 :
1325 152 : x = ur;
1326 160 : while (is_simple_project(x->op) && !need_distinct(x) && !rel_is_ref(x) && x->l && list_empty(x->r))
1327 8 : x = x->l;
1328 152 : if (x && x->op != rel->op) { /* only push topn once */
1329 66 : ur = func(v->sql->sa, ur, sum_limit_offset(v->sql, rel));
1330 66 : set_processed(ur);
1331 66 : u->r = ur;
1332 66 : changed = true;
1333 : }
1334 :
1335 152 : if (changed)
1336 66 : v->changes++;
1337 152 : return rel;
1338 : }
1339 :
1340 : /* duplicate topn/sample + [ project-order ] under union */
1341 : if (r && !rp)
1342 34136 : rp = r->l;
1343 34136 : if (r && r->exps && is_simple_project(r->op) && !rel_is_ref(r) && !list_empty(r->r) && r->l && is_union(rp->op)) {
1344 0 : sql_rel *u = rp, *ou = u, *x, *ul = u->l, *ur = u->r;
1345 0 : list *rcopy = NULL;
1346 :
1347 : /* only push topn/sample once */
1348 0 : x = ul;
1349 0 : while (is_simple_project(x->op) && !need_distinct(x) && !rel_is_ref(x) && x->l && list_empty(x->r))
1350 0 : x = x->l;
1351 0 : if (x && x->op == rel->op)
1352 : return rel;
1353 : x = ur;
1354 0 : while (is_simple_project(x->op) && !need_distinct(x) && !rel_is_ref(x) && x->l && list_empty(x->r))
1355 0 : x = x->l;
1356 0 : if (x && x->op == rel->op)
1357 : return rel;
1358 :
1359 0 : rcopy = exps_copy(v->sql, r->r);
1360 0 : for (node *n = rcopy->h ; n ; n = n->next) {
1361 0 : sql_exp *e = n->data;
1362 0 : set_descending(e); /* remove ordering properties for projected columns */
1363 0 : set_nulls_first(e);
1364 : }
1365 0 : ul = rel_dup(ul);
1366 0 : ur = rel_dup(ur);
1367 0 : if (!is_project(ul->op))
1368 0 : ul = rel_project(v->sql->sa, ul,
1369 : rel_projections(v->sql, ul, NULL, 1, 1));
1370 0 : if (!is_project(ur->op))
1371 0 : ur = rel_project(v->sql->sa, ur,
1372 : rel_projections(v->sql, ur, NULL, 1, 1));
1373 0 : rel_rename_exps(v->sql, u->exps, ul->exps);
1374 0 : rel_rename_exps(v->sql, u->exps, ur->exps);
1375 :
1376 : /* introduce projects under the set */
1377 0 : ul = rel_project(v->sql->sa, ul, NULL);
1378 0 : ul->exps = exps_copy(v->sql, r->exps);
1379 : /* possibly add order by column */
1380 0 : ul->exps = list_distinct(list_merge(ul->exps, exps_copy(v->sql, rcopy), NULL), (fcmp) exp_equal, (fdup) NULL);
1381 0 : ul->nrcols = list_length(ul->exps);
1382 0 : ul->r = exps_copy(v->sql, r->r);
1383 0 : set_processed(ul);
1384 0 : ul = func(v->sql->sa, ul, sum_limit_offset(v->sql, rel));
1385 0 : set_processed(ul);
1386 :
1387 0 : ur = rel_project(v->sql->sa, ur, NULL);
1388 0 : ur->exps = exps_copy(v->sql, r->exps);
1389 : /* possibly add order by column */
1390 0 : ur->exps = list_distinct(list_merge(ur->exps, exps_copy(v->sql, rcopy), NULL), (fcmp) exp_equal, (fdup) NULL);
1391 0 : ur->nrcols = list_length(ur->exps);
1392 0 : ur->r = exps_copy(v->sql, r->r);
1393 0 : set_processed(ur);
1394 0 : ur = func(v->sql->sa, ur, sum_limit_offset(v->sql, rel));
1395 0 : set_processed(ur);
1396 :
1397 0 : u = rel_setop(v->sql->sa, ul, ur, op_union);
1398 0 : u->exps = exps_alias(v->sql, r->exps);
1399 0 : u->nrcols = list_length(u->exps);
1400 0 : set_processed(u);
1401 : /* possibly add order by column */
1402 0 : u->exps = list_distinct(list_merge(u->exps, rcopy, NULL), (fcmp) exp_equal, (fdup) NULL);
1403 0 : if (need_distinct(r)) {
1404 0 : set_distinct(ul);
1405 0 : set_distinct(ur);
1406 : }
1407 :
1408 : /* zap names */
1409 0 : rel_no_rename_exps(u->exps);
1410 0 : rel_destroy(ou);
1411 :
1412 0 : ur = rel_project(v->sql->sa, u, exps_alias(v->sql, r->exps));
1413 0 : ur->r = r->r;
1414 0 : r->l = NULL;
1415 :
1416 0 : if (need_distinct(r))
1417 0 : set_distinct(ur);
1418 :
1419 0 : rel_destroy(r);
1420 0 : rel->l = ur;
1421 0 : v->changes++;
1422 0 : return rel;
1423 : }
1424 : /* a left outer join b order by a.* limit L, can be copied into a */
1425 : /* topn ( project (orderby)( optional project ( left ())
1426 : * rel r rp */
1427 34132 : if (r && !rp)
1428 66 : rp = r->l;
1429 34132 : if (r && rp && is_simple_project(rp->op) && !rp->r && rp->l)
1430 34132 : rpp = rp->l;
1431 34132 : if (r && r->exps && is_simple_project(r->op) && !rel_is_ref(r) && r->r && r->l && ((!rpp && is_left(rp->op)) ||
1432 312 : (rpp && is_left(rpp->op)))) {
1433 24 : sql_rel *lj = rpp?rpp:rp;
1434 24 : sql_rel *l = lj->l;
1435 24 : list *obes = r->r, *nobes = sa_list(v->sql->sa);
1436 24 : int fnd = 1;
1437 63 : for (node *n = obes->h; n && fnd; n = n->next) {
1438 41 : sql_exp *obe = n->data;
1439 41 : int part = is_partitioning(obe);
1440 41 : int asc = is_ascending(obe);
1441 41 : int nl = nulls_last(obe);
1442 : /* only simple rename expressions */
1443 41 : sql_exp *pe = exps_find_exp(r->exps, obe);
1444 41 : if (pe && rpp)
1445 33 : pe = exps_find_exp(rp->exps, pe);
1446 41 : if (pe)
1447 41 : pe = rel_find_exp(l, pe);
1448 41 : if (pe) {
1449 41 : if (exp_is_atom(pe))
1450 : return rel;
1451 39 : pe = exp_ref(v->sql, pe);
1452 39 : if (part)
1453 0 : set_partitioning(pe);
1454 39 : if (asc)
1455 29 : set_ascending(pe);
1456 39 : if (nl)
1457 5 : set_nulls_last(pe);
1458 39 : append(nobes, pe);
1459 : }
1460 : else
1461 : fnd = 0;
1462 : }
1463 22 : if (fnd && ((is_topn(rel->op) && !is_topn(l->op)) || (is_sample(rel->op) && !is_sample(l->op)))) {
1464 : /* inject topn */
1465 : /* Todo add order by */
1466 8 : sql_rel *ob = lj->l = rel_project(v->sql->sa, lj->l, rel_projections(v->sql, lj->l, NULL, 1, 1));
1467 8 : ob->r = nobes;
1468 8 : lj->l = func(v->sql->sa, lj->l, sum_limit_offset(v->sql, rel));
1469 8 : v->changes++;
1470 8 : return rel;
1471 : }
1472 : }
1473 : }
1474 : return rel;
1475 : }
1476 :
1477 : static sql_rel *
1478 33886 : rel_push_topn_and_sample_down(visitor *v, global_props *gp, sql_rel *rel)
1479 : {
1480 33886 : (void) gp;
1481 33886 : return rel_visitor_topdown(v, rel, &rel_push_topn_and_sample_down_);
1482 : }
1483 :
1484 : run_optimizer
1485 643714 : bind_push_topn_and_sample_down(visitor *v, global_props *gp)
1486 : {
1487 643714 : int flag = v->sql->sql_optimizer;
1488 643587 : return gp->opt_level == 1 && (gp->cnt[op_topn] || gp->cnt[op_sample]) &&
1489 677454 : (flag & push_topn_and_sample_down) ? rel_push_topn_and_sample_down : NULL;
1490 : }
|