Line data Source code
1 : /*
2 : * SPDX-License-Identifier: MPL-2.0
3 : *
4 : * This Source Code Form is subject to the terms of the Mozilla Public
5 : * License, v. 2.0. If a copy of the MPL was not distributed with this
6 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
7 : *
8 : * Copyright 2024 MonetDB Foundation;
9 : * Copyright August 2008 - 2023 MonetDB B.V.;
10 : * Copyright 1997 - July 2008 CWI.
11 : */
12 :
13 : #include "monetdb_config.h"
14 : #include "rel_optimizer_private.h"
15 : #include "rel_exp.h"
16 : #include "rel_select.h"
17 :
18 : static void
19 45 : rel_no_rename_exps( list *exps )
20 : {
21 230 : for (node *n = exps->h; n; n = n->next) {
22 185 : sql_exp *e = n->data;
23 :
24 185 : exp_setalias(e, e->l, e->r);
25 : }
26 45 : list_hash_clear(exps);
27 45 : }
28 :
29 : void
30 13182 : rel_rename_exps( mvc *sql, list *exps1, list *exps2)
31 : {
32 13182 : int pos = 0;
33 13182 : node *n, *m;
34 :
35 13182 : (void)sql;
36 : /* check if a column uses an alias earlier in the list */
37 102450 : for (n = exps1->h, m = exps2->h; n && m; n = n->next, m = m->next, pos++) {
38 89268 : sql_exp *e2 = m->data;
39 :
40 89268 : if (e2->type == e_column) {
41 87030 : sql_exp *ne = NULL;
42 :
43 87030 : if (e2->l)
44 72621 : ne = exps_bind_column2(exps2, e2->l, e2->r, NULL);
45 87030 : if (!ne && !e2->l)
46 14409 : ne = exps_bind_column(exps2, e2->r, NULL, NULL, 1);
47 70537 : if (ne) {
48 16741 : int p = list_position(exps2, ne);
49 :
50 16741 : if (p < pos) {
51 6 : ne = list_fetch(exps1, p);
52 6 : if (e2->l)
53 6 : e2->l = (void *) exp_relname(ne);
54 6 : e2->r = (void *) exp_name(ne);
55 : }
56 : }
57 : }
58 : }
59 :
60 13182 : assert(list_length(exps1) <= list_length(exps2));
61 102450 : for (n = exps1->h, m = exps2->h; n && m; n = n->next, m = m->next) {
62 89268 : sql_exp *e1 = n->data;
63 89268 : sql_exp *e2 = m->data;
64 89268 : const char *rname = exp_relname(e1);
65 :
66 89268 : if (!rname && e1->type == e_column && e1->l && exp_relname(e2) &&
67 0 : strcmp(e1->l, exp_relname(e2)) == 0)
68 0 : rname = exp_relname(e2);
69 89268 : exp_setalias(e2, rname, exp_name(e1));
70 : }
71 13182 : list_hash_clear(exps2);
72 13182 : }
73 :
74 : sql_rel *
75 170586 : rel_find_ref( sql_rel *r)
76 : {
77 520949 : while (!rel_is_ref(r) && r->l &&
78 388403 : (is_project(r->op) || is_select(r->op) /*|| is_join(r->op)*/))
79 : r = r->l;
80 170586 : if (rel_is_ref(r))
81 20193 : return r;
82 : return NULL;
83 : }
84 :
85 : /* merge projection */
86 :
87 : /* push an expression through a projection.
88 : * The result should again used in a projection.
89 : */
90 : static list *
91 50812 : exps_push_down_prj(mvc *sql, list *exps, sql_rel *f, sql_rel *t, bool keepalias)
92 : {
93 50812 : node *n;
94 50812 : list *nl = new_exp_list(sql->sa);
95 :
96 147938 : for(n = exps->h; n; n = n->next) {
97 104569 : sql_exp *arg = n->data, *narg = NULL;
98 :
99 104569 : narg = exp_push_down_prj(sql, arg, f, t);
100 104569 : if (!narg)
101 : return NULL;
102 97126 : narg = exp_propagate(sql->sa, narg, arg);
103 97126 : if (!keepalias && narg->type == e_column)
104 30072 : exp_setalias(narg, narg->l, narg->r);
105 97126 : append(nl, narg);
106 : }
107 : return nl;
108 : }
109 :
110 : sql_exp *
111 810665 : exp_push_down_prj(mvc *sql, sql_exp *e, sql_rel *f, sql_rel *t)
112 : {
113 810665 : sql_exp *ne = NULL, *l = NULL, *r = NULL, *r2 = NULL;
114 :
115 810665 : assert(is_project(f->op));
116 :
117 810665 : switch(e->type) {
118 602433 : case e_column:
119 602433 : if (e->l)
120 564853 : ne = exps_bind_column2(f->exps, e->l, e->r, NULL);
121 602434 : if (!ne && !e->l)
122 37581 : ne = exps_bind_column(f->exps, e->r, NULL, NULL, 1);
123 602434 : if (!ne || (ne->type != e_column && (ne->type != e_atom || ne->f)))
124 63992 : return NULL;
125 538455 : while (ne && (has_label(ne) || is_selfref(ne) /*inside this list */) && is_simple_project(f->op) && ne->type == e_column) {
126 10181 : sql_exp *oe = e, *one = ne;
127 :
128 10181 : e = ne;
129 10181 : ne = NULL;
130 10181 : if (e->l)
131 10180 : ne = exps_bind_column2(f->exps, e->l, e->r, NULL);
132 10181 : if (!ne && !e->l)
133 1 : ne = exps_bind_column(f->exps, e->r, NULL, NULL, 1);
134 10181 : if (ne && ne != one && list_position(f->exps, ne) >= list_position(f->exps, one))
135 10181 : ne = NULL;
136 10181 : if (!ne || ne == one) {
137 : ne = one;
138 : e = oe;
139 : break;
140 : }
141 357 : if (ne->type != e_column && (ne->type != e_atom || ne->f))
142 : return NULL;
143 : }
144 : /* possibly a groupby/project column is renamed */
145 538098 : if (is_groupby(f->op) && !list_empty(f->r) && ne->type == e_column) {
146 123 : sql_exp *gbe = NULL;
147 123 : if (ne->l)
148 122 : gbe = exps_bind_column2(f->r, ne->l, ne->r, NULL);
149 123 : if (!gbe && !e->l)
150 1 : gbe = exps_bind_column(f->r, ne->r, NULL, NULL, 1);
151 124 : ne = gbe;
152 123 : if (!ne || (ne->type != e_column && (ne->type != e_atom || ne->f)))
153 16 : return NULL;
154 : }
155 538082 : if (ne->type == e_atom)
156 18545 : e = exp_copy(sql, ne);
157 : else
158 519537 : e = exp_alias(sql->sa, exp_relname(e), exp_name(e), ne->l, ne->r, exp_subtype(e), e->card, has_nil(e), is_unique(e), is_intern(e));
159 538082 : return exp_propagate(sql->sa, e, ne);
160 51897 : case e_cmp:
161 51897 : if (e->flag == cmp_or || e->flag == cmp_filter) {
162 4211 : list *l = NULL, *r = NULL;
163 :
164 4211 : if (!(l = exps_push_down_prj(sql, e->l, f, t, true)) || !(r = exps_push_down_prj(sql, e->r, f, t, true)))
165 1161 : return NULL;
166 3050 : if (e->flag == cmp_filter) {
167 1048 : ne = exp_filter(sql->sa, l, r, e->f, is_anti(e));
168 : } else {
169 2002 : ne = exp_or(sql->sa, l, r, is_anti(e));
170 : }
171 47686 : } else if (e->flag == cmp_in || e->flag == cmp_notin) {
172 2791 : list *r = NULL;
173 :
174 2791 : if (!(l = exp_push_down_prj(sql, e->l, f, t)) || !(r = exps_push_down_prj(sql, e->r, f, t, true)))
175 2337 : return NULL;
176 454 : ne = exp_in(sql->sa, l, r, e->flag);
177 : } else {
178 44895 : if (!(l = exp_push_down_prj(sql, e->l, f, t)) || !(r = exp_push_down_prj(sql, e->r, f, t)) || (e->f && !(r2 = exp_push_down_prj(sql, e->f, f, t))))
179 18724 : return NULL;
180 26171 : if (e->f) {
181 679 : ne = exp_compare2(sql->sa, l, r, r2, e->flag, is_symmetric(e));
182 : } else {
183 25492 : ne = exp_compare(sql->sa, l, r, e->flag);
184 : }
185 : }
186 29675 : if (!ne)
187 : return NULL;
188 29675 : return exp_propagate(sql->sa, ne, e);
189 43764 : case e_convert:
190 43764 : if (!(l = exp_push_down_prj(sql, e->l, f, t)))
191 : return NULL;
192 34925 : ne = exp_convert(sql->sa, l, exp_fromtype(e), exp_totype(e));
193 34925 : return exp_propagate(sql->sa, ne, e);
194 42964 : case e_aggr:
195 : case e_func: {
196 42964 : list *l = e->l, *nl = NULL;
197 42964 : sql_exp *ne = NULL;
198 :
199 42964 : if (e->type == e_func && exp_unsafe(e,0))
200 : return NULL;
201 42944 : if (!list_empty(l)) {
202 42944 : nl = exps_push_down_prj(sql, l, f, t, false);
203 42944 : if (!nl)
204 : return NULL;
205 : }
206 36662 : if (e->type == e_func)
207 36662 : ne = exp_op(sql->sa, nl, e->f);
208 : else
209 0 : ne = exp_aggr(sql->sa, nl, e->f, need_distinct(e), need_no_nil(e), e->card, has_nil(e));
210 36662 : return exp_propagate(sql->sa, ne, e);
211 : }
212 69607 : case e_atom: {
213 69607 : list *l = e->f, *nl = NULL;
214 :
215 69607 : if (!list_empty(l)) {
216 0 : nl = exps_push_down_prj(sql, l, f, t, false);
217 0 : if (!nl)
218 : return NULL;
219 0 : ne = exp_values(sql->sa, nl);
220 : } else {
221 69607 : ne = exp_copy(sql, e);
222 : }
223 69607 : return exp_propagate(sql->sa, ne, e);
224 : }
225 : case e_psm:
226 : if (e->type == e_atom && e->f) /* value list */
227 : return NULL;
228 : return e;
229 : }
230 : return NULL;
231 : }
232 :
233 : atom *
234 596 : exp_flatten(mvc *sql, bool value_based_opt, sql_exp *e)
235 : {
236 596 : if (e->type == e_atom) {
237 332 : return value_based_opt ? exp_value(sql, e) : (atom *) e->l;
238 264 : } else if (e->type == e_convert) {
239 51 : atom *v = exp_flatten(sql, value_based_opt, e->l);
240 :
241 51 : if (v)
242 26 : return atom_cast(sql->sa, v, exp_subtype(e));
243 213 : } else if (e->type == e_func) {
244 213 : sql_subfunc *f = e->f;
245 213 : list *l = e->l;
246 213 : sql_arg *res = (f->func->res)?(f->func->res->h->data):NULL;
247 :
248 : /* TODO handle date + x months */
249 213 : if (!f->func->s && strcmp(f->func->base.name, "sql_add") == 0 && list_length(l) == 2 && res && EC_NUMBER(res->type.type->eclass)) {
250 26 : atom *l1 = exp_flatten(sql, value_based_opt, l->h->data);
251 26 : atom *l2 = exp_flatten(sql, value_based_opt, l->h->next->data);
252 26 : if (l1 && l2)
253 3 : return atom_add(sql->sa, l1, l2);
254 187 : } else if (!f->func->s && strcmp(f->func->base.name, "sql_sub") == 0 && list_length(l) == 2 && res && EC_NUMBER(res->type.type->eclass)) {
255 10 : atom *l1 = exp_flatten(sql, value_based_opt, l->h->data);
256 10 : atom *l2 = exp_flatten(sql, value_based_opt, l->h->next->data);
257 10 : if (l1 && l2)
258 9 : return atom_sub(sql->sa, l1, l2);
259 : }
260 : }
261 : return NULL;
262 : }
263 :
264 : int
265 98 : exp_range_overlap(atom *min, atom *max, atom *emin, atom *emax, bool min_exclusive, bool max_exclusive)
266 : {
267 98 : if (!min || !max || !emin || !emax || min->isnull || max->isnull || emin->isnull || emax->isnull)
268 : return 0;
269 :
270 98 : if ((!min_exclusive && VALcmp(&(emax->data), &(min->data)) < 0) || (min_exclusive && VALcmp(&(emax->data), &(min->data)) <= 0))
271 17 : return 0;
272 81 : if ((!max_exclusive && VALcmp(&(emin->data), &(max->data)) > 0) || (max_exclusive && VALcmp(&(emin->data), &(max->data)) >= 0))
273 27 : return 0;
274 : return 1;
275 : }
276 :
277 :
278 : /* if local_proj is >= -1, the current expression is from the same projection
279 : if local_proj is -1, then we don't care about self references (eg used to check for order by exps) */
280 : static int exp_mark_used(sql_rel *subrel, sql_exp *e, int local_proj);
281 :
282 : static int
283 716754 : exps_mark_used(sql_rel *subrel, list *l, int local_proj)
284 : {
285 716754 : int nr = 0;
286 716754 : if (list_empty(l))
287 : return nr;
288 :
289 2309581 : for (node *n = l->h; n != NULL; n = n->next)
290 1592886 : nr += exp_mark_used(subrel, n->data, local_proj);
291 : return nr;
292 : }
293 :
294 : static int
295 5923866 : exp_mark_used(sql_rel *subrel, sql_exp *e, int local_proj)
296 : {
297 6074316 : int nr = 0;
298 6074316 : sql_exp *ne = NULL;
299 :
300 6074316 : switch(e->type) {
301 3998528 : case e_column:
302 3998528 : ne = rel_find_exp(subrel, e);
303 : /* if looking in the same projection, make sure 'ne' is projected before the searched column */
304 3998582 : if (ne && local_proj > -1 && list_position(subrel->exps, ne) >= local_proj)
305 5053613 : ne = NULL;
306 : break;
307 150450 : case e_convert:
308 150450 : return exp_mark_used(subrel, e->l, local_proj);
309 660510 : case e_aggr:
310 : case e_func: {
311 660510 : if (e->l)
312 648159 : nr += exps_mark_used(subrel, e->l, local_proj);
313 660513 : assert(!e->r);
314 : break;
315 : }
316 394526 : case e_cmp:
317 394526 : if (e->flag == cmp_or || e->flag == cmp_filter) {
318 22577 : nr += exps_mark_used(subrel, e->l, local_proj);
319 22577 : nr += exps_mark_used(subrel, e->r, local_proj);
320 371949 : } else if (e->flag == cmp_in || e->flag == cmp_notin) {
321 19070 : nr += exp_mark_used(subrel, e->l, local_proj);
322 19070 : nr += exps_mark_used(subrel, e->r, local_proj);
323 : } else {
324 352879 : nr += exp_mark_used(subrel, e->l, local_proj);
325 352879 : nr += exp_mark_used(subrel, e->r, local_proj);
326 352879 : if (e->f)
327 6315 : nr += exp_mark_used(subrel, e->f, local_proj);
328 : }
329 : break;
330 870302 : case e_atom:
331 : /* atoms are used in e_cmp */
332 870302 : e->used = 1;
333 : /* return 0 as constants may require a full column ! */
334 870302 : if (e->f)
335 4373 : nr += exps_mark_used(subrel, e->f, local_proj);
336 : return nr;
337 0 : case e_psm:
338 0 : if (e->flag & PSM_SET || e->flag & PSM_RETURN || e->flag & PSM_EXCEPTION) {
339 0 : nr += exp_mark_used(subrel, e->l, local_proj);
340 0 : } else if (e->flag & PSM_WHILE || e->flag & PSM_IF) {
341 0 : nr += exp_mark_used(subrel, e->l, local_proj);
342 0 : nr += exps_mark_used(subrel, e->r, local_proj);
343 0 : if (e->flag == PSM_IF && e->f)
344 0 : nr += exps_mark_used(subrel, e->f, local_proj);
345 : }
346 0 : e->used = 1;
347 0 : break;
348 : }
349 5053613 : if (ne && e != ne) {
350 2250090 : if (local_proj == -2 || ne->type != e_column || (has_label(ne) || (ne->alias.rname && ne->alias.rname[0] == '%')) || (subrel->l && !is_munion(subrel->op) && !rel_find_exp(subrel->l, e)))
351 2155223 : ne->used = 1;
352 2250090 : return ne->used;
353 : }
354 : return nr;
355 : }
356 :
357 : static void
358 40618 : positional_exps_mark_used( sql_rel *rel, sql_rel *subrel )
359 : {
360 40618 : assert(rel->exps);
361 :
362 40618 : if ((is_topn(subrel->op) || is_sample(subrel->op)) && subrel->l)
363 40618 : subrel = subrel->l;
364 : /* everything is used within the set operation */
365 40618 : if (rel->exps && subrel->exps) {
366 40618 : node *m;
367 299168 : for (m=subrel->exps->h; m; m = m->next) {
368 258550 : sql_exp *se = m->data;
369 :
370 258550 : se->used = 1;
371 : }
372 : }
373 40618 : }
374 :
375 : static void
376 638817 : rel_exps_mark_used(allocator *sa, sql_rel *rel, sql_rel *subrel)
377 : {
378 638817 : int nr = 0;
379 :
380 638817 : if (rel->r && (is_simple_project(rel->op) || is_groupby(rel->op))) {
381 30312 : list *l = rel->r;
382 30312 : node *n;
383 :
384 104905 : for (n=l->h; n; n = n->next) {
385 74590 : sql_exp *e = n->data;
386 :
387 74590 : e->used = 1;
388 74590 : exp_mark_used(rel, e, -1);
389 : }
390 : }
391 638820 : if (rel->attr) {
392 23715 : for (node *n = rel->attr->h; n; n = n->next) {
393 11858 : sql_exp *e = n->data;
394 :
395 11858 : if (e->type != e_aggr) /* keep all group by's */
396 11858 : e->used = 1;
397 11858 : if (e->used)
398 11858 : nr += exp_mark_used(subrel, e, -2);
399 : }
400 : }
401 638819 : if (rel->exps) {
402 610305 : node *n;
403 610305 : int len = list_length(rel->exps), i;
404 610313 : sql_exp **exps = SA_NEW_ARRAY(sa, sql_exp*, len);
405 :
406 3001472 : for (n=rel->exps->h, i = 0; n; n = n->next, i++) {
407 2391158 : sql_exp *e = exps[i] = n->data;
408 :
409 2391158 : nr += e->used;
410 : }
411 :
412 610314 : if (!nr && is_project(rel->op) && len > 0) /* project at least one column if exists */
413 4696 : exps[0]->used = 1;
414 :
415 3001569 : for (i = len-1; i >= 0; i--) {
416 2391244 : sql_exp *e = exps[i];
417 :
418 2391244 : if (!is_project(rel->op) || e->used) {
419 1902619 : if (is_project(rel->op))
420 1536077 : nr += exp_mark_used(rel, e, i);
421 1902620 : nr += exp_mark_used(subrel, e, -2);
422 : }
423 : }
424 : }
425 : /* for count/rank we need atleast one column */
426 638839 : if (!nr && subrel && (is_project(subrel->op) || is_base(subrel->op)) && !list_empty(subrel->exps) &&
427 12646 : (is_simple_project(rel->op) && project_unsafe(rel, 0))) {
428 36 : sql_exp *e = subrel->exps->h->data;
429 36 : e->used = 1;
430 : }
431 638839 : if (rel->r && (is_simple_project(rel->op) || is_groupby(rel->op))) {
432 30311 : list *l = rel->r;
433 30311 : node *n;
434 :
435 104904 : for (n=l->h; n; n = n->next) {
436 74593 : sql_exp *e = n->data;
437 :
438 74593 : e->used = 1;
439 : /* possibly project/groupby uses columns from the inner */
440 74593 : exp_mark_used(subrel, e, -2);
441 : }
442 : }
443 638839 : }
444 :
445 : static void exps_used(list *l);
446 :
447 : static void
448 3180675 : exp_used(sql_exp *e)
449 : {
450 3198409 : if (e) {
451 3180674 : e->used = 1;
452 :
453 3180674 : switch (e->type) {
454 16542 : case e_convert:
455 16542 : exp_used(e->l);
456 16542 : break;
457 112749 : case e_func:
458 : case e_aggr:
459 112749 : exps_used(e->l);
460 112749 : break;
461 7628 : case e_cmp:
462 7628 : if (e->flag == cmp_or || e->flag == cmp_filter) {
463 1033 : exps_used(e->l);
464 1033 : exps_used(e->r);
465 6595 : } else if (e->flag == cmp_in || e->flag == cmp_notin) {
466 236 : exp_used(e->l);
467 236 : exps_used(e->r);
468 : } else {
469 6359 : exp_used(e->l);
470 6359 : exp_used(e->r);
471 6359 : if (e->f)
472 : exp_used(e->f);
473 : }
474 : break;
475 : default:
476 : break;
477 : }
478 : }
479 3180676 : }
480 :
481 : static void
482 855242 : exps_used(list *l)
483 : {
484 855242 : if (l) {
485 4020702 : for (node *n = l->h; n; n = n->next)
486 3166661 : exp_used(n->data);
487 : }
488 855497 : }
489 :
490 : static void
491 748175 : rel_used(sql_rel *rel)
492 : {
493 748175 : if (!rel)
494 : return;
495 705198 : if (is_join(rel->op) || is_set(rel->op) || is_semi(rel->op) || is_modify(rel->op)) {
496 60172 : rel_used(rel->l);
497 60389 : rel_used(rel->r);
498 : } else if (rel->op == op_munion) {
499 4947 : list *l = rel->l;
500 14841 : for(node *n = l->h; n; n = n->next)
501 9894 : rel_used(n->data);
502 : } else if (is_topn(rel->op) || is_select(rel->op) || is_sample(rel->op)) {
503 20303 : rel_used(rel->l);
504 20317 : rel = rel->l;
505 : } else if (is_ddl(rel->op)) {
506 408049 : if (rel->flag == ddl_output || rel->flag == ddl_create_seq || rel->flag == ddl_alter_seq || rel->flag == ddl_alter_table || rel->flag == ddl_create_table || rel->flag == ddl_create_view) {
507 33342 : rel_used(rel->l);
508 : } else if (rel->flag == ddl_list || rel->flag == ddl_exception) {
509 273 : rel_used(rel->l);
510 273 : rel_used(rel->r);
511 : } else if (rel->flag == ddl_psm) {
512 21877 : exps_used(rel->exps);
513 : }
514 : } else if (rel->op == op_table) {
515 986 : if (IS_TABLE_PROD_FUNC(rel->flag) || rel->flag == TABLE_FROM_RELATION)
516 986 : rel_used(rel->l);
517 986 : exp_used(rel->r);
518 : }
519 822470 : if (rel && rel->exps) {
520 699814 : exps_used(rel->exps);
521 700095 : if (rel->r && (is_simple_project(rel->op) || is_groupby(rel->op)))
522 18500 : exps_used(rel->r);
523 : }
524 : }
525 :
526 : static void
527 1146871 : rel_mark_used(mvc *sql, sql_rel *rel, int proj)
528 : {
529 1661968 : if (proj && (need_distinct(rel)))
530 10081 : rel_used(rel);
531 :
532 1661962 : switch(rel->op) {
533 : case op_basetable:
534 : case op_truncate:
535 : case op_insert:
536 : break;
537 :
538 12001 : case op_table:
539 :
540 12001 : if (rel->l && rel->flag != TRIGGER_WRAPPER) {
541 116 : rel_used(rel);
542 116 : if (rel->r)
543 116 : exp_mark_used(rel->l, rel->r, -2);
544 116 : rel_mark_used(sql, rel->l, proj);
545 : }
546 : break;
547 :
548 16621 : case op_topn:
549 : case op_sample:
550 16621 : if (proj) {
551 16558 : rel = rel ->l;
552 16558 : rel_mark_used(sql, rel, proj);
553 16558 : break;
554 : }
555 : /* fall through */
556 : case op_project:
557 : case op_groupby:
558 359065 : if (proj && rel->l) {
559 231600 : rel_exps_mark_used(sql->sa, rel, rel->l);
560 231614 : rel_mark_used(sql, rel->l, 0);
561 7420 : } else if (proj) {
562 7357 : rel_exps_mark_used(sql->sa, rel, NULL);
563 : }
564 : break;
565 7186 : case op_update:
566 : case op_delete:
567 7186 : if (proj && rel->r) {
568 5895 : sql_rel *r = rel->r;
569 :
570 5895 : if (!list_empty(r->exps)) {
571 6982 : for (node *n = r->exps->h; n; n = n->next) {
572 5896 : sql_exp *e = n->data;
573 5896 : const char *nname = exp_name(e);
574 :
575 5896 : if (nname && nname[0] == '%' && strcmp(nname, TID) == 0) { /* TID is used */
576 4809 : e->used = 1;
577 4809 : break;
578 : }
579 : }
580 : }
581 5895 : rel_exps_mark_used(sql->sa, rel, rel->r);
582 5895 : rel_mark_used(sql, rel->r, 0);
583 : }
584 : break;
585 :
586 406690 : case op_ddl:
587 406690 : if (rel->flag == ddl_output || rel->flag == ddl_create_seq || rel->flag == ddl_alter_seq || rel->flag == ddl_alter_table || rel->flag == ddl_create_table || rel->flag == ddl_create_view) {
588 31984 : if (rel->l)
589 : rel_mark_used(sql, rel->l, 0);
590 : } else if (rel->flag == ddl_list || rel->flag == ddl_exception) {
591 273 : if (rel->l)
592 273 : rel_mark_used(sql, rel->l, 0);
593 273 : if (rel->r)
594 : rel_mark_used(sql, rel->r, 0);
595 : }
596 : break;
597 :
598 97652 : case op_select:
599 97652 : if (rel->l) {
600 97652 : rel_exps_mark_used(sql->sa, rel, rel->l);
601 97652 : rel_mark_used(sql, rel->l, 0);
602 : }
603 : break;
604 :
605 7499 : case op_union:
606 : case op_inter:
607 : case op_except:
608 : /* For now we mark all union expression as used */
609 :
610 : /* Later we should (in case of union all) remove unused
611 : * columns from the projection.
612 : *
613 : * Project part of union is based on column position.
614 : */
615 7499 : if (proj && (need_distinct(rel) || !rel->exps)) {
616 2373 : rel_used(rel);
617 2373 : if (!rel->exps) {
618 0 : rel_used(rel->l);
619 0 : rel_used(rel->r);
620 : }
621 2373 : rel_mark_used(sql, rel->l, 0);
622 2373 : rel_mark_used(sql, rel->r, 0);
623 1169 : } else if (proj && !need_distinct(rel)) {
624 1169 : sql_rel *l = rel->l;
625 :
626 1169 : positional_exps_mark_used(rel, l);
627 1169 : rel_exps_mark_used(sql->sa, rel, l);
628 1169 : rel_mark_used(sql, rel->l, 0);
629 : /* based on child check set expression list */
630 1169 : if (is_project(l->op) && need_distinct(l))
631 0 : positional_exps_mark_used(l, rel);
632 1169 : positional_exps_mark_used(rel, rel->r);
633 1169 : rel_exps_mark_used(sql->sa, rel, rel->r);
634 1169 : rel_mark_used(sql, rel->r, 0);
635 : }
636 : break;
637 :
638 41513 : case op_munion:
639 41513 : assert(rel->l);
640 : // TODO: here we blindly follow the same logic as op_union. RE-evaluate
641 41513 : if (proj && (need_distinct(rel) || !rel->exps)) {
642 1906 : rel_used(rel);
643 1906 : if (!rel->exps) {
644 0 : for (node *n = ((list*)rel->l)->h; n; n = n->next) {
645 0 : rel_used(n->data);
646 0 : rel_mark_used(sql, n->data, 0);
647 : }
648 : }
649 19140 : } else if (proj && !need_distinct(rel)) {
650 19140 : bool first = true;
651 57420 : for (node *n = ((list*)rel->l)->h; n; n = n->next) {
652 38280 : sql_rel *l = n->data;
653 :
654 38280 : positional_exps_mark_used(rel, l);
655 38280 : rel_exps_mark_used(sql->sa, rel, l);
656 38280 : rel_mark_used(sql, rel->l, 0);
657 : /* based on child check set expression list */
658 38280 : if (first && is_project(l->op) && need_distinct(l))
659 0 : positional_exps_mark_used(l, rel);
660 38280 : first = false;
661 : }
662 : }
663 : break;
664 127848 : case op_join:
665 : case op_left:
666 : case op_right:
667 : case op_full:
668 : case op_semi:
669 : case op_anti:
670 : case op_merge:
671 127848 : rel_exps_mark_used(sql->sa, rel, rel->l);
672 127848 : rel_exps_mark_used(sql->sa, rel, rel->r);
673 127848 : rel_mark_used(sql, rel->l, 0);
674 127848 : rel_mark_used(sql, rel->r, 0);
675 127848 : break;
676 : }
677 1146880 : }
678 :
679 : static sql_rel * rel_dce_sub(mvc *sql, sql_rel *rel);
680 :
681 : static sql_rel *
682 1031503 : rel_remove_unused(mvc *sql, sql_rel *rel)
683 : {
684 1031503 : int needed = 0;
685 :
686 1031503 : if (!rel)
687 : return rel;
688 :
689 1025343 : switch(rel->op) {
690 280384 : case op_basetable: {
691 280384 : sql_table *t = rel->l;
692 :
693 280384 : if (t && isReplicaTable(t)) /* TODO fix rewriting in rel_distribute.c */
694 : return rel;
695 : }
696 : /* fall through */
697 : case op_table:
698 286507 : if (rel->exps && (rel->op != op_table || !IS_TABLE_PROD_FUNC(rel->flag))) {
699 1257416 : for(node *n=rel->exps->h; n && !needed; n = n->next) {
700 977069 : sql_exp *e = n->data;
701 :
702 977069 : if (!e->used)
703 205032 : needed = 1;
704 : }
705 :
706 280347 : if (!needed)
707 : return rel;
708 :
709 1255183 : for(node *n=rel->exps->h; n;) {
710 1050159 : node *next = n->next;
711 1050159 : sql_exp *e = n->data;
712 :
713 : /* atleast one (needed for crossproducts, count(*), rank() and single value projections) !, handled by rel_exps_mark_used */
714 1050159 : if (!e->used && list_length(rel->exps) > 1)
715 357186 : list_remove_node(rel->exps, NULL, n);
716 : n = next;
717 : }
718 : }
719 211184 : if (rel->op == op_table && (IS_TABLE_PROD_FUNC(rel->flag) || rel->flag == TABLE_FROM_RELATION))
720 6160 : rel->l = rel_remove_unused(sql, rel->l);
721 : return rel;
722 :
723 16557 : case op_topn:
724 : case op_sample:
725 :
726 16557 : if (rel->l)
727 16557 : rel->l = rel_remove_unused(sql, rel->l);
728 : return rel;
729 :
730 239007 : case op_project:
731 : case op_groupby:
732 :
733 239007 : if (/*rel->l &&*/ rel->exps) {
734 1533291 : for(node *n=rel->exps->h; n && !needed; n = n->next) {
735 1294284 : sql_exp *e = n->data;
736 :
737 1294284 : if (!e->used)
738 23268 : needed = 1;
739 : }
740 239007 : if (!needed)
741 : return rel;
742 :
743 553767 : for(node *n=rel->exps->h; n;) {
744 530499 : node *next = n->next;
745 530499 : sql_exp *e = n->data;
746 :
747 : /* atleast one (needed for crossproducts, count(*), rank() and single value projections) */
748 530499 : if (!e->used && list_length(rel->exps) > 1)
749 411084 : list_remove_node(rel->exps, NULL, n);
750 : n = next;
751 : }
752 : }
753 : return rel;
754 :
755 2840 : case op_join:
756 : case op_left:
757 : case op_right:
758 : case op_full:
759 2840 : if (list_length(rel->attr) > 1) {
760 0 : for(node *n=rel->attr->h; n && !needed; n = n->next) {
761 0 : sql_exp *e = n->data;
762 :
763 0 : if (!e->used)
764 0 : needed = 1;
765 : }
766 0 : if (!needed)
767 : return rel;
768 :
769 0 : for(node *n=rel->attr->h; n;) {
770 0 : node *next = n->next;
771 0 : sql_exp *e = n->data;
772 :
773 0 : if (!e->used)
774 0 : list_remove_node(rel->attr, NULL, n);
775 : n = next;
776 : }
777 : }
778 : return rel;
779 :
780 : case op_union:
781 : case op_inter:
782 : case op_except:
783 : case op_munion:
784 :
785 : case op_insert:
786 : case op_update:
787 : case op_delete:
788 : case op_truncate:
789 : case op_merge:
790 :
791 : case op_select:
792 :
793 : case op_semi:
794 : case op_anti:
795 : return rel;
796 406440 : case op_ddl:
797 406440 : if (rel->flag == ddl_output || rel->flag == ddl_create_seq || rel->flag == ddl_alter_seq || rel->flag == ddl_alter_table || rel->flag == ddl_create_table || rel->flag == ddl_create_view) {
798 31775 : if (rel->l)
799 31421 : rel->l = rel_remove_unused(sql, rel->l);
800 : } else if (rel->flag == ddl_list || rel->flag == ddl_exception) {
801 232 : if (rel->l)
802 232 : rel->l = rel_remove_unused(sql, rel->l);
803 232 : if (rel->r)
804 232 : rel->r = rel_remove_unused(sql, rel->r);
805 : }
806 : return rel;
807 : }
808 : return rel;
809 : }
810 :
811 : static void
812 1206277 : rel_dce_refs(mvc *sql, sql_rel *rel, list *refs)
813 : {
814 1206277 : if (!rel || (rel_is_ref(rel) && list_find(refs, rel, NULL)))
815 8137 : return ;
816 :
817 1198140 : switch(rel->op) {
818 345241 : case op_table:
819 : case op_topn:
820 : case op_sample:
821 : case op_project:
822 : case op_groupby:
823 : case op_select:
824 :
825 345241 : if (rel->l && (rel->op != op_table || rel->flag != TRIGGER_WRAPPER))
826 333054 : rel_dce_refs(sql, rel->l, refs);
827 : break;
828 :
829 : case op_basetable:
830 : case op_insert:
831 : case op_truncate:
832 : break;
833 :
834 6011 : case op_update:
835 : case op_delete:
836 :
837 6011 : if (rel->r)
838 5895 : rel_dce_refs(sql, rel->r, refs);
839 : break;
840 :
841 128577 : case op_union:
842 : case op_inter:
843 : case op_except:
844 : case op_join:
845 : case op_left:
846 : case op_right:
847 : case op_full:
848 : case op_semi:
849 : case op_anti:
850 : case op_merge:
851 :
852 128577 : if (rel->l)
853 128577 : rel_dce_refs(sql, rel->l, refs);
854 128577 : if (rel->r)
855 128577 : rel_dce_refs(sql, rel->r, refs);
856 : break;
857 19231 : case op_munion:
858 19231 : assert(rel->l);
859 57693 : for (node *n = ((list*)rel->l)->h; n; n = n->next)
860 38462 : rel_dce_refs(sql, n->data, refs);
861 : break;
862 407776 : case op_ddl:
863 :
864 407776 : if (rel->flag == ddl_output || rel->flag == ddl_create_seq || rel->flag == ddl_alter_seq || rel->flag == ddl_alter_table || rel->flag == ddl_create_table || rel->flag == ddl_create_view) {
865 33070 : if (rel->l)
866 32716 : rel_dce_refs(sql, rel->l, refs);
867 : } else if (rel->flag == ddl_list || rel->flag == ddl_exception) {
868 273 : if (rel->l)
869 273 : rel_dce_refs(sql, rel->l, refs);
870 273 : if (rel->r)
871 242 : rel_dce_refs(sql, rel->r, refs);
872 : } break;
873 : }
874 :
875 1198156 : if (rel_is_ref(rel) && !list_find(refs, rel, NULL))
876 7328 : list_prepend(refs, rel);
877 : }
878 :
879 : static sql_rel *
880 1651815 : rel_dce_down(mvc *sql, sql_rel *rel, int skip_proj)
881 : {
882 1651815 : if (!rel)
883 : return rel;
884 :
885 1651815 : if (!skip_proj && rel_is_ref(rel))
886 : return rel;
887 :
888 1639636 : switch(rel->op) {
889 510079 : case op_basetable:
890 : case op_table:
891 :
892 510079 : if (skip_proj && rel->l && rel->op == op_table && rel->flag != TRIGGER_WRAPPER)
893 58 : rel->l = rel_dce_down(sql, rel->l, 0);
894 254814 : if (!skip_proj)
895 254814 : rel_dce_sub(sql, rel);
896 : /* fall through */
897 :
898 : case op_truncate:
899 : return rel;
900 :
901 2153 : case op_insert:
902 2153 : rel_used(rel->r);
903 2153 : rel_dce_sub(sql, rel->r);
904 2153 : return rel;
905 :
906 7186 : case op_update:
907 : case op_delete:
908 :
909 7186 : if (skip_proj && rel->r)
910 5895 : rel->r = rel_dce_down(sql, rel->r, 0);
911 1175 : if (!skip_proj)
912 1175 : rel_dce_sub(sql, rel);
913 : return rel;
914 :
915 410056 : case op_topn:
916 : case op_sample:
917 : case op_project:
918 : case op_groupby:
919 :
920 410056 : if (skip_proj && rel->l)
921 248130 : rel->l = rel_dce_down(sql, rel->l, is_topn(rel->op) || is_sample(rel->op));
922 154569 : if (!skip_proj)
923 154569 : rel_dce_sub(sql, rel);
924 : return rel;
925 :
926 6513 : case op_union:
927 : case op_inter:
928 : case op_except:
929 6513 : if (skip_proj) {
930 3542 : if (rel->l)
931 3542 : rel->l = rel_dce_down(sql, rel->l, 0);
932 3542 : if (rel->r)
933 3542 : rel->r = rel_dce_down(sql, rel->r, 0);
934 : }
935 2971 : if (!skip_proj)
936 2971 : rel_dce_sub(sql, rel);
937 : return rel;
938 :
939 40987 : case op_munion:
940 40987 : if (skip_proj) {
941 63132 : for (node *n = ((list*)rel->l)->h; n; n = n->next)
942 42088 : n->data = rel_dce_down(sql, n->data, 0);
943 : }
944 19943 : if (!skip_proj)
945 19943 : rel_dce_sub(sql, rel);
946 : return rel;
947 90535 : case op_select:
948 90535 : if (rel->l)
949 90535 : rel->l = rel_dce_down(sql, rel->l, 0);
950 : return rel;
951 :
952 124491 : case op_join:
953 : case op_left:
954 : case op_right:
955 : case op_full:
956 : case op_semi:
957 : case op_anti:
958 : case op_merge:
959 124491 : if (rel->l)
960 124491 : rel->l = rel_dce_down(sql, rel->l, 0);
961 124491 : if (rel->r)
962 124491 : rel->r = rel_dce_down(sql, rel->r, 0);
963 124491 : if (!skip_proj && !list_empty(rel->attr))
964 2840 : rel_dce_sub(sql, rel);
965 : return rel;
966 :
967 406502 : case op_ddl:
968 406502 : if (rel->flag == ddl_output || rel->flag == ddl_create_seq || rel->flag == ddl_alter_seq || rel->flag == ddl_alter_table || rel->flag == ddl_create_table || rel->flag == ddl_create_view) {
969 31796 : if (rel->l)
970 31630 : rel->l = rel_dce_down(sql, rel->l, 0);
971 : } else if (rel->flag == ddl_list || rel->flag == ddl_exception) {
972 273 : if (rel->l)
973 273 : rel->l = rel_dce_down(sql, rel->l, 0);
974 273 : if (rel->r)
975 242 : rel->r = rel_dce_down(sql, rel->r, 0);
976 : }
977 : return rel;
978 : }
979 : return rel;
980 : }
981 :
982 : /* DCE
983 : *
984 : * Based on top relation expressions mark sub expressions as used.
985 : * Then recurse down until the projections. Clean them up and repeat.
986 : */
987 :
988 : static sql_rel *
989 976961 : rel_dce_sub(mvc *sql, sql_rel *rel)
990 : {
991 976961 : if (!rel)
992 : return rel;
993 :
994 : /*
995 : * Mark used up until the next project
996 : * For setops we need to first mark, then remove
997 : * because of positional dependency
998 : */
999 976961 : rel_mark_used(sql, rel, 1);
1000 976964 : rel = rel_remove_unused(sql, rel);
1001 976962 : rel_dce_down(sql, rel, 1);
1002 976962 : return rel;
1003 : }
1004 :
1005 : /* add projects under set ops */
1006 : static sql_rel *
1007 2006997 : rel_add_projects(mvc *sql, sql_rel *rel)
1008 : {
1009 2006997 : if (!rel)
1010 : return rel;
1011 :
1012 2006997 : switch(rel->op) {
1013 : case op_basetable:
1014 : case op_truncate:
1015 : return rel;
1016 8164 : case op_insert:
1017 : case op_update:
1018 : case op_delete:
1019 8164 : if (rel->r)
1020 8048 : rel->r = rel_add_projects(sql, rel->r);
1021 : return rel;
1022 13581 : case op_union:
1023 : case op_inter:
1024 : case op_except:
1025 : /* We can only reduce the list of expressions of an set op
1026 : * if the projection under it can also be reduced.
1027 : */
1028 13581 : if (rel->l) {
1029 13581 : sql_rel *l = rel->l;
1030 :
1031 13581 : if (!is_project(l->op) && !need_distinct(rel))
1032 2 : l = rel_project(sql->sa, l, rel_projections(sql, l, NULL, 1, 1));
1033 13581 : rel->l = rel_add_projects(sql, l);
1034 : }
1035 13581 : if (rel->r) {
1036 13581 : sql_rel *r = rel->r;
1037 :
1038 13581 : if (!is_project(r->op) && !need_distinct(rel))
1039 1 : r = rel_project(sql->sa, r, rel_projections(sql, r, NULL, 1, 1));
1040 13581 : rel->r = rel_add_projects(sql, r);
1041 : }
1042 : return rel;
1043 68613 : case op_munion:
1044 68613 : assert(rel->l);
1045 205839 : for (node *n = ((list*)rel->l)->h; n; n = n->next) {
1046 137226 : sql_rel* r = n->data;
1047 137226 : if (!is_project(r->op) && !need_distinct(rel))
1048 200 : r = rel_project(sql->sa, r, rel_projections(sql, r, NULL, 1, 1));
1049 137226 : r = rel_add_projects(sql, r);
1050 137226 : n->data = r;
1051 : }
1052 : return rel;
1053 745060 : case op_topn:
1054 : case op_sample:
1055 : case op_project:
1056 : case op_groupby:
1057 : case op_select:
1058 : case op_table:
1059 745060 : if (rel->l && (rel->op != op_table || rel->flag != TRIGGER_WRAPPER))
1060 720252 : rel->l = rel_add_projects(sql, rel->l);
1061 : return rel;
1062 271178 : case op_join:
1063 : case op_left:
1064 : case op_right:
1065 : case op_full:
1066 : case op_semi:
1067 : case op_anti:
1068 : case op_merge:
1069 271178 : if (rel->l)
1070 271178 : rel->l = rel_add_projects(sql, rel->l);
1071 271178 : if (rel->r)
1072 271178 : rel->r = rel_add_projects(sql, rel->r);
1073 : return rel;
1074 408044 : case op_ddl:
1075 408044 : if (rel->flag == ddl_output || rel->flag == ddl_create_seq || rel->flag == ddl_alter_seq || rel->flag == ddl_alter_table || rel->flag == ddl_create_table || rel->flag == ddl_create_view) {
1076 33338 : if (rel->l)
1077 32984 : rel->l = rel_add_projects(sql, rel->l);
1078 : } else if (rel->flag == ddl_list || rel->flag == ddl_exception) {
1079 273 : if (rel->l)
1080 273 : rel->l = rel_add_projects(sql, rel->l);
1081 273 : if (rel->r)
1082 242 : rel->r = rel_add_projects(sql, rel->r);
1083 : }
1084 : return rel;
1085 : }
1086 : return rel;
1087 : }
1088 :
1089 : static sql_rel *
1090 538489 : rel_dce_(mvc *sql, sql_rel *rel)
1091 : {
1092 538489 : list *refs = sa_list(sql->sa);
1093 :
1094 538574 : rel_dce_refs(sql, rel, refs);
1095 538503 : if (refs) {
1096 545865 : for(node *n = refs->h; n; n = n->next) {
1097 7328 : sql_rel *i = n->data;
1098 :
1099 7328 : while (!rel_is_ref(i) && i->l && !is_base(i->op))
1100 : i = i->l;
1101 7328 : if (i)
1102 7328 : rel_used(i);
1103 : }
1104 : }
1105 538537 : rel = rel_add_projects(sql, rel);
1106 538478 : rel_used(rel);
1107 538941 : rel_dce_sub(sql, rel);
1108 538501 : return rel;
1109 : }
1110 :
1111 : /* Remove unused expressions */
1112 : static sql_rel *
1113 538516 : rel_dce(visitor *v, global_props *gp, sql_rel *rel)
1114 : {
1115 538516 : (void) gp;
1116 538516 : return rel_dce_(v->sql, rel);
1117 : }
1118 :
1119 : run_optimizer
1120 600932 : bind_dce(visitor *v, global_props *gp)
1121 : {
1122 600932 : int flag = v->sql->sql_optimizer;
1123 600932 : return gp->opt_cycle == 0 && gp->opt_level == 1 && (flag & dce) ? rel_dce : NULL;
1124 : }
1125 :
1126 :
1127 : static int
1128 84695 : topn_sample_safe_exps( list *exps, bool nil_limit )
1129 : {
1130 : /* Limit only expression lists are always save */
1131 84695 : if (list_length(exps) == 1)
1132 : return 1;
1133 1316 : for (node *n = exps->h; n; n = n->next ) {
1134 890 : sql_exp *e = n->data;
1135 :
1136 890 : if (!e || e->type != e_atom || (!nil_limit && exp_is_null(e)))
1137 59 : return 0;
1138 : }
1139 : return 1;
1140 : }
1141 :
1142 : static list *
1143 234 : sum_limit_offset(mvc *sql, sql_rel *rel)
1144 : {
1145 : /* for sample we always propagate, or if the expression list only consists of a limit expression, we copy it */
1146 234 : if (is_sample(rel->op) || list_length(rel->exps) == 1)
1147 230 : return exps_copy(sql, rel->exps);
1148 4 : assert(list_length(rel->exps) == 2);
1149 4 : sql_subtype *lng = sql_bind_localtype("lng");
1150 4 : sql_exp *add = rel_binop_(sql, NULL, exp_copy(sql, rel->exps->h->data), exp_copy(sql, rel->exps->h->next->data), "sys", "sql_add", card_value, true);
1151 : /* for remote plans, make sure the output type is a bigint */
1152 4 : if (subtype_cmp(lng, exp_subtype(add)) != 0)
1153 4 : add = exp_convert(sql->sa, add, exp_subtype(add), lng);
1154 4 : return list_append(sa_list(sql->sa), add);
1155 : }
1156 :
1157 : /*
1158 : * Push TopN (only LIMIT, no ORDER BY) down through projections underneath crossproduct, i.e.,
1159 : *
1160 : * topn( topn(
1161 : * project( project(
1162 : * crossproduct( crossproduct(
1163 : * L, => topn( L )[ n ],
1164 : * R topn( R )[ n ]
1165 : * ) )
1166 : * )[ Cs ]* )[ Cs ]*
1167 : * )[ n ] )[ n ]
1168 : *
1169 : * (TODO: in case of n==1 we can omit the original top-level TopN)
1170 : *
1171 : * also push topn under (non reordering) projections.
1172 : */
1173 : static sql_rel *
1174 129671 : rel_push_topn_and_sample_down_(visitor *v, sql_rel *rel)
1175 : {
1176 129671 : sql_rel *rp = NULL, *r = rel->l, *rpp = NULL;
1177 :
1178 129671 : if (is_topn(rel->op) && !rel_is_ref(rel) &&
1179 50426 : r && r->op == op_table && r->flag != TRIGGER_WRAPPER && !rel_is_ref(r) && r->r) {
1180 3 : sql_exp *op = r->r;
1181 3 : sql_subfunc *f = op->f;
1182 :
1183 3 : if (is_func(op->type) && strcmp(f->func->base.name, "file_loader") == 0 && !sql_func_mod(f->func)[0] && !sql_func_imp(f->func)[0]) {
1184 : /* push limit, to arguments of file_loader */
1185 0 : list *args = op->l;
1186 0 : if (list_length(args) == 2) {
1187 0 : sql_exp *topN = rel->exps->h->data;
1188 0 : sql_exp *offset = rel->exps->h->next? rel->exps->h->next->data:NULL;
1189 0 : atom *topn = topN->l;
1190 0 : if (offset) {
1191 0 : atom *b1 = (atom *)offset->l, *c = atom_add(v->sql->sa, b1, topn);
1192 :
1193 0 : if (!c)
1194 : return rel;
1195 0 : if (atom_cmp(c, topn) < 0) /* overflow */
1196 : return rel;
1197 : topn = c;
1198 : }
1199 0 : append(args, exp_atom(v->sql->sa, topn));
1200 0 : v->changes++;
1201 : }
1202 : }
1203 : }
1204 :
1205 129671 : if ((is_topn(rel->op) || is_sample(rel->op)) && topn_sample_safe_exps(rel->exps, true)) {
1206 50519 : sql_rel *(*func) (allocator *, sql_rel *, list *) = is_topn(rel->op) ? rel_topn : rel_sample;
1207 :
1208 : /* nested topN relations */
1209 50519 : if (r && is_topn(rel->op) && is_topn(r->op) && !rel_is_ref(r)) {
1210 0 : sql_exp *topN1 = rel->exps->h->data, *topN2 = r->exps->h->data;
1211 0 : sql_exp *offset1 = list_length(rel->exps) > 1 ? rel->exps->h->next->data : NULL;
1212 0 : sql_exp *offset2 = list_length(r->exps) > 1 ? r->exps->h->next->data : NULL;
1213 :
1214 0 : if (topN1->l && topN2->l && (!offset1 || offset1->l) && (!offset2 || offset2->l)) { /* no parameters */
1215 0 : bool changed = false;
1216 :
1217 0 : if ((!offset1 || (offset1->type == e_atom && offset1->l)) && (!offset2 || (offset2->type == e_atom && offset2->l))) { /* only atoms */
1218 0 : if (!offset1 && offset2) {
1219 0 : list_append(rel->exps, exp_copy(v->sql, offset2));
1220 0 : changed = true;
1221 0 : } else if (offset1 && offset2) { /* sum offsets */
1222 0 : atom *b1 = (atom *)offset1->l, *b2 = (atom *)offset2->l, *c = atom_add(v->sql->sa, b1, b2);
1223 :
1224 0 : if (!c) /* error, don't apply optimization, WARNING because of this the offset optimization must come before the limit one */
1225 : return rel;
1226 0 : if (atom_cmp(c, b2) < 0) /* overflow */
1227 0 : c = atom_int(v->sql->sa, sql_bind_localtype("lng"), GDK_lng_max);
1228 0 : offset1->l = c;
1229 0 : changed = true;
1230 : }
1231 : }
1232 :
1233 0 : if (topN1->type == e_atom && topN1->l && topN2->type == e_atom && topN2->l) { /* only atoms */
1234 0 : atom *a1 = (atom *)topN1->l, *a2 = (atom *)topN2->l;
1235 :
1236 0 : if (!a2->isnull && (a1->isnull || atom_cmp(a1, a2) >= 0)) { /* topN1 is not set or is larger than topN2 */
1237 0 : rel->exps->h->data = exp_copy(v->sql, topN2);
1238 0 : changed = true;
1239 : }
1240 : }
1241 :
1242 0 : if (changed) {
1243 0 : rel->l = r->l;
1244 0 : r->l = NULL;
1245 0 : rel_destroy(r);
1246 0 : v->changes++;
1247 0 : return rel;
1248 : }
1249 : }
1250 : }
1251 :
1252 50519 : if (r && is_simple_project(r->op) && need_distinct(r))
1253 : return rel;
1254 :
1255 : /* push topn/sample under projections */
1256 50515 : if (!rel_is_ref(rel) && r && is_simple_project(r->op) && !need_distinct(r) && !rel_is_ref(r) && r->l && list_empty(r->r)) {
1257 : sql_rel *x = r, *px = x;
1258 :
1259 32689 : while (is_simple_project(x->op) && !need_distinct(x) && !rel_is_ref(x) && x->l && list_empty(x->r)) {
1260 16361 : px = x;
1261 16361 : x = x->l;
1262 : }
1263 : /* only push topn once */
1264 16323 : if (x && x->op == rel->op)
1265 : return rel;
1266 :
1267 16312 : rel->l = x;
1268 16312 : px->l = rel;
1269 16312 : rel = r;
1270 16312 : v->changes++;
1271 16312 : return rel;
1272 : }
1273 :
1274 34188 : if (!topn_sample_safe_exps(rel->exps, false))
1275 : return rel;
1276 :
1277 : /* duplicate topn/sample direct under union or crossproduct */
1278 34117 : if (r && !rel_is_ref(r) && r->l && r->r && ((is_union(r->op) && r->exps) || (r->op == op_join && list_empty(r->exps)))) {
1279 178 : sql_rel *u = r, *x;
1280 178 : sql_rel *ul = u->l;
1281 178 : sql_rel *ur = u->r;
1282 178 : bool changed = false;
1283 :
1284 178 : x = ul;
1285 207 : while (is_simple_project(x->op) && !need_distinct(x) && !rel_is_ref(x) && x->l && list_empty(x->r))
1286 29 : x = x->l;
1287 178 : if (x && x->op != rel->op) { /* only push topn once */
1288 70 : ul = func(v->sql->sa, ul, sum_limit_offset(v->sql, rel));
1289 70 : set_processed(ul);
1290 70 : u->l = ul;
1291 70 : changed = true;
1292 : }
1293 :
1294 178 : x = ur;
1295 208 : while (is_simple_project(x->op) && !need_distinct(x) && !rel_is_ref(x) && x->l && list_empty(x->r))
1296 30 : x = x->l;
1297 178 : if (x && x->op != rel->op) { /* only push topn once */
1298 69 : ur = func(v->sql->sa, ur, sum_limit_offset(v->sql, rel));
1299 69 : set_processed(ur);
1300 69 : u->r = ur;
1301 69 : changed = true;
1302 : }
1303 :
1304 178 : if (changed)
1305 71 : v->changes++;
1306 178 : return rel;
1307 : }
1308 :
1309 : /* duplicate topn/sample + [ project-order ] under union */
1310 : if (r && !rp)
1311 33938 : rp = r->l;
1312 33938 : if (r && r->exps && is_simple_project(r->op) && !rel_is_ref(r) && !list_empty(r->r) && r->l && is_union(rp->op)) {
1313 100 : sql_rel *u = rp, *ou = u, *x, *ul = u->l, *ur = u->r;
1314 100 : list *rcopy = NULL;
1315 :
1316 : /* only push topn/sample once */
1317 100 : x = ul;
1318 110 : while (is_simple_project(x->op) && !need_distinct(x) && !rel_is_ref(x) && x->l && list_empty(x->r))
1319 10 : x = x->l;
1320 100 : if (x && x->op == rel->op)
1321 : return rel;
1322 : x = ur;
1323 90 : while (is_simple_project(x->op) && !need_distinct(x) && !rel_is_ref(x) && x->l && list_empty(x->r))
1324 45 : x = x->l;
1325 45 : if (x && x->op == rel->op)
1326 : return rel;
1327 :
1328 45 : rcopy = exps_copy(v->sql, r->r);
1329 138 : for (node *n = rcopy->h ; n ; n = n->next) {
1330 93 : sql_exp *e = n->data;
1331 93 : set_descending(e); /* remove ordering properties for projected columns */
1332 93 : set_nulls_first(e);
1333 : }
1334 45 : ul = rel_dup(ul);
1335 45 : ur = rel_dup(ur);
1336 45 : if (!is_project(ul->op))
1337 0 : ul = rel_project(v->sql->sa, ul,
1338 : rel_projections(v->sql, ul, NULL, 1, 1));
1339 45 : if (!is_project(ur->op))
1340 0 : ur = rel_project(v->sql->sa, ur,
1341 : rel_projections(v->sql, ur, NULL, 1, 1));
1342 45 : rel_rename_exps(v->sql, u->exps, ul->exps);
1343 45 : rel_rename_exps(v->sql, u->exps, ur->exps);
1344 :
1345 : /* introduce projects under the set */
1346 45 : ul = rel_project(v->sql->sa, ul, NULL);
1347 45 : ul->exps = exps_copy(v->sql, r->exps);
1348 : /* possibly add order by column */
1349 45 : ul->exps = list_distinct(list_merge(ul->exps, exps_copy(v->sql, rcopy), NULL), (fcmp) exp_equal, (fdup) NULL);
1350 45 : ul->nrcols = list_length(ul->exps);
1351 45 : ul->r = exps_copy(v->sql, r->r);
1352 45 : set_processed(ul);
1353 45 : ul = func(v->sql->sa, ul, sum_limit_offset(v->sql, rel));
1354 45 : set_processed(ul);
1355 :
1356 45 : ur = rel_project(v->sql->sa, ur, NULL);
1357 45 : ur->exps = exps_copy(v->sql, r->exps);
1358 : /* possibly add order by column */
1359 45 : ur->exps = list_distinct(list_merge(ur->exps, exps_copy(v->sql, rcopy), NULL), (fcmp) exp_equal, (fdup) NULL);
1360 45 : ur->nrcols = list_length(ur->exps);
1361 45 : ur->r = exps_copy(v->sql, r->r);
1362 45 : set_processed(ur);
1363 45 : ur = func(v->sql->sa, ur, sum_limit_offset(v->sql, rel));
1364 45 : set_processed(ur);
1365 :
1366 45 : u = rel_setop(v->sql->sa, ul, ur, op_union);
1367 45 : u->exps = exps_alias(v->sql, r->exps);
1368 45 : u->nrcols = list_length(u->exps);
1369 45 : set_processed(u);
1370 : /* possibly add order by column */
1371 45 : u->exps = list_distinct(list_merge(u->exps, rcopy, NULL), (fcmp) exp_equal, (fdup) NULL);
1372 45 : if (need_distinct(r)) {
1373 0 : set_distinct(ul);
1374 0 : set_distinct(ur);
1375 : }
1376 :
1377 : /* zap names */
1378 45 : rel_no_rename_exps(u->exps);
1379 45 : rel_destroy(ou);
1380 :
1381 45 : ur = rel_project(v->sql->sa, u, exps_alias(v->sql, r->exps));
1382 45 : ur->r = r->r;
1383 45 : r->l = NULL;
1384 :
1385 45 : if (need_distinct(r))
1386 0 : set_distinct(ur);
1387 :
1388 45 : rel_destroy(r);
1389 45 : rel->l = ur;
1390 45 : v->changes++;
1391 45 : return rel;
1392 : }
1393 : /* a left outer join b order by a.* limit L, can be copied into a */
1394 : /* topn ( project (orderby)( optional project ( left ())
1395 : * rel r rp */
1396 33839 : if (r && !rp)
1397 62 : rp = r->l;
1398 33839 : if (r && rp && is_simple_project(rp->op) && !rp->r && rp->l)
1399 33839 : rpp = rp->l;
1400 33839 : if (r && r->exps && is_simple_project(r->op) && !rel_is_ref(r) && r->r && r->l && ((!rpp && is_left(rp->op)) ||
1401 405 : (rpp && is_left(rpp->op)))) {
1402 13 : sql_rel *lj = rpp?rpp:rp;
1403 13 : sql_rel *l = lj->l;
1404 13 : list *obes = r->r, *nobes = sa_list(v->sql->sa);
1405 13 : int fnd = 1;
1406 29 : for (node *n = obes->h; n && fnd; n = n->next) {
1407 16 : sql_exp *obe = n->data;
1408 16 : int asc = is_ascending(obe);
1409 16 : int nl = nulls_last(obe);
1410 : /* only simple rename expressions */
1411 16 : sql_exp *pe = exps_find_exp(r->exps, obe);
1412 16 : if (pe && rpp)
1413 12 : pe = exps_find_exp(rp->exps, pe);
1414 16 : if (pe)
1415 16 : pe = rel_find_exp(l, pe);
1416 16 : if (pe) {
1417 16 : pe = exp_ref(v->sql, pe);
1418 16 : if (asc)
1419 8 : set_ascending(pe);
1420 16 : if (nl)
1421 3 : set_nulls_last(pe);
1422 16 : append(nobes, pe);
1423 : }
1424 : else
1425 : fnd = 0;
1426 : }
1427 13 : if (fnd && ((is_topn(rel->op) && !is_topn(l->op)) || (is_sample(rel->op) && !is_sample(l->op)))) {
1428 : /* inject topn */
1429 : /* Todo add order by */
1430 5 : sql_rel *ob = lj->l = rel_project(v->sql->sa, lj->l, rel_projections(v->sql, lj->l, NULL, 1, 1));
1431 5 : ob->r = nobes;
1432 5 : lj->l = func(v->sql->sa, lj->l, sum_limit_offset(v->sql, rel));
1433 5 : v->changes++;
1434 5 : return rel;
1435 : }
1436 : }
1437 : }
1438 : return rel;
1439 : }
1440 :
1441 : static sql_rel *
1442 33413 : rel_push_topn_and_sample_down(visitor *v, global_props *gp, sql_rel *rel)
1443 : {
1444 33413 : (void) gp;
1445 33413 : return rel_visitor_topdown(v, rel, &rel_push_topn_and_sample_down_);
1446 : }
1447 :
1448 : run_optimizer
1449 601124 : bind_push_topn_and_sample_down(visitor *v, global_props *gp)
1450 : {
1451 601124 : int flag = v->sql->sql_optimizer;
1452 601044 : return gp->opt_level == 1 && (gp->cnt[op_topn] || gp->cnt[op_sample]) &&
1453 634414 : (flag & push_topn_and_sample_down) ? rel_push_topn_and_sample_down : NULL;
1454 : }
|