Line data Source code
1 : /*
2 : * SPDX-License-Identifier: MPL-2.0
3 : *
4 : * This Source Code Form is subject to the terms of the Mozilla Public
5 : * License, v. 2.0. If a copy of the MPL was not distributed with this
6 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
7 : *
8 : * Copyright 2024 MonetDB Foundation;
9 : * Copyright August 2008 - 2023 MonetDB B.V.;
10 : * Copyright 1997 - July 2008 CWI.
11 : */
12 :
13 : #include "monetdb_config.h"
14 : #include "rel_optimizer.h"
15 : #include "rel_optimizer_private.h"
16 : #include "rel_exp.h"
17 : #include "rel_select.h"
18 :
19 : static void
20 0 : rel_no_rename_exps( list *exps )
21 : {
22 0 : for (node *n = exps->h; n; n = n->next) {
23 0 : sql_exp *e = n->data;
24 :
25 0 : exp_setalias(e, e->alias.label, e->l, e->r);
26 : }
27 0 : list_hash_clear(exps);
28 0 : }
29 :
30 : /* positional renames ! */
31 : /* get the names (aka aliases) from the src_exps list and rename the dest_exps list */
32 : void
33 82361 : rel_rename_exps( mvc *sql, list *src_exps, list *dest_exps)
34 : {
35 82361 : node *n, *m;
36 :
37 82361 : (void)sql;
38 : /* check if a column uses an alias earlier in the list */
39 :
40 82361 : assert(list_length(src_exps) <= list_length(dest_exps));
41 690491 : for (n = src_exps->h, m = dest_exps->h; n && m; n = n->next, m = m->next) {
42 608130 : sql_exp *s = n->data;
43 608130 : sql_exp *d = m->data;
44 608130 : const char *rname = exp_relname(s);
45 :
46 608130 : exp_setalias(d, s->alias.label, rname, exp_name(s));
47 : }
48 82361 : list_hash_clear(dest_exps);
49 82361 : }
50 :
51 : sql_rel *
52 226576 : rel_find_ref( sql_rel *r)
53 : {
54 692327 : while (!rel_is_ref(r) && r->l &&
55 599555 : (is_project(r->op) || is_select(r->op) /*|| is_join(r->op)*/))
56 : r = r->l;
57 226576 : if (rel_is_ref(r))
58 82950 : return r;
59 : return NULL;
60 : }
61 :
62 : /* merge projection */
63 :
64 : /* push an expression through a projection.
65 : * The result should again be used in a projection.
66 : */
67 : static list *
68 60520 : exps_push_down_prj(mvc *sql, list *exps, sql_rel *f, sql_rel *t, bool keepalias)
69 : {
70 60520 : node *n;
71 60520 : list *nl = new_exp_list(sql->sa);
72 :
73 208566 : for(n = exps->h; n; n = n->next) {
74 150449 : sql_exp *arg = n->data, *narg = NULL;
75 :
76 150449 : narg = exp_push_down_prj(sql, arg, f, t);
77 150449 : if (!narg)
78 : return NULL;
79 148046 : narg = exp_propagate(sql->sa, narg, arg);
80 148046 : if (!keepalias && narg->type == e_column)
81 41186 : exp_setalias(narg, arg->alias.label, narg->l, narg->r);
82 148046 : append(nl, narg);
83 : }
84 : return nl;
85 : }
86 :
87 : sql_exp *
88 2790265 : exp_push_down_prj(mvc *sql, sql_exp *e, sql_rel *f, sql_rel *t)
89 : {
90 2790265 : sql_exp *ne = NULL, *l = NULL, *r = NULL, *r2 = NULL;
91 :
92 2790265 : assert(is_project(f->op));
93 :
94 2790265 : switch(e->type) {
95 2327486 : case e_column:
96 2327486 : assert(e->nid);
97 2327486 : ne = exps_bind_nid(f->exps, e->nid);
98 2327486 : if (!ne || (ne->type != e_column && (ne->type != e_atom || ne->f)))
99 : return NULL;
100 1988760 : while (ne && (has_label(ne) || is_selfref(ne) /*inside this list */) && is_simple_project(f->op) && ne->type == e_column) {
101 12318 : sql_exp *oe = e, *one = ne;
102 :
103 12318 : e = ne;
104 12318 : ne = NULL;
105 12318 : if (e->nid)
106 12318 : ne = exps_bind_nid(f->exps, e->nid);
107 12318 : if (ne && ne != one && list_position(f->exps, ne) >= list_position(f->exps, one))
108 12318 : ne = NULL;
109 12318 : if (!ne || ne == one) {
110 : ne = one;
111 1988361 : e = oe;
112 : break;
113 : }
114 399 : if (ne->type != e_column && (ne->type != e_atom || ne->f))
115 : return NULL;
116 : }
117 : /* possibly a groupby/project column is renamed */
118 1988361 : if (is_groupby(f->op) && !list_empty(f->r) && ne->type == e_column) {
119 139 : sql_exp *gbe = NULL;
120 139 : if (ne->nid)
121 139 : gbe = exps_bind_nid(f->exps, ne->nid);
122 139 : ne = gbe;
123 139 : if (!ne || (ne->type != e_column && (ne->type != e_atom || ne->f)))
124 0 : return NULL;
125 : }
126 1988361 : return exp_copy(sql, ne);
127 70583 : case e_cmp:
128 70583 : if (e->flag == cmp_or || e->flag == cmp_filter) {
129 4323 : list *l = NULL, *r = NULL;
130 :
131 4323 : if (!(l = exps_push_down_prj(sql, e->l, f, t, true)) || !(r = exps_push_down_prj(sql, e->r, f, t, true)))
132 1244 : return NULL;
133 3079 : if (e->flag == cmp_filter) {
134 1750 : ne = exp_filter(sql->sa, l, r, e->f, is_anti(e));
135 : } else {
136 1329 : ne = exp_or(sql->sa, l, r, is_anti(e));
137 : }
138 66260 : } else if (e->flag == cmp_in || e->flag == cmp_notin) {
139 6372 : list *r = NULL;
140 :
141 6372 : if (!(l = exp_push_down_prj(sql, e->l, f, t)) || !(r = exps_push_down_prj(sql, e->r, f, t, true)))
142 5213 : return NULL;
143 1159 : ne = exp_in(sql->sa, l, r, e->flag);
144 : } else {
145 59888 : if (!(l = exp_push_down_prj(sql, e->l, f, t)) || !(r = exp_push_down_prj(sql, e->r, f, t)) || (e->f && !(r2 = exp_push_down_prj(sql, e->f, f, t))))
146 24205 : return NULL;
147 35683 : if (e->f) {
148 759 : ne = exp_compare2(sql->sa, l, r, r2, e->flag, is_symmetric(e));
149 : } else {
150 34924 : ne = exp_compare(sql->sa, l, r, e->flag);
151 : }
152 : }
153 39921 : if (!ne)
154 : return NULL;
155 39921 : return exp_propagate(sql->sa, ne, e);
156 221265 : case e_convert:
157 221265 : if (!(l = exp_push_down_prj(sql, e->l, f, t)))
158 : return NULL;
159 150299 : ne = exp_convert(sql, l, exp_fromtype(e), exp_totype(e));
160 150299 : return exp_propagate(sql->sa, ne, e);
161 50291 : case e_aggr:
162 : case e_func: {
163 50291 : list *l = e->l, *nl = NULL;
164 50291 : sql_exp *ne = NULL;
165 :
166 50291 : if (e->type == e_func && exp_unsafe(e, false, false))
167 : return NULL;
168 50283 : if (!list_empty(l)) {
169 50283 : nl = exps_push_down_prj(sql, l, f, t, false);
170 50283 : if (!nl)
171 : return NULL;
172 : }
173 49134 : if (e->type == e_func)
174 49134 : ne = exp_op(sql->sa, nl, e->f);
175 : else
176 0 : ne = exp_aggr(sql->sa, nl, e->f, need_distinct(e), need_no_nil(e), e->card, has_nil(e));
177 49134 : return exp_propagate(sql->sa, ne, e);
178 : }
179 120640 : case e_atom: {
180 120640 : list *l = e->f, *nl = NULL;
181 :
182 120640 : if (!list_empty(l)) {
183 1522 : nl = exps_push_down_prj(sql, l, f, t, false);
184 1522 : if (!nl)
185 : return NULL;
186 1512 : ne = exp_values(sql->sa, nl);
187 : } else {
188 119118 : ne = exp_copy(sql, e);
189 : }
190 120630 : return exp_propagate(sql->sa, ne, e);
191 : }
192 : case e_psm:
193 : if (e->type == e_atom && e->f) /* value list */
194 : return NULL;
195 : return e;
196 : }
197 : return NULL;
198 : }
199 :
200 : atom *
201 628 : exp_flatten(mvc *sql, bool value_based_opt, sql_exp *e)
202 : {
203 628 : if (e->type == e_atom) {
204 379 : return value_based_opt ? exp_value(sql, e) : (atom *) e->l;
205 249 : } else if (e->type == e_convert) {
206 51 : atom *v = exp_flatten(sql, value_based_opt, e->l);
207 :
208 51 : if (v)
209 24 : return atom_cast(sql->sa, v, exp_subtype(e));
210 198 : } else if (e->type == e_func) {
211 198 : sql_subfunc *f = e->f;
212 198 : list *l = e->l;
213 198 : sql_arg *res = (f->func->res)?(f->func->res->h->data):NULL;
214 :
215 : /* TODO handle date + x months */
216 198 : if (!f->func->s && strcmp(f->func->base.name, "sql_add") == 0 && list_length(l) == 2 && res && EC_NUMBER(res->type.type->eclass)) {
217 24 : atom *l1 = exp_flatten(sql, value_based_opt, l->h->data);
218 24 : atom *l2 = exp_flatten(sql, value_based_opt, l->h->next->data);
219 24 : if (l1 && l2)
220 3 : return atom_add(sql->sa, l1, l2);
221 174 : } else if (!f->func->s && strcmp(f->func->base.name, "sql_sub") == 0 && list_length(l) == 2 && res && EC_NUMBER(res->type.type->eclass)) {
222 10 : atom *l1 = exp_flatten(sql, value_based_opt, l->h->data);
223 10 : atom *l2 = exp_flatten(sql, value_based_opt, l->h->next->data);
224 10 : if (l1 && l2)
225 9 : return atom_sub(sql->sa, l1, l2);
226 : }
227 : }
228 : return NULL;
229 : }
230 :
231 : int
232 150 : exp_range_overlap(atom *min, atom *max, atom *emin, atom *emax, bool min_exclusive, bool max_exclusive)
233 : {
234 150 : if (!min || !max || !emin || !emax || min->isnull || max->isnull || emin->isnull || emax->isnull)
235 : return 0;
236 :
237 150 : if ((!min_exclusive && VALcmp(&(emax->data), &(min->data)) < 0) || (min_exclusive && VALcmp(&(emax->data), &(min->data)) <= 0))
238 29 : return 0;
239 121 : if ((!max_exclusive && VALcmp(&(emin->data), &(max->data)) > 0) || (max_exclusive && VALcmp(&(emin->data), &(max->data)) >= 0))
240 34 : return 0;
241 : return 1;
242 : }
243 :
244 :
245 : /* if local_proj is >= -1, the current expression is from the same projection
246 : if local_proj is -1, then we don't care about self references (eg used to check for order by exps) */
247 : static int exp_mark_used(sql_rel *subrel, sql_exp *e, int local_proj);
248 :
249 : static int
250 811889 : exps_mark_used(sql_rel *subrel, list *l, int local_proj)
251 : {
252 811889 : int nr = 0;
253 811889 : if (list_empty(l))
254 : return nr;
255 :
256 2616826 : for (node *n = l->h; n != NULL; n = n->next)
257 1805000 : nr += exp_mark_used(subrel, n->data, local_proj);
258 : return nr;
259 : }
260 :
261 : static int
262 7204557 : exp_mark_used(sql_rel *subrel, sql_exp *e, int local_proj)
263 : {
264 7421480 : int nr = 0;
265 7421480 : sql_exp *ne = NULL;
266 :
267 7421480 : switch(e->type) {
268 4969053 : case e_column:
269 4969053 : ne = rel_find_exp(subrel, e);
270 : /* if looking in the same projection, make sure 'ne' is projected before the searched column */
271 4969094 : if (ne && local_proj > -1 && list_position(subrel->exps, ne) >= local_proj)
272 6191741 : ne = NULL;
273 : break;
274 216923 : case e_convert:
275 216923 : return exp_mark_used(subrel, e->l, local_proj);
276 772653 : case e_aggr:
277 : case e_func: {
278 772653 : if (e->l)
279 739440 : nr += exps_mark_used(subrel, e->l, local_proj);
280 772654 : if (e->r) {
281 1980 : list *r = e->r;
282 1980 : list *obes = r->h->data;
283 1980 : nr += exps_mark_used(subrel, obes, local_proj);
284 1980 : if (r->h->next) {
285 0 : list *exps = r->h->next->data;
286 0 : nr += exps_mark_used(subrel, exps, local_proj);
287 : }
288 : }
289 : break;
290 : }
291 450002 : case e_cmp:
292 450002 : if (e->flag == cmp_or || e->flag == cmp_filter) {
293 21697 : nr += exps_mark_used(subrel, e->l, local_proj);
294 21697 : nr += exps_mark_used(subrel, e->r, local_proj);
295 428305 : } else if (e->flag == cmp_in || e->flag == cmp_notin) {
296 21132 : nr += exp_mark_used(subrel, e->l, local_proj);
297 21132 : nr += exps_mark_used(subrel, e->r, local_proj);
298 : } else {
299 407173 : nr += exp_mark_used(subrel, e->l, local_proj);
300 407174 : nr += exp_mark_used(subrel, e->r, local_proj);
301 407173 : if (e->f)
302 6432 : nr += exp_mark_used(subrel, e->f, local_proj);
303 : }
304 : break;
305 1012849 : case e_atom:
306 : /* atoms are used in e_cmp */
307 1012849 : e->used = 1;
308 : /* return 0 as constants may require a full column ! */
309 1012849 : if (e->f)
310 5943 : nr += exps_mark_used(subrel, e->f, local_proj);
311 : return nr;
312 0 : case e_psm:
313 0 : if (e->flag & PSM_SET || e->flag & PSM_RETURN || e->flag & PSM_EXCEPTION) {
314 0 : nr += exp_mark_used(subrel, e->l, local_proj);
315 0 : } else if (e->flag & PSM_WHILE || e->flag & PSM_IF) {
316 0 : nr += exp_mark_used(subrel, e->l, local_proj);
317 0 : nr += exps_mark_used(subrel, e->r, local_proj);
318 0 : if (e->flag == PSM_IF && e->f)
319 0 : nr += exps_mark_used(subrel, e->f, local_proj);
320 : }
321 0 : e->used = 1;
322 0 : break;
323 : }
324 6191741 : if (ne && e != ne) {
325 2694504 : if (local_proj == -2 || ne->type != e_column || (has_label(ne) || (ne->alias.rname && ne->alias.rname[0] == '%')) || (subrel->l && !is_munion(subrel->op) && !rel_find_exp(subrel->l, e)))
326 2574397 : ne->used = 1;
327 2694504 : return ne->used;
328 : }
329 : return nr;
330 : }
331 :
332 : static void
333 45229 : positional_exps_mark_used( sql_rel *rel, sql_rel *subrel )
334 : {
335 45229 : assert(rel->exps);
336 :
337 45229 : if ((is_topn(subrel->op) || is_sample(subrel->op)) && subrel->l)
338 45229 : subrel = subrel->l;
339 : /* everything is used within the set operation */
340 45229 : if (rel->exps && subrel->exps) {
341 45229 : node *m;
342 381159 : for (m=subrel->exps->h; m; m = m->next) {
343 335930 : sql_exp *se = m->data;
344 :
345 335930 : se->used = 1;
346 : }
347 : }
348 45229 : }
349 :
350 : static void
351 791843 : rel_exps_mark_used(allocator *sa, sql_rel *rel, sql_rel *subrel)
352 : {
353 791843 : int nr = 0;
354 :
355 791843 : if (rel->r && (is_simple_project(rel->op) || is_groupby(rel->op))) {
356 32171 : list *l = rel->r;
357 32171 : node *n;
358 :
359 111638 : for (n=l->h; n; n = n->next) {
360 79465 : sql_exp *e = n->data;
361 :
362 79465 : e->used = 1;
363 79465 : exp_mark_used(rel, e, -1);
364 : }
365 : }
366 791845 : if (rel->attr) {
367 26456 : for (node *n = rel->attr->h; n; n = n->next) {
368 13228 : sql_exp *e = n->data;
369 :
370 13228 : if (e->type != e_aggr) /* keep all group by's */
371 13228 : e->used = 1;
372 13228 : if (e->used)
373 13228 : nr += exp_mark_used(subrel, e, -2);
374 : }
375 : }
376 791845 : if (rel->exps) {
377 751733 : node *n;
378 751733 : int len = list_length(rel->exps), i;
379 751734 : sql_exp **exps = SA_NEW_ARRAY(sa, sql_exp*, len);
380 :
381 3719603 : for (n=rel->exps->h, i = 0; n; n = n->next, i++) {
382 2967869 : sql_exp *e = exps[i] = n->data;
383 :
384 2967869 : nr += e->used;
385 : }
386 :
387 751734 : if (!nr && is_project(rel->op) && len > 0) /* project at least one column if exists */
388 6700 : exps[0]->used = 1;
389 :
390 3719690 : for (i = len-1; i >= 0; i--) {
391 2967940 : sql_exp *e = exps[i];
392 :
393 2967940 : if (!is_project(rel->op) || (e->used && !is_set(rel->op))) {
394 2406100 : if (is_project(rel->op))
395 1979247 : nr += exp_mark_used(rel, e, i);
396 2406106 : nr += exp_mark_used(subrel, e, -2);
397 : }
398 : }
399 : }
400 : /* for count/rank we need at least one column */
401 791862 : if (!nr && subrel && (is_project(subrel->op) || is_base(subrel->op)) && !list_empty(subrel->exps) &&
402 24904 : (is_simple_project(rel->op) && project_unsafe(rel, false))) {
403 34 : sql_exp *e = subrel->exps->h->data;
404 34 : e->used = 1;
405 : }
406 791863 : if (rel->r && (is_simple_project(rel->op) || is_groupby(rel->op))) {
407 32171 : list *l = rel->r;
408 32171 : node *n;
409 :
410 111638 : for (n=l->h; n; n = n->next) {
411 79467 : sql_exp *e = n->data;
412 :
413 79467 : e->used = 1;
414 : /* possibly project/groupby uses columns from the inner */
415 79467 : exp_mark_used(subrel, e, -2);
416 : }
417 : }
418 791863 : }
419 :
420 : static void exps_used(list *l);
421 :
422 : static void
423 3574280 : exp_used(sql_exp *e)
424 : {
425 3606833 : if (e) {
426 3588212 : e->used = 1;
427 :
428 3588212 : switch (e->type) {
429 31357 : case e_convert:
430 31357 : exp_used(e->l);
431 31357 : break;
432 98153 : case e_func:
433 : case e_aggr:
434 98153 : exps_used(e->l);
435 98153 : break;
436 7950 : case e_cmp:
437 7950 : if (e->flag == cmp_or || e->flag == cmp_filter) {
438 1042 : exps_used(e->l);
439 1042 : exps_used(e->r);
440 6908 : } else if (e->flag == cmp_in || e->flag == cmp_notin) {
441 249 : exp_used(e->l);
442 249 : exps_used(e->r);
443 : } else {
444 6659 : exp_used(e->l);
445 6659 : exp_used(e->r);
446 6659 : if (e->f)
447 : exp_used(e->f);
448 : }
449 : break;
450 : default:
451 : break;
452 : }
453 : }
454 3574280 : }
455 :
456 : static void
457 895520 : exps_used(list *l)
458 : {
459 895520 : if (l) {
460 4455048 : for (node *n = l->h; n; n = n->next)
461 3559588 : exp_used(n->data);
462 : }
463 895587 : }
464 :
465 : static void
466 831900 : rel_used(sql_rel *rel)
467 : {
468 831900 : if (!rel)
469 : return;
470 788341 : if (is_join(rel->op) || is_set(rel->op) || is_semi(rel->op) || is_modify(rel->op)) {
471 73351 : rel_used(rel->l);
472 73559 : rel_used(rel->r);
473 : } else if (rel->op == op_munion) {
474 12859 : list *l = rel->l;
475 38993 : for(node *n = l->h; n; n = n->next)
476 26134 : rel_used(n->data);
477 : } else if (is_topn(rel->op) || is_select(rel->op) || is_sample(rel->op)) {
478 20483 : rel_used(rel->l);
479 20501 : rel = rel->l;
480 : } else if (is_ddl(rel->op)) {
481 406141 : if (rel->flag == ddl_output || rel->flag == ddl_create_seq || rel->flag == ddl_alter_seq || rel->flag == ddl_alter_table || rel->flag == ddl_create_table || rel->flag == ddl_create_view) {
482 37233 : rel_used(rel->l);
483 : } else if (rel->flag == ddl_list || rel->flag == ddl_exception) {
484 285 : rel_used(rel->l);
485 285 : rel_used(rel->r);
486 : } else if (rel->flag == ddl_psm) {
487 7 : exps_used(rel->exps);
488 : }
489 : } else if (rel->op == op_table) {
490 1090 : if (IS_TABLE_PROD_FUNC(rel->flag) || rel->flag == TABLE_FROM_RELATION)
491 1090 : rel_used(rel->l);
492 1090 : exp_used(rel->r);
493 : }
494 900858 : if (rel && rel->exps) {
495 775284 : exps_used(rel->exps);
496 775510 : if (rel->r && (is_simple_project(rel->op) || is_groupby(rel->op)))
497 19612 : exps_used(rel->r);
498 : }
499 : }
500 :
501 : static void
502 1292339 : rel_mark_used(mvc *sql, sql_rel *rel, int proj)
503 : {
504 1926412 : if (proj && (need_distinct(rel)))
505 10981 : rel_used(rel);
506 :
507 1926401 : switch(rel->op) {
508 : case op_basetable:
509 : case op_truncate:
510 : case op_insert:
511 : break;
512 :
513 13478 : case op_table:
514 :
515 13478 : if (rel->l && rel->flag != TRIGGER_WRAPPER) {
516 144 : rel_used(rel);
517 144 : if (rel->r)
518 144 : exp_mark_used(rel->l, rel->r, -2);
519 144 : rel_mark_used(sql, rel->l, proj);
520 : }
521 : break;
522 :
523 16456 : case op_topn:
524 : case op_sample:
525 16456 : if (proj) {
526 16367 : rel = rel ->l;
527 16367 : rel_mark_used(sql, rel, proj);
528 16367 : break;
529 : }
530 : /* fall through */
531 : case op_project:
532 : case op_groupby:
533 543142 : if (proj && rel->l) {
534 309539 : rel_exps_mark_used(sql->sa, rel, rel->l);
535 309558 : rel_mark_used(sql, rel->l, 0);
536 11045 : } else if (proj) {
537 10956 : rel_exps_mark_used(sql->sa, rel, NULL);
538 : }
539 : break;
540 7905 : case op_update:
541 : case op_delete:
542 7905 : if (proj && rel->r) {
543 6347 : rel_used(rel);
544 6347 : sql_rel *r = rel->r;
545 :
546 6347 : if (!list_empty(r->exps)) {
547 7449 : for (node *n = r->exps->h; n; n = n->next) {
548 6349 : sql_exp *e = n->data;
549 6349 : const char *nname = exp_name(e);
550 :
551 6349 : if (nname && nname[0] == '%' && strcmp(nname, TID) == 0) { /* TID is used */
552 5247 : e->used = 1;
553 5247 : break;
554 : }
555 : }
556 : }
557 6347 : rel_exps_mark_used(sql->sa, rel, rel->r);
558 6347 : rel_mark_used(sql, rel->r, 0);
559 : }
560 : break;
561 :
562 402326 : case op_ddl:
563 402326 : if (rel->flag == ddl_output || rel->flag == ddl_create_seq || rel->flag == ddl_alter_seq || rel->flag == ddl_alter_table || rel->flag == ddl_create_table || rel->flag == ddl_create_view) {
564 33418 : if (rel->l)
565 : rel_mark_used(sql, rel->l, 0);
566 : } else if (rel->flag == ddl_list || rel->flag == ddl_exception) {
567 285 : if (rel->l)
568 285 : rel_mark_used(sql, rel->l, 0);
569 285 : if (rel->r)
570 : rel_mark_used(sql, rel->r, 0);
571 : }
572 : break;
573 :
574 111285 : case op_select:
575 111285 : if (rel->l) {
576 111285 : rel_exps_mark_used(sql->sa, rel, rel->l);
577 111286 : rel_mark_used(sql, rel->l, 0);
578 : }
579 : break;
580 :
581 5152 : case op_union:
582 : case op_inter:
583 : case op_except:
584 : /* For now we mark all union expression as used */
585 :
586 : /* Later we should (in case of union all) remove unused
587 : * columns from the projection.
588 : *
589 : * Project part of union is based on column position.
590 : */
591 5152 : if (proj && (need_distinct(rel) || !rel->exps)) {
592 2336 : rel_used(rel);
593 2336 : if (!rel->exps) {
594 0 : rel_used(rel->l);
595 0 : rel_used(rel->r);
596 : }
597 2336 : rel_mark_used(sql, rel->l, 0);
598 2336 : rel_mark_used(sql, rel->r, 0);
599 485 : } else if (proj && !need_distinct(rel)) {
600 485 : sql_rel *l = rel->l;
601 :
602 485 : positional_exps_mark_used(rel, l);
603 485 : rel_exps_mark_used(sql->sa, rel, l);
604 485 : rel_mark_used(sql, rel->l, 0);
605 : /* based on child check set expression list */
606 485 : if (is_project(l->op) && need_distinct(l))
607 0 : positional_exps_mark_used(l, rel);
608 485 : positional_exps_mark_used(rel, rel->r);
609 485 : rel_exps_mark_used(sql->sa, rel, rel->r);
610 485 : rel_mark_used(sql, rel->r, 0);
611 : }
612 : break;
613 :
614 49504 : case op_munion:
615 49504 : assert(rel->l);
616 : // TODO: here we blindly follow the same logic as op_union. RE-evaluate
617 49504 : if (proj && (need_distinct(rel) || !rel->exps)) {
618 2249 : rel_used(rel);
619 2249 : if (!rel->exps) {
620 0 : for (node *n = ((list*)rel->l)->h; n; n = n->next) {
621 0 : rel_used(n->data);
622 0 : rel_mark_used(sql, n->data, 0);
623 : }
624 : }
625 21966 : } else if (proj && !need_distinct(rel)) {
626 21966 : bool first = true;
627 66225 : for (node *n = ((list*)rel->l)->h; n; n = n->next) {
628 44259 : sql_rel *l = n->data;
629 :
630 44259 : positional_exps_mark_used(rel, l);
631 44259 : rel_exps_mark_used(sql->sa, rel, l);
632 44259 : rel_mark_used(sql, l, 0);
633 : /* based on child check set expression list */
634 44259 : if (first && is_project(l->op) && need_distinct(l))
635 0 : positional_exps_mark_used(l, rel);
636 44259 : first = false;
637 : }
638 : }
639 : break;
640 154244 : case op_join:
641 : case op_left:
642 : case op_right:
643 : case op_full:
644 : case op_semi:
645 : case op_anti:
646 : case op_merge:
647 154244 : rel_exps_mark_used(sql->sa, rel, rel->l);
648 154244 : rel_exps_mark_used(sql->sa, rel, rel->r);
649 154244 : rel_mark_used(sql, rel->l, 0);
650 154244 : rel_mark_used(sql, rel->r, 0);
651 154244 : break;
652 : }
653 1292348 : }
654 :
655 : static sql_rel * rel_dce_sub(mvc *sql, sql_rel *rel);
656 :
657 : static sql_rel *
658 1147224 : rel_remove_unused(mvc *sql, sql_rel *rel)
659 : {
660 1147224 : int needed = 0;
661 :
662 1147224 : if (!rel)
663 : return rel;
664 :
665 1140332 : switch(rel->op) {
666 308325 : case op_basetable: {
667 308325 : sql_table *t = rel->l;
668 :
669 308325 : if (t && isReplicaTable(t)) /* TODO fix rewriting in rel_distribute.c */
670 : return rel;
671 : }
672 : /* fall through */
673 : case op_table:
674 315191 : if (rel->exps && (rel->op != op_table || !IS_TABLE_PROD_FUNC(rel->flag))) {
675 1374501 : for(node *n=rel->exps->h; n && !needed; n = n->next) {
676 1066202 : sql_exp *e = n->data;
677 :
678 1066202 : if (!e->used)
679 232384 : needed = 1;
680 : }
681 :
682 308299 : if (!needed)
683 : return rel;
684 :
685 1374749 : for(node *n=rel->exps->h; n;) {
686 1142377 : node *next = n->next;
687 1142377 : sql_exp *e = n->data;
688 :
689 : /* at least one (needed for crossproducts, count(*), rank() and single value projections) !, handled by rel_exps_mark_used */
690 1142377 : if (!e->used && list_length(rel->exps) > 1)
691 388137 : list_remove_node(rel->exps, NULL, n);
692 : n = next;
693 : }
694 : }
695 239264 : if (rel->op == op_table && (IS_TABLE_PROD_FUNC(rel->flag) || rel->flag == TABLE_FROM_RELATION))
696 6892 : rel->l = rel_remove_unused(sql, rel->l);
697 : return rel;
698 :
699 16363 : case op_topn:
700 : case op_sample:
701 :
702 16363 : if (rel->l)
703 16363 : rel->l = rel_remove_unused(sql, rel->l);
704 : return rel;
705 :
706 320505 : case op_project:
707 : case op_groupby:
708 :
709 320505 : if (/*rel->l &&*/ rel->exps) {
710 2054518 : for(node *n=rel->exps->h; n && !needed; n = n->next) {
711 1734013 : sql_exp *e = n->data;
712 :
713 1734013 : if (!e->used)
714 22902 : needed = 1;
715 : }
716 320505 : if (!needed)
717 : return rel;
718 :
719 559063 : for(node *n=rel->exps->h; n;) {
720 536161 : node *next = n->next;
721 536161 : sql_exp *e = n->data;
722 :
723 : /* at least one (needed for crossproducts, count(*), rank() and single value projections) */
724 536161 : if (!e->used && list_length(rel->exps) > 1)
725 416094 : list_remove_node(rel->exps, NULL, n);
726 : n = next;
727 : }
728 : }
729 : return rel;
730 :
731 3180 : case op_join:
732 : case op_left:
733 : case op_right:
734 : case op_full:
735 3180 : if (list_length(rel->attr) > 1) {
736 0 : for(node *n=rel->attr->h; n && !needed; n = n->next) {
737 0 : sql_exp *e = n->data;
738 :
739 0 : if (!e->used)
740 0 : needed = 1;
741 : }
742 0 : if (!needed)
743 : return rel;
744 :
745 0 : for(node *n=rel->attr->h; n;) {
746 0 : node *next = n->next;
747 0 : sql_exp *e = n->data;
748 :
749 0 : if (!e->used)
750 0 : list_remove_node(rel->attr, NULL, n);
751 : n = next;
752 : }
753 : }
754 : return rel;
755 :
756 : case op_union:
757 : case op_inter:
758 : case op_except:
759 : case op_munion:
760 :
761 : case op_insert:
762 : case op_update:
763 : case op_delete:
764 : case op_truncate:
765 : case op_merge:
766 :
767 : case op_select:
768 :
769 : case op_semi:
770 : case op_anti:
771 : return rel;
772 402008 : case op_ddl:
773 402008 : if (rel->flag == ddl_output || rel->flag == ddl_create_seq || rel->flag == ddl_alter_seq || rel->flag == ddl_alter_table || rel->flag == ddl_create_table || rel->flag == ddl_create_view) {
774 33144 : if (rel->l)
775 32781 : rel->l = rel_remove_unused(sql, rel->l);
776 : } else if (rel->flag == ddl_list || rel->flag == ddl_exception) {
777 241 : if (rel->l)
778 241 : rel->l = rel_remove_unused(sql, rel->l);
779 241 : if (rel->r)
780 241 : rel->r = rel_remove_unused(sql, rel->r);
781 : }
782 : return rel;
783 : }
784 : return rel;
785 : }
786 :
787 : static void
788 1329675 : rel_dce_refs(mvc *sql, sql_rel *rel, list *refs)
789 : {
790 1329675 : if (!rel || (rel_is_ref(rel) && list_find(refs, rel, NULL)))
791 14119 : return ;
792 :
793 1315556 : switch(rel->op) {
794 411069 : case op_table:
795 : case op_topn:
796 : case op_sample:
797 : case op_project:
798 : case op_groupby:
799 : case op_select:
800 :
801 411069 : if (rel->l && (rel->op != op_table || rel->flag != TRIGGER_WRAPPER))
802 394913 : rel_dce_refs(sql, rel->l, refs);
803 : break;
804 :
805 : case op_basetable:
806 : case op_insert:
807 : case op_truncate:
808 : break;
809 :
810 6700 : case op_update:
811 : case op_delete:
812 :
813 6700 : if (rel->r)
814 6347 : rel_dce_refs(sql, rel->r, refs);
815 : break;
816 :
817 146576 : case op_union:
818 : case op_inter:
819 : case op_except:
820 : case op_join:
821 : case op_left:
822 : case op_right:
823 : case op_full:
824 : case op_semi:
825 : case op_anti:
826 : case op_merge:
827 :
828 146576 : if (rel->l)
829 146576 : rel_dce_refs(sql, rel->l, refs);
830 146576 : if (rel->r)
831 146576 : rel_dce_refs(sql, rel->r, refs);
832 : break;
833 23515 : case op_munion:
834 23515 : assert(rel->l);
835 71166 : for (node *n = ((list*)rel->l)->h; n; n = n->next)
836 47651 : rel_dce_refs(sql, n->data, refs);
837 : break;
838 403426 : case op_ddl:
839 :
840 403426 : if (rel->flag == ddl_output || rel->flag == ddl_create_seq || rel->flag == ddl_alter_seq || rel->flag == ddl_alter_table || rel->flag == ddl_create_table || rel->flag == ddl_create_view) {
841 34518 : if (rel->l)
842 34155 : rel_dce_refs(sql, rel->l, refs);
843 : } else if (rel->flag == ddl_list || rel->flag == ddl_exception) {
844 285 : if (rel->l)
845 285 : rel_dce_refs(sql, rel->l, refs);
846 285 : if (rel->r)
847 251 : rel_dce_refs(sql, rel->r, refs);
848 : } break;
849 : }
850 :
851 1315551 : if (rel_is_ref(rel) && !list_find(refs, rel, NULL))
852 16827 : list_prepend(refs, rel);
853 : }
854 :
855 : static sql_rel *
856 1916199 : rel_dce_down(mvc *sql, sql_rel *rel, int skip_proj)
857 : {
858 1916199 : if (!rel)
859 : return rel;
860 :
861 1916199 : if (!skip_proj && rel_is_ref(rel))
862 : return rel;
863 :
864 1885901 : switch(rel->op) {
865 564685 : case op_basetable:
866 : case op_table:
867 :
868 564685 : if (skip_proj && rel->l && rel->op == op_table && rel->flag != TRIGGER_WRAPPER)
869 83 : rel->l = rel_dce_down(sql, rel->l, 0);
870 282091 : if (!skip_proj)
871 282091 : rel_dce_sub(sql, rel);
872 : /* fall through */
873 :
874 : case op_truncate:
875 : return rel;
876 :
877 7809 : case op_insert:
878 7809 : rel_used(rel->r);
879 7809 : rel_dce_sub(sql, rel->r);
880 7809 : return rel;
881 :
882 7905 : case op_update:
883 : case op_delete:
884 :
885 7905 : if (skip_proj && rel->r)
886 6347 : rel->r = rel_dce_down(sql, rel->r, 0);
887 1205 : if (!skip_proj)
888 1205 : rel_dce_sub(sql, rel);
889 : return rel;
890 :
891 555903 : case op_topn:
892 : case op_sample:
893 : case op_project:
894 : case op_groupby:
895 :
896 555903 : if (skip_proj && rel->l)
897 325868 : rel->l = rel_dce_down(sql, rel->l, is_topn(rel->op) || is_sample(rel->op));
898 219101 : if (!skip_proj)
899 219101 : rel_dce_sub(sql, rel);
900 : return rel;
901 :
902 5145 : case op_union:
903 : case op_inter:
904 : case op_except:
905 5145 : if (skip_proj) {
906 2821 : if (rel->l)
907 2821 : rel->l = rel_dce_down(sql, rel->l, 0);
908 2821 : if (rel->r)
909 2821 : rel->r = rel_dce_down(sql, rel->r, 0);
910 : }
911 2324 : if (!skip_proj)
912 2324 : rel_dce_sub(sql, rel);
913 : return rel;
914 :
915 46475 : case op_munion:
916 46475 : if (skip_proj) {
917 72970 : for (node *n = ((list*)rel->l)->h; n; n = n->next)
918 48757 : n->data = rel_dce_down(sql, n->data, 0);
919 : }
920 22262 : if (!skip_proj)
921 22262 : rel_dce_sub(sql, rel);
922 : return rel;
923 103660 : case op_select:
924 103660 : if (rel->l)
925 103660 : rel->l = rel_dce_down(sql, rel->l, 0);
926 : return rel;
927 :
928 150773 : case op_join:
929 : case op_left:
930 : case op_right:
931 : case op_full:
932 : case op_semi:
933 : case op_anti:
934 : case op_merge:
935 150773 : if (rel->l)
936 150773 : rel->l = rel_dce_down(sql, rel->l, 0);
937 150773 : if (rel->r)
938 150773 : rel->r = rel_dce_down(sql, rel->r, 0);
939 150773 : if (!skip_proj && !list_empty(rel->attr))
940 3180 : rel_dce_sub(sql, rel);
941 : return rel;
942 :
943 402129 : case op_ddl:
944 402129 : if (rel->flag == ddl_output || rel->flag == ddl_create_seq || rel->flag == ddl_alter_seq || rel->flag == ddl_alter_table || rel->flag == ddl_create_table || rel->flag == ddl_create_view) {
945 33221 : if (rel->l)
946 33055 : rel->l = rel_dce_down(sql, rel->l, 0);
947 : } else if (rel->flag == ddl_list || rel->flag == ddl_exception) {
948 285 : if (rel->l)
949 285 : rel->l = rel_dce_down(sql, rel->l, 0);
950 285 : if (rel->r)
951 251 : rel->r = rel_dce_down(sql, rel->r, 0);
952 : }
953 : return rel;
954 : }
955 : return rel;
956 : }
957 :
958 : /* DCE
959 : *
960 : * Based on top relation expressions mark sub expressions as used.
961 : * Then recurse down until the projections. Clean them up and repeat.
962 : */
963 :
964 : static sql_rel *
965 1090748 : rel_dce_sub(mvc *sql, sql_rel *rel)
966 : {
967 1090748 : if (!rel)
968 : return rel;
969 :
970 : /*
971 : * Mark used up until the next project
972 : * For setops we need to first mark, then remove
973 : * because of positional dependency
974 : */
975 1090748 : rel_mark_used(sql, rel, 1);
976 1090760 : rel = rel_remove_unused(sql, rel);
977 1090757 : rel_dce_down(sql, rel, 1);
978 1090757 : return rel;
979 : }
980 :
981 : /* add projects under set ops */
982 : static sql_rel *
983 2102717 : rel_add_projects(mvc *sql, sql_rel *rel)
984 : {
985 2102717 : if (!rel)
986 : return rel;
987 :
988 2102717 : switch(rel->op) {
989 : case op_basetable:
990 : case op_truncate:
991 : return rel;
992 14509 : case op_insert:
993 : case op_update:
994 : case op_delete:
995 14509 : if (rel->r)
996 14156 : rel->r = rel_add_projects(sql, rel->r);
997 : return rel;
998 2918 : case op_union:
999 : case op_inter:
1000 : case op_except:
1001 : /* We can only reduce the list of expressions of an set op
1002 : * if the projection under it can also be reduced.
1003 : */
1004 2918 : if (rel->l) {
1005 2918 : sql_rel *l = rel->l;
1006 :
1007 2918 : if (!is_project(l->op) && !need_distinct(rel))
1008 3 : l = rel_project(sql->sa, l, rel_projections(sql, l, NULL, 1, 1));
1009 2918 : rel->l = rel_add_projects(sql, l);
1010 : }
1011 2918 : if (rel->r) {
1012 2918 : sql_rel *r = rel->r;
1013 :
1014 2918 : if (!is_project(r->op) && !need_distinct(rel))
1015 4 : r = rel_project(sql->sa, r, rel_projections(sql, r, NULL, 1, 1));
1016 2918 : rel->r = rel_add_projects(sql, r);
1017 : }
1018 : return rel;
1019 34794 : case op_munion:
1020 34794 : assert(rel->l);
1021 154119 : for (node *n = ((list*)rel->l)->h; n; n = n->next) {
1022 119325 : sql_rel* r = n->data;
1023 119325 : if (!is_project(r->op) && !need_distinct(rel))
1024 0 : r = rel_project(sql->sa, r, rel_projections(sql, r, NULL, 1, 1));
1025 119325 : r = rel_add_projects(sql, r);
1026 119325 : n->data = r;
1027 : }
1028 : return rel;
1029 800957 : case op_topn:
1030 : case op_sample:
1031 : case op_project:
1032 : case op_groupby:
1033 : case op_select:
1034 : case op_table:
1035 800957 : if (rel->l && (rel->op != op_table || rel->flag != TRIGGER_WRAPPER))
1036 770209 : rel->l = rel_add_projects(sql, rel->l);
1037 : return rel;
1038 302734 : case op_join:
1039 : case op_left:
1040 : case op_right:
1041 : case op_full:
1042 : case op_semi:
1043 : case op_anti:
1044 : case op_merge:
1045 302734 : if (rel->l)
1046 302734 : rel->l = rel_add_projects(sql, rel->l);
1047 302734 : if (rel->r)
1048 302734 : rel->r = rel_add_projects(sql, rel->r);
1049 : return rel;
1050 403694 : case op_ddl:
1051 403694 : if (rel->flag == ddl_output || rel->flag == ddl_create_seq || rel->flag == ddl_alter_seq || rel->flag == ddl_alter_table || rel->flag == ddl_create_table || rel->flag == ddl_create_view) {
1052 34786 : if (rel->l)
1053 34423 : rel->l = rel_add_projects(sql, rel->l);
1054 : } else if (rel->flag == ddl_list || rel->flag == ddl_exception) {
1055 285 : if (rel->l)
1056 285 : rel->l = rel_add_projects(sql, rel->l);
1057 285 : if (rel->r)
1058 251 : rel->r = rel_add_projects(sql, rel->r);
1059 : }
1060 : return rel;
1061 : }
1062 : return rel;
1063 : }
1064 :
1065 : static sql_rel *
1066 552788 : rel_dce_(mvc *sql, sql_rel *rel)
1067 : {
1068 552788 : list *refs = sa_list(sql->sa);
1069 :
1070 552894 : rel_dce_refs(sql, rel, refs);
1071 552876 : if (refs) {
1072 569734 : for(node *n = refs->h; n; n = n->next) {
1073 16827 : sql_rel *i = n->data;
1074 :
1075 16827 : while (!rel_is_ref(i) && i->l && !is_base(i->op))
1076 : i = i->l;
1077 16827 : if (i)
1078 16827 : rel_used(i);
1079 : }
1080 : }
1081 552907 : rel = rel_add_projects(sql, rel);
1082 552819 : rel_used(rel);
1083 553211 : rel_dce_sub(sql, rel);
1084 552793 : return rel;
1085 : }
1086 :
1087 :
1088 : /* Remove unused expressions */
1089 : static sql_rel *
1090 552802 : rel_dce(visitor *v, global_props *gp, sql_rel *rel)
1091 : {
1092 552802 : (void) gp;
1093 552802 : return rel_dce_(v->sql, rel);
1094 : }
1095 :
1096 : /* keep export for other projects */
1097 : sql_rel *
1098 0 : rel_deadcode_elimination(mvc *sql, sql_rel *rel)
1099 : {
1100 0 : return rel_dce_(sql, rel);
1101 : }
1102 :
1103 : run_optimizer
1104 629445 : bind_dce(visitor *v, global_props *gp)
1105 : {
1106 629445 : int flag = v->sql->sql_optimizer;
1107 629445 : return gp->opt_cycle == 0 && gp->opt_level == 1 && (flag & dce) ? rel_dce : NULL;
1108 : }
1109 :
1110 :
1111 : static int
1112 83496 : topn_sample_safe_exps( list *exps, bool nil_limit )
1113 : {
1114 : /* Limit only expression lists are always save */
1115 83496 : if (list_length(exps) == 1)
1116 : return 1;
1117 1206 : for (node *n = exps->h; n; n = n->next ) {
1118 819 : sql_exp *e = n->data;
1119 :
1120 819 : if (!e || e->type != e_atom || (!nil_limit && exp_is_null(e)))
1121 51 : return 0;
1122 : }
1123 : return 1;
1124 : }
1125 :
1126 : static list *
1127 129 : sum_limit_offset(mvc *sql, sql_rel *rel)
1128 : {
1129 : /* for sample we always propagate, or if the expression list only consists of a limit expression, we copy it */
1130 129 : if (is_sample(rel->op) || list_length(rel->exps) == 1)
1131 126 : return exps_copy(sql, rel->exps);
1132 3 : assert(list_length(rel->exps) == 2);
1133 3 : sql_subtype *lng = sql_bind_localtype("lng");
1134 3 : sql_exp *add = rel_binop_(sql, NULL, exp_copy(sql, rel->exps->h->data), exp_copy(sql, rel->exps->h->next->data), "sys", "sql_add", card_value, true);
1135 : /* for remote plans, make sure the output type is a bigint */
1136 3 : if (subtype_cmp(lng, exp_subtype(add)) != 0)
1137 3 : add = exp_convert(sql, add, exp_subtype(add), lng);
1138 3 : return list_append(sa_list(sql->sa), add);
1139 : }
1140 :
1141 : /*
1142 : * Push TopN (only LIMIT, no ORDER BY) down through projections underneath crossproduct, i.e.,
1143 : *
1144 : * topn( topn(
1145 : * project( project(
1146 : * crossproduct( crossproduct(
1147 : * L, => topn( L )[ n ],
1148 : * R topn( R )[ n ]
1149 : * ) )
1150 : * )[ Cs ]* )[ Cs ]*
1151 : * )[ n ] )[ n ]
1152 : *
1153 : * (TODO: in case of n==1 we can omit the original top-level TopN)
1154 : *
1155 : * also push topn under (non reordering) projections.
1156 : */
1157 : static sql_rel *
1158 136574 : rel_push_topn_and_sample_down_(visitor *v, sql_rel *rel)
1159 : {
1160 136574 : sql_rel *rp = NULL, *r = rel->l, *rpp = NULL;
1161 :
1162 136574 : if (is_topn(rel->op) && !rel_is_ref(rel) &&
1163 49709 : r && r->op == op_table && r->flag != TRIGGER_WRAPPER && !rel_is_ref(r) && r->r) {
1164 3 : sql_exp *op = r->r;
1165 3 : sql_subfunc *f = op->f;
1166 :
1167 3 : if (is_func(op->type) && strcmp(f->func->base.name, "file_loader") == 0 && !sql_func_mod(f->func)[0] && !sql_func_imp(f->func)[0]) {
1168 : /* push limit, to arguments of file_loader */
1169 0 : list *args = op->l;
1170 0 : if (list_length(args) == 2) {
1171 0 : sql_exp *topN = rel->exps->h->data;
1172 0 : sql_exp *offset = rel->exps->h->next? rel->exps->h->next->data:NULL;
1173 0 : atom *topn = topN->l;
1174 0 : if (offset) {
1175 0 : atom *b1 = (atom *)offset->l, *c = atom_add(v->sql->sa, b1, topn);
1176 :
1177 0 : if (!c)
1178 : return rel;
1179 0 : if (atom_cmp(c, topn) < 0) /* overflow */
1180 : return rel;
1181 : topn = c;
1182 : }
1183 0 : append(args, exp_atom(v->sql->sa, topn));
1184 0 : v->changes++;
1185 : }
1186 : }
1187 : }
1188 :
1189 136574 : if ((is_topn(rel->op) || is_sample(rel->op)) && topn_sample_safe_exps(rel->exps, true)) {
1190 49803 : sql_rel *(*func) (allocator *, sql_rel *, list *) = is_topn(rel->op) ? rel_topn : rel_sample;
1191 :
1192 : /* nested topN relations */
1193 49803 : if (r && is_topn(rel->op) && is_topn(r->op) && !rel_is_ref(r)) {
1194 0 : sql_exp *topN1 = rel->exps->h->data, *topN2 = r->exps->h->data;
1195 0 : sql_exp *offset1 = list_length(rel->exps) > 1 ? rel->exps->h->next->data : NULL;
1196 0 : sql_exp *offset2 = list_length(r->exps) > 1 ? r->exps->h->next->data : NULL;
1197 :
1198 0 : if (topN1->l && topN2->l && (!offset1 || offset1->l) && (!offset2 || offset2->l)) { /* no parameters */
1199 0 : bool changed = false;
1200 :
1201 0 : if ((!offset1 || (offset1->type == e_atom && offset1->l)) && (!offset2 || (offset2->type == e_atom && offset2->l))) { /* only atoms */
1202 0 : if (!offset1 && offset2) {
1203 0 : list_append(rel->exps, exp_copy(v->sql, offset2));
1204 0 : changed = true;
1205 0 : } else if (offset1 && offset2) { /* sum offsets */
1206 0 : atom *b1 = (atom *)offset1->l, *b2 = (atom *)offset2->l, *c = atom_add(v->sql->sa, b1, b2);
1207 :
1208 0 : if (!c) /* error, don't apply optimization, WARNING because of this the offset optimization must come before the limit one */
1209 : return rel;
1210 0 : if (atom_cmp(c, b2) < 0) /* overflow */
1211 0 : c = atom_int(v->sql->sa, sql_bind_localtype("lng"), GDK_lng_max);
1212 0 : offset1->l = c;
1213 0 : changed = true;
1214 : }
1215 : }
1216 :
1217 0 : if (topN1->type == e_atom && topN1->l && topN2->type == e_atom && topN2->l) { /* only atoms */
1218 0 : atom *a1 = (atom *)topN1->l, *a2 = (atom *)topN2->l;
1219 :
1220 0 : if (!a2->isnull && (a1->isnull || atom_cmp(a1, a2) >= 0)) { /* topN1 is not set or is larger than topN2 */
1221 0 : rel->exps->h->data = exp_copy(v->sql, topN2);
1222 0 : changed = true;
1223 : }
1224 : }
1225 :
1226 0 : if (changed) {
1227 0 : rel->l = r->l;
1228 0 : r->l = NULL;
1229 0 : rel_destroy(r);
1230 0 : v->changes++;
1231 0 : return rel;
1232 : }
1233 : }
1234 : }
1235 :
1236 49803 : if (r && is_simple_project(r->op) && need_distinct(r))
1237 : return rel;
1238 :
1239 : /* push topn/sample under projections */
1240 49798 : if (!rel_is_ref(rel) && r && is_simple_project(r->op) && !need_distinct(r) && !rel_is_ref(r) && r->l && list_empty(r->r)) {
1241 : sql_rel *x = r, *px = x;
1242 :
1243 32210 : while (is_simple_project(x->op) && !need_distinct(x) && !rel_is_ref(x) && x->l && list_empty(x->r)) {
1244 16115 : px = x;
1245 16115 : x = x->l;
1246 : }
1247 : /* only push topn once */
1248 16085 : if (x && x->op == rel->op)
1249 : return rel;
1250 :
1251 16075 : rel->l = x;
1252 16075 : px->l = rel;
1253 16075 : rel = r;
1254 16075 : v->changes++;
1255 16075 : return rel;
1256 : }
1257 :
1258 33699 : if (!topn_sample_safe_exps(rel->exps, false))
1259 : return rel;
1260 :
1261 : /* duplicate topn/sample direct under union or crossproduct */
1262 33646 : if (r && !rel_is_ref(r) && r->l && r->r && ((is_union(r->op) && r->exps) || (r->op == op_join && list_empty(r->exps)))) {
1263 150 : sql_rel *u = r, *x;
1264 150 : sql_rel *ul = u->l;
1265 150 : sql_rel *ur = u->r;
1266 150 : bool changed = false;
1267 :
1268 150 : x = ul;
1269 156 : while (is_simple_project(x->op) && !need_distinct(x) && !rel_is_ref(x) && x->l && list_empty(x->r))
1270 6 : x = x->l;
1271 150 : if (x && x->op != rel->op) { /* only push topn once */
1272 60 : ul = func(v->sql->sa, ul, sum_limit_offset(v->sql, rel));
1273 60 : set_processed(ul);
1274 60 : u->l = ul;
1275 60 : changed = true;
1276 : }
1277 :
1278 150 : x = ur;
1279 158 : while (is_simple_project(x->op) && !need_distinct(x) && !rel_is_ref(x) && x->l && list_empty(x->r))
1280 8 : x = x->l;
1281 150 : if (x && x->op != rel->op) { /* only push topn once */
1282 61 : ur = func(v->sql->sa, ur, sum_limit_offset(v->sql, rel));
1283 61 : set_processed(ur);
1284 61 : u->r = ur;
1285 61 : changed = true;
1286 : }
1287 :
1288 150 : if (changed)
1289 61 : v->changes++;
1290 150 : return rel;
1291 : }
1292 :
1293 : /* duplicate topn/sample + [ project-order ] under union */
1294 : if (r && !rp)
1295 33508 : rp = r->l;
1296 33508 : if (r && r->exps && is_simple_project(r->op) && !rel_is_ref(r) && !list_empty(r->r) && r->l && is_union(rp->op)) {
1297 0 : sql_rel *u = rp, *ou = u, *x, *ul = u->l, *ur = u->r;
1298 0 : list *rcopy = NULL;
1299 :
1300 : /* only push topn/sample once */
1301 0 : x = ul;
1302 0 : while (is_simple_project(x->op) && !need_distinct(x) && !rel_is_ref(x) && x->l && list_empty(x->r))
1303 0 : x = x->l;
1304 0 : if (x && x->op == rel->op)
1305 : return rel;
1306 : x = ur;
1307 0 : while (is_simple_project(x->op) && !need_distinct(x) && !rel_is_ref(x) && x->l && list_empty(x->r))
1308 0 : x = x->l;
1309 0 : if (x && x->op == rel->op)
1310 : return rel;
1311 :
1312 0 : rcopy = exps_copy(v->sql, r->r);
1313 0 : for (node *n = rcopy->h ; n ; n = n->next) {
1314 0 : sql_exp *e = n->data;
1315 0 : set_descending(e); /* remove ordering properties for projected columns */
1316 0 : set_nulls_first(e);
1317 : }
1318 0 : ul = rel_dup(ul);
1319 0 : ur = rel_dup(ur);
1320 0 : if (!is_project(ul->op))
1321 0 : ul = rel_project(v->sql->sa, ul,
1322 : rel_projections(v->sql, ul, NULL, 1, 1));
1323 0 : if (!is_project(ur->op))
1324 0 : ur = rel_project(v->sql->sa, ur,
1325 : rel_projections(v->sql, ur, NULL, 1, 1));
1326 0 : rel_rename_exps(v->sql, u->exps, ul->exps);
1327 0 : rel_rename_exps(v->sql, u->exps, ur->exps);
1328 :
1329 : /* introduce projects under the set */
1330 0 : ul = rel_project(v->sql->sa, ul, NULL);
1331 0 : ul->exps = exps_copy(v->sql, r->exps);
1332 : /* possibly add order by column */
1333 0 : ul->exps = list_distinct(list_merge(ul->exps, exps_copy(v->sql, rcopy), NULL), (fcmp) exp_equal, (fdup) NULL);
1334 0 : ul->nrcols = list_length(ul->exps);
1335 0 : ul->r = exps_copy(v->sql, r->r);
1336 0 : set_processed(ul);
1337 0 : ul = func(v->sql->sa, ul, sum_limit_offset(v->sql, rel));
1338 0 : set_processed(ul);
1339 :
1340 0 : ur = rel_project(v->sql->sa, ur, NULL);
1341 0 : ur->exps = exps_copy(v->sql, r->exps);
1342 : /* possibly add order by column */
1343 0 : ur->exps = list_distinct(list_merge(ur->exps, exps_copy(v->sql, rcopy), NULL), (fcmp) exp_equal, (fdup) NULL);
1344 0 : ur->nrcols = list_length(ur->exps);
1345 0 : ur->r = exps_copy(v->sql, r->r);
1346 0 : set_processed(ur);
1347 0 : ur = func(v->sql->sa, ur, sum_limit_offset(v->sql, rel));
1348 0 : set_processed(ur);
1349 :
1350 0 : u = rel_setop(v->sql->sa, ul, ur, op_union);
1351 0 : u->exps = exps_alias(v->sql, r->exps);
1352 0 : u->nrcols = list_length(u->exps);
1353 0 : set_processed(u);
1354 : /* possibly add order by column */
1355 0 : u->exps = list_distinct(list_merge(u->exps, rcopy, NULL), (fcmp) exp_equal, (fdup) NULL);
1356 0 : if (need_distinct(r)) {
1357 0 : set_distinct(ul);
1358 0 : set_distinct(ur);
1359 : }
1360 :
1361 : /* zap names */
1362 0 : rel_no_rename_exps(u->exps);
1363 0 : rel_destroy(ou);
1364 :
1365 0 : ur = rel_project(v->sql->sa, u, exps_alias(v->sql, r->exps));
1366 0 : ur->r = r->r;
1367 0 : r->l = NULL;
1368 :
1369 0 : if (need_distinct(r))
1370 0 : set_distinct(ur);
1371 :
1372 0 : rel_destroy(r);
1373 0 : rel->l = ur;
1374 0 : v->changes++;
1375 0 : return rel;
1376 : }
1377 : /* a left outer join b order by a.* limit L, can be copied into a */
1378 : /* topn ( project (orderby)( optional project ( left ())
1379 : * rel r rp */
1380 33496 : if (r && !rp)
1381 66 : rp = r->l;
1382 33496 : if (r && rp && is_simple_project(rp->op) && !rp->r && rp->l)
1383 33496 : rpp = rp->l;
1384 33496 : if (r && r->exps && is_simple_project(r->op) && !rel_is_ref(r) && r->r && r->l && ((!rpp && is_left(rp->op)) ||
1385 308 : (rpp && is_left(rpp->op)))) {
1386 24 : sql_rel *lj = rpp?rpp:rp;
1387 24 : sql_rel *l = lj->l;
1388 24 : list *obes = r->r, *nobes = sa_list(v->sql->sa);
1389 24 : int fnd = 1;
1390 63 : for (node *n = obes->h; n && fnd; n = n->next) {
1391 41 : sql_exp *obe = n->data;
1392 41 : int part = is_partitioning(obe);
1393 41 : int asc = is_ascending(obe);
1394 41 : int nl = nulls_last(obe);
1395 : /* only simple rename expressions */
1396 41 : sql_exp *pe = exps_find_exp(r->exps, obe);
1397 41 : if (pe && rpp)
1398 33 : pe = exps_find_exp(rp->exps, pe);
1399 41 : if (pe)
1400 41 : pe = rel_find_exp(l, pe);
1401 41 : if (pe) {
1402 41 : if (exp_is_atom(pe))
1403 : return rel;
1404 39 : pe = exp_ref(v->sql, pe);
1405 39 : if (part)
1406 0 : set_partitioning(pe);
1407 39 : if (asc)
1408 29 : set_ascending(pe);
1409 39 : if (nl)
1410 5 : set_nulls_last(pe);
1411 39 : append(nobes, pe);
1412 : }
1413 : else
1414 : fnd = 0;
1415 : }
1416 22 : if (fnd && ((is_topn(rel->op) && !is_topn(l->op)) || (is_sample(rel->op) && !is_sample(l->op)))) {
1417 : /* inject topn */
1418 : /* Todo add order by */
1419 8 : sql_rel *ob = lj->l = rel_project(v->sql->sa, lj->l, rel_projections(v->sql, lj->l, NULL, 1, 1));
1420 8 : ob->r = nobes;
1421 8 : lj->l = func(v->sql->sa, lj->l, sum_limit_offset(v->sql, rel));
1422 8 : v->changes++;
1423 8 : return rel;
1424 : }
1425 : }
1426 : }
1427 : return rel;
1428 : }
1429 :
1430 : static sql_rel *
1431 33181 : rel_push_topn_and_sample_down(visitor *v, global_props *gp, sql_rel *rel)
1432 : {
1433 33181 : (void) gp;
1434 33181 : return rel_visitor_topdown(v, rel, &rel_push_topn_and_sample_down_);
1435 : }
1436 :
1437 : run_optimizer
1438 629535 : bind_push_topn_and_sample_down(visitor *v, global_props *gp)
1439 : {
1440 629535 : int flag = v->sql->sql_optimizer;
1441 629366 : return gp->opt_level == 1 && (gp->cnt[op_topn] || gp->cnt[op_sample]) &&
1442 662519 : (flag & push_topn_and_sample_down) ? rel_push_topn_and_sample_down : NULL;
1443 : }
|