Line data Source code
1 : /*
2 : * SPDX-License-Identifier: MPL-2.0
3 : *
4 : * This Source Code Form is subject to the terms of the Mozilla Public
5 : * License, v. 2.0. If a copy of the MPL was not distributed with this
6 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
7 : *
8 : * Copyright 2024 MonetDB Foundation;
9 : * Copyright August 2008 - 2023 MonetDB B.V.;
10 : * Copyright 1997 - July 2008 CWI.
11 : */
12 :
13 : #include "monetdb_config.h"
14 : #include "rel_rewriter.h"
15 : #include "rel_exp.h"
16 : #include "rel_dump.h"
17 : #include "rel_basetable.h"
18 :
19 : /* simplify expressions, such as not(not(x)) */
20 : /* exp visitor */
21 :
22 : #define is_not_anyequal(sf) (strcmp((sf)->func->base.name, "sql_not_anyequal") == 0)
23 :
24 : static list *
25 1883639 : exps_simplify_exp(visitor *v, list *exps)
26 : {
27 1883639 : if (list_empty(exps))
28 : return exps;
29 :
30 1883639 : int needed = 0;
31 4051982 : for (node *n=exps->h; n && !needed; n = n->next) {
32 2168343 : sql_exp *e = n->data;
33 :
34 2246160 : needed = (exp_is_true(e) || exp_is_false(e) || (is_compare(e->type) && e->flag == cmp_or));
35 : }
36 : /* if there's only one expression and it is false, we have to keep it */
37 1883639 : if (list_length(exps) == 1 && exp_is_false(exps->h->data))
38 : return exps;
39 1880610 : if (needed) {
40 74788 : list *nexps = sa_list(v->sql->sa);
41 165970 : for (node *n=exps->h; n; n = n->next) {
42 91329 : sql_exp *e = n->data;
43 :
44 : /* TRUE or X -> TRUE
45 : * FALSE or X -> X */
46 91329 : if (is_compare(e->type) && e->flag == cmp_or) {
47 74481 : list *l = e->l = exps_simplify_exp(v, e->l);
48 74481 : list *r = e->r = exps_simplify_exp(v, e->r);
49 :
50 74481 : if (list_length(l) == 1) {
51 69739 : sql_exp *ie = l->h->data;
52 :
53 69739 : if (exp_is_true(ie)) {
54 0 : v->changes++;
55 0 : continue;
56 69739 : } else if (exp_is_false(ie)) {
57 80 : v->changes++;
58 80 : nexps = list_merge(nexps, r, (fdup)NULL);
59 80 : continue;
60 : }
61 4742 : } else if (list_length(l) == 0) { /* left is true */
62 35 : v->changes++;
63 35 : continue;
64 : }
65 74366 : if (list_length(r) == 1) {
66 67235 : sql_exp *ie = r->h->data;
67 :
68 67235 : if (exp_is_true(ie)) {
69 0 : v->changes++;
70 0 : continue;
71 67235 : } else if (exp_is_false(ie)) {
72 52 : nexps = list_merge(nexps, l, (fdup)NULL);
73 52 : v->changes++;
74 52 : continue;
75 : }
76 7131 : } else if (list_length(r) == 0) { /* right is true */
77 33 : v->changes++;
78 33 : continue;
79 : }
80 : }
81 : /* TRUE and X -> X */
82 91129 : if (exp_is_true(e)) {
83 2156 : v->changes++;
84 2156 : continue;
85 : /* FALSE and X -> FALSE */
86 88973 : } else if (exp_is_false(e)) {
87 147 : v->changes++;
88 147 : return append(sa_list(v->sql->sa), e);
89 : } else {
90 88826 : append(nexps, e);
91 : }
92 : }
93 74641 : return nexps;
94 : }
95 : return exps;
96 : }
97 :
98 : static sql_exp *
99 1 : exp_exists(mvc *sql, sql_exp *le, int exists)
100 : {
101 1 : sql_subfunc *exists_func = NULL;
102 :
103 2 : if (!(exists_func = sql_bind_func(sql, "sys", exists ? "sql_exists" : "sql_not_exists", exp_subtype(le), NULL, F_FUNC, true)))
104 0 : return sql_error(sql, 02, SQLSTATE(42000) "exist operator on type %s missing", exp_subtype(le) ? exp_subtype(le)->type->base.name : "unknown");
105 1 : sql_exp *res = exp_unop(sql->sa, le, exists_func);
106 1 : set_has_no_nil(res);
107 1 : return res;
108 : }
109 :
110 : sql_exp *
111 9486676 : rewrite_simplify_exp(visitor *v, sql_rel *rel, sql_exp *e, int depth)
112 : {
113 9486676 : if (!e)
114 : return e;
115 :
116 9486676 : v->changes = 0;
117 9486676 : (void)rel; (void)depth;
118 :
119 9486676 : sql_subfunc *sf = e->f;
120 9486676 : if (is_func(e->type) && list_length(e->l) == 1 && is_not_func(sf)) {
121 5123 : list *args = e->l;
122 5123 : sql_exp *ie = args->h->data;
123 :
124 5123 : if (!ie)
125 : return e;
126 :
127 5123 : sql_subfunc *sf = ie->f;
128 5123 : if (is_func(ie->type) && list_length(ie->l) == 1 && is_not_func(sf)) {
129 0 : args = ie->l;
130 :
131 0 : ie = args->h->data;
132 0 : if (exp_name(e))
133 0 : exp_prop_alias(v->sql->sa, ie, e);
134 0 : v->changes++;
135 0 : return ie;
136 : }
137 5123 : if (is_func(ie->type) && list_length(ie->l) == 2 && is_not_anyequal(sf)) {
138 0 : args = ie->l;
139 :
140 0 : sql_exp *l = args->h->data;
141 0 : sql_exp *vals = args->h->next->data;
142 :
143 0 : if (!(ie = exp_in_func(v->sql, l, vals, 1, 0)))
144 : return NULL;
145 0 : if (exp_name(e))
146 0 : exp_prop_alias(v->sql->sa, ie, e);
147 0 : v->changes++;
148 0 : return ie;
149 : }
150 : /* TRUE or X -> TRUE
151 : * FALSE or X -> X */
152 5123 : if (is_compare(e->type) && e->flag == cmp_or) {
153 0 : list *l = e->l = exps_simplify_exp(v, e->l);
154 0 : list *r = e->r = exps_simplify_exp(v, e->r);
155 :
156 0 : if (list_length(l) == 1) {
157 0 : sql_exp *ie = l->h->data;
158 :
159 0 : if (exp_is_true(ie)) {
160 0 : v->changes++;
161 0 : return ie;
162 0 : } else if (exp_is_false(ie) && list_length(r) == 1) {
163 0 : v->changes++;
164 0 : return r->h->data;
165 : }
166 0 : } else if (list_length(l) == 0) { /* left is true */
167 0 : v->changes++;
168 0 : return exp_atom_bool(v->sql->sa, 1);
169 : }
170 0 : if (list_length(r) == 1) {
171 0 : sql_exp *ie = r->h->data;
172 :
173 0 : if (exp_is_true(ie)) {
174 0 : v->changes++;
175 0 : return ie;
176 0 : } else if (exp_is_false(ie) && list_length(l) == 1) {
177 0 : v->changes++;
178 0 : return l->h->data;
179 : }
180 0 : } else if (list_length(r) == 0) { /* right is true */
181 0 : v->changes++;
182 0 : return exp_atom_bool(v->sql->sa, 1);
183 : }
184 : }
185 : }
186 9486676 : if (is_compare(e->type) && e->flag == cmp_equal) { /* predicate_func = TRUE */
187 233887 : sql_exp *l = e->l, *r = e->r;
188 233887 : if (is_func(l->type) && exp_is_true(r) && (is_anyequal_func(((sql_subfunc*)l->f)) || is_exists_func(((sql_subfunc*)l->f))))
189 : return l;
190 233882 : if (is_func(l->type) && exp_is_false(r) && (is_anyequal_func(((sql_subfunc*)l->f)) || is_exists_func(((sql_subfunc*)l->f)))) {
191 2 : sql_subfunc *sf = l->f;
192 2 : if (is_anyequal_func(sf))
193 1 : return exp_in_func(v->sql, ((list*)l->l)->h->data, ((list*)l->l)->h->next->data, !is_anyequal(sf), 0);
194 1 : if (is_exists_func(sf))
195 1 : return exp_exists(v->sql, ((list*)l->l)->h->data, !is_exists(sf));
196 : return l;
197 : }
198 : }
199 : return e;
200 : }
201 :
202 : sql_rel *
203 5749362 : rewrite_simplify(visitor *v, uint8_t cycle, bool value_based_opt, sql_rel *rel)
204 : {
205 5749362 : if (!rel)
206 : return rel;
207 :
208 5749362 : if ((is_select(rel->op) || is_join(rel->op) || is_semi(rel->op)) && !list_empty(rel->exps)) {
209 1734677 : int changes = v->changes;
210 1734677 : rel->exps = exps_simplify_exp(v, rel->exps);
211 : /* At a select or inner join relation if the single expression is false, eliminate the inner relations with a dummy projection */
212 1734677 : if (value_based_opt && (v->changes > changes || cycle == 0) && (is_select(rel->op) || is_innerjoin(rel->op)) &&
213 179841 : !is_single(rel) && list_length(rel->exps) == 1 && (exp_is_false(rel->exps->h->data) || exp_is_null(rel->exps->h->data))) {
214 1473 : list *nexps = sa_list(v->sql->sa), *toconvert = rel_projections(v->sql, rel->l, NULL, 1, 1);
215 1473 : if (is_innerjoin(rel->op))
216 92 : toconvert = list_merge(toconvert, rel_projections(v->sql, rel->r, NULL, 1, 1), NULL);
217 :
218 8752 : for (node *n = toconvert->h ; n ; n = n->next) {
219 7279 : sql_exp *e = n->data, *a = exp_atom(v->sql->sa, atom_general(v->sql->sa, exp_subtype(e), NULL));
220 7279 : exp_prop_alias(v->sql->sa, a, e);
221 7279 : list_append(nexps, a);
222 : }
223 1473 : rel_destroy(rel->l);
224 1473 : if (is_innerjoin(rel->op)) {
225 92 : rel_destroy(rel->r);
226 92 : rel->r = NULL;
227 92 : rel->op = op_select;
228 : }
229 : /* make sure the single expression is false, so the generate NULL values won't match */
230 1473 : rel->exps->h->data = exp_atom_bool(v->sql->sa, 0);
231 1473 : rel->l = rel_project(v->sql->sa, NULL, nexps);
232 1473 : set_count_prop(v->sql->sa, rel->l, 1);
233 1473 : set_count_prop(v->sql->sa, rel, 0);
234 1473 : rel->card = CARD_ATOM;
235 1473 : v->changes++;
236 : }
237 : }
238 5749362 : if (is_join(rel->op) && list_empty(rel->exps))
239 68682 : rel->exps = NULL; /* crossproduct */
240 5749362 : return try_remove_empty_select(v, rel);
241 : }
242 :
243 : int
244 0 : find_member_pos(list *l, sql_table *t)
245 : {
246 0 : int i = 0;
247 0 : if (l) {
248 0 : for (node *n = l->h; n ; n = n->next, i++) {
249 0 : sql_part *pt = n->data;
250 0 : if (pt->member == t->base.id)
251 0 : return i;
252 : }
253 : }
254 : return -1;
255 : }
256 :
257 : /* The important task of the relational optimizer is to optimize the
258 : join order.
259 :
260 : The current implementation chooses the join order based on
261 : select counts, ie if one of the join sides has been reduced using
262 : a select this join is choosen over one without such selections.
263 : */
264 :
265 : /* currently we only find simple column expressions */
266 : sql_column *
267 1528597 : name_find_column( sql_rel *rel, const char *rname, const char *name, int pnr, sql_rel **bt )
268 : {
269 2002812 : sql_exp *alias = NULL;
270 2002812 : sql_column *c = NULL;
271 :
272 2002812 : switch (rel->op) {
273 1126565 : case op_basetable: {
274 1126565 : sql_table *t = rel->l;
275 :
276 1126565 : if (rel->exps) {
277 1126565 : sql_exp *e;
278 :
279 1126565 : if (rname)
280 1125707 : e = exps_bind_column2(rel->exps, rname, name, NULL);
281 : else
282 858 : e = exps_bind_column(rel->exps, name, NULL, NULL, 0);
283 1126575 : if (!e || e->type != e_column)
284 : return NULL;
285 1054775 : if (e->l)
286 1054775 : rname = e->l;
287 1054775 : name = e->r;
288 : }
289 1054775 : if (rname && strcmp(t->base.name, rname) != 0)
290 : return NULL;
291 1053526 : sql_table *mt = rel_base_get_mergetable(rel);
292 1053531 : if (ol_length(t->columns)) {
293 7440346 : for (node *cn = ol_first_node(t->columns); cn; cn = cn->next) {
294 7328471 : sql_column *c = cn->data;
295 7328471 : if (strcmp(c->base.name, name) == 0) {
296 941652 : if (bt)
297 78014 : *bt = rel;
298 941650 : if (pnr < 0 || (mt &&
299 0 : find_member_pos(mt->members, c->t) == pnr))
300 941657 : return c;
301 : }
302 : }
303 : }
304 111875 : if (name[0] == '%' && ol_length(t->idxs)) {
305 64878 : for (node *cn = ol_first_node(t->idxs); cn; cn = cn->next) {
306 53997 : sql_idx *i = cn->data;
307 53997 : if (strcmp(i->base.name, name+1 /* skip % */) == 0) {
308 6222 : if (bt)
309 741 : *bt = rel;
310 6222 : if (pnr < 0 || (mt &&
311 0 : find_member_pos(mt->members, i->t) == pnr)) {
312 6222 : sql_kc *c = i->columns->h->data;
313 6222 : return c->c;
314 : }
315 : }
316 : }
317 : }
318 : break;
319 : }
320 : case op_table:
321 : /* table func */
322 : return NULL;
323 : case op_ddl:
324 0 : if (is_updateble(rel))
325 0 : return name_find_column( rel->l, rname, name, pnr, bt);
326 : return NULL;
327 235992 : case op_join:
328 : case op_left:
329 : case op_right:
330 : case op_full:
331 : /* first right (possible subquery) */
332 235992 : c = name_find_column( rel->r, rname, name, pnr, bt);
333 : /* fall through */
334 : case op_semi:
335 : case op_anti:
336 235992 : if (!c)
337 180384 : c = name_find_column( rel->l, rname, name, pnr, bt);
338 180384 : if (!c && !list_empty(rel->attr)) {
339 19348 : if (rname)
340 19331 : alias = exps_bind_column2(rel->attr, rname, name, NULL);
341 : else
342 17 : alias = exps_bind_column(rel->attr, name, NULL, NULL, 1);
343 : }
344 : return c;
345 87610 : case op_select:
346 : case op_topn:
347 : case op_sample:
348 87610 : return name_find_column( rel->l, rname, name, pnr, bt);
349 29307 : case op_union:
350 : case op_inter:
351 : case op_except:
352 :
353 29307 : if (pnr >= 0 || pnr == -2) {
354 : /* first right (possible subquery) */
355 29307 : c = name_find_column( rel->r, rname, name, pnr, bt);
356 29307 : if (!c)
357 25971 : c = name_find_column( rel->l, rname, name, pnr, bt);
358 3336 : return c;
359 : }
360 : return NULL;
361 :
362 515663 : case op_project:
363 : case op_groupby:
364 515663 : if (!rel->exps)
365 : break;
366 515663 : if (rname)
367 499471 : alias = exps_bind_column2(rel->exps, rname, name, NULL);
368 : else
369 16192 : alias = exps_bind_column(rel->exps, name, NULL, NULL, 1);
370 515663 : if (is_groupby(rel->op) && alias && alias->type == e_column && !list_empty(rel->r)) {
371 162855 : if (alias->l)
372 155840 : alias = exps_bind_column2(rel->r, alias->l, alias->r, NULL);
373 : else
374 7015 : alias = exps_bind_column(rel->r, alias->r, NULL, NULL, 1);
375 : }
376 515663 : if (is_groupby(rel->op) && !alias && rel->l) {
377 : /* Group by column not found as alias in projection
378 : * list, fall back to check plain input columns */
379 : return name_find_column( rel->l, rname, name, pnr, bt);
380 : }
381 : break;
382 : case op_insert:
383 : case op_update:
384 : case op_delete:
385 : case op_truncate:
386 : case op_merge:
387 : break;
388 : }
389 520346 : if (alias && !is_join(rel->op)) { /* we found an expression with the correct name, but
390 : we need sql_columns */
391 382705 : if (rel->l && alias->type == e_column) /* real alias */
392 354436 : return name_find_column(rel->l, alias->l, alias->r, pnr, bt);
393 : }
394 : return NULL;
395 : }
396 :
397 : sql_column *
398 119950 : exp_find_column( sql_rel *rel, sql_exp *exp, int pnr)
399 : {
400 119950 : if (exp->type == e_column)
401 113642 : return name_find_column(rel, exp->l, exp->r, pnr, NULL);
402 : return NULL;
403 : }
404 :
405 : int
406 1834174 : exp_joins_rels(sql_exp *e, list *rels)
407 : {
408 1834174 : sql_rel *l = NULL, *r = NULL;
409 :
410 1834174 : assert (e->type == e_cmp);
411 :
412 1834174 : if (e->flag == cmp_or) {
413 : l = NULL;
414 1834174 : } else if (e->flag == cmp_filter) {
415 4 : list *ll = e->l;
416 4 : list *lr = e->r;
417 :
418 4 : l = find_rel(rels, ll->h->data);
419 4 : r = find_rel(rels, lr->h->data);
420 1834170 : } else if (e->flag == cmp_in || e->flag == cmp_notin) {
421 0 : list *lr = e->r;
422 :
423 0 : l = find_rel(rels, e->l);
424 0 : if (lr && lr->h)
425 0 : r = find_rel(rels, lr->h->data);
426 : } else {
427 1834170 : l = find_rel(rels, e->l);
428 1834170 : r = find_rel(rels, e->r);
429 : }
430 :
431 1834174 : if (l && r)
432 402628 : return 0;
433 : return -1;
434 : }
435 :
436 : static int
437 99 : rel_is_unique(sql_rel *rel)
438 : {
439 99 : switch(rel->op) {
440 0 : case op_semi:
441 : case op_anti:
442 : case op_inter:
443 : case op_except:
444 : case op_topn:
445 : case op_sample:
446 0 : return rel_is_unique(rel->l);
447 : case op_table:
448 : case op_basetable:
449 : return 1;
450 99 : default:
451 99 : return 0;
452 : }
453 : }
454 :
455 : int
456 15674 : kc_column_cmp(sql_kc *kc, sql_column *c)
457 : {
458 : /* return on equality */
459 15674 : return !(c == kc->c);
460 : }
461 :
462 : /* WARNING exps_unique doesn't check for duplicate NULL values */
463 : int
464 28563 : exps_unique(mvc *sql, sql_rel *rel, list *exps)
465 : {
466 28563 : int nr = 0, need_check = 0;
467 28563 : sql_ukey *k = NULL;
468 :
469 28563 : if (list_empty(exps))
470 : return 0;
471 85179 : for(node *n = exps->h; n ; n = n->next) {
472 56616 : sql_exp *e = n->data;
473 56616 : prop *p;
474 :
475 56616 : if (!is_unique(e)) { /* ignore unique columns */
476 56586 : need_check++;
477 56586 : if (!k && (p = find_prop(e->p, PROP_HASHCOL))) /* at the moment, use only one k */
478 138 : k = p->value.pval;
479 : }
480 : }
481 28563 : if (!need_check) /* all have unique property return */
482 : return 1;
483 28547 : if (!k || list_length(k->k.columns) != need_check)
484 28446 : return 0;
485 101 : if (rel) {
486 101 : char *matched = SA_ZNEW_ARRAY(sql->sa, char, list_length(k->k.columns));
487 101 : fcmp cmp = (fcmp)&kc_column_cmp;
488 205 : for(node *n = exps->h; n; n = n->next) {
489 104 : sql_exp *e = n->data;
490 104 : sql_column *c;
491 104 : node *m;
492 :
493 104 : if (is_unique(e))
494 0 : continue;
495 104 : if ((c = exp_find_column(rel, e, -2)) != NULL && (m = list_find(k->k.columns, c, cmp)) != NULL) {
496 102 : int pos = list_position(k->k.columns, m->data);
497 102 : if (!matched[pos])
498 102 : nr++;
499 102 : matched[pos] = 1;
500 : }
501 : }
502 101 : if (nr == list_length(k->k.columns))
503 99 : return rel_is_unique(rel);
504 : }
505 : return 0;
506 : }
507 :
508 : BUN
509 675992 : get_rel_count(sql_rel *rel)
510 : {
511 675992 : prop *found = find_prop(rel->p, PROP_COUNT);
512 675992 : return found ? found->value.lval : BUN_NONE;
513 : }
514 :
515 : void
516 781105 : set_count_prop(sql_allocator *sa, sql_rel *rel, BUN val)
517 : {
518 781105 : if (val != BUN_NONE) {
519 749886 : prop *found = find_prop(rel->p, PROP_COUNT);
520 :
521 749886 : if (found) {
522 2275 : found->value.lval = val;
523 : } else {
524 747611 : prop *p = rel->p = prop_create(sa, PROP_COUNT, rel->p);
525 747609 : p->value.lval = val;
526 : }
527 : }
528 781103 : }
|