Line data Source code
1 : /*
2 : * SPDX-License-Identifier: MPL-2.0
3 : *
4 : * This Source Code Form is subject to the terms of the Mozilla Public
5 : * License, v. 2.0. If a copy of the MPL was not distributed with this
6 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
7 : *
8 : * Copyright 2024 MonetDB Foundation;
9 : * Copyright August 2008 - 2023 MonetDB B.V.;
10 : * Copyright 1997 - July 2008 CWI.
11 : */
12 :
13 : #include "monetdb_config.h"
14 : #include "rel_rewriter.h"
15 : #include "rel_exp.h"
16 : #include "rel_dump.h"
17 : #include "rel_basetable.h"
18 :
19 : /* simplify expressions, such as not(not(x)) */
20 : /* exp visitor */
21 :
22 : #define is_not_anyequal(sf) (strcmp((sf)->func->base.name, "sql_not_anyequal") == 0)
23 :
24 : static list *
25 1729806 : exps_simplify_exp(visitor *v, list *exps)
26 : {
27 1729806 : if (list_empty(exps))
28 : return exps;
29 :
30 1729806 : int needed = 0;
31 3709125 : for (node *n=exps->h; n && !needed; n = n->next) {
32 1979319 : sql_exp *e = n->data;
33 :
34 2111942 : needed = (exp_is_true(e) || exp_is_false(e) || (is_compare(e->type) && e->flag == cmp_or));
35 : }
36 1729806 : if (needed) {
37 : /* if there's only one expression and it is false, we have to keep it */
38 132623 : if (list_length(exps) == 1 && exp_is_false(exps->h->data))
39 : return exps;
40 82815 : list *nexps = sa_list(v->sql->sa);
41 184908 : for (node *n=exps->h; n; n = n->next) {
42 102497 : sql_exp *e = n->data;
43 :
44 : /* TRUE or X -> TRUE
45 : * FALSE or X -> X */
46 102497 : if (is_compare(e->type) && e->flag == cmp_or) {
47 73823 : list *l = e->l = exps_simplify_exp(v, e->l);
48 73823 : list *r = e->r = exps_simplify_exp(v, e->r);
49 :
50 73823 : if (list_length(l) == 1) {
51 69524 : sql_exp *ie = l->h->data;
52 :
53 69524 : if (exp_is_true(ie)) {
54 0 : v->changes++;
55 0 : continue;
56 69524 : } else if (exp_is_false(ie)) {
57 190 : v->changes++;
58 190 : nexps = list_merge(nexps, r, (fdup)NULL);
59 190 : continue;
60 : }
61 4299 : } else if (list_length(l) == 0) { /* left is true */
62 32 : v->changes++;
63 32 : continue;
64 : }
65 73601 : if (list_length(r) == 1) {
66 66914 : sql_exp *ie = r->h->data;
67 :
68 66914 : if (exp_is_true(ie)) {
69 0 : v->changes++;
70 0 : continue;
71 66914 : } else if (exp_is_false(ie)) {
72 300 : nexps = list_merge(nexps, l, (fdup)NULL);
73 300 : v->changes++;
74 300 : continue;
75 : }
76 6687 : } else if (list_length(r) == 0) { /* right is true */
77 27 : v->changes++;
78 27 : continue;
79 : }
80 : }
81 : /* TRUE and X -> X */
82 101948 : if (exp_is_true(e)) {
83 10643 : v->changes++;
84 10643 : continue;
85 : /* FALSE and X -> FALSE */
86 91305 : } else if (exp_is_false(e)) {
87 404 : v->changes++;
88 404 : return append(sa_list(v->sql->sa), e);
89 : } else {
90 90901 : append(nexps, e);
91 : }
92 : }
93 82411 : return nexps;
94 : }
95 : return exps;
96 : }
97 :
98 : static sql_exp *
99 1 : exp_exists(mvc *sql, sql_exp *le, int exists)
100 : {
101 1 : sql_subfunc *exists_func = NULL;
102 :
103 2 : if (!(exists_func = sql_bind_func(sql, "sys", exists ? "sql_exists" : "sql_not_exists", exp_subtype(le), NULL, F_FUNC, true, true)))
104 0 : return sql_error(sql, 02, SQLSTATE(42000) "exist operator on type %s missing", exp_subtype(le) ? exp_subtype(le)->type->base.name : "unknown");
105 1 : sql_exp *res = exp_unop(sql->sa, le, exists_func);
106 1 : set_has_no_nil(res);
107 1 : return res;
108 : }
109 :
110 : sql_exp *
111 9623833 : rewrite_simplify_exp(visitor *v, sql_rel *rel, sql_exp *e, int depth)
112 : {
113 9623833 : if (!e)
114 : return e;
115 :
116 9623833 : v->changes = 0;
117 9623833 : (void)rel; (void)depth;
118 :
119 9623833 : sql_subfunc *sf = e->f;
120 9623833 : if (is_func(e->type) && list_length(e->l) == 1 && is_not_func(sf)) {
121 5564 : list *args = e->l;
122 5564 : sql_exp *ie = args->h->data;
123 :
124 5564 : if (!ie)
125 : return e;
126 :
127 5564 : sql_subfunc *sf = ie->f;
128 5564 : if (is_func(ie->type) && list_length(ie->l) == 1 && is_not_func(sf)) {
129 0 : args = ie->l;
130 :
131 0 : ie = args->h->data;
132 0 : if (exp_name(e))
133 0 : exp_prop_alias(v->sql->sa, ie, e);
134 0 : v->changes++;
135 0 : return ie;
136 : }
137 5564 : if (is_func(ie->type) && list_length(ie->l) == 2 && is_not_anyequal(sf)) {
138 0 : args = ie->l;
139 :
140 0 : sql_exp *l = args->h->data;
141 0 : sql_exp *vals = args->h->next->data;
142 :
143 0 : if (!(ie = exp_in_func(v->sql, l, vals, 1, 0)))
144 : return NULL;
145 0 : if (exp_name(e))
146 0 : exp_prop_alias(v->sql->sa, ie, e);
147 0 : v->changes++;
148 0 : return ie;
149 : }
150 : /* TRUE or X -> TRUE
151 : * FALSE or X -> X */
152 5564 : if (is_compare(e->type) && e->flag == cmp_or) {
153 0 : list *l = e->l = exps_simplify_exp(v, e->l);
154 0 : list *r = e->r = exps_simplify_exp(v, e->r);
155 :
156 0 : if (list_length(l) == 1) {
157 0 : sql_exp *ie = l->h->data;
158 :
159 0 : if (exp_is_true(ie)) {
160 0 : v->changes++;
161 0 : return ie;
162 0 : } else if (exp_is_false(ie) && list_length(r) == 1) {
163 0 : v->changes++;
164 0 : return r->h->data;
165 : }
166 0 : } else if (list_length(l) == 0) { /* left is true */
167 0 : v->changes++;
168 0 : return exp_atom_bool(v->sql->sa, 1);
169 : }
170 0 : if (list_length(r) == 1) {
171 0 : sql_exp *ie = r->h->data;
172 :
173 0 : if (exp_is_true(ie)) {
174 0 : v->changes++;
175 0 : return ie;
176 0 : } else if (exp_is_false(ie) && list_length(l) == 1) {
177 0 : v->changes++;
178 0 : return l->h->data;
179 : }
180 0 : } else if (list_length(r) == 0) { /* right is true */
181 0 : v->changes++;
182 0 : return exp_atom_bool(v->sql->sa, 1);
183 : }
184 : }
185 : }
186 9623833 : if (is_compare(e->type) && e->flag == cmp_equal && !is_semantics(e)) { /* predicate_func = TRUE */
187 238641 : sql_exp *l = e->l, *r = e->r;
188 238641 : if (is_func(l->type) && exp_is_true(r) && (is_anyequal_func(((sql_subfunc*)l->f)) || is_exists_func(((sql_subfunc*)l->f))))
189 : return l;
190 238637 : if (is_func(l->type) && exp_is_false(r) && exp_is_not_null(r) && (is_anyequal_func(((sql_subfunc*)l->f)) || is_exists_func(((sql_subfunc*)l->f)))) {
191 2 : sql_subfunc *sf = l->f;
192 2 : if (is_anyequal_func(sf))
193 1 : return exp_in_func(v->sql, ((list*)l->l)->h->data, ((list*)l->l)->h->next->data, !is_anyequal(sf), 0);
194 1 : if (is_exists_func(sf))
195 1 : return exp_exists(v->sql, ((list*)l->l)->h->data, !is_exists(sf));
196 : return l;
197 : }
198 : }
199 : return e;
200 : }
201 :
202 : sql_rel *
203 5730463 : rewrite_simplify(visitor *v, uint8_t cycle, bool value_based_opt, sql_rel *rel)
204 : {
205 5730463 : if (!rel)
206 : return rel;
207 :
208 5730463 : if ((is_select(rel->op) || is_join(rel->op) || is_semi(rel->op)) && !list_empty(rel->exps)) {
209 1582161 : int changes = v->changes;
210 1582161 : rel->exps = exps_simplify_exp(v, rel->exps);
211 : /* At a select or inner join relation if the single expression is false, eliminate the inner relations with a dummy projection */
212 1582160 : if (value_based_opt && (v->changes > changes || cycle == 0) && (is_select(rel->op) || is_innerjoin(rel->op)) &&
213 190153 : !is_single(rel) && list_length(rel->exps) == 1 && (exp_is_false(rel->exps->h->data) || exp_is_null(rel->exps->h->data))) {
214 3941 : list *nexps = sa_list(v->sql->sa), *toconvert = rel_projections(v->sql, rel->l, NULL, 1, 1);
215 3941 : if (is_innerjoin(rel->op))
216 165 : toconvert = list_merge(toconvert, rel_projections(v->sql, rel->r, NULL, 1, 1), NULL);
217 :
218 34150 : for (node *n = toconvert->h ; n ; n = n->next) {
219 30209 : sql_exp *e = n->data, *a = exp_atom(v->sql->sa, atom_general(v->sql->sa, exp_subtype(e), NULL, 0));
220 30209 : exp_prop_alias(v->sql->sa, a, e);
221 30209 : list_append(nexps, a);
222 : }
223 3941 : rel_destroy(rel->l);
224 3941 : if (is_innerjoin(rel->op)) {
225 165 : rel_destroy(rel->r);
226 165 : rel->r = NULL;
227 165 : rel->op = op_select;
228 : }
229 : /* make sure the single expression is false, so the generate NULL values won't match */
230 3941 : rel->exps->h->data = exp_atom_bool(v->sql->sa, 0);
231 3941 : rel->l = rel_project(v->sql->sa, NULL, nexps);
232 3941 : set_count_prop(v->sql->sa, rel->l, 1);
233 3941 : set_count_prop(v->sql->sa, rel, 0);
234 3941 : rel->card = CARD_ATOM;
235 3941 : v->changes++;
236 : }
237 : }
238 5730464 : if (is_join(rel->op) && list_empty(rel->exps))
239 68743 : rel->exps = NULL; /* crossproduct */
240 5730464 : return try_remove_empty_select(v, rel);
241 : }
242 :
243 : int
244 0 : find_member_pos(list *l, sql_table *t)
245 : {
246 0 : int i = 0;
247 0 : if (l) {
248 0 : for (node *n = l->h; n ; n = n->next, i++) {
249 0 : sql_part *pt = n->data;
250 0 : if (pt->member == t->base.id)
251 0 : return i;
252 : }
253 : }
254 : return -1;
255 : }
256 :
257 : /* The important task of the relational optimizer is to optimize the
258 : join order.
259 :
260 : The current implementation chooses the join order based on
261 : select counts, ie if one of the join sides has been reduced using
262 : a select this join is chosen over one without such selections.
263 : */
264 :
265 : /* currently we only find simple column expressions */
266 : sql_column *
267 732907 : name_find_column( sql_rel *rel, const char *rname, const char *name, int pnr, sql_rel **bt )
268 : {
269 1205481 : sql_exp *alias = NULL;
270 1205481 : sql_column *c = NULL;
271 :
272 1205481 : switch (rel->op) {
273 267034 : case op_basetable: {
274 267034 : sql_table *t = rel->l;
275 :
276 267034 : if (rel->exps) {
277 267034 : sql_exp *e;
278 :
279 267034 : if (rname)
280 266143 : e = exps_bind_column2(rel->exps, rname, name, NULL);
281 : else
282 891 : e = exps_bind_column(rel->exps, name, NULL, NULL, 0);
283 267034 : if (!e || e->type != e_column)
284 : return NULL;
285 173219 : if (e->l)
286 173219 : rname = e->l;
287 173219 : name = e->r;
288 : }
289 173219 : if (rname && strcmp(t->base.name, rname) != 0)
290 : return NULL;
291 173196 : sql_table *mt = rel_base_get_mergetable(rel);
292 173196 : if (ol_length(t->columns)) {
293 738144 : for (node *cn = ol_first_node(t->columns); cn; cn = cn->next) {
294 736097 : sql_column *c = cn->data;
295 736097 : if (strcmp(c->base.name, name) == 0) {
296 171149 : if (bt)
297 90264 : *bt = rel;
298 171149 : if (pnr < 0 || (mt &&
299 0 : find_member_pos(mt->members, c->t) == pnr))
300 171149 : return c;
301 : }
302 : }
303 : }
304 2047 : if (name[0] == '%' && ol_length(t->idxs)) {
305 15150 : for (node *cn = ol_first_node(t->idxs); cn; cn = cn->next) {
306 15020 : sql_idx *i = cn->data;
307 15020 : if (strcmp(i->base.name, name+1 /* skip % */) == 0) {
308 1917 : if (bt)
309 751 : *bt = rel;
310 1917 : if (pnr < 0 || (mt &&
311 0 : find_member_pos(mt->members, i->t) == pnr)) {
312 1917 : sql_kc *c = i->columns->h->data;
313 1917 : return c->c;
314 : }
315 : }
316 : }
317 : }
318 : break;
319 : }
320 : case op_table:
321 : /* table func */
322 : return NULL;
323 : case op_ddl:
324 0 : if (is_updateble(rel))
325 0 : return name_find_column( rel->l, rname, name, pnr, bt);
326 : return NULL;
327 264306 : case op_join:
328 : case op_left:
329 : case op_right:
330 : case op_full:
331 : /* first right (possible subquery) */
332 264306 : c = name_find_column( rel->r, rname, name, pnr, bt);
333 : /* fall through */
334 : case op_semi:
335 : case op_anti:
336 264306 : if (!c)
337 214874 : c = name_find_column( rel->l, rname, name, pnr, bt);
338 214874 : if (!c && !list_empty(rel->attr)) {
339 19423 : if (rname)
340 19406 : alias = exps_bind_column2(rel->attr, rname, name, NULL);
341 : else
342 17 : alias = exps_bind_column(rel->attr, name, NULL, NULL, 1);
343 : }
344 : return c;
345 86495 : case op_select:
346 : case op_topn:
347 : case op_sample:
348 86495 : return name_find_column( rel->l, rname, name, pnr, bt);
349 76 : case op_union:
350 : case op_inter:
351 : case op_except:
352 :
353 76 : if (pnr >= 0 || pnr == -2) {
354 : /* first right (possible subquery) */
355 76 : c = name_find_column( rel->r, rname, name, pnr, bt);
356 76 : if (!c)
357 49 : c = name_find_column( rel->l, rname, name, pnr, bt);
358 27 : return c;
359 : }
360 : return NULL;
361 :
362 25782 : case op_munion:
363 25782 : if (pnr >= 0 || pnr == -2) {
364 71258 : for (node *n = ((list*)rel->l)->h; n; n = n->next) {
365 49295 : if ((c = name_find_column(n->data, rname, name, pnr, bt)) != NULL)
366 3819 : return c;
367 : }
368 : }
369 : return NULL;
370 542829 : case op_project:
371 : case op_groupby:
372 542829 : if (!rel->exps)
373 : break;
374 542829 : if (rname)
375 510326 : alias = exps_bind_column2(rel->exps, rname, name, NULL);
376 : else
377 32503 : alias = exps_bind_column(rel->exps, name, NULL, NULL, 1);
378 542829 : if (is_groupby(rel->op) && alias && alias->type == e_column && !list_empty(rel->r)) {
379 179038 : if (alias->l)
380 164028 : alias = exps_bind_column2(rel->r, alias->l, alias->r, NULL);
381 : else
382 15010 : alias = exps_bind_column(rel->r, alias->r, NULL, NULL, 1);
383 : }
384 542829 : if (is_groupby(rel->op) && !alias && rel->l) {
385 : /* Group by column not found as alias in projection
386 : * list, fall back to check plain input columns */
387 : return name_find_column( rel->l, rname, name, pnr, bt);
388 : }
389 : break;
390 : case op_insert:
391 : case op_update:
392 : case op_delete:
393 : case op_truncate:
394 : case op_merge:
395 : break;
396 : }
397 539628 : if (alias && !is_join(rel->op)) { /* we found an expression with the correct name, but
398 : we need sql_columns */
399 411129 : if (rel->l && alias->type == e_column) /* real alias */
400 382699 : return name_find_column(rel->l, alias->l, alias->r, pnr, bt);
401 : }
402 : return NULL;
403 : }
404 :
405 : sql_column *
406 105233 : exp_find_column( sql_rel *rel, sql_exp *exp, int pnr)
407 : {
408 105233 : if (exp->type == e_column)
409 104165 : return name_find_column(rel, exp->l, exp->r, pnr, NULL);
410 : return NULL;
411 : }
412 :
413 : int
414 1779310 : exp_joins_rels(sql_exp *e, list *rels)
415 : {
416 1779310 : sql_rel *l = NULL, *r = NULL;
417 :
418 1779310 : assert (e->type == e_cmp);
419 :
420 1779310 : if (e->flag == cmp_or) {
421 : l = NULL;
422 1779310 : } else if (e->flag == cmp_filter) {
423 5 : list *ll = e->l;
424 5 : list *lr = e->r;
425 :
426 5 : l = find_rel(rels, ll->h->data);
427 5 : r = find_rel(rels, lr->h->data);
428 1779305 : } else if (e->flag == cmp_in || e->flag == cmp_notin) {
429 0 : list *lr = e->r;
430 :
431 0 : l = find_rel(rels, e->l);
432 0 : if (lr && lr->h)
433 0 : r = find_rel(rels, lr->h->data);
434 : } else {
435 1779305 : l = find_rel(rels, e->l);
436 1779305 : r = find_rel(rels, e->r);
437 : }
438 :
439 1779310 : if (l && r)
440 384338 : return 0;
441 : return -1;
442 : }
443 :
444 : static int
445 103 : rel_is_unique(sql_rel *rel)
446 : {
447 103 : switch(rel->op) {
448 0 : case op_semi:
449 : case op_anti:
450 : case op_inter:
451 : case op_except:
452 : case op_topn:
453 : case op_sample:
454 0 : return rel_is_unique(rel->l);
455 : case op_table:
456 : case op_basetable:
457 : return 1;
458 103 : default:
459 103 : return 0;
460 : }
461 : }
462 :
463 : int
464 9522 : kc_column_cmp(sql_kc *kc, sql_column *c)
465 : {
466 : /* return on equality */
467 9522 : return !(c == kc->c);
468 : }
469 :
470 : /* WARNING exps_unique doesn't check for duplicate NULL values */
471 : int
472 85762 : exps_unique(mvc *sql, sql_rel *rel, list *exps)
473 : {
474 85762 : int nr = 0, need_check = 0;
475 85762 : sql_ukey *k = NULL;
476 :
477 85762 : if (list_empty(exps))
478 : return 0;
479 551966 : for(node *n = exps->h; n ; n = n->next) {
480 466204 : sql_exp *e = n->data;
481 466204 : prop *p;
482 :
483 466204 : if (!is_unique(e)) { /* ignore unique columns */
484 452613 : need_check++;
485 452613 : if (!k && (p = find_prop(e->p, PROP_HASHCOL))) /* at the moment, use only one k */
486 1427 : k = p->value.pval;
487 : }
488 : }
489 85762 : if (!need_check) /* all have unique property return */
490 : return 1;
491 84480 : if (!k || list_length(k->k.columns) != need_check)
492 84375 : return 0;
493 105 : if (rel) {
494 105 : char *matched = SA_ZNEW_ARRAY(sql->sa, char, list_length(k->k.columns));
495 105 : fcmp cmp = (fcmp)&kc_column_cmp;
496 221 : for(node *n = exps->h; n; n = n->next) {
497 116 : sql_exp *e = n->data;
498 116 : sql_column *c;
499 116 : node *m;
500 :
501 116 : if (is_unique(e))
502 0 : continue;
503 116 : if ((c = exp_find_column(rel, e, -2)) != NULL && (m = list_find(k->k.columns, c, cmp)) != NULL) {
504 114 : int pos = list_position(k->k.columns, m->data);
505 114 : if (!matched[pos])
506 114 : nr++;
507 114 : matched[pos] = 1;
508 : }
509 : }
510 105 : if (nr == list_length(k->k.columns))
511 103 : return rel_is_unique(rel);
512 : }
513 : return 0;
514 : }
515 :
516 : BUN
517 727904 : get_rel_count(sql_rel *rel)
518 : {
519 727904 : prop *found = find_prop(rel->p, PROP_COUNT);
520 727908 : return found ? found->value.lval : BUN_NONE;
521 : }
522 :
523 : void
524 1396354 : set_count_prop(allocator *sa, sql_rel *rel, BUN val)
525 : {
526 1396354 : if (val != BUN_NONE) {
527 1357915 : prop *found = find_prop(rel->p, PROP_COUNT);
528 :
529 1357764 : if (found) {
530 269079 : found->value.lval = val;
531 : } else {
532 1088685 : prop *p = rel->p = prop_create(sa, PROP_COUNT, rel->p);
533 1088682 : p->value.lval = val;
534 : }
535 : }
536 1396200 : }
|