Line data Source code
1 : /*
2 : * SPDX-License-Identifier: MPL-2.0
3 : *
4 : * This Source Code Form is subject to the terms of the Mozilla Public
5 : * License, v. 2.0. If a copy of the MPL was not distributed with this
6 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
7 : *
8 : * Copyright 2024 MonetDB Foundation;
9 : * Copyright August 2008 - 2023 MonetDB B.V.;
10 : * Copyright 1997 - July 2008 CWI.
11 : */
12 :
13 : #include "monetdb_config.h"
14 : #include "rel_optimizer_private.h"
15 : #include "rel_basetable.h"
16 : #include "rel_exp.h"
17 : #include "rel_remote.h"
18 : #include "sql_privileges.h"
19 :
20 : typedef struct rmt_prop_state {
21 : int depth;
22 : prop* rmt;
23 : sql_rel* orig;
24 : bool no_rmt_branch_rpl_leaf;
25 : } rps;
26 :
27 : static sql_rel*
28 305 : rel_unique_exps(mvc *sql, sql_rel *rel)
29 : {
30 305 : list *l;
31 :
32 305 : if (!is_project(rel->op))
33 : return rel;
34 153 : l = sa_list(sql->sa);
35 1500 : for (node *n = rel->exps->h; n; n = n->next) {
36 1347 : sql_exp *e = n->data;
37 1347 : if (e->type == e_column) {
38 1255 : const char *name = exp_name(e);
39 1255 : const char *rname = exp_relname(e);
40 :
41 : /* If there are two identical expression names, there will be ambiguity */
42 1255 : if (name && rname && exps_bind_column2(l, rname, name, NULL))
43 0 : exp_label(sql->sa, e, ++sql->label);
44 : }
45 1347 : append(l,e);
46 : }
47 153 : rel->exps = l;
48 153 : return rel;
49 : }
50 :
51 : static int
52 1774 : has_remote_or_replica( sql_rel *rel )
53 : {
54 3737 : if (!rel)
55 : return 0;
56 :
57 2293 : switch (rel->op) {
58 201 : case op_basetable: {
59 201 : sql_table *t = rel->l;
60 :
61 201 : return t && (isReplicaTable(t) || isRemote(t));
62 : }
63 21 : case op_table:
64 21 : if (IS_TABLE_PROD_FUNC(rel->flag) || rel->flag == TABLE_FROM_RELATION)
65 21 : return has_remote_or_replica( rel->l );
66 : break;
67 93 : case op_join:
68 : case op_left:
69 : case op_right:
70 : case op_full:
71 :
72 : case op_semi:
73 : case op_anti:
74 :
75 : case op_union:
76 : case op_inter:
77 : case op_except:
78 : case op_merge:
79 :
80 : case op_insert:
81 : case op_update:
82 : case op_delete:
83 93 : return has_remote_or_replica( rel->l ) || has_remote_or_replica( rel->r );
84 1939 : case op_project:
85 : case op_select:
86 : case op_groupby:
87 : case op_topn:
88 : case op_sample:
89 : case op_truncate:
90 1939 : return has_remote_or_replica( rel->l );
91 39 : case op_ddl:
92 39 : if (rel->flag == ddl_output || rel->flag == ddl_create_seq || rel->flag == ddl_alter_seq /*|| rel->flag == ddl_alter_table || rel->flag == ddl_create_table || rel->flag == ddl_create_view*/)
93 3 : return has_remote_or_replica( rel->l );
94 36 : if (rel->flag == ddl_list || rel->flag == ddl_exception)
95 0 : return has_remote_or_replica( rel->l ) || has_remote_or_replica( rel->r );
96 : break;
97 : }
98 : return 0;
99 : }
100 :
101 : static sql_rel *
102 50 : do_replica_rewrite(mvc *sql, list *exps, sql_table *t, sql_table *p, int remote_prop)
103 : {
104 50 : node *n, *m;
105 50 : sql_rel *r = rel_basetable(sql, p, t->base.name);
106 :
107 185 : for (n = exps->h; n; n = n->next) {
108 135 : sql_exp *e = n->data;
109 135 : const char *nname = exp_name(e);
110 :
111 135 : node *nn = ol_find_name(t->columns, nname);
112 135 : if (nn) {
113 85 : sql_column *c = nn->data;
114 85 : rel_base_use(sql, r, c->colnr);
115 50 : } else if (strcmp(nname, TID) == 0) {
116 50 : rel_base_use_tid(sql, r);
117 : } else {
118 0 : assert(0);
119 : }
120 : }
121 50 : r = rewrite_basetable(sql, r);
122 185 : for (n = exps->h, m = r->exps->h; n && m; n = n->next, m = m->next) {
123 135 : sql_exp *e = n->data;
124 135 : sql_exp *ne = m->data;
125 :
126 135 : exp_prop_alias(sql->sa, ne, e);
127 : }
128 50 : list_hash_clear(r->exps); /* the child table may have different column names, so clear the hash */
129 :
130 : /* set_remote() */
131 50 : if (remote_prop && p && isRemote(p)) {
132 6 : list *uris = sa_list(sql->sa);
133 6 : tid_uri *tu = SA_NEW(sql->sa, tid_uri);
134 6 : tu->id = p->base.id;
135 6 : tu->uri = p->query;
136 6 : append(uris, tu);
137 :
138 6 : prop *rmt_prop = r->p = prop_create(sql->sa, PROP_REMOTE, r->p);
139 6 : rmt_prop->id = p->base.id;
140 6 : rmt_prop->value.pval = uris;
141 : }
142 50 : return r;
143 : }
144 :
145 : static sql_rel *
146 51 : replica_rewrite(visitor *v, sql_table *t, list *exps)
147 : {
148 52 : sql_rel *res = NULL;
149 52 : rps *rpstate = v->data;
150 52 : prop *rp = rpstate->rmt;
151 52 : sqlid tid = rp->id;
152 52 : list *uris = rp->value.pval;
153 :
154 52 : if (mvc_highwater(v->sql))
155 0 : return sql_error(v->sql, 10, SQLSTATE(42000) "Query too complex: running out of stack space");
156 :
157 : /* if there was a REMOTE property in any higher node and there is not
158 : * a local tid then use the available uris to rewrite */
159 52 : if (uris && !tid) {
160 54 : for (node *n = t->members->h; n && !res; n = n->next) {
161 32 : sql_part *p = n->data;
162 32 : sql_table *pt = find_sql_table_id(v->sql->session->tr, t->s, p->member);
163 :
164 32 : if (!isRemote(pt))
165 8 : continue;
166 :
167 45 : for (node *m = uris->h; m && !res; m = m->next) {
168 24 : if (strcmp(((tid_uri*)m->data)->uri, pt->query) == 0) {
169 : /* we found a matching uri do the actual rewrite */
170 44 : res = do_replica_rewrite(v->sql, exps, t, pt,
171 22 : rpstate->no_rmt_branch_rpl_leaf ? true: false);
172 : /* set to the REMOTE a list with a single uri (the matching one)
173 : * this is for the case that our REMOTE subtree has only replicas
174 : * with multiple remotes*/
175 22 : if (list_length(rp->value.pval) > 1) {
176 3 : list *uri = sa_list(v->sql->sa);
177 3 : tid_uri *tu = SA_NEW(v->sql->sa, tid_uri);
178 : /* sql_gencode requires the proper tableid */
179 3 : tu->id = p->member;
180 3 : tu->uri = pt->query;
181 3 : append(uri, tu);
182 3 : rp->value.pval = uri;
183 3 : break;
184 : }
185 : }
186 : }
187 : }
188 : }
189 :
190 : /* no match, find one without remote or use first */
191 22 : if (!res) {
192 30 : sql_table *pt = NULL;
193 30 : int remote = 1;
194 :
195 34 : for (node *n = t->members->h; n; n = n->next) {
196 33 : sql_part *p = n->data;
197 33 : sql_table *next = find_sql_table_id(v->sql->session->tr, t->s, p->member);
198 :
199 : /* give preference to local tables and avoid empty merge or replica tables */
200 33 : if (!isRemote(next) && ((!isReplicaTable(next) && !isMergeTable(next)) || !list_empty(next->members))) {
201 29 : pt = next;
202 29 : remote = 0;
203 : /* if we resolved the replica to a local table we have to
204 : * go and remove the remote property from the subtree */
205 29 : sql_rel *r = ((rps*)v->data)->orig;
206 29 : r->p = prop_remove(r->p, rp);
207 29 : break;
208 : }
209 : }
210 30 : if (!pt) {
211 1 : sql_part *p = t->members->h->data;
212 1 : pt = find_sql_table_id(v->sql->session->tr, t->s, p->member);
213 : }
214 :
215 30 : if ((isMergeTable(pt) || isReplicaTable(pt)) && list_empty(pt->members))
216 1 : return sql_error(v->sql, 02, SQLSTATE(42000) "%s '%s'.'%s' should have at least one table associated",
217 1 : TABLE_TYPE_DESCRIPTION(pt->type, pt->properties), pt->s->base.name, pt->base.name);
218 29 : res = isReplicaTable(pt) ? replica_rewrite(v, pt, exps) : do_replica_rewrite(v->sql, exps, t, pt, remote);
219 : }
220 : return res;
221 : }
222 :
223 : static bool
224 18175 : eliminate_remote_or_replica_refs(visitor *v, sql_rel **rel)
225 : {
226 18175 : if (rel_is_ref(*rel) && !((*rel)->flag&MERGE_LEFT)) {
227 1598 : if (has_remote_or_replica(*rel)) {
228 16 : sql_rel *nrel = rel_copy(v->sql, *rel, 1);
229 16 : rel_destroy(*rel);
230 16 : *rel = nrel;
231 16 : return true;
232 : } else {
233 : // TODO why do we want to bail out if we have a non rmt/rpl ref?
234 : return false;
235 : }
236 : }
237 : return true;
238 : }
239 :
240 : static sql_rel *
241 12116 : rel_rewrite_replica_(visitor *v, sql_rel *rel)
242 : {
243 12116 : if (!eliminate_remote_or_replica_refs(v, &rel))
244 1052 : return rel;
245 :
246 : /* if we are higher in the tree clear the previous REMOTE prop in the visitor state */
247 11064 : if (v->data && v->depth <= ((rps*)v->data)->depth) {
248 120 : v->data = NULL;
249 : }
250 :
251 : /* no-leaf nodes: store the REMOTE property uris in the state of the visitor
252 : * leaf nodes: check if they are basetable replicas and proceed with the rewrite */
253 11064 : prop *p;
254 11064 : if (!is_basetable(rel->op)) {
255 : /* if there is a REMOTE prop set it to the visitor state */
256 7571 : if ((p = find_prop(rel->p, PROP_REMOTE)) != NULL) {
257 365 : rps *rp = SA_NEW(v->sql->sa, rps);
258 365 : rp->depth = v->depth;
259 365 : rp->rmt = p;
260 365 : rp->orig = rel;
261 365 : rp->no_rmt_branch_rpl_leaf = false;
262 365 : v->data = rp;
263 : }
264 : } else {
265 3493 : sql_table *t = rel->l;
266 :
267 3493 : if (t && isReplicaTable(t)) {
268 : /* we might have reached a replica table through a branch that has
269 : * no REMOTE property. In this case we have to set the v->data */
270 105 : if (!v->data && (p = find_prop(rel->p, PROP_REMOTE)) != NULL) {
271 15 : rps *rp = SA_NEW(v->sql->sa, rps);
272 15 : rp->depth = v->depth;
273 15 : rp->rmt = p;
274 15 : rp->orig = rel;
275 15 : rp->no_rmt_branch_rpl_leaf = true;
276 15 : v->data = rp;
277 : }
278 :
279 105 : if (list_empty(t->members)) /* in DDL statement cases skip if replica is empty */
280 54 : return rel;
281 :
282 51 : sql_rel *r = replica_rewrite(v, t, rel->exps);
283 51 : rel_destroy(rel);
284 51 : rel = r;
285 : }
286 : }
287 11010 : return rel;
288 : }
289 :
290 : static sql_rel *
291 1266 : rel_rewrite_replica(visitor *v, global_props *gp, sql_rel *rel)
292 : {
293 1266 : (void) gp;
294 1266 : return rel_visitor_topdown(v, rel, &rel_rewrite_replica_);
295 : }
296 :
297 : run_optimizer
298 686770 : bind_rewrite_replica(visitor *v, global_props *gp)
299 : {
300 686770 : (void) v;
301 686770 : return gp->needs_mergetable_rewrite || gp->needs_remote_replica_rewrite ? rel_rewrite_replica : NULL;
302 : }
303 :
304 : static list*
305 36 : rel_merge_remote_prop(visitor *v, prop *pl, prop *pr)
306 : {
307 36 : list* uris = sa_list(v->sql->sa);
308 : // TODO this double loop must go (maybe use the hashmap of the list?)
309 79 : for (node* n = ((list*)pl->value.pval)->h; n; n = n->next) {
310 104 : for (node* m = ((list*)pr->value.pval)->h; m; m = m->next) {
311 61 : tid_uri* ltu = n->data;
312 61 : tid_uri* rtu = m->data;
313 61 : if (strcmp(ltu->uri, rtu->uri) == 0) {
314 28 : append(uris, n->data);
315 : }
316 : }
317 : }
318 36 : return uris;
319 : }
320 :
321 : static sql_rel *
322 6059 : rel_rewrite_remote_(visitor *v, sql_rel *rel)
323 : {
324 6059 : prop *p, *pl, *pr;
325 :
326 6059 : if (!eliminate_remote_or_replica_refs(v, &rel))
327 530 : return rel;
328 :
329 5529 : sql_rel *l = rel->l, *r = rel->r; /* look on left and right relations after possibly doing rel_copy */
330 :
331 5529 : switch (rel->op) {
332 1745 : case op_basetable: {
333 1745 : sql_table *t = rel->l;
334 :
335 : /* when a basetable wraps a sql_table (->l) which is remote we want to store its remote
336 : * uri to the REMOTE property. As the property is pulled up the tree it can be used in
337 : * the case of binary rel operators (see later switch cases) in order to
338 : * 1. resolve properly (same uri) replica tables in the other subtree (that's why we
339 : * call the do_replica_rewrite)
340 : * 2. pull REMOTE over the binary op if the other subtree has a matching uri remote table
341 : */
342 1745 : if (t && isRemote(t) && (p = find_prop(rel->p, PROP_REMOTE)) == NULL) {
343 304 : if (t->query) {
344 304 : tid_uri *tu = SA_NEW(v->sql->sa, tid_uri);
345 304 : tu->id = t->base.id;
346 304 : tu->uri = mapiuri_uri(t->query, v->sql->sa);
347 304 : list *uris = sa_list(v->sql->sa);
348 304 : append(uris, tu);
349 :
350 304 : p = rel->p = prop_create(v->sql->sa, PROP_REMOTE, rel->p);
351 304 : p->id = 0;
352 304 : p->value.pval = uris;
353 : }
354 : }
355 1745 : if (t && isReplicaTable(t) && !list_empty(t->members)) {
356 : /* the parts of a replica are either
357 : * - remote tables for which we have to store tid and uri
358 : * - local table for which we only care if they exist (localpart var)
359 : * the relevant info are passed in the REMOTE property value.pval and id members
360 : */
361 56 : list *uris = sa_list(v->sql->sa);
362 56 : sqlid localpart = 0;
363 161 : for (node *n = t->members->h; n; n = n->next) {
364 105 : sql_part *part = n->data;
365 105 : sql_table *ptable = find_sql_table_id(v->sql->session->tr, t->s, part->member);
366 :
367 105 : if (isRemote(ptable)) {
368 58 : assert(ptable->query);
369 58 : tid_uri *tu = SA_NEW(v->sql->sa, tid_uri);
370 58 : tu->id = ptable->base.id;
371 58 : tu->uri = mapiuri_uri(ptable->query, v->sql->sa);
372 58 : append(uris, tu);
373 : } else {
374 47 : localpart = ptable->base.id;
375 : }
376 : }
377 : /* always introduce the remote prop even if there are no remote uri's
378 : * this is needed for the proper replica resolution */
379 56 : p = rel->p = prop_create(v->sql->sa, PROP_REMOTE, rel->p);
380 56 : p->id = localpart;
381 56 : p->value.pval = (void*)uris;
382 : }
383 : } break;
384 8 : case op_table:
385 8 : if (IS_TABLE_PROD_FUNC(rel->flag) || rel->flag == TABLE_FROM_RELATION) {
386 8 : if (l && (p = find_prop(l->p, PROP_REMOTE)) != NULL) {
387 5 : l->p = prop_remove(l->p, p);
388 5 : if (!find_prop(rel->p, PROP_REMOTE)) {
389 5 : p->p = rel->p;
390 5 : rel->p = p;
391 : }
392 : }
393 : } break;
394 738 : case op_join:
395 : case op_left:
396 : case op_right:
397 : case op_full:
398 :
399 : case op_semi:
400 : case op_anti:
401 :
402 : case op_union:
403 : case op_inter:
404 : case op_except:
405 :
406 : case op_insert:
407 : case op_update:
408 : case op_delete:
409 : case op_merge:
410 :
411 738 : if (rel->flag&MERGE_LEFT) /* search for any remote tables but don't propagate over to this relation */
412 : return rel;
413 :
414 : /* if both subtrees have REMOTE property with the common uri then pull it up */
415 736 : if (l && (pl = find_prop(l->p, PROP_REMOTE)) != NULL &&
416 77 : r && (pr = find_prop(r->p, PROP_REMOTE)) != NULL) {
417 :
418 36 : list *uris = rel_merge_remote_prop(v, pl, pr);
419 :
420 : /* if there are common uris pull the REMOTE prop with the common uris up */
421 36 : if (!list_empty(uris)) {
422 21 : l->p = prop_remove(l->p, pl);
423 21 : r->p = prop_remove(r->p, pr);
424 21 : if (!find_prop(rel->p, PROP_REMOTE)) {
425 : /* remove local tid ONLY if no subtree has local parts */
426 21 : if (pl->id == 0 || pr->id == 0)
427 16 : pl->id = 0;
428 : /* set the new (matching) uris */
429 21 : pl->value.pval = uris;
430 : /* push the pl REMOTE property to the list of properties */
431 21 : pl->p = rel->p;
432 21 : rel->p = pl;
433 : } else {
434 : // TODO what if we are here? can that even happen?
435 : }
436 : }
437 : }
438 : break;
439 2167 : case op_project:
440 : case op_select:
441 : case op_groupby:
442 : case op_topn:
443 : case op_sample:
444 : case op_truncate:
445 : /* if the subtree has the REMOTE property just pull it up */
446 2167 : if (l && (p = find_prop(l->p, PROP_REMOTE)) != NULL) {
447 330 : l->p = prop_remove(l->p, p);
448 330 : if (!find_prop(rel->p, PROP_REMOTE)) {
449 306 : p->p = rel->p;
450 306 : rel->p = p;
451 : }
452 : }
453 : break;
454 871 : case op_ddl:
455 871 : if (rel->flag == ddl_output || rel->flag == ddl_create_seq || rel->flag == ddl_alter_seq /*|| rel->flag == ddl_alter_table || rel->flag == ddl_create_table || rel->flag == ddl_create_view*/) {
456 0 : if (l && (p = find_prop(l->p, PROP_REMOTE)) != NULL) {
457 0 : l->p = prop_remove(l->p, p);
458 0 : if (!find_prop(rel->p, PROP_REMOTE)) {
459 0 : p->p = rel->p;
460 0 : rel->p = p;
461 : }
462 : }
463 871 : } else if (rel->flag == ddl_list || rel->flag == ddl_exception) {
464 418 : if (l && (pl = find_prop(l->p, PROP_REMOTE)) != NULL &&
465 0 : r && (pr = find_prop(r->p, PROP_REMOTE)) != NULL) {
466 :
467 0 : list *uris = rel_merge_remote_prop(v, pl, pr);
468 :
469 : /* if there are common uris pull the REMOTE prop with the common uris up */
470 0 : if (!list_empty(uris)) {
471 0 : l->p = prop_remove(l->p, pl);
472 0 : r->p = prop_remove(r->p, pr);
473 0 : if (!find_prop(rel->p, PROP_REMOTE)) {
474 : /* remove local tid ONLY if no subtree has local parts */
475 0 : if (pl->id == 0 || pr->id == 0)
476 0 : pl->id = 0;
477 : /* set the new (matching) uris */
478 0 : pl->value.pval = uris;
479 : /* push the pl REMOTE property to the list of properties */
480 0 : pl->p = rel->p;
481 0 : rel->p = pl;
482 : } else {
483 : // TODO what if we are here? can that even happen?
484 : }
485 : }
486 : }
487 : }
488 : break;
489 : }
490 5527 : return rel;
491 : }
492 :
493 : static sql_rel *
494 1266 : rel_rewrite_remote(visitor *v, global_props *gp, sql_rel *rel)
495 : {
496 1266 : (void) gp;
497 1266 : rel = rel_visitor_bottomup(v, rel, &rel_rewrite_remote_);
498 1266 : v->data = NULL;
499 1266 : rel = rel_visitor_topdown(v, rel, &rel_rewrite_replica_);
500 1266 : v->data = NULL;
501 1266 : return rel;
502 : }
503 :
504 : run_optimizer
505 686773 : bind_rewrite_remote(visitor *v, global_props *gp)
506 : {
507 686773 : (void) v;
508 686773 : return gp->needs_mergetable_rewrite || gp->needs_remote_replica_rewrite ? rel_rewrite_remote : NULL;
509 : }
510 :
511 :
512 : static sql_rel *
513 6067 : rel_remote_func_(visitor *v, sql_rel *rel)
514 : {
515 6067 : (void) v;
516 :
517 : /* Don't modify the same relation twice */
518 6067 : if (is_rel_remote_func_used(rel->used))
519 : return rel;
520 5538 : rel->used |= rel_remote_func_used;
521 :
522 5538 : if (find_prop(rel->p, PROP_REMOTE) != NULL) {
523 305 : list *exps = rel_projections(v->sql, rel, NULL, 1, 1);
524 305 : rel = rel_unique_exps(v->sql, rel); /* remove any duplicate results (aliases) */
525 305 : rel = rel_relational_func(v->sql->sa, rel, exps);
526 : }
527 : return rel;
528 : }
529 :
530 : static sql_rel *
531 1266 : rel_remote_func(visitor *v, global_props *gp, sql_rel *rel)
532 : {
533 1266 : (void) gp;
534 1266 : return rel_visitor_bottomup(v, rel, &rel_remote_func_);
535 : }
536 :
537 : run_optimizer
538 686770 : bind_remote_func(visitor *v, global_props *gp)
539 : {
540 686770 : (void) v;
541 686770 : return gp->needs_mergetable_rewrite || gp->needs_remote_replica_rewrite ? rel_remote_func : NULL;
542 : }
|