Line data Source code
1 : /*
2 : * SPDX-License-Identifier: MPL-2.0
3 : *
4 : * This Source Code Form is subject to the terms of the Mozilla Public
5 : * License, v. 2.0. If a copy of the MPL was not distributed with this
6 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
7 : *
8 : * Copyright 2024 MonetDB Foundation;
9 : * Copyright August 2008 - 2023 MonetDB B.V.;
10 : * Copyright 1997 - July 2008 CWI.
11 : */
12 :
13 : #include "monetdb_config.h"
14 : #include "rel_optimizer_private.h"
15 : #include "rel_basetable.h"
16 : #include "rel_exp.h"
17 : #include "rel_remote.h"
18 : #include "sql_privileges.h"
19 :
20 : typedef struct rmt_prop_state {
21 : int depth;
22 : prop* rmt;
23 : sql_rel* orig;
24 : bool no_rmt_branch_rpl_leaf;
25 : } rps;
26 :
27 : static sql_rel*
28 313 : rel_unique_exps(mvc *sql, sql_rel *rel)
29 : {
30 313 : list *l;
31 :
32 313 : if (!is_project(rel->op))
33 : return rel;
34 154 : l = sa_list(sql->sa);
35 1504 : for (node *n = rel->exps->h; n; n = n->next) {
36 1350 : sql_exp *e = n->data;
37 1350 : if (e->type == e_column) {
38 1250 : const char *name = exp_name(e);
39 1250 : const char *rname = exp_relname(e);
40 :
41 : /* If there are two identical expression names, there will be ambiguity */
42 1250 : if (name && rname && exps_bind_column2(l, rname, name, NULL))
43 0 : exp_label(sql->sa, e, ++sql->label);
44 : }
45 1350 : append(l,e);
46 : }
47 154 : rel->exps = l;
48 154 : return rel;
49 : }
50 :
51 : static int
52 1751 : has_remote_or_replica( sql_rel *rel )
53 : {
54 3531 : if (!rel)
55 : return 0;
56 :
57 2180 : switch (rel->op) {
58 255 : case op_basetable: {
59 255 : sql_table *t = rel->l;
60 :
61 255 : return t && (isReplicaTable(t) || isRemote(t));
62 : }
63 21 : case op_table:
64 21 : if (IS_TABLE_PROD_FUNC(rel->flag) || rel->flag == TABLE_FROM_RELATION)
65 21 : return has_remote_or_replica( rel->l );
66 : break;
67 107 : case op_join:
68 : case op_left:
69 : case op_right:
70 : case op_full:
71 :
72 : case op_semi:
73 : case op_anti:
74 :
75 : case op_union:
76 : case op_inter:
77 : case op_except:
78 : case op_merge:
79 :
80 : case op_insert:
81 : case op_update:
82 : case op_delete:
83 107 : return has_remote_or_replica( rel->l ) || has_remote_or_replica( rel->r );
84 2 : case op_munion:
85 4 : for (node *n = ((list*)rel->l)->h; n; n = n->next) {
86 4 : if (has_remote_or_replica(n->data))
87 : return true;
88 : }
89 : break;
90 1756 : case op_project:
91 : case op_select:
92 : case op_groupby:
93 : case op_topn:
94 : case op_sample:
95 : case op_truncate:
96 1756 : return has_remote_or_replica( rel->l );
97 39 : case op_ddl:
98 39 : if (rel->flag == ddl_output || rel->flag == ddl_create_seq || rel->flag == ddl_alter_seq /*|| rel->flag == ddl_alter_table || rel->flag == ddl_create_table || rel->flag == ddl_create_view*/)
99 3 : return has_remote_or_replica( rel->l );
100 36 : if (rel->flag == ddl_list || rel->flag == ddl_exception)
101 0 : return has_remote_or_replica( rel->l ) || has_remote_or_replica( rel->r );
102 : break;
103 : }
104 : return 0;
105 : }
106 :
107 : static sql_rel *
108 51 : do_replica_rewrite(mvc *sql, list *exps, sql_table *t, sql_table *p, int remote_prop)
109 : {
110 51 : node *n, *m;
111 51 : sql_rel *r = rel_basetable(sql, p, t->base.name);
112 :
113 188 : for (n = exps->h; n; n = n->next) {
114 137 : sql_exp *e = n->data;
115 137 : const char *nname = exp_name(e);
116 :
117 137 : node *nn = ol_find_name(t->columns, nname);
118 137 : if (nn) {
119 86 : sql_column *c = nn->data;
120 86 : rel_base_use(sql, r, c->colnr);
121 51 : } else if (strcmp(nname, TID) == 0) {
122 51 : rel_base_use_tid(sql, r);
123 : } else {
124 0 : assert(0);
125 : }
126 : }
127 51 : r = rewrite_basetable(sql, r);
128 188 : for (n = exps->h, m = r->exps->h; n && m; n = n->next, m = m->next) {
129 137 : sql_exp *e = n->data;
130 137 : sql_exp *ne = m->data;
131 :
132 137 : exp_prop_alias(sql->sa, ne, e);
133 : }
134 51 : list_hash_clear(r->exps); /* the child table may have different column names, so clear the hash */
135 :
136 : /* set_remote() */
137 51 : if (remote_prop && p && isRemote(p)) {
138 6 : list *uris = sa_list(sql->sa);
139 6 : tid_uri *tu = SA_NEW(sql->sa, tid_uri);
140 6 : tu->id = p->base.id;
141 6 : tu->uri = p->query;
142 6 : append(uris, tu);
143 :
144 6 : prop *rmt_prop = r->p = prop_create(sql->sa, PROP_REMOTE, r->p);
145 6 : rmt_prop->id = p->base.id;
146 6 : rmt_prop->value.pval = uris;
147 : }
148 51 : return r;
149 : }
150 :
151 : static sql_rel *
152 52 : replica_rewrite(visitor *v, sql_table *t, list *exps)
153 : {
154 53 : sql_rel *res = NULL;
155 53 : rps *rpstate = v->data;
156 53 : prop *rp = rpstate->rmt;
157 53 : sqlid tid = rp->id;
158 53 : list *uris = rp->value.pval;
159 :
160 53 : if (mvc_highwater(v->sql))
161 0 : return sql_error(v->sql, 10, SQLSTATE(42000) "Query too complex: running out of stack space");
162 :
163 : /* if there was a REMOTE property in any higher node and there is not
164 : * a local tid then use the available uris to rewrite */
165 53 : if (uris && !tid) {
166 54 : for (node *n = t->members->h; n && !res; n = n->next) {
167 32 : sql_part *p = n->data;
168 32 : sql_table *pt = find_sql_table_id(v->sql->session->tr, t->s, p->member);
169 :
170 32 : if (!isRemote(pt))
171 8 : continue;
172 :
173 45 : for (node *m = uris->h; m && !res; m = m->next) {
174 24 : if (strcmp(((tid_uri*)m->data)->uri, pt->query) == 0) {
175 : /* we found a matching uri do the actual rewrite */
176 44 : res = do_replica_rewrite(v->sql, exps, t, pt,
177 22 : rpstate->no_rmt_branch_rpl_leaf ? true: false);
178 : /* set to the REMOTE a list with a single uri (the matching one)
179 : * this is for the case that our REMOTE subtree has only replicas
180 : * with multiple remotes*/
181 22 : if (list_length(rp->value.pval) > 1) {
182 3 : list *uri = sa_list(v->sql->sa);
183 3 : tid_uri *tu = SA_NEW(v->sql->sa, tid_uri);
184 : /* sql_gencode requires the proper tableid */
185 3 : tu->id = p->member;
186 3 : tu->uri = pt->query;
187 3 : append(uri, tu);
188 3 : rp->value.pval = uri;
189 3 : break;
190 : }
191 : }
192 : }
193 : }
194 : }
195 :
196 : /* no match, find one without remote or use first */
197 22 : if (!res) {
198 31 : sql_table *pt = NULL;
199 31 : int remote = 1;
200 :
201 35 : for (node *n = t->members->h; n; n = n->next) {
202 34 : sql_part *p = n->data;
203 34 : sql_table *next = find_sql_table_id(v->sql->session->tr, t->s, p->member);
204 :
205 : /* give preference to local tables and avoid empty merge or replica tables */
206 34 : if (!isRemote(next) && ((!isReplicaTable(next) && !isMergeTable(next)) || !list_empty(next->members))) {
207 30 : pt = next;
208 30 : remote = 0;
209 : /* if we resolved the replica to a local table we have to
210 : * go and remove the remote property from the subtree */
211 30 : sql_rel *r = ((rps*)v->data)->orig;
212 30 : r->p = prop_remove(r->p, rp);
213 30 : break;
214 : }
215 : }
216 31 : if (!pt) {
217 1 : sql_part *p = t->members->h->data;
218 1 : pt = find_sql_table_id(v->sql->session->tr, t->s, p->member);
219 : }
220 :
221 31 : if ((isMergeTable(pt) || isReplicaTable(pt)) && list_empty(pt->members))
222 1 : return sql_error(v->sql, 02, SQLSTATE(42000) "%s '%s'.'%s' should have at least one table associated",
223 1 : TABLE_TYPE_DESCRIPTION(pt->type, pt->properties), pt->s->base.name, pt->base.name);
224 30 : res = isReplicaTable(pt) ? replica_rewrite(v, pt, exps) : do_replica_rewrite(v->sql, exps, t, pt, remote);
225 : }
226 : return res;
227 : }
228 :
229 : static bool
230 18721 : eliminate_remote_or_replica_refs(visitor *v, sql_rel **rel)
231 : {
232 18721 : if (rel_is_ref(*rel) && !((*rel)->flag&MERGE_LEFT)) {
233 1543 : if (has_remote_or_replica(*rel)) {
234 19 : sql_rel *nrel = rel_copy(v->sql, *rel, 1);
235 19 : rel_destroy(*rel);
236 19 : *rel = nrel;
237 19 : return true;
238 : } else {
239 : // TODO why do we want to bail out if we have a non rmt/rpl ref?
240 : return false;
241 : }
242 : }
243 : return true;
244 : }
245 :
246 : static sql_rel *
247 12480 : rel_rewrite_replica_(visitor *v, sql_rel *rel)
248 : {
249 12480 : if (!eliminate_remote_or_replica_refs(v, &rel))
250 1014 : return rel;
251 :
252 : /* if we are higher in the tree clear the previous REMOTE prop in the visitor state */
253 11466 : if (v->data && v->depth <= ((rps*)v->data)->depth) {
254 95 : v->data = NULL;
255 : }
256 :
257 : /* no-leaf nodes: store the REMOTE property uris in the state of the visitor
258 : * leaf nodes: check if they are basetable replicas and proceed with the rewrite */
259 11466 : prop *p;
260 11466 : if (!is_basetable(rel->op)) {
261 : /* if there is a REMOTE prop set it to the visitor state */
262 7761 : if ((p = find_prop(rel->p, PROP_REMOTE)) != NULL) {
263 350 : rps *rp = SA_NEW(v->sql->sa, rps);
264 350 : rp->depth = v->depth;
265 350 : rp->rmt = p;
266 350 : rp->orig = rel;
267 350 : rp->no_rmt_branch_rpl_leaf = false;
268 350 : v->data = rp;
269 : }
270 : } else {
271 3705 : sql_table *t = rel->l;
272 :
273 3705 : if (t && isReplicaTable(t)) {
274 : /* we might have reached a replica table through a branch that has
275 : * no REMOTE property. In this case we have to set the v->data */
276 106 : if (!v->data && (p = find_prop(rel->p, PROP_REMOTE)) != NULL) {
277 15 : rps *rp = SA_NEW(v->sql->sa, rps);
278 15 : rp->depth = v->depth;
279 15 : rp->rmt = p;
280 15 : rp->orig = rel;
281 15 : rp->no_rmt_branch_rpl_leaf = true;
282 15 : v->data = rp;
283 : }
284 :
285 106 : if (list_empty(t->members)) /* in DDL statement cases skip if replica is empty */
286 54 : return rel;
287 :
288 52 : sql_rel *r = replica_rewrite(v, t, rel->exps);
289 52 : rel_destroy(rel);
290 52 : rel = r;
291 : }
292 : }
293 11412 : return rel;
294 : }
295 :
296 : static sql_rel *
297 1374 : rel_rewrite_replica(visitor *v, global_props *gp, sql_rel *rel)
298 : {
299 1374 : (void) gp;
300 1374 : return rel_visitor_topdown(v, rel, &rel_rewrite_replica_);
301 : }
302 :
303 : run_optimizer
304 705987 : bind_rewrite_replica(visitor *v, global_props *gp)
305 : {
306 705987 : (void) v;
307 705987 : return gp->needs_mergetable_rewrite || gp->needs_remote_replica_rewrite ? rel_rewrite_replica : NULL;
308 : }
309 :
310 : static list*
311 29 : rel_merge_remote_prop(visitor *v, prop *pl, prop *pr)
312 : {
313 29 : list* uris = sa_list(v->sql->sa);
314 : // TODO this double loop must go (maybe use the hashmap of the list?)
315 65 : for (node* n = ((list*)pl->value.pval)->h; n; n = n->next) {
316 90 : for (node* m = ((list*)pr->value.pval)->h; m; m = m->next) {
317 54 : tid_uri* ltu = n->data;
318 54 : tid_uri* rtu = m->data;
319 54 : if (strcmp(ltu->uri, rtu->uri) == 0) {
320 28 : append(uris, n->data);
321 : }
322 : }
323 : }
324 29 : return uris;
325 : }
326 :
327 : static sql_rel *
328 6241 : rel_rewrite_remote_(visitor *v, sql_rel *rel)
329 : {
330 6241 : prop *p, *pl, *pr;
331 :
332 6241 : if (!eliminate_remote_or_replica_refs(v, &rel))
333 510 : return rel;
334 :
335 5731 : sql_rel *l = rel->l, *r = rel->r; /* look on left and right relations after possibly doing rel_copy */
336 :
337 5731 : switch (rel->op) {
338 1851 : case op_basetable: {
339 1851 : sql_table *t = rel->l;
340 :
341 : /* when a basetable wraps a sql_table (->l) which is remote we want to store its remote
342 : * uri to the REMOTE property. As the property is pulled up the tree it can be used in
343 : * the case of binary rel operators (see later switch cases) in order to
344 : * 1. resolve properly (same uri) replica tables in the other subtree (that's why we
345 : * call the do_replica_rewrite)
346 : * 2. pull REMOTE over the binary op if the other subtree has a matching uri remote table
347 : */
348 1851 : if (t && isRemote(t) && (p = find_prop(rel->p, PROP_REMOTE)) == NULL) {
349 303 : if (t->query) {
350 303 : tid_uri *tu = SA_NEW(v->sql->sa, tid_uri);
351 303 : tu->id = t->base.id;
352 303 : tu->uri = mapiuri_uri(t->query, v->sql->sa);
353 303 : list *uris = sa_list(v->sql->sa);
354 303 : append(uris, tu);
355 :
356 303 : p = rel->p = prop_create(v->sql->sa, PROP_REMOTE, rel->p);
357 303 : p->id = 0;
358 303 : p->value.pval = uris;
359 : }
360 : }
361 1851 : if (t && isReplicaTable(t) && !list_empty(t->members)) {
362 : /* the parts of a replica are either
363 : * - remote tables for which we have to store tid and uri
364 : * - local table for which we only care if they exist (localpart var)
365 : * the relevant info are passed in the REMOTE property value.pval and id members
366 : */
367 52 : list *uris = sa_list(v->sql->sa);
368 52 : sqlid localpart = 0;
369 151 : for (node *n = t->members->h; n; n = n->next) {
370 99 : sql_part *part = n->data;
371 99 : sql_table *ptable = find_sql_table_id(v->sql->session->tr, t->s, part->member);
372 :
373 99 : if (isRemote(ptable)) {
374 58 : assert(ptable->query);
375 58 : tid_uri *tu = SA_NEW(v->sql->sa, tid_uri);
376 58 : tu->id = ptable->base.id;
377 58 : tu->uri = mapiuri_uri(ptable->query, v->sql->sa);
378 58 : append(uris, tu);
379 : } else {
380 41 : localpart = ptable->base.id;
381 : }
382 : }
383 : /* always introduce the remote prop even if there are no remote uri's
384 : * this is needed for the proper replica resolution */
385 52 : p = rel->p = prop_create(v->sql->sa, PROP_REMOTE, rel->p);
386 52 : p->id = localpart;
387 52 : p->value.pval = (void*)uris;
388 : }
389 : } break;
390 8 : case op_table:
391 8 : if (IS_TABLE_PROD_FUNC(rel->flag) || rel->flag == TABLE_FROM_RELATION) {
392 8 : if (l && (p = find_prop(l->p, PROP_REMOTE)) != NULL) {
393 5 : l->p = prop_remove(l->p, p);
394 5 : if (!find_prop(rel->p, PROP_REMOTE)) {
395 5 : p->p = rel->p;
396 5 : rel->p = p;
397 : }
398 : }
399 : } break;
400 530 : case op_join:
401 : case op_left:
402 : case op_right:
403 : case op_full:
404 :
405 : case op_semi:
406 : case op_anti:
407 :
408 : case op_union:
409 : case op_inter:
410 : case op_except:
411 :
412 : case op_insert:
413 : case op_update:
414 : case op_delete:
415 : case op_merge:
416 :
417 530 : if (rel->flag&MERGE_LEFT) /* search for any remote tables but don't propagate over to this relation */
418 : return rel;
419 :
420 : /* if both subtrees have REMOTE property with the common uri then pull it up */
421 528 : if (l && (pl = find_prop(l->p, PROP_REMOTE)) != NULL &&
422 69 : r && (pr = find_prop(r->p, PROP_REMOTE)) != NULL) {
423 :
424 29 : list *uris = rel_merge_remote_prop(v, pl, pr);
425 :
426 : /* if there are common uris pull the REMOTE prop with the common uris up */
427 29 : if (!list_empty(uris)) {
428 21 : l->p = prop_remove(l->p, pl);
429 21 : r->p = prop_remove(r->p, pr);
430 21 : if (!find_prop(rel->p, PROP_REMOTE)) {
431 : /* remove local tid ONLY if no subtree has local parts */
432 21 : if (pl->id == 0 || pr->id == 0)
433 16 : pl->id = 0;
434 : /* set the new (matching) uris */
435 21 : pl->value.pval = uris;
436 : /* push the pl REMOTE property to the list of properties */
437 21 : pl->p = rel->p;
438 21 : rel->p = pl;
439 : } else {
440 : // TODO what if we are here? can that even happen?
441 : }
442 : }
443 : }
444 : break;
445 : case op_munion:
446 : /*assert(0);*/
447 : break;
448 2277 : case op_project:
449 : case op_select:
450 : case op_groupby:
451 : case op_topn:
452 : case op_sample:
453 : case op_truncate:
454 : /* if the subtree has the REMOTE property just pull it up */
455 2277 : if (l && (p = find_prop(l->p, PROP_REMOTE)) != NULL) {
456 302 : l->p = prop_remove(l->p, p);
457 302 : if (!find_prop(rel->p, PROP_REMOTE)) {
458 291 : p->p = rel->p;
459 291 : rel->p = p;
460 : }
461 : }
462 : break;
463 903 : case op_ddl:
464 903 : if (rel->flag == ddl_output || rel->flag == ddl_create_seq || rel->flag == ddl_alter_seq /*|| rel->flag == ddl_alter_table || rel->flag == ddl_create_table || rel->flag == ddl_create_view*/) {
465 0 : if (l && (p = find_prop(l->p, PROP_REMOTE)) != NULL) {
466 0 : l->p = prop_remove(l->p, p);
467 0 : if (!find_prop(rel->p, PROP_REMOTE)) {
468 0 : p->p = rel->p;
469 0 : rel->p = p;
470 : }
471 : }
472 903 : } else if (rel->flag == ddl_list || rel->flag == ddl_exception) {
473 442 : if (l && (pl = find_prop(l->p, PROP_REMOTE)) != NULL &&
474 0 : r && (pr = find_prop(r->p, PROP_REMOTE)) != NULL) {
475 :
476 0 : list *uris = rel_merge_remote_prop(v, pl, pr);
477 :
478 : /* if there are common uris pull the REMOTE prop with the common uris up */
479 0 : if (!list_empty(uris)) {
480 0 : l->p = prop_remove(l->p, pl);
481 0 : r->p = prop_remove(r->p, pr);
482 0 : if (!find_prop(rel->p, PROP_REMOTE)) {
483 : /* remove local tid ONLY if no subtree has local parts */
484 0 : if (pl->id == 0 || pr->id == 0)
485 0 : pl->id = 0;
486 : /* set the new (matching) uris */
487 0 : pl->value.pval = uris;
488 : /* push the pl REMOTE property to the list of properties */
489 0 : pl->p = rel->p;
490 0 : rel->p = pl;
491 : } else {
492 : // TODO what if we are here? can that even happen?
493 : }
494 : }
495 : }
496 : }
497 : break;
498 : }
499 5729 : return rel;
500 : }
501 :
502 : static sql_rel *
503 1374 : rel_rewrite_remote(visitor *v, global_props *gp, sql_rel *rel)
504 : {
505 1374 : (void) gp;
506 1374 : rel = rel_visitor_bottomup(v, rel, &rel_rewrite_remote_);
507 1374 : v->data = NULL;
508 1374 : rel = rel_visitor_topdown(v, rel, &rel_rewrite_replica_);
509 1374 : v->data = NULL;
510 1374 : return rel;
511 : }
512 :
513 : run_optimizer
514 705987 : bind_rewrite_remote(visitor *v, global_props *gp)
515 : {
516 705987 : (void) v;
517 705987 : return gp->needs_mergetable_rewrite || gp->needs_remote_replica_rewrite ? rel_rewrite_remote : NULL;
518 : }
519 :
520 :
521 : static sql_rel *
522 6240 : rel_remote_func_(visitor *v, sql_rel *rel)
523 : {
524 6240 : (void) v;
525 :
526 : /* Don't modify the same relation twice */
527 6240 : if (is_rel_remote_func_used(rel->used))
528 : return rel;
529 5819 : rel->used |= rel_remote_func_used;
530 :
531 5819 : if (find_prop(rel->p, PROP_REMOTE) != NULL) {
532 313 : list *exps = rel_projections(v->sql, rel, NULL, 1, 1);
533 313 : rel = rel_unique_exps(v->sql, rel); /* remove any duplicate results (aliases) */
534 313 : rel = rel_relational_func(v->sql->sa, rel, exps);
535 : }
536 : return rel;
537 : }
538 :
539 : static sql_rel *
540 1374 : rel_remote_func(visitor *v, global_props *gp, sql_rel *rel)
541 : {
542 1374 : (void) gp;
543 1374 : return rel_visitor_bottomup(v, rel, &rel_remote_func_);
544 : }
545 :
546 : run_optimizer
547 705978 : bind_remote_func(visitor *v, global_props *gp)
548 : {
549 705978 : (void) v;
550 705978 : return gp->needs_mergetable_rewrite || gp->needs_remote_replica_rewrite ? rel_remote_func : NULL;
551 : }
|