Line data Source code
1 : /*
2 : * SPDX-License-Identifier: MPL-2.0
3 : *
4 : * This Source Code Form is subject to the terms of the Mozilla Public
5 : * License, v. 2.0. If a copy of the MPL was not distributed with this
6 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
7 : *
8 : * Copyright 2024 MonetDB Foundation;
9 : * Copyright August 2008 - 2023 MonetDB B.V.;
10 : * Copyright 1997 - July 2008 CWI.
11 : */
12 :
13 : #include "monetdb_config.h"
14 : #include "rel_optimizer_private.h"
15 : #include "rel_basetable.h"
16 : #include "rel_exp.h"
17 : #include "rel_remote.h"
18 : #include "sql_privileges.h"
19 :
20 : typedef struct rmt_prop_state {
21 : int depth;
22 : prop* rmt;
23 : sql_rel* orig;
24 : bool no_rmt_branch_rpl_leaf;
25 : } rps;
26 :
27 : static sql_rel*
28 300 : rel_unique_exps(mvc *sql, sql_rel *rel)
29 : {
30 300 : list *l;
31 :
32 300 : if (!is_project(rel->op))
33 : return rel;
34 150 : l = sa_list(sql->sa);
35 1494 : for (node *n = rel->exps->h; n; n = n->next) {
36 1344 : sql_exp *e = n->data;
37 1344 : if (e->type == e_column) {
38 1254 : const char *name = exp_name(e);
39 1254 : const char *rname = exp_relname(e);
40 :
41 : /* If there are two identical expression names, there will be ambiguity */
42 1254 : if (name && rname && exps_bind_column2(l, rname, name, NULL))
43 0 : exp_label(sql->sa, e, ++sql->label);
44 : }
45 1344 : append(l,e);
46 : }
47 150 : rel->exps = l;
48 150 : return rel;
49 : }
50 :
51 : static int
52 1468 : has_remote_or_replica( sql_rel *rel )
53 : {
54 3118 : if (!rel)
55 : return 0;
56 :
57 1802 : switch (rel->op) {
58 89 : case op_basetable: {
59 89 : sql_table *t = rel->l;
60 :
61 89 : return t && (isReplicaTable(t) || isRemote(t));
62 : }
63 15 : case op_table:
64 15 : if (IS_TABLE_PROD_FUNC(rel->flag) || rel->flag == TABLE_FROM_RELATION)
65 15 : return has_remote_or_replica( rel->l );
66 : break;
67 27 : case op_join:
68 : case op_left:
69 : case op_right:
70 : case op_full:
71 :
72 : case op_semi:
73 : case op_anti:
74 :
75 : case op_union:
76 : case op_inter:
77 : case op_except:
78 : case op_merge:
79 :
80 : case op_insert:
81 : case op_update:
82 : case op_delete:
83 27 : return has_remote_or_replica( rel->l ) || has_remote_or_replica( rel->r );
84 0 : case op_munion:
85 0 : for (node *n = ((list*)rel->l)->h; n; n = n->next) {
86 0 : if (has_remote_or_replica(n->data))
87 : return true;
88 : }
89 : break;
90 1632 : case op_project:
91 : case op_select:
92 : case op_groupby:
93 : case op_topn:
94 : case op_sample:
95 : case op_truncate:
96 1632 : return has_remote_or_replica( rel->l );
97 39 : case op_ddl:
98 39 : if (rel->flag == ddl_output || rel->flag == ddl_create_seq || rel->flag == ddl_alter_seq /*|| rel->flag == ddl_alter_table || rel->flag == ddl_create_table || rel->flag == ddl_create_view*/)
99 3 : return has_remote_or_replica( rel->l );
100 36 : if (rel->flag == ddl_list || rel->flag == ddl_exception)
101 0 : return has_remote_or_replica( rel->l ) || has_remote_or_replica( rel->r );
102 : break;
103 : }
104 : return 0;
105 : }
106 :
107 : static sql_rel *
108 49 : do_replica_rewrite(mvc *sql, list *exps, sql_table *t, sql_table *p, int remote_prop)
109 : {
110 49 : node *n, *m;
111 49 : sql_rel *r = rel_basetable(sql, p, t->base.name);
112 :
113 182 : for (n = exps->h; n; n = n->next) {
114 133 : sql_exp *e = n->data;
115 133 : const char *nname = exp_name(e);
116 :
117 133 : node *nn = ol_find_name(t->columns, nname);
118 133 : if (nn) {
119 84 : sql_column *c = nn->data;
120 84 : rel_base_use(sql, r, c->colnr);
121 49 : } else if (strcmp(nname, TID) == 0) {
122 49 : rel_base_use_tid(sql, r);
123 : } else {
124 0 : assert(0);
125 : }
126 : }
127 49 : r = rewrite_basetable(sql, r);
128 182 : for (n = exps->h, m = r->exps->h; n && m; n = n->next, m = m->next) {
129 133 : sql_exp *e = n->data;
130 133 : sql_exp *ne = m->data;
131 :
132 133 : exp_prop_alias(sql->sa, ne, e);
133 : }
134 49 : list_hash_clear(r->exps); /* the child table may have different column names, so clear the hash */
135 :
136 : /* set_remote() */
137 49 : if (remote_prop && p && isRemote(p)) {
138 6 : list *uris = sa_list(sql->sa);
139 6 : tid_uri *tu = SA_NEW(sql->sa, tid_uri);
140 6 : tu->id = p->base.id;
141 6 : tu->uri = p->query;
142 6 : append(uris, tu);
143 :
144 6 : prop *rmt_prop = r->p = prop_create(sql->sa, PROP_REMOTE, r->p);
145 6 : rmt_prop->id = p->base.id;
146 6 : rmt_prop->value.pval = uris;
147 : }
148 49 : return r;
149 : }
150 :
151 : static sql_rel *
152 49 : replica_rewrite(visitor *v, sql_table *t, list *exps)
153 : {
154 49 : sql_rel *res = NULL;
155 49 : rps *rpstate = v->data;
156 49 : prop *rp = rpstate->rmt;
157 49 : sqlid tid = rp->id;
158 49 : list *uris = rp->value.pval;
159 :
160 49 : if (mvc_highwater(v->sql))
161 0 : return sql_error(v->sql, 10, SQLSTATE(42000) "Query too complex: running out of stack space");
162 :
163 : /* if there was a REMOTE property in any higher node and there is not
164 : * a local tid then use the available uris to rewrite */
165 49 : if (uris && !tid) {
166 54 : for (node *n = t->members->h; n && !res; n = n->next) {
167 32 : sql_part *p = n->data;
168 32 : sql_table *pt = find_sql_table_id(v->sql->session->tr, t->s, p->member);
169 :
170 32 : if (!isRemote(pt))
171 8 : continue;
172 :
173 45 : for (node *m = uris->h; m && !res; m = m->next) {
174 24 : if (strcmp(((tid_uri*)m->data)->uri, pt->query) == 0) {
175 : /* we found a matching uri do the actual rewrite */
176 44 : res = do_replica_rewrite(v->sql, exps, t, pt,
177 22 : rpstate->no_rmt_branch_rpl_leaf ? true: false);
178 : /* set to the REMOTE a list with a single uri (the matching one)
179 : * this is for the case that our REMOTE subtree has only replicas
180 : * with multiple remotes*/
181 22 : if (list_length(rp->value.pval) > 1) {
182 3 : list *uri = sa_list(v->sql->sa);
183 3 : tid_uri *tu = SA_NEW(v->sql->sa, tid_uri);
184 : /* sql_gencode requires the proper tableid */
185 3 : tu->id = p->member;
186 3 : tu->uri = pt->query;
187 3 : append(uri, tu);
188 3 : rp->value.pval = uri;
189 3 : break;
190 : }
191 : }
192 : }
193 : }
194 : }
195 :
196 : /* no match, find one without remote or use first */
197 22 : if (!res) {
198 27 : sql_table *pt = NULL;
199 27 : int remote = 1;
200 :
201 30 : for (node *n = t->members->h; n; n = n->next) {
202 30 : sql_part *p = n->data;
203 30 : sql_table *next = find_sql_table_id(v->sql->session->tr, t->s, p->member);
204 :
205 : /* give preference to local tables and avoid empty merge or replica tables */
206 30 : if (!isRemote(next) && ((!isReplicaTable(next) && !isMergeTable(next)) || !list_empty(next->members))) {
207 27 : pt = next;
208 27 : remote = 0;
209 : /* if we resolved the replica to a local table we have to
210 : * go and remove the remote property from the subtree */
211 27 : sql_rel *r = ((rps*)v->data)->orig;
212 27 : r->p = prop_remove(r->p, rp);
213 27 : break;
214 : }
215 : }
216 27 : if (!pt) {
217 0 : sql_part *p = t->members->h->data;
218 0 : pt = find_sql_table_id(v->sql->session->tr, t->s, p->member);
219 : }
220 :
221 27 : if ((isMergeTable(pt) || isReplicaTable(pt)) && list_empty(pt->members))
222 0 : return sql_error(v->sql, 02, SQLSTATE(42000) "%s '%s'.'%s' should have at least one table associated",
223 0 : TABLE_TYPE_DESCRIPTION(pt->type, pt->properties), pt->s->base.name, pt->base.name);
224 27 : res = isReplicaTable(pt) ? replica_rewrite(v, pt, exps) : do_replica_rewrite(v->sql, exps, t, pt, remote);
225 : }
226 : return res;
227 : }
228 :
229 : static bool
230 16734 : eliminate_remote_or_replica_refs(visitor *v, sql_rel **rel)
231 : {
232 16734 : if (rel_is_ref(*rel) && !((*rel)->flag&MERGE_LEFT)) {
233 1423 : if (has_remote_or_replica(*rel)) {
234 15 : sql_rel *nrel = rel_copy(v->sql, *rel, 1);
235 15 : rel_destroy(*rel);
236 15 : *rel = nrel;
237 15 : return true;
238 : } else {
239 : // TODO why do we want to bail out if we have a non rmt/rpl ref?
240 : return false;
241 : }
242 : }
243 : return true;
244 : }
245 :
246 : static sql_rel *
247 11156 : rel_rewrite_replica_(visitor *v, sql_rel *rel)
248 : {
249 11156 : if (!eliminate_remote_or_replica_refs(v, &rel))
250 936 : return rel;
251 :
252 : /* if we are higher in the tree clear the previous REMOTE prop in the visitor state */
253 10220 : if (v->data && v->depth <= ((rps*)v->data)->depth) {
254 109 : v->data = NULL;
255 : }
256 :
257 : /* no-leaf nodes: store the REMOTE property uris in the state of the visitor
258 : * leaf nodes: check if they are basetable replicas and proceed with the rewrite */
259 10220 : prop *p;
260 10220 : if (!is_basetable(rel->op)) {
261 : /* if there is a REMOTE prop set it to the visitor state */
262 6984 : if ((p = find_prop(rel->p, PROP_REMOTE)) != NULL) {
263 355 : rps *rp = SA_NEW(v->sql->sa, rps);
264 355 : rp->depth = v->depth;
265 355 : rp->rmt = p;
266 355 : rp->orig = rel;
267 355 : rp->no_rmt_branch_rpl_leaf = false;
268 355 : v->data = rp;
269 : }
270 : } else {
271 3236 : sql_table *t = rel->l;
272 :
273 3236 : if (t && isReplicaTable(t)) {
274 : /* we might have reached a replica table through a branch that has
275 : * no REMOTE property. In this case we have to set the v->data */
276 99 : if (!v->data && (p = find_prop(rel->p, PROP_REMOTE)) != NULL) {
277 15 : rps *rp = SA_NEW(v->sql->sa, rps);
278 15 : rp->depth = v->depth;
279 15 : rp->rmt = p;
280 15 : rp->orig = rel;
281 15 : rp->no_rmt_branch_rpl_leaf = true;
282 15 : v->data = rp;
283 : }
284 :
285 99 : if (list_empty(t->members)) /* in DDL statement cases skip if replica is empty */
286 50 : return rel;
287 :
288 49 : sql_rel *r = replica_rewrite(v, t, rel->exps);
289 49 : rel_destroy(rel);
290 49 : rel = r;
291 : }
292 : }
293 10170 : return rel;
294 : }
295 :
296 : static sql_rel *
297 1223 : rel_rewrite_replica(visitor *v, global_props *gp, sql_rel *rel)
298 : {
299 1223 : (void) gp;
300 1223 : return rel_visitor_topdown(v, rel, &rel_rewrite_replica_);
301 : }
302 :
303 : run_optimizer
304 698575 : bind_rewrite_replica(visitor *v, global_props *gp)
305 : {
306 698575 : (void) v;
307 698575 : return gp->needs_mergetable_rewrite || gp->needs_remote_replica_rewrite ? rel_rewrite_replica : NULL;
308 : }
309 :
310 : static list*
311 37 : rel_merge_remote_prop(visitor *v, prop *pl, prop *pr)
312 : {
313 37 : list* uris = sa_list(v->sql->sa);
314 : // TODO this double loop must go (maybe use the hashmap of the list?)
315 81 : for (node* n = ((list*)pl->value.pval)->h; n; n = n->next) {
316 106 : for (node* m = ((list*)pr->value.pval)->h; m; m = m->next) {
317 62 : tid_uri* ltu = n->data;
318 62 : tid_uri* rtu = m->data;
319 62 : if (strcmp(ltu->uri, rtu->uri) == 0) {
320 29 : append(uris, n->data);
321 : }
322 : }
323 : }
324 37 : return uris;
325 : }
326 :
327 : static sql_rel *
328 5578 : rel_rewrite_remote_(visitor *v, sql_rel *rel)
329 : {
330 5578 : prop *p, *pl, *pr;
331 :
332 5578 : if (!eliminate_remote_or_replica_refs(v, &rel))
333 472 : return rel;
334 :
335 5106 : sql_rel *l = rel->l, *r = rel->r; /* look on left and right relations after possibly doing rel_copy */
336 :
337 5106 : switch (rel->op) {
338 1616 : case op_basetable: {
339 1616 : sql_table *t = rel->l;
340 :
341 : /* when a basetable wraps a sql_table (->l) which is remote we want to store its remote
342 : * uri to the REMOTE property. As the property is pulled up the tree it can be used in
343 : * the case of binary rel operators (see later switch cases) in order to
344 : * 1. resolve properly (same uri) replica tables in the other subtree (that's why we
345 : * call the do_replica_rewrite)
346 : * 2. pull REMOTE over the binary op if the other subtree has a matching uri remote table
347 : */
348 1616 : if (t && isRemote(t) && (p = find_prop(rel->p, PROP_REMOTE)) == NULL) {
349 301 : if (t->query) {
350 301 : tid_uri *tu = SA_NEW(v->sql->sa, tid_uri);
351 301 : tu->id = t->base.id;
352 301 : tu->uri = mapiuri_uri(t->query, v->sql->sa);
353 301 : list *uris = sa_list(v->sql->sa);
354 301 : append(uris, tu);
355 :
356 301 : p = rel->p = prop_create(v->sql->sa, PROP_REMOTE, rel->p);
357 301 : p->id = 0;
358 301 : p->value.pval = uris;
359 : }
360 : }
361 1616 : if (t && isReplicaTable(t) && !list_empty(t->members)) {
362 : /* the parts of a replica are either
363 : * - remote tables for which we have to store tid and uri
364 : * - local table for which we only care if they exist (localpart var)
365 : * the relevant info are passed in the REMOTE property value.pval and id members
366 : */
367 49 : list *uris = sa_list(v->sql->sa);
368 49 : sqlid localpart = 0;
369 145 : for (node *n = t->members->h; n; n = n->next) {
370 96 : sql_part *part = n->data;
371 96 : sql_table *ptable = find_sql_table_id(v->sql->session->tr, t->s, part->member);
372 :
373 96 : if (isRemote(ptable)) {
374 58 : assert(ptable->query);
375 58 : tid_uri *tu = SA_NEW(v->sql->sa, tid_uri);
376 58 : tu->id = ptable->base.id;
377 58 : tu->uri = mapiuri_uri(ptable->query, v->sql->sa);
378 58 : append(uris, tu);
379 : } else {
380 38 : localpart = ptable->base.id;
381 : }
382 : }
383 : /* always introduce the remote prop even if there are no remote uri's
384 : * this is needed for the proper replica resolution */
385 49 : p = rel->p = prop_create(v->sql->sa, PROP_REMOTE, rel->p);
386 49 : p->id = localpart;
387 49 : p->value.pval = (void*)uris;
388 : }
389 : } break;
390 5 : case op_table:
391 5 : if (IS_TABLE_PROD_FUNC(rel->flag) || rel->flag == TABLE_FROM_RELATION) {
392 5 : if (l && (p = find_prop(l->p, PROP_REMOTE)) != NULL) {
393 5 : l->p = prop_remove(l->p, p);
394 5 : if (!find_prop(rel->p, PROP_REMOTE)) {
395 5 : p->p = rel->p;
396 5 : rel->p = p;
397 : }
398 : }
399 : } break;
400 615 : case op_join:
401 : case op_left:
402 : case op_right:
403 : case op_full:
404 :
405 : case op_semi:
406 : case op_anti:
407 :
408 : case op_union:
409 : case op_inter:
410 : case op_except:
411 :
412 : case op_insert:
413 : case op_update:
414 : case op_delete:
415 : case op_merge:
416 :
417 615 : if (rel->flag&MERGE_LEFT) /* search for any remote tables but don't propagate over to this relation */
418 : return rel;
419 :
420 : /* if both subtrees have REMOTE property with the common uri then pull it up */
421 613 : if (l && (pl = find_prop(l->p, PROP_REMOTE)) != NULL &&
422 75 : r && (pr = find_prop(r->p, PROP_REMOTE)) != NULL) {
423 :
424 37 : list *uris = rel_merge_remote_prop(v, pl, pr);
425 :
426 : /* if there are common uris pull the REMOTE prop with the common uris up */
427 37 : if (!list_empty(uris)) {
428 22 : l->p = prop_remove(l->p, pl);
429 22 : r->p = prop_remove(r->p, pr);
430 22 : if (!find_prop(rel->p, PROP_REMOTE)) {
431 : /* remove local tid ONLY if no subtree has local parts */
432 22 : if (pl->id == 0 || pr->id == 0)
433 17 : pl->id = 0;
434 : /* set the new (matching) uris */
435 22 : pl->value.pval = uris;
436 : /* push the pl REMOTE property to the list of properties */
437 22 : pl->p = rel->p;
438 22 : rel->p = pl;
439 : } else {
440 : // TODO what if we are here? can that even happen?
441 : }
442 : }
443 : }
444 : break;
445 : case op_munion:
446 : /*assert(0);*/
447 : break;
448 2010 : case op_project:
449 : case op_select:
450 : case op_groupby:
451 : case op_topn:
452 : case op_sample:
453 : case op_truncate:
454 : /* if the subtree has the REMOTE property just pull it up */
455 2010 : if (l && (p = find_prop(l->p, PROP_REMOTE)) != NULL) {
456 324 : l->p = prop_remove(l->p, p);
457 324 : if (!find_prop(rel->p, PROP_REMOTE)) {
458 306 : p->p = rel->p;
459 306 : rel->p = p;
460 : }
461 : }
462 : break;
463 854 : case op_ddl:
464 854 : if (rel->flag == ddl_output || rel->flag == ddl_create_seq || rel->flag == ddl_alter_seq /*|| rel->flag == ddl_alter_table || rel->flag == ddl_create_table || rel->flag == ddl_create_view*/) {
465 0 : if (l && (p = find_prop(l->p, PROP_REMOTE)) != NULL) {
466 0 : l->p = prop_remove(l->p, p);
467 0 : if (!find_prop(rel->p, PROP_REMOTE)) {
468 0 : p->p = rel->p;
469 0 : rel->p = p;
470 : }
471 : }
472 854 : } else if (rel->flag == ddl_list || rel->flag == ddl_exception) {
473 442 : if (l && (pl = find_prop(l->p, PROP_REMOTE)) != NULL &&
474 0 : r && (pr = find_prop(r->p, PROP_REMOTE)) != NULL) {
475 :
476 0 : list *uris = rel_merge_remote_prop(v, pl, pr);
477 :
478 : /* if there are common uris pull the REMOTE prop with the common uris up */
479 0 : if (!list_empty(uris)) {
480 0 : l->p = prop_remove(l->p, pl);
481 0 : r->p = prop_remove(r->p, pr);
482 0 : if (!find_prop(rel->p, PROP_REMOTE)) {
483 : /* remove local tid ONLY if no subtree has local parts */
484 0 : if (pl->id == 0 || pr->id == 0)
485 0 : pl->id = 0;
486 : /* set the new (matching) uris */
487 0 : pl->value.pval = uris;
488 : /* push the pl REMOTE property to the list of properties */
489 0 : pl->p = rel->p;
490 0 : rel->p = pl;
491 : } else {
492 : // TODO what if we are here? can that even happen?
493 : }
494 : }
495 : }
496 : }
497 : break;
498 : }
499 5104 : return rel;
500 : }
501 :
502 : static sql_rel *
503 1223 : rel_rewrite_remote(visitor *v, global_props *gp, sql_rel *rel)
504 : {
505 1223 : (void) gp;
506 1223 : rel = rel_visitor_bottomup(v, rel, &rel_rewrite_remote_);
507 1223 : v->data = NULL;
508 1223 : rel = rel_visitor_topdown(v, rel, &rel_rewrite_replica_);
509 1223 : v->data = NULL;
510 1223 : return rel;
511 : }
512 :
513 : run_optimizer
514 698470 : bind_rewrite_remote(visitor *v, global_props *gp)
515 : {
516 698470 : (void) v;
517 698470 : return gp->needs_mergetable_rewrite || gp->needs_remote_replica_rewrite ? rel_rewrite_remote : NULL;
518 : }
519 :
520 :
521 : static sql_rel *
522 5588 : rel_remote_func_(visitor *v, sql_rel *rel)
523 : {
524 5588 : (void) v;
525 :
526 : /* Don't modify the same relation twice */
527 5588 : if (is_rel_remote_func_used(rel->used))
528 : return rel;
529 5194 : rel->used |= rel_remote_func_used;
530 :
531 5194 : if (find_prop(rel->p, PROP_REMOTE) != NULL) {
532 300 : list *exps = rel_projections(v->sql, rel, NULL, 1, 1);
533 300 : rel = rel_unique_exps(v->sql, rel); /* remove any duplicate results (aliases) */
534 300 : rel = rel_relational_func(v->sql->sa, rel, exps);
535 : }
536 : return rel;
537 : }
538 :
539 : static sql_rel *
540 1223 : rel_remote_func(visitor *v, global_props *gp, sql_rel *rel)
541 : {
542 1223 : (void) gp;
543 1223 : return rel_visitor_bottomup(v, rel, &rel_remote_func_);
544 : }
545 :
546 : run_optimizer
547 699146 : bind_remote_func(visitor *v, global_props *gp)
548 : {
549 699146 : (void) v;
550 699146 : return gp->needs_mergetable_rewrite || gp->needs_remote_replica_rewrite ? rel_remote_func : NULL;
551 : }
|