Line data Source code
1 : /*
2 : * SPDX-License-Identifier: MPL-2.0
3 : *
4 : * This Source Code Form is subject to the terms of the Mozilla Public
5 : * License, v. 2.0. If a copy of the MPL was not distributed with this
6 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
7 : *
8 : * Copyright 2024 MonetDB Foundation;
9 : * Copyright August 2008 - 2023 MonetDB B.V.;
10 : * Copyright 1997 - July 2008 CWI.
11 : */
12 :
13 : /*
14 : * Martin Kersten
15 : * Multiple association tables
16 : * A MAT is a convenient way to deal represent horizontal fragmented
17 : * tables. It combines the definitions of several, type compatible
18 : * BATs under a single name.
19 : * It is produced by the mitosis optimizer and the operations
20 : * are the target of the mergetable optimizer.
21 : *
22 : * The MAT is materialized when the operations
23 : * can not deal with the components individually,
24 : * or the incremental operation is not supported.
25 : * Normally all mat.new() operations are removed by the
26 : * mergetable optimizer.
27 : * In case a mat.new() is retained in the code, then it will
28 : * behave as a mat.pack();
29 : *
30 : * The primitives below are chosen to accommodate the SQL
31 : * front-end to produce reasonable efficient code.
32 : */
33 : #include "monetdb_config.h"
34 : #include "mal_resolve.h"
35 : #include "mal_exception.h"
36 : #include "mal_interpreter.h"
37 :
38 : /*
39 : * The pack is an ordinary multi BAT insert. Oid synchronistion
40 : * between pieces should be ensured by the code generators.
41 : * The pack operation could be quite expensive, because it
42 : * may create a really large BAT.
43 : * The slice over a mat helps to avoid constructing intermediates
44 : * that are subsequently reduced.
45 : * Contrary to most operations, NIL arguments are skipped and
46 : * do not produce RUNTIME_OBJECT_MISSING.
47 : */
48 : static str
49 97246 : MATpackInternal(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr p)
50 : {
51 97246 : int i;
52 97246 : bat *ret = getArgReference_bat(stk, p, 0);
53 97246 : BAT *b, *bn = NULL;
54 97246 : BUN cap = 0;
55 97246 : int tt = TYPE_any;
56 97246 : int rt = getArgType(mb, p, 0), unmask = 0;
57 97246 : (void) cntxt;
58 :
59 485889 : for (i = 1; i < p->argc; i++) {
60 388669 : bat bid = stk->stk[getArg(p, i)].val.bval;
61 388669 : b = BBPquickdesc(bid);
62 388643 : if (b) {
63 388643 : if (tt == TYPE_any)
64 97238 : tt = b->ttype;
65 388643 : if ((tt != TYPE_void && b->ttype != TYPE_void
66 323589 : && b->ttype != TYPE_msk) && tt != b->ttype)
67 0 : throw(MAL, "mat.pack", "incompatible arguments");
68 388643 : cap += BATcount(b);
69 : }
70 : }
71 97220 : if (tt == TYPE_any) {
72 0 : *ret = bat_nil;
73 0 : return MAL_SUCCEED;
74 : }
75 :
76 97220 : if (tt == TYPE_msk && rt == newBatType(TYPE_oid)) {
77 0 : tt = TYPE_oid;
78 0 : unmask = 1;
79 : }
80 97220 : bn = COLnew(0, tt, cap, TRANSIENT);
81 97236 : if (bn == NULL)
82 0 : throw(MAL, "mat.pack", SQLSTATE(HY013) MAL_MALLOC_FAIL);
83 :
84 485890 : for (i = 1; i < p->argc; i++) {
85 388664 : if (!(b = BATdescriptor(stk->stk[getArg(p, i)].val.ival))) {
86 0 : BBPreclaim(bn);
87 0 : throw(MAL, "mat.pack", SQLSTATE(HY002) RUNTIME_OBJECT_MISSING);
88 : }
89 388733 : if ((unmask && b->ttype == TYPE_msk) || mask_cand(b)) {
90 5332 : BAT *ob = b;
91 5332 : b = BATunmask(b);
92 5333 : BBPunfix(ob->batCacheid);
93 5333 : if (!b) {
94 0 : BBPreclaim(bn);
95 0 : throw(MAL, "mat.pack", GDK_EXCEPTION);
96 : }
97 : }
98 388734 : if (BATcount(bn) == 0) {
99 97956 : BAThseqbase(bn, b->hseqbase);
100 97962 : BATtseqbase(bn, b->tseqbase);
101 : }
102 388738 : if (BATappend(bn, b, NULL, false) != GDK_SUCCEED) {
103 0 : BBPreclaim(bn);
104 0 : BBPunfix(b->batCacheid);
105 0 : throw(MAL, "mat.pack", GDK_EXCEPTION);
106 : }
107 388665 : BBPunfix(b->batCacheid);
108 : }
109 97226 : if (bn->tnil && bn->tnonil) {
110 0 : BBPreclaim(bn);
111 0 : throw(MAL, "mat.pack",
112 : "INTERNAL ERROR" "bn->tnil or bn->tnonil fails ");
113 : }
114 97226 : *ret = bn->batCacheid;
115 97226 : BBPkeepref(bn);
116 97226 : return MAL_SUCCEED;
117 : }
118 :
119 : /*
120 : * Enable incremental packing. The SQL front-end requires
121 : * fixed oid sequences.
122 : */
123 : static str
124 642492 : MATpackIncrement(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr p)
125 : {
126 642492 : bat *ret = getArgReference_bat(stk, p, 0);
127 642492 : int pieces;
128 642492 : BAT *b, *bb, *bn;
129 642492 : size_t newsize;
130 :
131 642492 : (void) cntxt;
132 642492 : b = BATdescriptor(stk->stk[getArg(p, 1)].val.ival);
133 642473 : if (b == NULL)
134 0 : throw(MAL, "mat.pack", SQLSTATE(HY002) RUNTIME_OBJECT_MISSING);
135 :
136 642473 : if (getArgType(mb, p, 2) == TYPE_int) {
137 : /* first step, estimate with some slack */
138 165559 : pieces = stk->stk[getArg(p, 2)].val.ival;
139 165559 : int tt = ATOMtype(b->ttype);
140 165559 : if (b->ttype == TYPE_msk)
141 0 : tt = TYPE_oid;
142 165559 : bn = COLnew(b->hseqbase, tt, (BUN) (1.2 * BATcount(b) * pieces),
143 : TRANSIENT);
144 165546 : if (bn == NULL) {
145 0 : BBPunfix(b->batCacheid);
146 0 : throw(MAL, "mat.pack", SQLSTATE(HY013) MAL_MALLOC_FAIL);
147 : }
148 : /* allocate enough space for the vheap, but not for strings,
149 : * since BATappend does clever things for strings, and not for
150 : * vheap views since they may well get shared */
151 165546 : if (b->tvheap && b->tvheap->parentid == b->batCacheid && bn->tvheap
152 64018 : && ATOMstorage(b->ttype) != TYPE_str) {
153 46 : newsize = b->tvheap->size * pieces;
154 46 : if (HEAPextend(bn->tvheap, newsize, true) != GDK_SUCCEED) {
155 0 : BBPunfix(b->batCacheid);
156 0 : BBPreclaim(bn);
157 0 : throw(MAL, "mat.pack", GDK_EXCEPTION);
158 : }
159 : }
160 165546 : BATtseqbase(bn, b->tseqbase);
161 165559 : if (b->ttype == TYPE_msk || mask_cand(b)) {
162 0 : BAT *ob = b;
163 0 : b = BATunmask(b);
164 0 : BBPunfix(ob->batCacheid);
165 0 : if (!b) {
166 0 : BBPreclaim(bn);
167 0 : throw(MAL, "mat.pack", GDK_EXCEPTION);
168 : }
169 : }
170 165559 : if (BATappend(bn, b, NULL, false) != GDK_SUCCEED) {
171 0 : BBPreclaim(bn);
172 0 : BBPunfix(b->batCacheid);
173 0 : throw(MAL, "mat.pack", GDK_EXCEPTION);
174 : }
175 165553 : bn->tunique_est = b->tunique_est;
176 165553 : bn->unused = (pieces - 1); /* misuse "unused" field */
177 165553 : BBPunfix(b->batCacheid);
178 165519 : if (bn->tnil && bn->tnonil) {
179 0 : BBPreclaim(bn);
180 0 : throw(MAL, "mat.pack",
181 0 : "INTERNAL ERROR" " bn->tnil %d bn->tnonil %d", bn->tnil,
182 0 : bn->tnonil);
183 : }
184 165519 : *ret = bn->batCacheid;
185 165519 : BBPretain(bn->batCacheid);
186 165541 : BBPunfix(bn->batCacheid);
187 : } else {
188 : /* remaining steps */
189 476914 : if (!(bb = BATdescriptor(stk->stk[getArg(p, 2)].val.ival))) {
190 0 : BBPunfix(b->batCacheid);
191 0 : throw(MAL, "mat.pack", SQLSTATE(HY002) RUNTIME_OBJECT_MISSING);
192 : }
193 476809 : if (bb->ttype == TYPE_msk || mask_cand(bb)) {
194 0 : BAT *obb = bb;
195 0 : bb = BATunmask(bb);
196 0 : BBPunfix(obb->batCacheid);
197 0 : if (!bb) {
198 0 : BBPunfix(b->batCacheid);
199 0 : throw(MAL, "mat.pack", GDK_EXCEPTION);
200 : }
201 : }
202 476809 : if (BATcount(b) == 0) {
203 291680 : BAThseqbase(b, bb->hseqbase);
204 291755 : BATtseqbase(b, bb->tseqbase);
205 : }
206 476846 : if (BATappend(b, bb, NULL, false) != GDK_SUCCEED) {
207 0 : BBPunfix(bb->batCacheid);
208 0 : BBPunfix(b->batCacheid);
209 0 : throw(MAL, "mat.pack", GDK_EXCEPTION);
210 : }
211 476906 : BBPunfix(bb->batCacheid);
212 476686 : b->tunique_est += bb->tunique_est;
213 476686 : b->unused--;
214 476686 : if (b->unused == 0 && (b = BATsetaccess(b, BAT_READ)) == NULL)
215 0 : throw(MAL, "mat.pack", GDK_EXCEPTION);
216 476721 : if (b->tnil && b->tnonil) {
217 0 : BBPunfix(b->batCacheid);
218 0 : throw(MAL, "mat.pack",
219 : "INTERNAL ERROR" " b->tnil or b->tnonil fails ");
220 : }
221 476721 : *ret = b->batCacheid;
222 476721 : BBPretain(b->batCacheid);
223 476854 : BBPunfix(b->batCacheid);
224 : }
225 : return MAL_SUCCEED;
226 : }
227 :
228 : static str
229 97245 : MATpack(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr p)
230 : {
231 97245 : return MATpackInternal(cntxt, mb, stk, p);
232 : }
233 :
234 : static str
235 323929 : MATpackValues(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr p)
236 : {
237 323929 : int i, type, first = 1;
238 323929 : bat *ret;
239 323929 : BAT *bn;
240 :
241 323929 : (void) cntxt;
242 323929 : type = getArgType(mb, p, first);
243 323929 : bn = COLnew(0, type, p->argc, TRANSIENT);
244 323806 : if (bn == NULL)
245 0 : throw(MAL, "mat.pack", SQLSTATE(HY013) MAL_MALLOC_FAIL);
246 :
247 323806 : if (ATOMextern(type)) {
248 1017738 : for (i = first; i < p->argc; i++)
249 824588 : if (BUNappend(bn, stk->stk[getArg(p, i)].val.pval, false) != GDK_SUCCEED)
250 0 : goto bailout;
251 : } else {
252 690990 : for (i = first; i < p->argc; i++)
253 560406 : if (BUNappend(bn, getArgReference(stk, p, i), false) != GDK_SUCCEED)
254 0 : goto bailout;
255 : }
256 323734 : ret = getArgReference_bat(stk, p, 0);
257 323734 : *ret = bn->batCacheid;
258 323734 : BBPkeepref(bn);
259 323734 : return MAL_SUCCEED;
260 0 : bailout:
261 0 : BBPreclaim(bn);
262 0 : throw(MAL, "mat.pack", GDK_EXCEPTION);
263 : }
264 :
265 : #include "mel.h"
266 : mel_func mat_init_funcs[] = {
267 : pattern("mat", "new", MATpack, false, "Define a Merge Association Table (MAT). Fall back to the pack operation\nwhen this is called ", args(1,2, batargany("",1),batvarargany("b",1))),
268 : pattern("bat", "pack", MATpackValues, false, "Materialize the values into a BAT. Avoiding a clash with mat.pack() in mergetable", args(1,2, batargany("",1),varargany("",1))),
269 : pattern("mat", "pack", MATpackValues, false, "Materialize the MAT (of values) into a BAT", args(1,2, batargany("",1),varargany("",1))),
270 : pattern("mat", "pack", MATpack, false, "Materialize the MAT into a BAT", args(1,2, batargany("",1),batvarargany("b",1))),
271 : pattern("mat", "packIncrement", MATpackIncrement, false, "Prepare incremental mat pack", args(1,3, batargany("",1),batargany("b",1),arg("pieces",int))),
272 : pattern("mat", "packIncrement", MATpackIncrement, false, "Prepare incremental mat pack", args(1,3, batargany("",1),batargany("b",1),batargany("c",1))),
273 : { .imp=NULL }
274 : };
275 : #include "mal_import.h"
276 : #ifdef _MSC_VER
277 : #undef read
278 : #pragma section(".CRT$XCU",read)
279 : #endif
280 324 : LIB_STARTUP_FUNC(init_mat_mal)
281 324 : { mal_module("mat", NULL, mat_init_funcs); }
|