Line data Source code
1 : /*
2 : * SPDX-License-Identifier: MPL-2.0
3 : *
4 : * This Source Code Form is subject to the terms of the Mozilla Public
5 : * License, v. 2.0. If a copy of the MPL was not distributed with this
6 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
7 : *
8 : * Copyright 2024 MonetDB Foundation;
9 : * Copyright August 2008 - 2023 MonetDB B.V.;
10 : * Copyright 1997 - July 2008 CWI.
11 : */
12 :
13 : /*
14 : * Martin Kersten
15 : * Multiple association tables
16 : * A MAT is a convenient way to deal represent horizontal fragmented
17 : * tables. It combines the definitions of several, type compatible
18 : * BATs under a single name.
19 : * It is produced by the mitosis optimizer and the operations
20 : * are the target of the mergetable optimizer.
21 : *
22 : * The MAT is materialized when the operations
23 : * can not deal with the components individually,
24 : * or the incremental operation is not supported.
25 : * Normally all mat.new() operations are removed by the
26 : * mergetable optimizer.
27 : * In case a mat.new() is retained in the code, then it will
28 : * behave as a mat.pack();
29 : *
30 : * The primitives below are chosen to accomodate the SQL
31 : * front-end to produce reasonable efficient code.
32 : */
33 : #include "monetdb_config.h"
34 : #include "mal_resolve.h"
35 : #include "mal_exception.h"
36 : #include "mal_interpreter.h"
37 :
38 : /*
39 : * The pack is an ordinary multi BAT insert. Oid synchronistion
40 : * between pieces should be ensured by the code generators.
41 : * The pack operation could be quite expensive, because it
42 : * may create a really large BAT.
43 : * The slice over a mat helps to avoid constructing intermediates
44 : * that are subsequently reduced.
45 : * Contrary to most operations, NIL arguments are skipped and
46 : * do not produce RUNTIME_OBJECT_MISSING.
47 : */
48 : static str
49 82800 : MATpackInternal(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr p)
50 : {
51 82800 : int i;
52 82800 : bat *ret = getArgReference_bat(stk, p, 0);
53 82800 : BAT *b, *bn = NULL;
54 82800 : BUN cap = 0;
55 82800 : int tt = TYPE_any;
56 82800 : int rt = getArgType(mb, p, 0), unmask = 0;
57 82800 : (void) cntxt;
58 :
59 413574 : for (i = 1; i < p->argc; i++) {
60 330811 : bat bid = stk->stk[getArg(p, i)].val.bval;
61 330811 : b = BBPquickdesc(bid);
62 330774 : if (b) {
63 330774 : if (tt == TYPE_any)
64 82794 : tt = b->ttype;
65 330774 : if ((tt != TYPE_void && b->ttype != TYPE_void
66 318406 : && b->ttype != TYPE_msk) && tt != b->ttype)
67 0 : throw(MAL, "mat.pack", "incompatible arguments");
68 330774 : cap += BATcount(b);
69 : }
70 : }
71 82763 : if (tt == TYPE_any) {
72 0 : *ret = bat_nil;
73 0 : return MAL_SUCCEED;
74 : }
75 :
76 82763 : if (tt == TYPE_msk && rt == newBatType(TYPE_oid)) {
77 0 : tt = TYPE_oid;
78 0 : unmask = 1;
79 : }
80 82763 : bn = COLnew(0, tt, cap, TRANSIENT);
81 82798 : if (bn == NULL)
82 0 : throw(MAL, "mat.pack", SQLSTATE(HY013) MAL_MALLOC_FAIL);
83 :
84 413615 : for (i = 1; i < p->argc; i++) {
85 330825 : if (!(b = BATdescriptor(stk->stk[getArg(p, i)].val.ival))) {
86 0 : BBPreclaim(bn);
87 0 : throw(MAL, "mat.pack", SQLSTATE(HY002) RUNTIME_OBJECT_MISSING);
88 : }
89 330817 : if ((unmask && b->ttype == TYPE_msk) || mask_cand(b)) {
90 740 : BAT *ob = b;
91 740 : b = BATunmask(b);
92 740 : BBPunfix(ob->batCacheid);
93 740 : if (!b) {
94 0 : BBPreclaim(bn);
95 0 : throw(MAL, "mat.pack", GDK_EXCEPTION);
96 : }
97 : }
98 330817 : if (BATcount(bn) == 0) {
99 83490 : BAThseqbase(bn, b->hseqbase);
100 83499 : BATtseqbase(bn, b->tseqbase);
101 : }
102 330822 : if (BATappend(bn, b, NULL, false) != GDK_SUCCEED) {
103 0 : BBPreclaim(bn);
104 0 : BBPunfix(b->batCacheid);
105 0 : throw(MAL, "mat.pack", GDK_EXCEPTION);
106 : }
107 330989 : BBPunfix(b->batCacheid);
108 : }
109 82790 : if (bn->tnil && bn->tnonil) {
110 0 : BBPreclaim(bn);
111 0 : throw(MAL, "mat.pack",
112 : "INTERNAL ERROR" "bn->tnil or bn->tnonil fails ");
113 : }
114 82790 : *ret = bn->batCacheid;
115 82790 : BBPkeepref(bn);
116 82790 : return MAL_SUCCEED;
117 : }
118 :
119 : /*
120 : * Enable incremental packing. The SQL front-end requires
121 : * fixed oid sequences.
122 : */
123 : static str
124 659842 : MATpackIncrement(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr p)
125 : {
126 659842 : bat *ret = getArgReference_bat(stk, p, 0);
127 659842 : int pieces;
128 659842 : BAT *b, *bb, *bn;
129 659842 : size_t newsize;
130 :
131 659842 : (void) cntxt;
132 659842 : b = BATdescriptor(stk->stk[getArg(p, 1)].val.ival);
133 659822 : if (b == NULL)
134 0 : throw(MAL, "mat.pack", SQLSTATE(HY002) RUNTIME_OBJECT_MISSING);
135 :
136 659822 : if (getArgType(mb, p, 2) == TYPE_int) {
137 : /* first step, estimate with some slack */
138 167326 : pieces = stk->stk[getArg(p, 2)].val.ival;
139 167326 : int tt = ATOMtype(b->ttype);
140 167326 : if (b->ttype == TYPE_msk)
141 0 : tt = TYPE_oid;
142 167326 : bn = COLnew(b->hseqbase, tt, (BUN) (1.2 * BATcount(b) * pieces),
143 : TRANSIENT);
144 167328 : if (bn == NULL) {
145 0 : BBPunfix(b->batCacheid);
146 0 : throw(MAL, "mat.pack", SQLSTATE(HY013) MAL_MALLOC_FAIL);
147 : }
148 : /* allocate enough space for the vheap, but not for strings,
149 : * since BATappend does clever things for strings, and not for
150 : * vheap views since they may well get shared */
151 167328 : if (b->tvheap && b->tvheap->parentid == b->batCacheid && bn->tvheap
152 46692 : && ATOMstorage(b->ttype) != TYPE_str) {
153 33 : newsize = b->tvheap->size * pieces;
154 33 : if (HEAPextend(bn->tvheap, newsize, true) != GDK_SUCCEED) {
155 0 : BBPunfix(b->batCacheid);
156 0 : BBPreclaim(bn);
157 0 : throw(MAL, "mat.pack", GDK_EXCEPTION);
158 : }
159 : }
160 167328 : BATtseqbase(bn, b->tseqbase);
161 167327 : if (b->ttype == TYPE_msk || mask_cand(b)) {
162 0 : BAT *ob = b;
163 0 : b = BATunmask(b);
164 0 : BBPunfix(ob->batCacheid);
165 0 : if (!b) {
166 0 : BBPreclaim(bn);
167 0 : throw(MAL, "mat.pack", GDK_EXCEPTION);
168 : }
169 : }
170 167327 : if (BATappend(bn, b, NULL, false) != GDK_SUCCEED) {
171 0 : BBPreclaim(bn);
172 0 : BBPunfix(b->batCacheid);
173 0 : throw(MAL, "mat.pack", GDK_EXCEPTION);
174 : }
175 167325 : bn->unused = (pieces - 1); /* misuse "unused" field */
176 167325 : BBPunfix(b->batCacheid);
177 167313 : if (bn->tnil && bn->tnonil) {
178 0 : BBPreclaim(bn);
179 0 : throw(MAL, "mat.pack",
180 0 : "INTERNAL ERROR" " bn->tnil %d bn->tnonil %d", bn->tnil,
181 0 : bn->tnonil);
182 : }
183 167313 : *ret = bn->batCacheid;
184 167313 : BBPretain(bn->batCacheid);
185 167328 : BBPunfix(bn->batCacheid);
186 : } else {
187 : /* remaining steps */
188 492496 : if (!(bb = BATdescriptor(stk->stk[getArg(p, 2)].val.ival))) {
189 0 : BBPunfix(b->batCacheid);
190 0 : throw(MAL, "mat.pack", SQLSTATE(HY002) RUNTIME_OBJECT_MISSING);
191 : }
192 492224 : if (bb->ttype == TYPE_msk || mask_cand(bb)) {
193 0 : BAT *obb = bb;
194 0 : bb = BATunmask(bb);
195 0 : BBPunfix(obb->batCacheid);
196 0 : if (!bb) {
197 0 : BBPunfix(b->batCacheid);
198 0 : throw(MAL, "mat.pack", GDK_EXCEPTION);
199 : }
200 : }
201 492224 : if (BATcount(b) == 0) {
202 272836 : BAThseqbase(b, bb->hseqbase);
203 272875 : BATtseqbase(b, bb->tseqbase);
204 : }
205 492197 : if (BATappend(b, bb, NULL, false) != GDK_SUCCEED) {
206 0 : BBPunfix(bb->batCacheid);
207 0 : BBPunfix(b->batCacheid);
208 0 : throw(MAL, "mat.pack", GDK_EXCEPTION);
209 : }
210 492474 : BBPunfix(bb->batCacheid);
211 492438 : b->unused--;
212 492438 : if (b->unused == 0 && (b = BATsetaccess(b, BAT_READ)) == NULL)
213 0 : throw(MAL, "mat.pack", GDK_EXCEPTION);
214 492449 : if (b->tnil && b->tnonil) {
215 0 : BBPunfix(b->batCacheid);
216 0 : throw(MAL, "mat.pack",
217 : "INTERNAL ERROR" " b->tnil or b->tnonil fails ");
218 : }
219 492449 : *ret = b->batCacheid;
220 492449 : BBPretain(b->batCacheid);
221 492498 : BBPunfix(b->batCacheid);
222 : }
223 : return MAL_SUCCEED;
224 : }
225 :
226 : static str
227 82800 : MATpack(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr p)
228 : {
229 82800 : return MATpackInternal(cntxt, mb, stk, p);
230 : }
231 :
232 : static str
233 306564 : MATpackValues(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr p)
234 : {
235 306564 : int i, type, first = 1;
236 306564 : bat *ret;
237 306564 : BAT *bn;
238 :
239 306564 : (void) cntxt;
240 306564 : type = getArgType(mb, p, first);
241 306564 : bn = COLnew(0, type, p->argc, TRANSIENT);
242 306544 : if (bn == NULL)
243 0 : throw(MAL, "mat.pack", SQLSTATE(HY013) MAL_MALLOC_FAIL);
244 :
245 306544 : if (ATOMextern(type)) {
246 937586 : for (i = first; i < p->argc; i++)
247 754983 : if (BUNappend(bn, stk->stk[getArg(p, i)].val.pval, false) != GDK_SUCCEED)
248 0 : goto bailout;
249 : } else {
250 639100 : for (i = first; i < p->argc; i++)
251 515572 : if (BUNappend(bn, getArgReference(stk, p, i), false) != GDK_SUCCEED)
252 0 : goto bailout;
253 : }
254 306131 : ret = getArgReference_bat(stk, p, 0);
255 306131 : *ret = bn->batCacheid;
256 306131 : BBPkeepref(bn);
257 306131 : return MAL_SUCCEED;
258 0 : bailout:
259 0 : BBPreclaim(bn);
260 0 : throw(MAL, "mat.pack", GDK_EXCEPTION);
261 : }
262 :
263 : #include "mel.h"
264 : mel_func mat_init_funcs[] = {
265 : pattern("mat", "new", MATpack, false, "Define a Merge Association Table (MAT). Fall back to the pack operation\nwhen this is called ", args(1,2, batargany("",2),batvarargany("b",2))),
266 : pattern("bat", "pack", MATpackValues, false, "Materialize the values into a BAT. Avoiding a clash with mat.pack() in mergetable", args(1,2, batargany("",2),varargany("",2))),
267 : pattern("mat", "pack", MATpackValues, false, "Materialize the MAT (of values) into a BAT", args(1,2, batargany("",2),varargany("",2))),
268 : pattern("mat", "pack", MATpack, false, "Materialize the MAT into a BAT", args(1,2, batargany("",2),batvarargany("b",2))),
269 : pattern("mat", "packIncrement", MATpackIncrement, false, "Prepare incremental mat pack", args(1,3, batargany("",2),batargany("b",2),arg("pieces",int))),
270 : pattern("mat", "packIncrement", MATpackIncrement, false, "Prepare incremental mat pack", args(1,3, batargany("",2),batargany("b",2),batargany("c",2))),
271 : { .imp=NULL }
272 : };
273 : #include "mal_import.h"
274 : #ifdef _MSC_VER
275 : #undef read
276 : #pragma section(".CRT$XCU",read)
277 : #endif
278 329 : LIB_STARTUP_FUNC(init_mat_mal)
279 329 : { mal_module("mat", NULL, mat_init_funcs); }
|