Line data Source code
1 : /*
2 : * SPDX-License-Identifier: MPL-2.0
3 : *
4 : * This Source Code Form is subject to the terms of the Mozilla Public
5 : * License, v. 2.0. If a copy of the MPL was not distributed with this
6 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
7 : *
8 : * Copyright 2024 MonetDB Foundation;
9 : * Copyright August 2008 - 2023 MonetDB B.V.;
10 : * Copyright 1997 - July 2008 CWI.
11 : */
12 :
13 : /*
14 : * Martin Kersten
15 : * Multiple association tables
16 : * A MAT is a convenient way to deal represent horizontal fragmented
17 : * tables. It combines the definitions of several, type compatible
18 : * BATs under a single name.
19 : * It is produced by the mitosis optimizer and the operations
20 : * are the target of the mergetable optimizer.
21 : *
22 : * The MAT is materialized when the operations
23 : * can not deal with the components individually,
24 : * or the incremental operation is not supported.
25 : * Normally all mat.new() operations are removed by the
26 : * mergetable optimizer.
27 : * In case a mat.new() is retained in the code, then it will
28 : * behave as a mat.pack();
29 : *
30 : * The primitives below are chosen to accommodate the SQL
31 : * front-end to produce reasonable efficient code.
32 : */
33 : #include "monetdb_config.h"
34 : #include "mal_resolve.h"
35 : #include "mal_exception.h"
36 : #include "mal_interpreter.h"
37 :
38 : /*
39 : * The pack is an ordinary multi BAT insert. Oid synchronistion
40 : * between pieces should be ensured by the code generators.
41 : * The pack operation could be quite expensive, because it
42 : * may create a really large BAT.
43 : * The slice over a mat helps to avoid constructing intermediates
44 : * that are subsequently reduced.
45 : * Contrary to most operations, NIL arguments are skipped and
46 : * do not produce RUNTIME_OBJECT_MISSING.
47 : */
48 : static str
49 96934 : MATpackInternal(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr p)
50 : {
51 96934 : int i;
52 96934 : bat *ret = getArgReference_bat(stk, p, 0);
53 96934 : BAT *b, *bn = NULL;
54 96934 : BUN cap = 0;
55 96934 : int tt = TYPE_any;
56 96934 : int rt = getArgType(mb, p, 0), unmask = 0;
57 96934 : (void) cntxt;
58 :
59 484314 : for (i = 1; i < p->argc; i++) {
60 387402 : bat bid = stk->stk[getArg(p, i)].val.bval;
61 387402 : b = BBPquickdesc(bid);
62 387380 : if (b) {
63 387380 : if (tt == TYPE_any)
64 96929 : tt = b->ttype;
65 387380 : if ((tt != TYPE_void && b->ttype != TYPE_void
66 322548 : && b->ttype != TYPE_msk) && tt != b->ttype)
67 0 : throw(MAL, "mat.pack", "incompatible arguments");
68 387380 : cap += BATcount(b);
69 : }
70 : }
71 96912 : if (tt == TYPE_any) {
72 0 : *ret = bat_nil;
73 0 : return MAL_SUCCEED;
74 : }
75 :
76 96912 : if (tt == TYPE_msk && rt == newBatType(TYPE_oid)) {
77 0 : tt = TYPE_oid;
78 0 : unmask = 1;
79 : }
80 96912 : bn = COLnew(0, tt, cap, TRANSIENT);
81 96930 : if (bn == NULL)
82 0 : throw(MAL, "mat.pack", SQLSTATE(HY013) MAL_MALLOC_FAIL);
83 :
84 484507 : for (i = 1; i < p->argc; i++) {
85 387575 : if (!(b = BATdescriptor(stk->stk[getArg(p, i)].val.ival))) {
86 0 : BBPreclaim(bn);
87 0 : throw(MAL, "mat.pack", SQLSTATE(HY002) RUNTIME_OBJECT_MISSING);
88 : }
89 387336 : if ((unmask && b->ttype == TYPE_msk) || mask_cand(b)) {
90 5647 : BAT *ob = b;
91 5647 : b = BATunmask(b);
92 5647 : BBPunfix(ob->batCacheid);
93 5647 : if (!b) {
94 0 : BBPreclaim(bn);
95 0 : throw(MAL, "mat.pack", GDK_EXCEPTION);
96 : }
97 : }
98 387336 : if (BATcount(bn) == 0) {
99 97640 : BAThseqbase(bn, b->hseqbase);
100 97648 : BATtseqbase(bn, b->tseqbase);
101 : }
102 387336 : if (BATappend(bn, b, NULL, false) != GDK_SUCCEED) {
103 0 : BBPreclaim(bn);
104 0 : BBPunfix(b->batCacheid);
105 0 : throw(MAL, "mat.pack", GDK_EXCEPTION);
106 : }
107 387411 : BBPunfix(b->batCacheid);
108 : }
109 96932 : if (bn->tnil && bn->tnonil) {
110 0 : BBPreclaim(bn);
111 0 : throw(MAL, "mat.pack",
112 : "INTERNAL ERROR" "bn->tnil or bn->tnonil fails ");
113 : }
114 96932 : *ret = bn->batCacheid;
115 96932 : BBPkeepref(bn);
116 96932 : return MAL_SUCCEED;
117 : }
118 :
119 : /*
120 : * Enable incremental packing. The SQL front-end requires
121 : * fixed oid sequences.
122 : */
123 : static str
124 643080 : MATpackIncrement(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr p)
125 : {
126 643080 : bat *ret = getArgReference_bat(stk, p, 0);
127 643080 : int pieces;
128 643080 : BAT *b, *bb, *bn;
129 643080 : size_t newsize;
130 :
131 643080 : (void) cntxt;
132 643080 : b = BATdescriptor(stk->stk[getArg(p, 1)].val.ival);
133 643064 : if (b == NULL)
134 0 : throw(MAL, "mat.pack", SQLSTATE(HY002) RUNTIME_OBJECT_MISSING);
135 :
136 643064 : if (getArgType(mb, p, 2) == TYPE_int) {
137 : /* first step, estimate with some slack */
138 165707 : pieces = stk->stk[getArg(p, 2)].val.ival;
139 165707 : int tt = ATOMtype(b->ttype);
140 165707 : if (b->ttype == TYPE_msk)
141 0 : tt = TYPE_oid;
142 165707 : bn = COLnew(b->hseqbase, tt, (BUN) (1.2 * BATcount(b) * pieces),
143 : TRANSIENT);
144 165700 : if (bn == NULL) {
145 0 : BBPunfix(b->batCacheid);
146 0 : throw(MAL, "mat.pack", SQLSTATE(HY013) MAL_MALLOC_FAIL);
147 : }
148 : /* allocate enough space for the vheap, but not for strings,
149 : * since BATappend does clever things for strings, and not for
150 : * vheap views since they may well get shared */
151 165700 : if (b->tvheap && b->tvheap->parentid == b->batCacheid && bn->tvheap
152 64087 : && ATOMstorage(b->ttype) != TYPE_str) {
153 46 : newsize = b->tvheap->size * pieces;
154 46 : if (HEAPextend(bn->tvheap, newsize, true) != GDK_SUCCEED) {
155 0 : BBPunfix(b->batCacheid);
156 0 : BBPreclaim(bn);
157 0 : throw(MAL, "mat.pack", GDK_EXCEPTION);
158 : }
159 : }
160 165700 : BATtseqbase(bn, b->tseqbase);
161 165705 : if (b->ttype == TYPE_msk || mask_cand(b)) {
162 0 : BAT *ob = b;
163 0 : b = BATunmask(b);
164 0 : BBPunfix(ob->batCacheid);
165 0 : if (!b) {
166 0 : BBPreclaim(bn);
167 0 : throw(MAL, "mat.pack", GDK_EXCEPTION);
168 : }
169 : }
170 165705 : if (BATappend(bn, b, NULL, false) != GDK_SUCCEED) {
171 0 : BBPreclaim(bn);
172 0 : BBPunfix(b->batCacheid);
173 0 : throw(MAL, "mat.pack", GDK_EXCEPTION);
174 : }
175 165700 : bn->unused = (pieces - 1); /* misuse "unused" field */
176 165700 : BBPunfix(b->batCacheid);
177 165700 : if (bn->tnil && bn->tnonil) {
178 0 : BBPreclaim(bn);
179 0 : throw(MAL, "mat.pack",
180 0 : "INTERNAL ERROR" " bn->tnil %d bn->tnonil %d", bn->tnil,
181 0 : bn->tnonil);
182 : }
183 165700 : *ret = bn->batCacheid;
184 165700 : BBPretain(bn->batCacheid);
185 165698 : BBPunfix(bn->batCacheid);
186 : } else {
187 : /* remaining steps */
188 477357 : if (!(bb = BATdescriptor(stk->stk[getArg(p, 2)].val.ival))) {
189 0 : BBPunfix(b->batCacheid);
190 0 : throw(MAL, "mat.pack", SQLSTATE(HY002) RUNTIME_OBJECT_MISSING);
191 : }
192 477238 : if (bb->ttype == TYPE_msk || mask_cand(bb)) {
193 0 : BAT *obb = bb;
194 0 : bb = BATunmask(bb);
195 0 : BBPunfix(obb->batCacheid);
196 0 : if (!bb) {
197 0 : BBPunfix(b->batCacheid);
198 0 : throw(MAL, "mat.pack", GDK_EXCEPTION);
199 : }
200 : }
201 477238 : if (BATcount(b) == 0) {
202 292119 : BAThseqbase(b, bb->hseqbase);
203 292196 : BATtseqbase(b, bb->tseqbase);
204 : }
205 477294 : if (BATappend(b, bb, NULL, false) != GDK_SUCCEED) {
206 0 : BBPunfix(bb->batCacheid);
207 0 : BBPunfix(b->batCacheid);
208 0 : throw(MAL, "mat.pack", GDK_EXCEPTION);
209 : }
210 477357 : BBPunfix(bb->batCacheid);
211 477331 : b->unused--;
212 477331 : if (b->unused == 0 && (b = BATsetaccess(b, BAT_READ)) == NULL)
213 0 : throw(MAL, "mat.pack", GDK_EXCEPTION);
214 477332 : if (b->tnil && b->tnonil) {
215 0 : BBPunfix(b->batCacheid);
216 0 : throw(MAL, "mat.pack",
217 : "INTERNAL ERROR" " b->tnil or b->tnonil fails ");
218 : }
219 477332 : *ret = b->batCacheid;
220 477332 : BBPretain(b->batCacheid);
221 477321 : BBPunfix(b->batCacheid);
222 : }
223 : return MAL_SUCCEED;
224 : }
225 :
226 : static str
227 96934 : MATpack(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr p)
228 : {
229 96934 : return MATpackInternal(cntxt, mb, stk, p);
230 : }
231 :
232 : static str
233 324725 : MATpackValues(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr p)
234 : {
235 324725 : int i, type, first = 1;
236 324725 : bat *ret;
237 324725 : BAT *bn;
238 :
239 324725 : (void) cntxt;
240 324725 : type = getArgType(mb, p, first);
241 324725 : bn = COLnew(0, type, p->argc, TRANSIENT);
242 324642 : if (bn == NULL)
243 0 : throw(MAL, "mat.pack", SQLSTATE(HY013) MAL_MALLOC_FAIL);
244 :
245 324642 : if (ATOMextern(type)) {
246 1019821 : for (i = first; i < p->argc; i++)
247 826201 : if (BUNappend(bn, stk->stk[getArg(p, i)].val.pval, false) != GDK_SUCCEED)
248 0 : goto bailout;
249 : } else {
250 692493 : for (i = first; i < p->argc; i++)
251 561652 : if (BUNappend(bn, getArgReference(stk, p, i), false) != GDK_SUCCEED)
252 0 : goto bailout;
253 : }
254 324461 : ret = getArgReference_bat(stk, p, 0);
255 324461 : *ret = bn->batCacheid;
256 324461 : BBPkeepref(bn);
257 324461 : return MAL_SUCCEED;
258 0 : bailout:
259 0 : BBPreclaim(bn);
260 0 : throw(MAL, "mat.pack", GDK_EXCEPTION);
261 : }
262 :
263 : #include "mel.h"
264 : mel_func mat_init_funcs[] = {
265 : pattern("mat", "new", MATpack, false, "Define a Merge Association Table (MAT). Fall back to the pack operation\nwhen this is called ", args(1,2, batargany("",1),batvarargany("b",1))),
266 : pattern("bat", "pack", MATpackValues, false, "Materialize the values into a BAT. Avoiding a clash with mat.pack() in mergetable", args(1,2, batargany("",1),varargany("",1))),
267 : pattern("mat", "pack", MATpackValues, false, "Materialize the MAT (of values) into a BAT", args(1,2, batargany("",1),varargany("",1))),
268 : pattern("mat", "pack", MATpack, false, "Materialize the MAT into a BAT", args(1,2, batargany("",1),batvarargany("b",1))),
269 : pattern("mat", "packIncrement", MATpackIncrement, false, "Prepare incremental mat pack", args(1,3, batargany("",1),batargany("b",1),arg("pieces",int))),
270 : pattern("mat", "packIncrement", MATpackIncrement, false, "Prepare incremental mat pack", args(1,3, batargany("",1),batargany("b",1),batargany("c",1))),
271 : { .imp=NULL }
272 : };
273 : #include "mal_import.h"
274 : #ifdef _MSC_VER
275 : #undef read
276 : #pragma section(".CRT$XCU",read)
277 : #endif
278 325 : LIB_STARTUP_FUNC(init_mat_mal)
279 325 : { mal_module("mat", NULL, mat_init_funcs); }
|