Line data Source code
1 : /*
2 : * SPDX-License-Identifier: MPL-2.0
3 : *
4 : * This Source Code Form is subject to the terms of the Mozilla Public
5 : * License, v. 2.0. If a copy of the MPL was not distributed with this
6 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
7 : *
8 : * Copyright 2024 MonetDB Foundation;
9 : * Copyright August 2008 - 2023 MonetDB B.V.;
10 : * Copyright 1997 - July 2008 CWI.
11 : */
12 :
13 : /*
14 : * author M.L.Kersten
15 : * BAT Iterators
16 : * Many low level algorithms rely on an iterator to break a
17 : * collection into smaller pieces. Each piece is subsequently processed
18 : * by a block.
19 : *
20 : * For very large BATs it may make sense to break it into chunks
21 : * and process them separately to solve a query. An iterator pair is
22 : * provided to chop a BAT into fixed size elements.
23 : * Each chunk is made available as a BATview.
24 : * It provides read-only access to an underlying BAT. Adjusting the bounds
25 : * is cheap, once the BATview descriptor has been constructed.
26 : *
27 : * The smallest granularity is a single BUN, which can be used
28 : * to realize an iterator over the individual BAT elements.
29 : * For larger sized chunks, the operators return a BATview.
30 : *
31 : * All iterators require storage space to administer the
32 : * location of the next element. The BAT iterator module uses a simple
33 : * lng variable, which also acts as a cursor for barrier statements.
34 : *
35 : * The larger chunks produced are currently static, i.e.
36 : * their size is a parameter of the call. Dynamic chunk sizes
37 : * are interesting for time-series query processing. (See another module)
38 : *
39 : */
40 :
41 : #include "monetdb_config.h"
42 : #include "mal.h"
43 : #include "mal_interpreter.h"
44 : #include "mal_exception.h"
45 :
46 : /*
47 : * We start with the large chunk iterator.
48 : * The definition of the control statements require the same
49 : * control variables, which means that the BATview is accessible
50 : * to determine how far to advance when the next chunk is retrieved.
51 : * The number of elements in the chunk is limited by the granule
52 : * size.
53 : */
54 : static str
55 1 : ITRnewChunk(lng *res, bat *vid, const bat *bid, const lng *granule)
56 : {
57 1 : BAT *b, *view;
58 :
59 1 : if ((b = BATdescriptor(*bid)) == NULL) {
60 0 : throw(MAL, "chop.newChunk", INTERNAL_BAT_ACCESS);
61 : }
62 : /* printf("set bat chunk bound to " LLFMT " 0 - " BUNFMT "\n",
63 : *granule, MIN(BATcount(b),(BUN) *granule)); */
64 1 : view = VIEWcreate(b->hseqbase, b, 0, (BUN) *granule);
65 1 : if (view == NULL) {
66 0 : BBPunfix(b->batCacheid);
67 0 : throw(MAL, "chop.newChunk", GDK_EXCEPTION);
68 : }
69 :
70 1 : *vid = view->batCacheid;
71 1 : BBPkeepref(view);
72 1 : BBPunfix(b->batCacheid);
73 1 : *res = 0;
74 1 : return MAL_SUCCEED;
75 : }
76 :
77 : /*
78 : * The nextChunk version advances the reader,
79 : * which also means that the view descriptor is already available.
80 : * The granule size may differ in each call.
81 : */
82 : static str
83 3 : ITRnextChunk(lng *res, bat *vid, const bat *bid, const lng *granule)
84 : {
85 3 : BAT *b, *view;
86 3 : BUN i;
87 :
88 3 : if ((b = BATdescriptor(*bid)) == NULL) {
89 0 : throw(MAL, "iterator.nextChunk", INTERNAL_BAT_ACCESS);
90 : }
91 3 : if ((view = BATdescriptor(*vid)) == NULL) {
92 0 : BBPunfix(b->batCacheid);
93 0 : throw(MAL, "iterator.nextChunk", INTERNAL_BAT_ACCESS);
94 : }
95 3 : i = (BUN) (*res + BATcount(view));
96 3 : if (i >= BATcount(b)) {
97 1 : *res = lng_nil;
98 1 : *vid = 0;
99 1 : BBPunfix(view->batCacheid);
100 1 : BBPunfix(b->batCacheid);
101 1 : return MAL_SUCCEED;
102 : }
103 : /* printf("set bat chunk bound to " BUNFMT " - " BUNFMT " \n",
104 : i, i+(BUN) *granule-1); */
105 2 : VIEWbounds(b, view, i, i + (BUN) *granule);
106 2 : MT_lock_set(&b->theaplock);
107 2 : view->tkey = b->tkey | (*granule <= 1);
108 2 : MT_lock_unset(&b->theaplock);
109 2 : BAThseqbase(view, is_oid_nil(b->hseqbase) ? oid_nil : b->hseqbase + i);
110 2 : *vid = view->batCacheid;
111 2 : BBPkeepref(view);
112 2 : BBPunfix(b->batCacheid);
113 2 : *res = i;
114 2 : return MAL_SUCCEED;
115 : }
116 :
117 : /*
118 : * @-
119 : * The BUN- and BAT-stream manipulate a long handle, i.e.
120 : * the destination variable. It assumes it has been set to
121 : * zero as part of runtime stack initialization. Subsequently,
122 : * it fetches a bun and returns the increment to the control
123 : * variable. If it returns zero the control variable has been reset
124 : * to zero and end of stream has been reached.
125 : */
126 : static str
127 3980 : ITRbunIterator(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
128 : {
129 3980 : BATiter bi;
130 3980 : BAT *b;
131 3980 : oid *head;
132 3980 : bat *bid;
133 3980 : ValPtr tail;
134 :
135 3980 : (void) cntxt;
136 3980 : (void) mb;
137 3980 : head = getArgReference_oid(stk, pci, 0);
138 3980 : tail = &stk->stk[pci->argv[1]];
139 3980 : bid = getArgReference_bat(stk, pci, 2);
140 :
141 3980 : if ((b = BATdescriptor(*bid)) == NULL) {
142 0 : throw(MAL, "iterator.nextChunk", INTERNAL_BAT_ACCESS);
143 : }
144 :
145 3980 : if (BATcount(b) == 0) {
146 3742 : *head = oid_nil;
147 3742 : BBPunfix(b->batCacheid);
148 3742 : return MAL_SUCCEED;
149 : }
150 238 : *head = 0;
151 :
152 238 : bi = bat_iterator(b);
153 476 : if (VALinit(tail, ATOMtype(b->ttype), BUNtail(bi, *head)) == NULL) {
154 0 : bat_iterator_end(&bi);
155 0 : BBPunfix(b->batCacheid);
156 0 : throw(MAL, "iterator.nextChunk", SQLSTATE(HY013) MAL_MALLOC_FAIL);
157 : }
158 238 : bat_iterator_end(&bi);
159 238 : BBPunfix(b->batCacheid);
160 238 : return MAL_SUCCEED;
161 : }
162 :
163 : static str
164 44681 : ITRbunNext(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
165 : {
166 44681 : BATiter bi;
167 44681 : BAT *b;
168 44681 : oid *head;
169 44681 : bat *bid;
170 44681 : ValPtr tail;
171 :
172 44681 : (void) cntxt;
173 44681 : (void) mb;
174 44681 : head = getArgReference_oid(stk, pci, 0);
175 44681 : tail = &stk->stk[pci->argv[1]];
176 44681 : bid = getArgReference_bat(stk, pci, 2);
177 :
178 44681 : if ((b = BATdescriptor(*bid)) == NULL) {
179 0 : throw(MAL, "iterator.nextChunk", INTERNAL_BAT_ACCESS);
180 : }
181 :
182 44681 : *head = *head + 1;
183 44681 : if (*head >= BATcount(b)) {
184 232 : *head = oid_nil;
185 232 : BBPunfix(b->batCacheid);
186 232 : return MAL_SUCCEED;
187 : }
188 44449 : bi = bat_iterator(b);
189 88898 : if (VALinit(tail, ATOMtype(b->ttype), BUNtail(bi, *head)) == NULL) {
190 0 : bat_iterator_end(&bi);
191 0 : BBPunfix(b->batCacheid);
192 0 : throw(MAL, "iterator.nextChunk", SQLSTATE(HY013) MAL_MALLOC_FAIL);
193 : }
194 44449 : bat_iterator_end(&bi);
195 44449 : BBPunfix(b->batCacheid);
196 44449 : return MAL_SUCCEED;
197 : }
198 :
199 : static str
200 2 : ITRnext_oid(oid *i, const oid *step, const oid *last)
201 : {
202 2 : oid v = *i;
203 2 : v = v + *step;
204 2 : *i = v;
205 2 : if (*last <= v)
206 1 : *i = oid_nil;
207 2 : return MAL_SUCCEED;
208 : }
209 :
210 : static str
211 13210250 : ITRnext_lng(lng *i, const lng *step, const lng *last)
212 : {
213 13210250 : lng v = *i;
214 13210250 : v = v + *step;
215 13210250 : *i = v;
216 13210250 : if (*last <= v)
217 13 : *i = lng_nil;
218 13210250 : return MAL_SUCCEED;
219 : }
220 :
221 : #ifdef HAVE_HGE
222 : static str
223 0 : ITRnext_hge(hge *i, const hge *step, const hge *last)
224 : {
225 0 : hge v = *i;
226 0 : v = v + *step;
227 0 : *i = v;
228 0 : if (*last <= v)
229 0 : *i = hge_nil;
230 0 : return MAL_SUCCEED;
231 : }
232 : #endif
233 : static str
234 116 : ITRnext_int(int *i, const int *step, const int *last)
235 : {
236 116 : int v = *i;
237 116 : v = v + *step;
238 116 : *i = v;
239 116 : if (*last <= v)
240 5 : *i = int_nil;
241 116 : return MAL_SUCCEED;
242 : }
243 :
244 : static str
245 0 : ITRnext_sht(sht *i, const sht *step, const sht *last)
246 : {
247 0 : sht v = *i;
248 0 : v = v + *step;
249 0 : *i = v;
250 0 : if (*last <= v)
251 0 : *i = int_nil;
252 0 : return MAL_SUCCEED;
253 : }
254 :
255 : static str
256 2 : ITRnext_flt(flt *i, const flt *step, const flt *last)
257 : {
258 2 : flt v = *i;
259 2 : v = v + *step;
260 2 : *i = v;
261 2 : if (*last <= v)
262 1 : *i = flt_nil;
263 2 : return MAL_SUCCEED;
264 : }
265 :
266 : static str
267 0 : ITRnext_dbl(dbl *i, const dbl *step, const dbl *last)
268 : {
269 0 : dbl v = *i;
270 0 : v = v + *step;
271 0 : *i = v;
272 0 : if (*last <= v)
273 0 : *i = dbl_nil;
274 0 : return MAL_SUCCEED;
275 : }
276 :
277 : #include "mel.h"
278 : mel_func iterator_init_funcs[] = {
279 : command("iterator", "new", ITRnewChunk, false, "Create an iterator with fixed granule size.\nThe result is a view.", args(2,4, arg("",lng),batargany("",1),batargany("b",1),arg("size",lng))),
280 : command("iterator", "next", ITRnextChunk, false, "Produce the next chunk for processing.", args(2,4, arg("",lng),batargany("",1),batargany("b",1),arg("size",lng))),
281 : pattern("iterator", "new", ITRbunIterator, false, "Process the buns one by one extracted from a void table.", args(2,3, arg("h",oid),argany("t",1),batargany("b",1))),
282 : pattern("iterator", "next", ITRbunNext, false, "Produce the next bun for processing.", args(2,3, arg("h",oid),argany("t",1),batargany("b",1))),
283 : command("iterator", "next", ITRnext_oid, false, "", args(1,3, arg("",oid),arg("step",oid),arg("last",oid))),
284 : command("iterator", "next", ITRnext_sht, false, "", args(1,3, arg("",sht),arg("step",sht),arg("last",sht))),
285 : command("iterator", "next", ITRnext_int, false, "", args(1,3, arg("",int),arg("step",int),arg("last",int))),
286 : command("iterator", "next", ITRnext_lng, false, "", args(1,3, arg("",lng),arg("step",lng),arg("last",lng))),
287 : command("iterator", "next", ITRnext_flt, false, "", args(1,3, arg("",flt),arg("step",flt),arg("last",flt))),
288 : command("iterator", "next", ITRnext_dbl, false, "Advances the iterator with a fixed value", args(1,3, arg("",dbl),arg("step",dbl),arg("last",dbl))),
289 : #ifdef HAVE_HGE
290 : command("iterator", "next", ITRnext_hge, false, "", args(1,3, arg("",hge),arg("step",hge),arg("last",hge))),
291 : #endif
292 : { .imp=NULL }
293 : };
294 : #include "mal_import.h"
295 : #ifdef _MSC_VER
296 : #undef read
297 : #pragma section(".CRT$XCU",read)
298 : #endif
299 323 : LIB_STARTUP_FUNC(init_iterator_mal)
300 323 : { mal_module("iterator", NULL, iterator_init_funcs); }
|