Line data Source code
1 : /*
2 : * SPDX-License-Identifier: MPL-2.0
3 : *
4 : * This Source Code Form is subject to the terms of the Mozilla Public
5 : * License, v. 2.0. If a copy of the MPL was not distributed with this
6 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
7 : *
8 : * Copyright 2024 MonetDB Foundation;
9 : * Copyright August 2008 - 2023 MonetDB B.V.;
10 : * Copyright 1997 - July 2008 CWI.
11 : */
12 :
13 : /*
14 : * author M.L.Kersten
15 : * BAT Iterators
16 : * Many low level algorithms rely on an iterator to break a
17 : * collection into smaller pieces. Each piece is subsequently processed
18 : * by a block.
19 : *
20 : * For very large BATs it may make sense to break it into chunks
21 : * and process them separately to solve a query. An iterator pair is
22 : * provided to chop a BAT into fixed size elements.
23 : * Each chunk is made available as a BATview.
24 : * It provides read-only access to an underlying BAT. Adjusting the bounds
25 : * is cheap, once the BATview descriptor has been constructed.
26 : *
27 : * The smallest granularity is a single BUN, which can be used
28 : * to realize an iterator over the individual BAT elements.
29 : * For larger sized chunks, the operators return a BATview.
30 : *
31 : * All iterators require storage space to administer the
32 : * location of the next element. The BAT iterator module uses a simple
33 : * lng variable, which also acts as a cursor for barrier statements.
34 : *
35 : * The larger chunks produced are currently static, i.e.
36 : * their size is a parameter of the call. Dynamic chunk sizes
37 : * are interesting for time-series query processing. (See another module)
38 : *
39 : */
40 :
41 : #include "monetdb_config.h"
42 : #include "mal.h"
43 : #include "mal_interpreter.h"
44 : #include "mal_exception.h"
45 :
46 : /*
47 : * We start with the large chunk iterator.
48 : * The definition of the control statements require the same
49 : * control variables, which means that the BATview is accessible
50 : * to determine how far to advance when the next chunk is retrieved.
51 : * The number of elements in the chunk is limited by the granule
52 : * size.
53 : */
54 : static str
55 1 : ITRnewChunk(lng *res, bat *vid, bat *bid, lng *granule)
56 : {
57 1 : BAT *b, *view;
58 1 : BUN cnt;
59 :
60 1 : if ((b = BATdescriptor(*bid)) == NULL) {
61 0 : throw(MAL, "chop.newChunk", INTERNAL_BAT_ACCESS);
62 : }
63 1 : cnt = BATcount(b);
64 1 : view = VIEWcreate(b->hseqbase, b);
65 1 : if (view == NULL) {
66 0 : BBPunfix(b->batCacheid);
67 0 : throw(MAL, "chop.newChunk", GDK_EXCEPTION);
68 : }
69 :
70 : /* printf("set bat chunk bound to " LLFMT " 0 - " BUNFMT "\n",
71 : *granule, MIN(cnt,(BUN) *granule)); */
72 1 : VIEWbounds(b, view, 0, MIN(cnt, (BUN) *granule));
73 1 : *vid = view->batCacheid;
74 1 : BBPkeepref(view);
75 1 : BBPunfix(b->batCacheid);
76 1 : *res = 0;
77 1 : return MAL_SUCCEED;
78 : }
79 :
80 : /*
81 : * The nextChunk version advances the reader,
82 : * which also means that the view descriptor is already available.
83 : * The granule size may differ in each call.
84 : */
85 : static str
86 3 : ITRnextChunk(lng *res, bat *vid, bat *bid, lng *granule)
87 : {
88 3 : BAT *b, *view;
89 3 : BUN i;
90 :
91 3 : if ((b = BATdescriptor(*bid)) == NULL) {
92 0 : throw(MAL, "iterator.nextChunk", INTERNAL_BAT_ACCESS);
93 : }
94 3 : if ((view = BATdescriptor(*vid)) == NULL) {
95 0 : BBPunfix(b->batCacheid);
96 0 : throw(MAL, "iterator.nextChunk", INTERNAL_BAT_ACCESS);
97 : }
98 3 : i = (BUN) (*res + BATcount(view));
99 3 : if (i >= BATcount(b)) {
100 1 : *res = lng_nil;
101 1 : *vid = 0;
102 1 : BBPunfix(view->batCacheid);
103 1 : BBPunfix(b->batCacheid);
104 1 : return MAL_SUCCEED;
105 : }
106 : /* printf("set bat chunk bound to " BUNFMT " - " BUNFMT " \n",
107 : i, i+(BUN) *granule-1); */
108 2 : VIEWbounds(b, view, i, i + (BUN) *granule);
109 2 : MT_lock_set(&b->theaplock);
110 2 : view->tkey = b->tkey | (*granule <= 1);
111 2 : MT_lock_unset(&b->theaplock);
112 2 : BAThseqbase(view, is_oid_nil(b->hseqbase) ? oid_nil : b->hseqbase + i);
113 2 : *vid = view->batCacheid;
114 2 : BBPkeepref(view);
115 2 : BBPunfix(b->batCacheid);
116 2 : *res = i;
117 2 : return MAL_SUCCEED;
118 : }
119 :
120 : /*
121 : * @-
122 : * The BUN- and BAT-stream manipulate a long handle, i.e.
123 : * the destination variable. It assumes it has been set to
124 : * zero as part of runtime stack initialization. Subsequently,
125 : * it fetches a bun and returns the increment to the control
126 : * variable. If it returns zero the control variable has been reset
127 : * to zero and end of stream has been reached.
128 : */
129 : static str
130 597 : ITRbunIterator(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
131 : {
132 597 : BATiter bi;
133 597 : BAT *b;
134 597 : oid *head;
135 597 : bat *bid;
136 597 : ValPtr tail;
137 :
138 597 : (void) cntxt;
139 597 : (void) mb;
140 597 : head = getArgReference_oid(stk, pci, 0);
141 597 : tail = &stk->stk[pci->argv[1]];
142 597 : bid = getArgReference_bat(stk, pci, 2);
143 :
144 597 : if ((b = BATdescriptor(*bid)) == NULL) {
145 0 : throw(MAL, "iterator.nextChunk", INTERNAL_BAT_ACCESS);
146 : }
147 :
148 597 : if (BATcount(b) == 0) {
149 423 : *head = oid_nil;
150 423 : BBPunfix(b->batCacheid);
151 423 : return MAL_SUCCEED;
152 : }
153 174 : *head = 0;
154 :
155 174 : bi = bat_iterator(b);
156 348 : if (VALinit(tail, ATOMtype(b->ttype), BUNtail(bi, *head)) == NULL) {
157 0 : bat_iterator_end(&bi);
158 0 : BBPunfix(b->batCacheid);
159 0 : throw(MAL, "iterator.nextChunk", SQLSTATE(HY013) MAL_MALLOC_FAIL);
160 : }
161 174 : bat_iterator_end(&bi);
162 174 : BBPunfix(b->batCacheid);
163 174 : return MAL_SUCCEED;
164 : }
165 :
166 : static str
167 47588 : ITRbunNext(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
168 : {
169 47588 : BATiter bi;
170 47588 : BAT *b;
171 47588 : oid *head;
172 47588 : bat *bid;
173 47588 : ValPtr tail;
174 :
175 47588 : (void) cntxt;
176 47588 : (void) mb;
177 47588 : head = getArgReference_oid(stk, pci, 0);
178 47588 : tail = &stk->stk[pci->argv[1]];
179 47588 : bid = getArgReference_bat(stk, pci, 2);
180 :
181 47588 : if ((b = BATdescriptor(*bid)) == NULL) {
182 0 : throw(MAL, "iterator.nextChunk", INTERNAL_BAT_ACCESS);
183 : }
184 :
185 47588 : *head = *head + 1;
186 47588 : if (*head >= BATcount(b)) {
187 168 : *head = oid_nil;
188 168 : BBPunfix(b->batCacheid);
189 168 : return MAL_SUCCEED;
190 : }
191 47420 : bi = bat_iterator(b);
192 94840 : if (VALinit(tail, ATOMtype(b->ttype), BUNtail(bi, *head)) == NULL) {
193 0 : bat_iterator_end(&bi);
194 0 : BBPunfix(b->batCacheid);
195 0 : throw(MAL, "iterator.nextChunk", SQLSTATE(HY013) MAL_MALLOC_FAIL);
196 : }
197 47420 : bat_iterator_end(&bi);
198 47420 : BBPunfix(b->batCacheid);
199 47420 : return MAL_SUCCEED;
200 : }
201 :
202 : static str
203 2 : ITRnext_oid(oid *i, oid *step, oid *last)
204 : {
205 2 : oid v = *i;
206 2 : v = v + *step;
207 2 : *i = v;
208 2 : if (*last <= v)
209 1 : *i = oid_nil;
210 2 : return MAL_SUCCEED;
211 : }
212 :
213 : static str
214 14210250 : ITRnext_lng(lng *i, lng *step, lng *last)
215 : {
216 14210250 : lng v = *i;
217 14210250 : v = v + *step;
218 14210250 : *i = v;
219 14210250 : if (*last <= v)
220 14 : *i = lng_nil;
221 14210250 : return MAL_SUCCEED;
222 : }
223 :
224 : #ifdef HAVE_HGE
225 : static str
226 0 : ITRnext_hge(hge *i, hge *step, hge *last)
227 : {
228 0 : hge v = *i;
229 0 : v = v + *step;
230 0 : *i = v;
231 0 : if (*last <= v)
232 0 : *i = hge_nil;
233 0 : return MAL_SUCCEED;
234 : }
235 : #endif
236 : static str
237 116 : ITRnext_int(int *i, int *step, int *last)
238 : {
239 116 : int v = *i;
240 116 : v = v + *step;
241 116 : *i = v;
242 116 : if (*last <= v)
243 5 : *i = int_nil;
244 116 : return MAL_SUCCEED;
245 : }
246 :
247 : static str
248 0 : ITRnext_sht(sht *i, sht *step, sht *last)
249 : {
250 0 : sht v = *i;
251 0 : v = v + *step;
252 0 : *i = v;
253 0 : if (*last <= v)
254 0 : *i = int_nil;
255 0 : return MAL_SUCCEED;
256 : }
257 :
258 : static str
259 2 : ITRnext_flt(flt *i, flt *step, flt *last)
260 : {
261 2 : flt v = *i;
262 2 : v = v + *step;
263 2 : *i = v;
264 2 : if (*last <= v)
265 1 : *i = flt_nil;
266 2 : return MAL_SUCCEED;
267 : }
268 :
269 : static str
270 0 : ITRnext_dbl(dbl *i, dbl *step, dbl *last)
271 : {
272 0 : dbl v = *i;
273 0 : v = v + *step;
274 0 : *i = v;
275 0 : if (*last <= v)
276 0 : *i = dbl_nil;
277 0 : return MAL_SUCCEED;
278 : }
279 :
280 : #include "mel.h"
281 : mel_func iterator_init_funcs[] = {
282 : command("iterator", "new", ITRnewChunk, false, "Create an iterator with fixed granule size.\nThe result is a view.", args(2,4, arg("",lng),batargany("",2),batargany("b",2),arg("size",lng))),
283 : command("iterator", "next", ITRnextChunk, false, "Produce the next chunk for processing.", args(2,4, arg("",lng),batargany("",2),batargany("b",2),arg("size",lng))),
284 : pattern("iterator", "new", ITRbunIterator, false, "Process the buns one by one extracted from a void table.", args(2,3, arg("h",oid),argany("t",2),batargany("b",2))),
285 : pattern("iterator", "next", ITRbunNext, false, "Produce the next bun for processing.", args(2,3, arg("h",oid),argany("t",2),batargany("b",2))),
286 : command("iterator", "next", ITRnext_oid, false, "", args(1,3, arg("",oid),arg("step",oid),arg("last",oid))),
287 : command("iterator", "next", ITRnext_sht, false, "", args(1,3, arg("",sht),arg("step",sht),arg("last",sht))),
288 : command("iterator", "next", ITRnext_int, false, "", args(1,3, arg("",int),arg("step",int),arg("last",int))),
289 : command("iterator", "next", ITRnext_lng, false, "", args(1,3, arg("",lng),arg("step",lng),arg("last",lng))),
290 : command("iterator", "next", ITRnext_flt, false, "", args(1,3, arg("",flt),arg("step",flt),arg("last",flt))),
291 : command("iterator", "next", ITRnext_dbl, false, "Advances the iterator with a fixed value", args(1,3, arg("",dbl),arg("step",dbl),arg("last",dbl))),
292 : #ifdef HAVE_HGE
293 : command("iterator", "next", ITRnext_hge, false, "", args(1,3, arg("",hge),arg("step",hge),arg("last",hge))),
294 : #endif
295 : { .imp=NULL }
296 : };
297 : #include "mal_import.h"
298 : #ifdef _MSC_VER
299 : #undef read
300 : #pragma section(".CRT$XCU",read)
301 : #endif
302 329 : LIB_STARTUP_FUNC(init_iterator_mal)
303 329 : { mal_module("iterator", NULL, iterator_init_funcs); }
|