Line data Source code
1 : /*
2 : * SPDX-License-Identifier: MPL-2.0
3 : *
4 : * This Source Code Form is subject to the terms of the Mozilla Public
5 : * License, v. 2.0. If a copy of the MPL was not distributed with this
6 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
7 : *
8 : * Copyright 2024 MonetDB Foundation;
9 : * Copyright August 2008 - 2023 MonetDB B.V.;
10 : * Copyright 1997 - July 2008 CWI.
11 : */
12 :
13 : /*
14 : * @a stefan manegold
15 : * @+
16 : * The microbenchmark routines are primarilly used to create a
17 : * simple database for testing the performance of core routines.
18 : * It was originally developed in the context of the Radix Cluster
19 : * activities.
20 : * @f microbenchmark
21 : */
22 : #include "monetdb_config.h"
23 : #include "mal.h"
24 : #include "mal_exception.h"
25 : #include "mal.h"
26 :
27 : static gdk_return
28 0 : BATrandom(BAT **bn, oid *base, lng *size, int *domain, int seed)
29 : {
30 0 : const BUN n = (BUN) *size;
31 0 : BUN i;
32 0 : BAT *b = NULL;
33 0 : int *restrict val;
34 :
35 0 : if (*size > (lng) BUN_MAX) {
36 0 : GDKerror("size must not exceed BUN_MAX");
37 0 : return GDK_FAIL;
38 : }
39 :
40 0 : if (*size < 0) {
41 0 : GDKerror("size must not be negative");
42 0 : return GDK_FAIL;
43 : }
44 :
45 0 : b = COLnew(*base, TYPE_int, n, TRANSIENT);
46 0 : if (b == NULL)
47 : return GDK_FAIL;
48 0 : if (n == 0) {
49 0 : b->tsorted = true;
50 0 : b->trevsorted = false;
51 0 : b->tseqbase = oid_nil;
52 0 : BATkey(b, true);
53 0 : *bn = b;
54 0 : return GDK_SUCCEED;
55 : }
56 0 : val = (int *) Tloc(b, 0);
57 :
58 : /* create BUNs with random distribution */
59 0 : if (!is_int_nil(seed))
60 0 : srand(seed);
61 0 : if (is_int_nil(*domain)) {
62 0 : for (i = 0; i < n; i++) {
63 0 : val[i] = rand();
64 : }
65 : #if RAND_MAX < 46340 /* 46340*46340 = 2147395600 < INT_MAX */
66 : } else if (*domain > RAND_MAX + 1) {
67 : for (i = 0; i < n; i++) {
68 : val[i] = (rand() * (RAND_MAX + 1) + rand()) % *domain;
69 : }
70 : #endif
71 : } else {
72 0 : for (i = 0; i < n; i++) {
73 0 : val[i] = rand() % *domain;
74 : }
75 : }
76 :
77 0 : BATsetcount(b, n);
78 0 : b->tsorted = false;
79 0 : b->trevsorted = false;
80 0 : b->tseqbase = oid_nil;
81 0 : BATkey(b, false);
82 0 : *bn = b;
83 0 : return GDK_SUCCEED;
84 : }
85 :
86 : static gdk_return
87 0 : BATuniform(BAT **bn, oid *base, lng *size, int *domain)
88 : {
89 0 : const BUN n = (BUN) *size;
90 0 : BUN i, r;
91 0 : BAT *b = NULL;
92 0 : int *restrict val;
93 0 : int v;
94 :
95 0 : if (*size > (lng) BUN_MAX) {
96 0 : GDKerror("size must not exceed BUN_MAX");
97 0 : return GDK_FAIL;
98 : }
99 :
100 0 : if (*size < 0) {
101 0 : GDKerror("size must not be negative");
102 0 : return GDK_FAIL;
103 : }
104 :
105 0 : b = COLnew(*base, TYPE_int, n, TRANSIENT);
106 0 : if (b == NULL)
107 : return GDK_FAIL;
108 0 : if (n == 0) {
109 0 : b->tsorted = true;
110 0 : b->trevsorted = false;
111 0 : b->tseqbase = oid_nil;
112 0 : BATkey(b, true);
113 0 : *bn = b;
114 0 : return GDK_SUCCEED;
115 : }
116 0 : val = (int *) Tloc(b, 0);
117 :
118 : /* create BUNs with uniform distribution */
119 0 : for (v = 0, i = 0; i < n; i++) {
120 0 : val[i] = v;
121 0 : if (++v >= *domain)
122 0 : v = 0;
123 : }
124 :
125 : /* mix BUNs randomly */
126 0 : for (r = 0, i = 0; i < n; i++) {
127 0 : const BUN j = i + ((r += rand()) % (n - i));
128 0 : const int tmp = val[i];
129 :
130 0 : val[i] = val[j];
131 0 : val[j] = tmp;
132 : }
133 :
134 0 : BATsetcount(b, n);
135 0 : b->tsorted = false;
136 0 : b->trevsorted = false;
137 0 : b->tseqbase = oid_nil;
138 0 : BATkey(b, *size <= *domain);
139 0 : *bn = b;
140 0 : return GDK_SUCCEED;
141 : }
142 :
143 : static gdk_return
144 0 : BATskewed(BAT **bn, oid *base, lng *size, int *domain, int *skew)
145 : {
146 0 : const BUN n = (BUN) *size;
147 0 : BUN i, r;
148 0 : BAT *b = NULL;
149 0 : int *restrict val;
150 0 : const BUN skewedSize = ((*skew) * n) / 100;
151 0 : const int skewedDomain = ((100 - (*skew)) * (*domain)) / 100;
152 :
153 0 : if (*size > (lng) BUN_MAX) {
154 0 : GDKerror("size must not exceed BUN_MAX = " BUNFMT, BUN_MAX);
155 0 : return GDK_FAIL;
156 : }
157 :
158 0 : if (*size < 0) {
159 0 : GDKerror("size must not be negative");
160 0 : return GDK_FAIL;
161 : }
162 :
163 0 : if (*skew > 100 || *skew < 0) {
164 0 : GDKerror("skew must be between 0 and 100");
165 0 : return GDK_FAIL;
166 : }
167 :
168 0 : b = COLnew(*base, TYPE_int, n, TRANSIENT);
169 0 : if (b == NULL)
170 : return GDK_FAIL;
171 0 : if (n == 0) {
172 0 : b->tsorted = true;
173 0 : b->trevsorted = false;
174 0 : b->tseqbase = oid_nil;
175 0 : BATkey(b, true);
176 0 : *bn = b;
177 0 : return GDK_SUCCEED;
178 : }
179 0 : val = (int *) Tloc(b, 0);
180 :
181 : /* create BUNs with skewed distribution */
182 0 : for (i = 0; i < skewedSize; i++)
183 0 : val[i] = rand() % skewedDomain;
184 0 : for (; i < n; i++)
185 0 : val[i] = (rand() % (*domain - skewedDomain)) + skewedDomain;
186 :
187 : /* mix BUNs randomly */
188 0 : for (r = 0, i = 0; i < n; i++) {
189 0 : const BUN j = i + ((r += rand()) % (n - i));
190 0 : const int tmp = val[i];
191 :
192 0 : val[i] = val[j];
193 0 : val[j] = tmp;
194 : }
195 :
196 0 : BATsetcount(b, n);
197 0 : b->tsorted = false;
198 0 : b->trevsorted = false;
199 0 : b->tseqbase = oid_nil;
200 0 : BATkey(b, *size <= *domain);
201 0 : *bn = b;
202 0 : return GDK_SUCCEED;
203 : }
204 :
205 :
206 : /* math.h files do not have M_PI/M_E defined */
207 : #ifndef M_PI
208 : # define M_PI ((dbl) 3.14159265358979323846) /* pi */
209 : #endif
210 : #ifndef M_E
211 : # define M_E ((dbl) 2.7182818284590452354) /* e */
212 : #endif
213 :
214 : static gdk_return
215 0 : BATnormal(BAT **bn, oid *base, lng *size, int *domain, int *stddev, int *mean)
216 : {
217 0 : const BUN n = (BUN) *size;
218 0 : BUN i, r;
219 0 : const int d = (*domain < 0 ? 0 : *domain);
220 0 : int j;
221 0 : BAT *b = NULL;
222 0 : int *restrict val;
223 0 : const int m = *mean, s = *stddev;
224 0 : unsigned int *restrict abs;
225 0 : flt *restrict rel;
226 0 : dbl tot = 0;
227 0 : const dbl s_s_2 = (dbl) s * (dbl) s * 2;
228 0 : const dbl s_sqrt_2_pi = ((dbl) s * sqrt(2 * M_PI));
229 :
230 0 : assert(sizeof(unsigned int) == sizeof(flt));
231 :
232 : #if SIZEOF_BUN > 4
233 0 : if (n >= ((ulng) 1 << 32)) {
234 0 : GDKerror("size must be less than 2^32 = " LLFMT, (lng) 1 << 32);
235 0 : return GDK_FAIL;
236 : }
237 : #endif
238 0 : if (*size > (lng) BUN_MAX) {
239 : GDKerror("size must not exceed BUN_MAX");
240 : return GDK_FAIL;
241 : }
242 :
243 0 : if (*size < 0) {
244 : GDKerror("size must not be negative");
245 : return GDK_FAIL;
246 : }
247 :
248 0 : b = COLnew(*base, TYPE_int, n, TRANSIENT);
249 0 : if (b == NULL) {
250 : return GDK_FAIL;
251 : }
252 0 : if (n == 0) {
253 0 : b->tsorted = true;
254 0 : b->trevsorted = false;
255 0 : b->tseqbase = oid_nil;
256 0 : BATkey(b, true);
257 0 : *bn = b;
258 0 : return GDK_SUCCEED;
259 : }
260 0 : val = (int *) Tloc(b, 0);
261 :
262 0 : abs = (unsigned int *) GDKmalloc(d * sizeof(unsigned int));
263 0 : if (abs == NULL) {
264 0 : BBPreclaim(b);
265 0 : return GDK_FAIL;
266 : }
267 : rel = (flt *) abs;
268 :
269 : /* assert(0 <= *mean && *mean < *size); */
270 :
271 : /* created inverted table with rel fraction per value */
272 0 : for (tot = 0, j = 0; j < d; j++) {
273 0 : const dbl j_m = (dbl) j - m;
274 0 : const dbl tmp = j_m * j_m / s_s_2;
275 :
276 0 : rel[j] = (flt) (pow(M_E, -tmp) / s_sqrt_2_pi);
277 0 : tot += rel[j];
278 : }
279 : /* just in case we get tot != 1 due to. e.g.,
280 : * rounding errors, limited precision, or limited domain */
281 0 : tot = 1.0 / tot;
282 : /* calculate abs count per value from rel fraction */
283 0 : for (r = n, j = 0; j < d; j++) {
284 0 : assert(((dbl) n * rel[j] * tot) < (dbl) ((lng) 1 << 32));
285 0 : abs[j] = (unsigned int) ((dbl) n * rel[j] * tot);
286 0 : assert(r >= abs[j]);
287 0 : r -= abs[j];
288 : }
289 0 : assert(((ulng) 1 << 32) - r > abs[m]);
290 0 : abs[m] += (unsigned int) r;
291 :
292 : /* create BUNs with normal distribution */
293 0 : for (j = 0, i = 0; i < n && j < d; i++) {
294 0 : while (j < d && abs[j] == 0)
295 0 : j++;
296 0 : if (j < d) {
297 0 : val[i] = j;
298 0 : abs[j]--;
299 : }
300 : }
301 0 : assert(i == n);
302 0 : while (j < d && abs[j] == 0)
303 0 : j++;
304 0 : assert(j == d);
305 0 : GDKfree(abs);
306 :
307 :
308 0 : BATsetcount(b, n);
309 0 : b->tsorted = false;
310 0 : b->trevsorted = false;
311 0 : b->tseqbase = oid_nil;
312 0 : BATkey(b, n < 2);
313 0 : *bn = b;
314 0 : return GDK_SUCCEED;
315 : }
316 :
317 : /*
318 : * @-
319 : * The M5 wrapper code
320 : */
321 :
322 : static str
323 0 : MBMrandom_seed(bat *ret, oid *base, lng *size, int *domain, const int *seed)
324 : {
325 0 : BAT *bn = NULL;
326 :
327 0 : BATrandom(&bn, base, size, domain, *seed);
328 0 : if (bn) {
329 0 : *ret = bn->batCacheid;
330 0 : BBPkeepref(bn);
331 : } else
332 0 : throw(MAL, "microbenchmark.random", OPERATION_FAILED);
333 0 : return MAL_SUCCEED;
334 : }
335 :
336 : static str
337 0 : MBMrandom(bat *ret, oid *base, lng *size, int *domain)
338 : {
339 0 : return MBMrandom_seed(ret, base, size, domain, &int_nil);
340 : }
341 :
342 : static str
343 0 : MBMuniform(bat *ret, oid *base, lng *size, int *domain)
344 : {
345 0 : BAT *bn = NULL;
346 :
347 0 : BATuniform(&bn, base, size, domain);
348 0 : if (bn) {
349 0 : *ret = bn->batCacheid;
350 0 : BBPkeepref(bn);
351 : } else
352 0 : throw(MAL, "microbenchmark.uniform", OPERATION_FAILED);
353 0 : return MAL_SUCCEED;
354 : }
355 :
356 : static str
357 0 : MBMnormal(bat *ret, oid *base, lng *size, int *domain, int *stddev, int *mean)
358 : {
359 0 : BAT *bn = NULL;
360 0 : BATnormal(&bn, base, size, domain, stddev, mean);
361 0 : if (bn) {
362 0 : *ret = bn->batCacheid;
363 0 : BBPkeepref(bn);
364 : } else
365 0 : throw(MAL, "microbenchmark.normal", OPERATION_FAILED);
366 0 : return MAL_SUCCEED;
367 : }
368 :
369 :
370 : static str
371 0 : MBMmix(bat *bn, bat *batid)
372 : {
373 0 : BUN n, r, i;
374 0 : BAT *b;
375 :
376 0 : if ((b = BATdescriptor(*batid)) == NULL)
377 0 : throw(MAL, "microbenchmark.mix",
378 : SQLSTATE(HY002) RUNTIME_OBJECT_MISSING);
379 :
380 0 : n = BATcount(b);
381 : /* mix BUNs randomly */
382 0 : for (r = i = 0; i < n; i++) {
383 0 : BUN idx = i + ((r += (BUN) rand()) % (n - i));
384 0 : int val;
385 :
386 0 : val = *(int *) Tloc(b, i);
387 0 : *(int *) Tloc(b, i) = *(int *) Tloc(b, idx);
388 0 : *(int *) Tloc(b, idx) = val;
389 : }
390 :
391 0 : *bn = b->batCacheid;
392 0 : BBPkeepref(b);
393 :
394 0 : return MAL_SUCCEED;
395 : }
396 :
397 : static str
398 0 : MBMskewed(bat *ret, oid *base, lng *size, int *domain, int *skew)
399 : {
400 0 : BAT *bn = NULL;
401 :
402 0 : BATskewed(&bn, base, size, domain, skew);
403 0 : if (bn) {
404 0 : *ret = bn->batCacheid;
405 0 : BBPkeepref(bn);
406 : } else
407 0 : throw(MAL, "microbenchmark.skewed", OPERATION_FAILED);
408 0 : return MAL_SUCCEED;
409 : }
410 :
411 : #include "mel.h"
412 : mel_func microbenchmark_init_funcs[] = {
413 : command("microbenchmark", "random", MBMrandom, false, "Create a BAT with random integer distribution; domain == nil:int ? [0:RAND_MAX] : [0,domain)", args(1,4, batarg("",int),arg("base",oid),arg("size",lng),arg("domain",int))),
414 : command("microbenchmark", "random", MBMrandom_seed, false, "Create a BAT with random integer distribution,\nusing given seed (seed == nil:int -> no seed used);\ndomain == nil:int ? [0:RAND_MAX] : [0,domain)", args(1,5, batarg("",int),arg("base",oid),arg("size",lng),arg("domain",int),arg("seed",int))),
415 : command("microbenchmark", "uniform", MBMuniform, false, "Create a BAT with uniform integer distribution", args(1,4, batarg("",int),arg("base",oid),arg("size",lng),arg("domain",int))),
416 : command("microbenchmark", "normal", MBMnormal, false, "Create a BAT with a normal integer distribution", args(1,6, batarg("",int),arg("base",oid),arg("size",lng),arg("domain",int),arg("stddev",int),arg("mean",int))),
417 : command("microbenchmark", "mix", MBMmix, false, "Mix the BUNs of this BAT", args(1,2, batarg("",int),batarg("b1",int))),
418 : command("microbenchmark", "skewed", MBMskewed, false, "Create a BAT with skewed integer distribution", args(1,5, batarg("",int),arg("base",oid),arg("size",lng),arg("domain",int),arg("skew",int))),
419 : { .imp=NULL }
420 : };
421 : #include "mal_import.h"
422 : #ifdef _MSC_VER
423 : #undef read
424 : #pragma section(".CRT$XCU",read)
425 : #endif
426 1 : LIB_STARTUP_FUNC(init_microbenchmark_mal)
427 1 : { mal_module("microbenchmark", NULL, microbenchmark_init_funcs); }
|