Line data Source code
1 : /*
2 : * SPDX-License-Identifier: MPL-2.0
3 : *
4 : * This Source Code Form is subject to the terms of the Mozilla Public
5 : * License, v. 2.0. If a copy of the MPL was not distributed with this
6 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
7 : *
8 : * Copyright 2024, 2025 MonetDB Foundation;
9 : * Copyright August 2008 - 2023 MonetDB B.V.;
10 : * Copyright 1997 - July 2008 CWI.
11 : */
12 :
13 : #include "monetdb_config.h"
14 : #include "gdk.h"
15 : #include "gdk_analytic.h"
16 : #include "gdk_calc_private.h"
17 :
18 : #ifdef HAVE_HGE
19 : #define LNG_HGE hge
20 : #define GDK_LNG_HGE_max GDK_hge_max
21 : #define LNG_HGE_nil hge_nil
22 : #else
23 : #define LNG_HGE lng
24 : #define GDK_LNG_HGE_max GDK_lng_max
25 : #define LNG_HGE_nil lng_nil
26 : #endif
27 :
28 : /* average on integers */
29 : #define ANALYTICAL_AVERAGE_CALC_NUM_STEP1(TPE, IMP, ARG) \
30 : do { \
31 : if (!is_##TPE##_nil(ARG)) { \
32 : ADDI_WITH_CHECK(ARG, sum, LNG_HGE, sum, \
33 : GDK_LNG_HGE_max, \
34 : goto avg_overflow##TPE##IMP); \
35 : /* count only when no overflow occurs */ \
36 : n++; \
37 : } \
38 : } while (0)
39 :
40 : /* do the common part of the overflow handling from STEP1 */
41 : #define ANALYTICAL_AVERAGE_CALC_NUM_STEP2(TPE, IMP) \
42 : do { \
43 : avg_overflow##TPE##IMP: \
44 : assert(n > 0); \
45 : if (sum >= 0) { \
46 : a = (TPE) (sum / n); \
47 : rr = (lng) (sum % n); \
48 : } else { \
49 : sum = -sum; \
50 : a = - (TPE) (sum / n); \
51 : rr = (lng) (sum % n); \
52 : if (r) { \
53 : a--; \
54 : rr = n - rr; \
55 : } \
56 : } \
57 : } while (0)
58 :
59 : #define ANALYTICAL_AVG_IMP_NUM_UNBOUNDED_TILL_CURRENT_ROW(TPE, IMP) \
60 : do { \
61 : TPE a = 0; \
62 : dbl curval = dbl_nil; \
63 : for (; k < i;) { \
64 : j = k; \
65 : do { \
66 : ANALYTICAL_AVERAGE_CALC_NUM_STEP1(TPE, IMP, bp[k]); \
67 : k++; \
68 : } while (k < i && !op[k]); \
69 : curval = n > 0 ? (dbl) sum / n : dbl_nil; \
70 : if (0) { /* overflow handling from STEP1 */ \
71 : ANALYTICAL_AVERAGE_CALC_NUM_STEP2(TPE, IMP); \
72 : while (k < i && !op[k]) { \
73 : TPE v = bp[k++]; \
74 : if (is_##TPE##_nil(v)) \
75 : continue; \
76 : AVERAGE_ITER(TPE, v, a, rr, n); \
77 : } \
78 : curval = a + (dbl) rr / n; \
79 : } \
80 : for (; j < k; j++) \
81 : rb[j] = curval; \
82 : has_nils |= (n == 0); \
83 : } \
84 : n = 0; \
85 : sum = 0; \
86 : } while (0)
87 :
88 : #define ANALYTICAL_AVG_IMP_NUM_CURRENT_ROW_TILL_UNBOUNDED(TPE, IMP) \
89 : do { \
90 : TPE a = 0; \
91 : dbl curval = dbl_nil; \
92 : l = i - 1; \
93 : for (j = l; ; j--) { \
94 : ANALYTICAL_AVERAGE_CALC_NUM_STEP1(TPE, IMP, bp[j]); \
95 : if (op[j] || j == k) { \
96 : curval = n > 0 ? (dbl) sum / n : dbl_nil; \
97 : if (0) { /* overflow handling from STEP1 */ \
98 : ANALYTICAL_AVERAGE_CALC_NUM_STEP2(TPE, IMP); \
99 : while (!op[j] && j != k) { \
100 : TPE v = bp[j--]; \
101 : if (is_##TPE##_nil(v)) \
102 : continue; \
103 : AVERAGE_ITER(TPE, v, a, rr, n); \
104 : curval = a + (dbl) rr / n; \
105 : } \
106 : } \
107 : for (; ; l--) { \
108 : rb[l] = curval; \
109 : if (l == j) \
110 : break; \
111 : } \
112 : has_nils |= (n == 0); \
113 : if (j == k) \
114 : break; \
115 : l = j - 1; \
116 : } \
117 : } \
118 : n = 0; \
119 : sum = 0; \
120 : k = i; \
121 : } while (0)
122 :
123 : #define ANALYTICAL_AVG_IMP_NUM_ALL_ROWS(TPE, IMP) \
124 : do { \
125 : TPE a = 0; \
126 : TPE v; \
127 : for (; j < i; j++) { \
128 : v = bp[j]; \
129 : ANALYTICAL_AVERAGE_CALC_NUM_STEP1(TPE, IMP, v); \
130 : } \
131 : curval = n > 0 ? (dbl) sum / n : dbl_nil; \
132 : if (0) { /* overflow handling from STEP1 */ \
133 : ANALYTICAL_AVERAGE_CALC_NUM_STEP2(TPE, IMP); \
134 : for (; j < i; j++) { \
135 : v = bp[j]; \
136 : if (is_##TPE##_nil(v)) \
137 : continue; \
138 : AVERAGE_ITER(TPE, v, a, rr, n); \
139 : } \
140 : curval = a + (dbl) rr / n; \
141 : } \
142 : for (; k < i; k++) \
143 : rb[k] = curval; \
144 : has_nils |= (n == 0); \
145 : n = 0; \
146 : sum = 0; \
147 : } while (0)
148 :
149 : #define ANALYTICAL_AVG_IMP_NUM_CURRENT_ROW(TPE, IMP) \
150 : do { \
151 : for (; k < i; k++) { \
152 : TPE v = bp[k]; \
153 : if (is_##TPE##_nil(v)) { \
154 : rb[k] = dbl_nil; \
155 : has_nils = true; \
156 : } else { \
157 : rb[k] = (dbl) v; \
158 : } \
159 : } \
160 : } while (0)
161 :
162 : #define avg_num_deltas(TPE) typedef struct avg_num_deltas##TPE { TPE a; lng n; lng rr;} avg_num_deltas##TPE;
163 : avg_num_deltas(bte)
164 : avg_num_deltas(sht)
165 : avg_num_deltas(int)
166 : avg_num_deltas(lng)
167 :
168 : #define INIT_AGGREGATE_AVG_NUM(TPE, NOTHING1, NOTHING2) \
169 : do { \
170 : computed = (avg_num_deltas##TPE) {0}; \
171 : } while (0)
172 : #define COMPUTE_LEVEL0_AVG_NUM(X, TPE, NOTHING1, NOTHING2) \
173 : do { \
174 : TPE v = bp[j + X]; \
175 : computed = is_##TPE##_nil(v) ? (avg_num_deltas##TPE){0} : (avg_num_deltas##TPE) {.a = v, .n = 1}; \
176 : } while (0)
177 : #define COMPUTE_LEVELN_AVG_NUM(VAL, TPE, NOTHING1, NOTHING2) \
178 : do { \
179 : if (VAL.n) \
180 : AVERAGE_ITER(TPE, VAL.a, computed.a, computed.rr, computed.n); \
181 : } while (0)
182 : #define FINALIZE_AGGREGATE_AVG_NUM(TPE, NOTHING1, NOTHING2) \
183 : do { \
184 : if (computed.n == 0) { \
185 : rb[k] = dbl_nil; \
186 : has_nils = true; \
187 : } else { \
188 : rb[k] = computed.a + (dbl) computed.rr / computed.n; \
189 : } \
190 : } while (0)
191 : #define ANALYTICAL_AVG_IMP_NUM_OTHERS(TPE, IMP) \
192 : do { \
193 : oid ncount = i - k; \
194 : if ((res = GDKrebuild_segment_tree(ncount, sizeof(avg_num_deltas##TPE), st, &segment_tree, &levels_offset, &nlevels)) != GDK_SUCCEED) \
195 : goto cleanup; \
196 : populate_segment_tree(avg_num_deltas##TPE, ncount, INIT_AGGREGATE_AVG_NUM, COMPUTE_LEVEL0_AVG_NUM, COMPUTE_LEVELN_AVG_NUM, TPE, NOTHING, NOTHING); \
197 : for (; k < i; k++) \
198 : compute_on_segment_tree(avg_num_deltas##TPE, start[k] - j, end[k] - j, INIT_AGGREGATE_AVG_NUM, COMPUTE_LEVELN_AVG_NUM, FINALIZE_AGGREGATE_AVG_NUM, TPE, NOTHING, NOTHING); \
199 : j = k; \
200 : } while (0)
201 :
202 : /* average on floating-points */
203 : #define ANALYTICAL_AVG_IMP_FP_UNBOUNDED_TILL_CURRENT_ROW(TPE, IMP) \
204 : do { \
205 : TPE a = 0; \
206 : dbl curval = dbl_nil; \
207 : for (; k < i;) { \
208 : j = k; \
209 : do { \
210 : if (!is_##TPE##_nil(bp[k])) \
211 : AVERAGE_ITER_FLOAT(TPE, bp[k], a, n); \
212 : k++; \
213 : } while (k < i && !op[k]); \
214 : if (n > 0) \
215 : curval = a; \
216 : else \
217 : has_nils = true; \
218 : for (; j < k; j++) \
219 : rb[j] = curval; \
220 : } \
221 : n = 0; \
222 : } while (0)
223 :
224 : #define ANALYTICAL_AVG_IMP_FP_CURRENT_ROW_TILL_UNBOUNDED(TPE, IMP) \
225 : do { \
226 : TPE a = 0; \
227 : dbl curval = dbl_nil; \
228 : l = i - 1; \
229 : for (j = l; ; j--) { \
230 : if (!is_##TPE##_nil(bp[j])) \
231 : AVERAGE_ITER_FLOAT(TPE, bp[j], a, n); \
232 : if (op[j] || j == k) { \
233 : for (; ; l--) { \
234 : rb[l] = curval; \
235 : if (l == j) \
236 : break; \
237 : } \
238 : has_nils |= is_##TPE##_nil(curval); \
239 : if (j == k) \
240 : break; \
241 : l = j - 1; \
242 : } \
243 : } \
244 : n = 0; \
245 : k = i; \
246 : } while (0)
247 :
248 : #define ANALYTICAL_AVG_IMP_FP_ALL_ROWS(TPE, IMP) \
249 : do { \
250 : TPE a = 0; \
251 : dbl curval = dbl_nil; \
252 : for (; j < i; j++) { \
253 : TPE v = bp[j]; \
254 : if (!is_##TPE##_nil(v)) \
255 : AVERAGE_ITER_FLOAT(TPE, v, a, n); \
256 : } \
257 : if (n > 0) \
258 : curval = a; \
259 : else \
260 : has_nils = true; \
261 : for (; k < i; k++) \
262 : rb[k] = curval; \
263 : n = 0; \
264 : } while (0)
265 :
266 : #define ANALYTICAL_AVG_IMP_FP_CURRENT_ROW(TPE, IMP) ANALYTICAL_AVG_IMP_NUM_CURRENT_ROW(TPE, IMP)
267 :
268 : #define avg_fp_deltas(TPE) typedef struct avg_fp_deltas_##TPE {TPE a; lng n;} avg_fp_deltas_##TPE;
269 : avg_fp_deltas(flt)
270 : avg_fp_deltas(dbl)
271 :
272 : #define INIT_AGGREGATE_AVG_FP(TPE, NOTHING1, NOTHING2) \
273 : do { \
274 : computed = (avg_fp_deltas_##TPE) {0}; \
275 : } while (0)
276 : #define COMPUTE_LEVEL0_AVG_FP(X, TPE, NOTHING1, NOTHING2) \
277 : do { \
278 : TPE v = bp[j + X]; \
279 : computed = is_##TPE##_nil(v) ? (avg_fp_deltas_##TPE) {0} : (avg_fp_deltas_##TPE) {.n = 1, .a = v}; \
280 : } while (0)
281 : #define COMPUTE_LEVELN_AVG_FP(VAL, TPE, NOTHING1, NOTHING2) \
282 : do { \
283 : if (VAL.n) \
284 : AVERAGE_ITER_FLOAT(TPE, VAL.a, computed.a, computed.n); \
285 : } while (0)
286 : #define FINALIZE_AGGREGATE_AVG_FP(TPE, NOTHING1, NOTHING2) \
287 : do { \
288 : if (computed.n == 0) { \
289 : rb[k] = dbl_nil; \
290 : has_nils = true; \
291 : } else { \
292 : rb[k] = computed.a; \
293 : } \
294 : } while (0)
295 : #define ANALYTICAL_AVG_IMP_FP_OTHERS(TPE, IMP) \
296 : do { \
297 : oid ncount = i - k; \
298 : if ((res = GDKrebuild_segment_tree(ncount, sizeof(avg_fp_deltas_##TPE), st, &segment_tree, &levels_offset, &nlevels)) != GDK_SUCCEED) \
299 : goto cleanup; \
300 : populate_segment_tree(avg_fp_deltas_##TPE, ncount, INIT_AGGREGATE_AVG_FP, COMPUTE_LEVEL0_AVG_FP, COMPUTE_LEVELN_AVG_FP, TPE, NOTHING, NOTHING); \
301 : for (; k < i; k++) \
302 : compute_on_segment_tree(avg_fp_deltas_##TPE, start[k] - j, end[k] - j, INIT_AGGREGATE_AVG_FP, COMPUTE_LEVELN_AVG_FP, FINALIZE_AGGREGATE_AVG_FP, TPE, NOTHING, NOTHING); \
303 : j = k; \
304 : } while (0)
305 :
306 : #define ANALYTICAL_AVG_PARTITIONS(TPE, IMP, REAL_IMP) \
307 : do { \
308 : TPE *restrict bp = (TPE*)bi.base; \
309 : if (p) { \
310 : while (i < cnt) { \
311 : if (np[i]) { \
312 : avg##TPE##IMP: \
313 : REAL_IMP(TPE, IMP); \
314 : } \
315 : if (!last) \
316 : i++; \
317 : } \
318 : } \
319 : if (!last) { \
320 : last = true; \
321 : i = cnt; \
322 : goto avg##TPE##IMP; \
323 : } \
324 : } while (0)
325 :
326 : #ifdef HAVE_HGE
327 : avg_num_deltas(hge)
328 : #define ANALYTICAL_AVG_LIMIT(IMP) \
329 : case TYPE_hge: \
330 : ANALYTICAL_AVG_PARTITIONS(hge, IMP, ANALYTICAL_AVG_IMP_NUM_##IMP); \
331 : break;
332 : #else
333 : #define ANALYTICAL_AVG_LIMIT(IMP)
334 : #endif
335 :
336 : #define ANALYTICAL_AVG_BRANCHES(IMP) \
337 : do { \
338 : switch (tpe) { \
339 : case TYPE_bte: \
340 : ANALYTICAL_AVG_PARTITIONS(bte, IMP, ANALYTICAL_AVG_IMP_NUM_##IMP); \
341 : break; \
342 : case TYPE_sht: \
343 : ANALYTICAL_AVG_PARTITIONS(sht, IMP, ANALYTICAL_AVG_IMP_NUM_##IMP); \
344 : break; \
345 : case TYPE_int: \
346 : ANALYTICAL_AVG_PARTITIONS(int, IMP, ANALYTICAL_AVG_IMP_NUM_##IMP); \
347 : break; \
348 : case TYPE_lng: \
349 : ANALYTICAL_AVG_PARTITIONS(lng, IMP, ANALYTICAL_AVG_IMP_NUM_##IMP); \
350 : break; \
351 : ANALYTICAL_AVG_LIMIT(IMP) \
352 : case TYPE_flt: \
353 : ANALYTICAL_AVG_PARTITIONS(flt, IMP, ANALYTICAL_AVG_IMP_FP_##IMP); \
354 : break; \
355 : case TYPE_dbl: \
356 : ANALYTICAL_AVG_PARTITIONS(dbl, IMP, ANALYTICAL_AVG_IMP_FP_##IMP); \
357 : break; \
358 : default: \
359 : goto nosupport; \
360 : } \
361 : } while (0)
362 :
363 : BAT *
364 72 : GDKanalyticalavg(BAT *p, BAT *o, BAT *b, BAT *s, BAT *e, int tpe, int frame_type)
365 : {
366 72 : BAT *r = COLnew(b->hseqbase, TYPE_dbl, BATcount(b), TRANSIENT);
367 72 : if (r == NULL)
368 : return NULL;
369 72 : BATiter pi = bat_iterator(p);
370 72 : BATiter oi = bat_iterator(o);
371 72 : BATiter bi = bat_iterator(b);
372 72 : BATiter si = bat_iterator(s);
373 72 : BATiter ei = bat_iterator(e);
374 72 : bool has_nils = false, last = false;
375 72 : oid i = 1, j = 0, k = 0, l = 0, cnt = BATcount(b), *restrict start = si.base, *restrict end = ei.base,
376 72 : *levels_offset = NULL, nlevels = 0;
377 72 : lng n = 0, rr = 0;
378 72 : dbl *rb = (dbl *) Tloc(r, 0), curval = dbl_nil;
379 72 : bit *np = pi.base, *op = oi.base;
380 72 : void *segment_tree = NULL;
381 72 : gdk_return res = GDK_SUCCEED;
382 : #ifdef HAVE_HGE
383 72 : hge sum = 0;
384 : #else
385 : lng sum = 0;
386 : #endif
387 72 : BAT *st = NULL;
388 :
389 72 : assert(np == NULL || cnt == 0 || np[0] == 0);
390 72 : if (cnt > 0) {
391 72 : switch (frame_type) {
392 25 : case 3: /* unbounded until current row */
393 586 : ANALYTICAL_AVG_BRANCHES(UNBOUNDED_TILL_CURRENT_ROW);
394 : break;
395 1 : case 4: /* current row until unbounded */
396 11 : ANALYTICAL_AVG_BRANCHES(CURRENT_ROW_TILL_UNBOUNDED);
397 : break;
398 17 : case 5: /* all rows */
399 356 : ANALYTICAL_AVG_BRANCHES(ALL_ROWS);
400 : break;
401 0 : case 6: /* current row */
402 0 : ANALYTICAL_AVG_BRANCHES(CURRENT_ROW);
403 : break;
404 29 : default:
405 29 : if ((st = GDKinitialize_segment_tree()) == NULL) {
406 0 : res = GDK_FAIL;
407 0 : goto cleanup;
408 : }
409 2077 : ANALYTICAL_AVG_BRANCHES(OTHERS);
410 : break;
411 : }
412 : }
413 :
414 72 : BATsetcount(r, cnt);
415 72 : r->tnonil = !has_nils;
416 72 : r->tnil = has_nils;
417 72 : cleanup:
418 72 : bat_iterator_end(&pi);
419 72 : bat_iterator_end(&oi);
420 72 : bat_iterator_end(&bi);
421 72 : bat_iterator_end(&si);
422 72 : bat_iterator_end(&ei);
423 72 : BBPreclaim(st);
424 72 : if (res != GDK_SUCCEED) {
425 0 : BBPreclaim(r);
426 0 : r = NULL;
427 : }
428 : return r;
429 0 : nosupport:
430 0 : GDKerror("42000!average of type %s to dbl unsupported.\n", ATOMname(tpe));
431 0 : res = GDK_FAIL;
432 0 : goto cleanup;
433 : }
434 :
435 : #ifdef TRUNCATE_NUMBERS
436 : #define ANALYTICAL_AVERAGE_INT_CALC_FINALIZE(avg, rem, ncnt) \
437 : do { \
438 : if (rem > 0 && avg < 0) \
439 : avg++; \
440 : } while(0)
441 : #else
442 : #define ANALYTICAL_AVERAGE_INT_CALC_FINALIZE(avg, rem, ncnt) \
443 : do { \
444 : if (rem > 0) { \
445 : if (avg < 0) { \
446 : if (2*rem > ncnt) \
447 : avg++; \
448 : } else { \
449 : if (2*rem >= ncnt) \
450 : avg++; \
451 : } \
452 : } \
453 : } while(0)
454 : #endif
455 :
456 : #define ANALYTICAL_AVG_INT_UNBOUNDED_TILL_CURRENT_ROW(TPE) \
457 : do { \
458 : TPE avg = 0; \
459 : for (; k < i;) { \
460 : j = k; \
461 : do { \
462 : if (!is_##TPE##_nil(bp[k])) \
463 : AVERAGE_ITER(TPE, bp[k], avg, rem, ncnt); \
464 : k++; \
465 : } while (k < i && !op[k]); \
466 : if (ncnt == 0) { \
467 : has_nils = true; \
468 : for (; j < k; j++) \
469 : rb[j] = TPE##_nil; \
470 : } else { \
471 : TPE avgfinal = avg; \
472 : ANALYTICAL_AVERAGE_INT_CALC_FINALIZE(avgfinal, rem, ncnt); \
473 : for (; j < k; j++) \
474 : rb[j] = avgfinal; \
475 : } \
476 : } \
477 : rem = 0; \
478 : ncnt = 0; \
479 : } while (0)
480 :
481 : #define ANALYTICAL_AVG_INT_CURRENT_ROW_TILL_UNBOUNDED(TPE) \
482 : do { \
483 : TPE avg = 0; \
484 : l = i - 1; \
485 : for (j = l; ; j--) { \
486 : if (!is_##TPE##_nil(bp[j])) \
487 : AVERAGE_ITER(TPE, bp[j], avg, rem, ncnt); \
488 : if (op[j] || j == k) { \
489 : if (ncnt == 0) { \
490 : has_nils = true; \
491 : for (; ; l--) { \
492 : rb[l] = TPE##_nil; \
493 : if (l == j) \
494 : break; \
495 : } \
496 : } else { \
497 : TPE avgfinal = avg; \
498 : ANALYTICAL_AVERAGE_INT_CALC_FINALIZE(avgfinal, rem, ncnt); \
499 : for (; ; l--) { \
500 : rb[l] = avgfinal; \
501 : if (l == j) \
502 : break; \
503 : } \
504 : } \
505 : if (j == k) \
506 : break; \
507 : l = j - 1; \
508 : } \
509 : } \
510 : rem = 0; \
511 : ncnt = 0; \
512 : k = i; \
513 : } while (0)
514 :
515 : #define ANALYTICAL_AVG_INT_ALL_ROWS(TPE) \
516 : do { \
517 : TPE avg = 0; \
518 : for (; j < i; j++) { \
519 : TPE v = bp[j]; \
520 : if (!is_##TPE##_nil(v)) \
521 : AVERAGE_ITER(TPE, v, avg, rem, ncnt); \
522 : } \
523 : if (ncnt == 0) { \
524 : for (; k < i; k++) \
525 : rb[k] = TPE##_nil; \
526 : has_nils = true; \
527 : } else { \
528 : ANALYTICAL_AVERAGE_INT_CALC_FINALIZE(avg, rem, ncnt); \
529 : for (; k < i; k++) \
530 : rb[k] = avg; \
531 : } \
532 : rem = 0; \
533 : ncnt = 0; \
534 : } while (0)
535 :
536 : #define ANALYTICAL_AVG_INT_CURRENT_ROW(TPE) \
537 : do { \
538 : for (; k < i; k++) { \
539 : TPE v = bp[k]; \
540 : rb[k] = bp[k]; \
541 : has_nils |= is_##TPE##_nil(v); \
542 : } \
543 : } while (0)
544 :
545 : #define avg_int_deltas(TPE) typedef struct avg_int_deltas_##TPE { TPE avg; lng rem, ncnt;} avg_int_deltas_##TPE;
546 : avg_int_deltas(bte)
547 : avg_int_deltas(sht)
548 : avg_int_deltas(int)
549 : avg_int_deltas(lng)
550 :
551 : #define INIT_AGGREGATE_AVG_INT(TPE, NOTHING1, NOTHING2) \
552 : do { \
553 : computed = (avg_int_deltas_##TPE) {0}; \
554 : } while (0)
555 : #define COMPUTE_LEVEL0_AVG_INT(X, TPE, NOTHING1, NOTHING2) \
556 : do { \
557 : TPE v = bp[j + X]; \
558 : computed = is_##TPE##_nil(v) ? (avg_int_deltas_##TPE) {0} : (avg_int_deltas_##TPE) {.avg = v, .ncnt = 1}; \
559 : } while (0)
560 : #define COMPUTE_LEVELN_AVG_INT(VAL, TPE, NOTHING1, NOTHING2) \
561 : do { \
562 : if (VAL.ncnt) \
563 : AVERAGE_ITER(TPE, VAL.avg, computed.avg, computed.rem, computed.ncnt); \
564 : } while (0)
565 : #define FINALIZE_AGGREGATE_AVG_INT(TPE, NOTHING1, NOTHING2) \
566 : do { \
567 : if (computed.ncnt == 0) { \
568 : has_nils = true; \
569 : rb[k] = TPE##_nil; \
570 : } else { \
571 : ANALYTICAL_AVERAGE_INT_CALC_FINALIZE(computed.avg, computed.rem, computed.ncnt); \
572 : rb[k] = computed.avg; \
573 : } \
574 : } while (0)
575 : #define ANALYTICAL_AVG_INT_OTHERS(TPE) \
576 : do { \
577 : oid ncount = i - k; \
578 : if ((res = GDKrebuild_segment_tree(ncount, sizeof(avg_int_deltas_##TPE), st, &segment_tree, &levels_offset, &nlevels)) != GDK_SUCCEED) \
579 : goto cleanup; \
580 : populate_segment_tree(avg_int_deltas_##TPE, ncount, INIT_AGGREGATE_AVG_INT, COMPUTE_LEVEL0_AVG_INT, COMPUTE_LEVELN_AVG_INT, TPE, NOTHING, NOTHING); \
581 : for (; k < i; k++) \
582 : compute_on_segment_tree(avg_int_deltas_##TPE, start[k] - j, end[k] - j, INIT_AGGREGATE_AVG_INT, COMPUTE_LEVELN_AVG_INT, FINALIZE_AGGREGATE_AVG_INT, TPE, NOTHING, NOTHING); \
583 : j = k; \
584 : } while (0)
585 :
586 : #define ANALYTICAL_AVG_INT_PARTITIONS(TPE, IMP) \
587 : do { \
588 : TPE *restrict bp = (TPE*)bi.base, *rb = (TPE *) Tloc(r, 0); \
589 : if (p) { \
590 : while (i < cnt) { \
591 : if (np[i]) { \
592 : avg##TPE##IMP: \
593 : IMP(TPE); \
594 : } \
595 : if (!last) \
596 : i++; \
597 : } \
598 : } \
599 : if (!last) { \
600 : last = true; \
601 : i = cnt; \
602 : goto avg##TPE##IMP; \
603 : } \
604 : } while (0)
605 :
606 : #ifdef HAVE_HGE
607 : avg_int_deltas(hge)
608 : #define ANALYTICAL_AVG_INT_LIMIT(IMP) \
609 : case TYPE_hge: \
610 : ANALYTICAL_AVG_INT_PARTITIONS(hge, ANALYTICAL_AVG_INT_##IMP); \
611 : break;
612 : #else
613 : #define ANALYTICAL_AVG_INT_LIMIT(IMP)
614 : #endif
615 :
616 : #define ANALYTICAL_AVG_INT_BRANCHES(IMP) \
617 : do { \
618 : switch (tpe) { \
619 : case TYPE_bte: \
620 : ANALYTICAL_AVG_INT_PARTITIONS(bte, ANALYTICAL_AVG_INT_##IMP); \
621 : break; \
622 : case TYPE_sht: \
623 : ANALYTICAL_AVG_INT_PARTITIONS(sht, ANALYTICAL_AVG_INT_##IMP); \
624 : break; \
625 : case TYPE_int: \
626 : ANALYTICAL_AVG_INT_PARTITIONS(int, ANALYTICAL_AVG_INT_##IMP); \
627 : break; \
628 : case TYPE_lng: \
629 : ANALYTICAL_AVG_INT_PARTITIONS(lng, ANALYTICAL_AVG_INT_##IMP); \
630 : break; \
631 : ANALYTICAL_AVG_INT_LIMIT(IMP) \
632 : default: \
633 : goto nosupport; \
634 : } \
635 : } while (0)
636 :
637 : BAT *
638 48 : GDKanalyticalavginteger(BAT *p, BAT *o, BAT *b, BAT *s, BAT *e, int tpe, int frame_type)
639 : {
640 48 : BAT *r = COLnew(b->hseqbase, b->ttype, BATcount(b), TRANSIENT);
641 48 : if (r == NULL)
642 : return NULL;
643 48 : BATiter pi = bat_iterator(p);
644 48 : BATiter oi = bat_iterator(o);
645 48 : BATiter bi = bat_iterator(b);
646 48 : BATiter si = bat_iterator(s);
647 48 : BATiter ei = bat_iterator(e);
648 48 : bool has_nils = false, last = false;
649 48 : oid i = 1, j = 0, k = 0, l = 0, cnt = BATcount(b), *restrict start = si.base, *restrict end = ei.base,
650 48 : *levels_offset = NULL, nlevels = 0;
651 48 : lng rem = 0, ncnt = 0;
652 48 : bit *np = pi.base, *op = oi.base;
653 48 : void *segment_tree = NULL;
654 48 : gdk_return res = GDK_SUCCEED;
655 48 : BAT *st = NULL;
656 :
657 48 : assert(np == NULL || cnt == 0 || np[0] == 0);
658 48 : if (cnt > 0) {
659 48 : switch (frame_type) {
660 18 : case 3: /* unbounded until current row */
661 595 : ANALYTICAL_AVG_INT_BRANCHES(UNBOUNDED_TILL_CURRENT_ROW);
662 : break;
663 0 : case 4: /* current row until unbounded */
664 0 : ANALYTICAL_AVG_INT_BRANCHES(CURRENT_ROW_TILL_UNBOUNDED);
665 : break;
666 17 : case 5: /* all rows */
667 308498 : ANALYTICAL_AVG_INT_BRANCHES(ALL_ROWS);
668 : break;
669 0 : case 6: /* current row */
670 0 : ANALYTICAL_AVG_INT_BRANCHES(CURRENT_ROW);
671 : break;
672 13 : default:
673 13 : if ((st = GDKinitialize_segment_tree()) == NULL) {
674 0 : res = GDK_FAIL;
675 0 : goto cleanup;
676 : }
677 1159 : ANALYTICAL_AVG_INT_BRANCHES(OTHERS);
678 : break;
679 : }
680 : }
681 :
682 48 : BATsetcount(r, cnt);
683 48 : r->tnonil = !has_nils;
684 48 : r->tnil = has_nils;
685 48 : cleanup:
686 48 : bat_iterator_end(&pi);
687 48 : bat_iterator_end(&oi);
688 48 : bat_iterator_end(&bi);
689 48 : bat_iterator_end(&si);
690 48 : bat_iterator_end(&ei);
691 48 : BBPreclaim(st);
692 48 : if (res != GDK_SUCCEED) {
693 0 : BBPreclaim(r);
694 0 : r = NULL;
695 : }
696 : return r;
697 0 : nosupport:
698 0 : GDKerror("42000!average of type %s to %s unsupported.\n", ATOMname(tpe), ATOMname(tpe));
699 0 : res = GDK_FAIL;
700 0 : goto cleanup;
701 : }
702 :
703 : #define ANALYTICAL_STDEV_VARIANCE_UNBOUNDED_TILL_CURRENT_ROW(TPE, SAMPLE, OP) \
704 : do { \
705 : TPE *restrict bp = (TPE*)bi.base; \
706 : for (; k < i;) { \
707 : j = k; \
708 : do { \
709 : TPE v = bp[k]; \
710 : if (!is_##TPE##_nil(v)) { \
711 : n++; \
712 : delta = (dbl) v - mean; \
713 : mean += delta / n; \
714 : m2 += delta * ((dbl) v - mean); \
715 : } \
716 : k++; \
717 : } while (k < i && !op[k]); \
718 : if (isinf(m2)) { \
719 : goto overflow; \
720 : } else if (n > SAMPLE) { \
721 : for (; j < k; j++) \
722 : rb[j] = OP; \
723 : } else { \
724 : for (; j < k; j++) \
725 : rb[j] = dbl_nil; \
726 : has_nils = true; \
727 : } \
728 : } \
729 : n = 0; \
730 : mean = 0; \
731 : m2 = 0; \
732 : } while (0)
733 :
734 : #define ANALYTICAL_STDEV_VARIANCE_CURRENT_ROW_TILL_UNBOUNDED(TPE, SAMPLE, OP) \
735 : do { \
736 : TPE *restrict bp = (TPE*)bi.base; \
737 : l = i - 1; \
738 : for (j = l; ; j--) { \
739 : TPE v = bp[j]; \
740 : if (!is_##TPE##_nil(v)) { \
741 : n++; \
742 : delta = (dbl) v - mean; \
743 : mean += delta / n; \
744 : m2 += delta * ((dbl) v - mean); \
745 : } \
746 : if (op[j] || j == k) { \
747 : if (isinf(m2)) { \
748 : goto overflow; \
749 : } else if (n > SAMPLE) { \
750 : for (; ; l--) { \
751 : rb[l] = OP; \
752 : if (l == j) \
753 : break; \
754 : } \
755 : } else { \
756 : for (; ; l--) { \
757 : rb[l] = dbl_nil; \
758 : if (l == j) \
759 : break; \
760 : } \
761 : has_nils = true; \
762 : } \
763 : if (j == k) \
764 : break; \
765 : l = j - 1; \
766 : } \
767 : } \
768 : n = 0; \
769 : mean = 0; \
770 : m2 = 0; \
771 : k = i; \
772 : } while (0)
773 :
774 : #define ANALYTICAL_STDEV_VARIANCE_ALL_ROWS(TPE, SAMPLE, OP) \
775 : do { \
776 : TPE *restrict bp = (TPE*)bi.base; \
777 : for (; j < i; j++) { \
778 : TPE v = bp[j]; \
779 : if (is_##TPE##_nil(v)) \
780 : continue; \
781 : n++; \
782 : delta = (dbl) v - mean; \
783 : mean += delta / n; \
784 : m2 += delta * ((dbl) v - mean); \
785 : } \
786 : if (isinf(m2)) { \
787 : goto overflow; \
788 : } else if (n > SAMPLE) { \
789 : for (; k < i; k++) \
790 : rb[k] = OP; \
791 : } else { \
792 : for (; k < i; k++) \
793 : rb[k] = dbl_nil; \
794 : has_nils = true; \
795 : } \
796 : n = 0; \
797 : mean = 0; \
798 : m2 = 0; \
799 : } while (0)
800 :
801 : #define ANALYTICAL_STDEV_VARIANCE_CURRENT_ROW(TPE, SAMPLE, OP) \
802 : do { \
803 : for (; k < i; k++) \
804 : rb[k] = SAMPLE == 1 ? dbl_nil : 0; \
805 : has_nils = is_dbl_nil(rb[k - 1]); \
806 : } while (0)
807 :
808 : typedef struct stdev_var_deltas {
809 : BUN n;
810 : dbl mean, delta, m2;
811 : } stdev_var_deltas;
812 :
813 : #define INIT_AGGREGATE_STDEV_VARIANCE(TPE, SAMPLE, OP) \
814 : do { \
815 : computed = (stdev_var_deltas) {0}; \
816 : } while (0)
817 : #define COMPUTE_LEVEL0_STDEV_VARIANCE(X, TPE, SAMPLE, OP) \
818 : do { \
819 : TPE v = bp[j + X]; \
820 : computed = is_##TPE##_nil(v) ? (stdev_var_deltas) {0} : (stdev_var_deltas) {.n = 1, .mean = (dbl)v, .delta = (dbl)v}; \
821 : } while (0)
822 : #define COMPUTE_LEVELN_STDEV_VARIANCE(VAL, TPE, SAMPLE, OP) \
823 : do { \
824 : if (VAL.n) { \
825 : computed.n++; \
826 : computed.delta = VAL.delta - computed.mean; \
827 : computed.mean += computed.delta / computed.n; \
828 : computed.m2 += computed.delta * (VAL.delta - computed.mean); \
829 : } \
830 : } while (0)
831 : #define FINALIZE_AGGREGATE_STDEV_VARIANCE(TPE, SAMPLE, OP) \
832 : do { \
833 : dbl m2 = computed.m2; \
834 : BUN n = computed.n; \
835 : if (isinf(m2)) { \
836 : goto overflow; \
837 : } else if (n > SAMPLE) { \
838 : rb[k] = OP; \
839 : } else { \
840 : rb[k] = dbl_nil; \
841 : has_nils = true; \
842 : } \
843 : } while (0)
844 : #define ANALYTICAL_STDEV_VARIANCE_OTHERS(TPE, SAMPLE, OP) \
845 : do { \
846 : TPE *restrict bp = (TPE*)bi.base; \
847 : oid ncount = i - k; \
848 : if ((res = GDKrebuild_segment_tree(ncount, sizeof(stdev_var_deltas), st, &segment_tree, &levels_offset, &nlevels)) != GDK_SUCCEED) \
849 : goto cleanup; \
850 : populate_segment_tree(stdev_var_deltas, ncount, INIT_AGGREGATE_STDEV_VARIANCE, COMPUTE_LEVEL0_STDEV_VARIANCE, COMPUTE_LEVELN_STDEV_VARIANCE, TPE, SAMPLE, OP); \
851 : for (; k < i; k++) \
852 : compute_on_segment_tree(stdev_var_deltas, start[k] - j, end[k] - j, INIT_AGGREGATE_STDEV_VARIANCE, COMPUTE_LEVELN_STDEV_VARIANCE, FINALIZE_AGGREGATE_STDEV_VARIANCE, TPE, SAMPLE, OP); \
853 : j = k; \
854 : } while (0)
855 :
856 : #define ANALYTICAL_STATISTICS_PARTITIONS(TPE, SAMPLE, OP, IMP) \
857 : do { \
858 : if (p) { \
859 : while (i < cnt) { \
860 : if (np[i]) { \
861 : statistics##TPE##IMP: \
862 : IMP(TPE, SAMPLE, OP); \
863 : } \
864 : if (!last) \
865 : i++; \
866 : } \
867 : } \
868 : if (!last) { \
869 : last = true; \
870 : i = cnt; \
871 : goto statistics##TPE##IMP; \
872 : } \
873 : } while (0)
874 :
875 : #ifdef HAVE_HGE
876 : #define ANALYTICAL_STATISTICS_LIMIT(IMP, SAMPLE, OP) \
877 : case TYPE_hge: \
878 : ANALYTICAL_STATISTICS_PARTITIONS(hge, SAMPLE, OP, ANALYTICAL_##IMP); \
879 : break;
880 : #else
881 : #define ANALYTICAL_STATISTICS_LIMIT(IMP, SAMPLE, OP)
882 : #endif
883 :
884 : #define ANALYTICAL_STATISTICS_BRANCHES(IMP, SAMPLE, OP) \
885 : do { \
886 : switch (tpe) { \
887 : case TYPE_bte: \
888 : ANALYTICAL_STATISTICS_PARTITIONS(bte, SAMPLE, OP, ANALYTICAL_##IMP); \
889 : break; \
890 : case TYPE_sht: \
891 : ANALYTICAL_STATISTICS_PARTITIONS(sht, SAMPLE, OP, ANALYTICAL_##IMP); \
892 : break; \
893 : case TYPE_int: \
894 : ANALYTICAL_STATISTICS_PARTITIONS(int, SAMPLE, OP, ANALYTICAL_##IMP); \
895 : break; \
896 : case TYPE_lng: \
897 : ANALYTICAL_STATISTICS_PARTITIONS(lng, SAMPLE, OP, ANALYTICAL_##IMP); \
898 : break; \
899 : case TYPE_flt: \
900 : ANALYTICAL_STATISTICS_PARTITIONS(flt, SAMPLE, OP, ANALYTICAL_##IMP); \
901 : break; \
902 : case TYPE_dbl: \
903 : ANALYTICAL_STATISTICS_PARTITIONS(dbl, SAMPLE, OP, ANALYTICAL_##IMP); \
904 : break; \
905 : ANALYTICAL_STATISTICS_LIMIT(IMP, SAMPLE, OP) \
906 : default: \
907 : goto nosupport; \
908 : } \
909 : } while (0)
910 :
911 : #define GDK_ANALYTICAL_STDEV_VARIANCE(NAME, SAMPLE, OP, DESC) \
912 : BAT * \
913 : GDKanalytical_##NAME(BAT *p, BAT *o, BAT *b, BAT *s, BAT *e, int tpe, int frame_type) \
914 : { \
915 : BAT *r = COLnew(b->hseqbase, TYPE_dbl, BATcount(b), TRANSIENT); \
916 : if (r == NULL) \
917 : return NULL; \
918 : BATiter pi = bat_iterator(p); \
919 : BATiter oi = bat_iterator(o); \
920 : BATiter bi = bat_iterator(b); \
921 : BATiter si = bat_iterator(s); \
922 : BATiter ei = bat_iterator(e); \
923 : bool has_nils = false, last = false; \
924 : oid i = 0, j = 0, k = 0, l = 0, cnt = BATcount(b), *restrict start = si.base, *restrict end = ei.base, \
925 : *levels_offset = NULL, nlevels = 0; \
926 : lng n = 0; \
927 : bit *np = pi.base, *op = oi.base; \
928 : dbl *rb = (dbl *) Tloc(r, 0), mean = 0, m2 = 0, delta; \
929 : void *segment_tree = NULL; \
930 : gdk_return res = GDK_SUCCEED; \
931 : BAT *st = NULL; \
932 : \
933 : assert(np == NULL || cnt == 0 || np[0] == 0); \
934 : if (cnt > 0) { \
935 : switch (frame_type) { \
936 : case 3: /* unbounded until current row */ \
937 : ANALYTICAL_STATISTICS_BRANCHES(STDEV_VARIANCE_UNBOUNDED_TILL_CURRENT_ROW, SAMPLE, OP); \
938 : break; \
939 : case 4: /* current row until unbounded */ \
940 : ANALYTICAL_STATISTICS_BRANCHES(STDEV_VARIANCE_CURRENT_ROW_TILL_UNBOUNDED, SAMPLE, OP); \
941 : break; \
942 : case 5: /* all rows */ \
943 : ANALYTICAL_STATISTICS_BRANCHES(STDEV_VARIANCE_ALL_ROWS, SAMPLE, OP); \
944 : break; \
945 : case 6: /* current row */ \
946 : ANALYTICAL_STATISTICS_BRANCHES(STDEV_VARIANCE_CURRENT_ROW, SAMPLE, OP); \
947 : break; \
948 : default: \
949 : if ((st = GDKinitialize_segment_tree()) == NULL) { \
950 : res = GDK_FAIL; \
951 : goto cleanup; \
952 : } \
953 : ANALYTICAL_STATISTICS_BRANCHES(STDEV_VARIANCE_OTHERS, SAMPLE, OP); \
954 : break; \
955 : } \
956 : } \
957 : \
958 : BATsetcount(r, cnt); \
959 : r->tnonil = !has_nils; \
960 : r->tnil = has_nils; \
961 : goto cleanup; /* all these gotos seem confusing but it cleans up the ending of the operator */ \
962 : overflow: \
963 : GDKerror("22003!overflow in calculation.\n"); \
964 : res = GDK_FAIL; \
965 : cleanup: \
966 : bat_iterator_end(&pi); \
967 : bat_iterator_end(&oi); \
968 : bat_iterator_end(&bi); \
969 : bat_iterator_end(&si); \
970 : bat_iterator_end(&ei); \
971 : BBPreclaim(st); \
972 : if (res != GDK_SUCCEED) { \
973 : BBPreclaim(r); \
974 : r = NULL; \
975 : } \
976 : return r; \
977 : nosupport: \
978 : GDKerror("42000!%s of type %s unsupported.\n", DESC, ATOMname(tpe)); \
979 : res = GDK_FAIL; \
980 : goto cleanup; \
981 : }
982 :
983 757 : GDK_ANALYTICAL_STDEV_VARIANCE(stddev_samp, 1, sqrt(m2 / (n - 1)), "standard deviation")
984 340 : GDK_ANALYTICAL_STDEV_VARIANCE(stddev_pop, 0, sqrt(m2 / n), "standard deviation")
985 284 : GDK_ANALYTICAL_STDEV_VARIANCE(variance_samp, 1, m2 / (n - 1), "variance")
986 889 : GDK_ANALYTICAL_STDEV_VARIANCE(variance_pop, 0, m2 / n, "variance")
987 :
988 : #define ANALYTICAL_COVARIANCE_UNBOUNDED_TILL_CURRENT_ROW(TPE, SAMPLE, OP) \
989 : do { \
990 : TPE *bp1 = (TPE*)b1i.base, *bp2 = (TPE*)b2i.base; \
991 : for (; k < i;) { \
992 : j = k; \
993 : do { \
994 : TPE v1 = bp1[k], v2 = bp2[k]; \
995 : if (!is_##TPE##_nil(v1) && !is_##TPE##_nil(v2)) { \
996 : n++; \
997 : delta1 = (dbl) v1 - mean1; \
998 : mean1 += delta1 / n; \
999 : delta2 = (dbl) v2 - mean2; \
1000 : mean2 += delta2 / n; \
1001 : m2 += delta1 * ((dbl) v2 - mean2); \
1002 : } \
1003 : k++; \
1004 : } while (k < i && !op[k]); \
1005 : if (isinf(m2)) \
1006 : goto overflow; \
1007 : if (n > SAMPLE) { \
1008 : for (; j < k; j++) \
1009 : rb[j] = OP; \
1010 : } else { \
1011 : for (; j < k; j++) \
1012 : rb[j] = dbl_nil; \
1013 : has_nils = true; \
1014 : } \
1015 : } \
1016 : n = 0; \
1017 : mean1 = 0; \
1018 : mean2 = 0; \
1019 : m2 = 0; \
1020 : } while (0)
1021 :
1022 : #define ANALYTICAL_COVARIANCE_CURRENT_ROW_TILL_UNBOUNDED(TPE, SAMPLE, OP) \
1023 : do { \
1024 : TPE *bp1 = (TPE*)b1i.base, *bp2 = (TPE*)b2i.base; \
1025 : l = i - 1; \
1026 : for (j = l; ; j--) { \
1027 : TPE v1 = bp1[j], v2 = bp2[j]; \
1028 : if (!is_##TPE##_nil(v1) && !is_##TPE##_nil(v2)) { \
1029 : n++; \
1030 : delta1 = (dbl) v1 - mean1; \
1031 : mean1 += delta1 / n; \
1032 : delta2 = (dbl) v2 - mean2; \
1033 : mean2 += delta2 / n; \
1034 : m2 += delta1 * ((dbl) v2 - mean2); \
1035 : } \
1036 : if (op[j] || j == k) { \
1037 : if (isinf(m2)) \
1038 : goto overflow; \
1039 : if (n > SAMPLE) { \
1040 : for (; ; l--) { \
1041 : rb[l] = OP; \
1042 : if (l == j) \
1043 : break; \
1044 : } \
1045 : } else { \
1046 : for (; ; l--) { \
1047 : rb[l] = dbl_nil; \
1048 : if (l == j) \
1049 : break; \
1050 : } \
1051 : has_nils = true; \
1052 : } \
1053 : if (j == k) \
1054 : break; \
1055 : l = j - 1; \
1056 : } \
1057 : } \
1058 : n = 0; \
1059 : mean1 = 0; \
1060 : mean2 = 0; \
1061 : m2 = 0; \
1062 : k = i; \
1063 : } while (0)
1064 :
1065 : #define ANALYTICAL_COVARIANCE_ALL_ROWS(TPE, SAMPLE, OP) \
1066 : do { \
1067 : TPE *bp1 = (TPE*)b1i.base, *bp2 = (TPE*)b2i.base; \
1068 : for (; j < i; j++) { \
1069 : TPE v1 = bp1[j], v2 = bp2[j]; \
1070 : if (!is_##TPE##_nil(v1) && !is_##TPE##_nil(v2)) { \
1071 : n++; \
1072 : delta1 = (dbl) v1 - mean1; \
1073 : mean1 += delta1 / n; \
1074 : delta2 = (dbl) v2 - mean2; \
1075 : mean2 += delta2 / n; \
1076 : m2 += delta1 * ((dbl) v2 - mean2); \
1077 : } \
1078 : } \
1079 : if (isinf(m2)) \
1080 : goto overflow; \
1081 : if (n > SAMPLE) { \
1082 : for (; k < i; k++) \
1083 : rb[k] = OP; \
1084 : } else { \
1085 : for (; k < i; k++) \
1086 : rb[k] = dbl_nil; \
1087 : has_nils = true; \
1088 : } \
1089 : n = 0; \
1090 : mean1 = 0; \
1091 : mean2 = 0; \
1092 : m2 = 0; \
1093 : } while (0)
1094 :
1095 : #define ANALYTICAL_COVARIANCE_CURRENT_ROW(TPE, SAMPLE, OP) \
1096 : do { \
1097 : for (; k < i; k++) \
1098 : rb[k] = SAMPLE == 1 ? dbl_nil : 0; \
1099 : has_nils = is_dbl_nil(rb[k - 1]); \
1100 : } while (0)
1101 :
1102 : typedef struct covariance_deltas {
1103 : BUN n;
1104 : dbl mean1, mean2, delta1, delta2, m2;
1105 : } covariance_deltas;
1106 :
1107 : #define INIT_AGGREGATE_COVARIANCE(TPE, SAMPLE, OP) \
1108 : do { \
1109 : computed = (covariance_deltas) {0}; \
1110 : } while (0)
1111 : #define COMPUTE_LEVEL0_COVARIANCE(X, TPE, SAMPLE, OP) \
1112 : do { \
1113 : TPE v1 = bp1[j + X], v2 = bp2[j + X]; \
1114 : computed = is_##TPE##_nil(v1) || is_##TPE##_nil(v2) ? (covariance_deltas) {0} \
1115 : : (covariance_deltas) {.n = 1, .mean1 = (dbl)v1, .mean2 = (dbl)v2, .delta1 = (dbl)v1, .delta2 = (dbl)v2}; \
1116 : } while (0)
1117 : #define COMPUTE_LEVELN_COVARIANCE(VAL, TPE, SAMPLE, OP) \
1118 : do { \
1119 : if (VAL.n) { \
1120 : computed.n++; \
1121 : computed.delta1 = VAL.delta1 - computed.mean1; \
1122 : computed.mean1 += computed.delta1 / computed.n; \
1123 : computed.delta2 = VAL.delta2 - computed.mean2; \
1124 : computed.mean2 += computed.delta2 / computed.n; \
1125 : computed.m2 += computed.delta1 * (VAL.delta2 - computed.mean2); \
1126 : } \
1127 : } while (0)
1128 : #define FINALIZE_AGGREGATE_COVARIANCE(TPE, SAMPLE, OP) FINALIZE_AGGREGATE_STDEV_VARIANCE(TPE, SAMPLE, OP)
1129 : #define ANALYTICAL_COVARIANCE_OTHERS(TPE, SAMPLE, OP) \
1130 : do { \
1131 : TPE *bp1 = (TPE*)b1i.base, *bp2 = (TPE*)b2i.base; \
1132 : oid ncount = i - k; \
1133 : if ((res = GDKrebuild_segment_tree(ncount, sizeof(covariance_deltas), st, &segment_tree, &levels_offset, &nlevels)) != GDK_SUCCEED) \
1134 : goto cleanup; \
1135 : populate_segment_tree(covariance_deltas, ncount, INIT_AGGREGATE_COVARIANCE, COMPUTE_LEVEL0_COVARIANCE, COMPUTE_LEVELN_COVARIANCE, TPE, SAMPLE, OP); \
1136 : for (; k < i; k++) \
1137 : compute_on_segment_tree(covariance_deltas, start[k] - j, end[k] - j, INIT_AGGREGATE_COVARIANCE, COMPUTE_LEVELN_COVARIANCE, FINALIZE_AGGREGATE_COVARIANCE, TPE, SAMPLE, OP); \
1138 : j = k; \
1139 : } while (0)
1140 :
1141 : #define GDK_ANALYTICAL_COVARIANCE(NAME, SAMPLE, OP) \
1142 : BAT * \
1143 : GDKanalytical_##NAME(BAT *p, BAT *o, BAT *b1, BAT *b2, BAT *s, BAT *e, int tpe, int frame_type) \
1144 : { \
1145 : BAT *r = COLnew(b1->hseqbase, TYPE_dbl, BATcount(b1), TRANSIENT); \
1146 : if (r == NULL) \
1147 : return NULL; \
1148 : BATiter pi = bat_iterator(p); \
1149 : BATiter oi = bat_iterator(o); \
1150 : BATiter b1i = bat_iterator(b1); \
1151 : BATiter b2i = bat_iterator(b2); \
1152 : BATiter si = bat_iterator(s); \
1153 : BATiter ei = bat_iterator(e); \
1154 : bool has_nils = false, last = false; \
1155 : oid i = 1, j = 0, k = 0, l = 0, cnt = BATcount(b1), *restrict start = si.base, *restrict end = ei.base, \
1156 : *levels_offset = NULL, nlevels = 0; \
1157 : lng n = 0; \
1158 : bit *np = pi.base, *op = oi.base; \
1159 : dbl *rb = (dbl *) Tloc(r, 0), mean1 = 0, mean2 = 0, m2 = 0, delta1, delta2; \
1160 : void *segment_tree = NULL; \
1161 : gdk_return res = GDK_SUCCEED; \
1162 : BAT *st = NULL; \
1163 : \
1164 : assert(np == NULL || cnt == 0 || np[0] == 0); \
1165 : if (cnt > 0) { \
1166 : switch (frame_type) { \
1167 : case 3: /* unbounded until current row */ \
1168 : ANALYTICAL_STATISTICS_BRANCHES(COVARIANCE_UNBOUNDED_TILL_CURRENT_ROW, SAMPLE, OP); \
1169 : break; \
1170 : case 4: /* current row until unbounded */ \
1171 : ANALYTICAL_STATISTICS_BRANCHES(COVARIANCE_CURRENT_ROW_TILL_UNBOUNDED, SAMPLE, OP); \
1172 : break; \
1173 : case 5: /* all rows */ \
1174 : ANALYTICAL_STATISTICS_BRANCHES(COVARIANCE_ALL_ROWS, SAMPLE, OP); \
1175 : break; \
1176 : case 6: /* current row */ \
1177 : ANALYTICAL_STATISTICS_BRANCHES(COVARIANCE_CURRENT_ROW, SAMPLE, OP); \
1178 : break; \
1179 : default: \
1180 : if ((st = GDKinitialize_segment_tree()) == NULL) { \
1181 : res = GDK_FAIL; \
1182 : goto cleanup; \
1183 : } \
1184 : ANALYTICAL_STATISTICS_BRANCHES(COVARIANCE_OTHERS, SAMPLE, OP); \
1185 : break; \
1186 : } \
1187 : } \
1188 : \
1189 : BATsetcount(r, cnt); \
1190 : r->tnonil = !has_nils; \
1191 : r->tnil = has_nils; \
1192 : goto cleanup; /* all these gotos seem confusing but it cleans up the ending of the operator */ \
1193 : overflow: \
1194 : GDKerror("22003!overflow in calculation.\n"); \
1195 : res = GDK_FAIL; \
1196 : cleanup: \
1197 : bat_iterator_end(&pi); \
1198 : bat_iterator_end(&oi); \
1199 : bat_iterator_end(&b1i); \
1200 : bat_iterator_end(&b2i); \
1201 : bat_iterator_end(&si); \
1202 : bat_iterator_end(&ei); \
1203 : BBPreclaim(st); \
1204 : if (res != GDK_SUCCEED) { \
1205 : BBPreclaim(r); \
1206 : r = NULL; \
1207 : } \
1208 : return r; \
1209 : nosupport: \
1210 : GDKerror("42000!covariance of type %s unsupported.\n", ATOMname(tpe)); \
1211 : res = GDK_FAIL; \
1212 : goto cleanup; \
1213 : }
1214 :
1215 1188 : GDK_ANALYTICAL_COVARIANCE(covariance_samp, 1, m2 / (n - 1))
1216 1267 : GDK_ANALYTICAL_COVARIANCE(covariance_pop, 0, m2 / n)
1217 :
1218 : #define ANALYTICAL_CORRELATION_UNBOUNDED_TILL_CURRENT_ROW(TPE, SAMPLE, OP) /* SAMPLE and OP not used */ \
1219 : do { \
1220 : TPE *bp1 = (TPE*)b1i.base, *bp2 = (TPE*)b2i.base; \
1221 : for (; k < i;) { \
1222 : j = k; \
1223 : do { \
1224 : TPE v1 = bp1[k], v2 = bp2[k]; \
1225 : if (!is_##TPE##_nil(v1) && !is_##TPE##_nil(v2)) { \
1226 : n++; \
1227 : delta1 = (dbl) v1 - mean1; \
1228 : mean1 += delta1 / n; \
1229 : delta2 = (dbl) v2 - mean2; \
1230 : mean2 += delta2 / n; \
1231 : aux = (dbl) v2 - mean2; \
1232 : up += delta1 * aux; \
1233 : down1 += delta1 * ((dbl) v1 - mean1); \
1234 : down2 += delta2 * aux; \
1235 : } \
1236 : k++; \
1237 : } while (k < i && !op[k]); \
1238 : if (isinf(up) || isinf(down1) || isinf(down2)) \
1239 : goto overflow; \
1240 : if (n != 0 && down1 != 0 && down2 != 0) { \
1241 : rr = (up / n) / (sqrt(down1 / n) * sqrt(down2 / n)); \
1242 : assert(!is_dbl_nil(rr)); \
1243 : } else { \
1244 : rr = dbl_nil; \
1245 : has_nils = true; \
1246 : } \
1247 : for (; j < k; j++) \
1248 : rb[j] = rr; \
1249 : } \
1250 : n = 0; \
1251 : mean1 = 0; \
1252 : mean2 = 0; \
1253 : up = 0; \
1254 : down1 = 0; \
1255 : down2 = 0; \
1256 : } while (0)
1257 :
1258 : #define ANALYTICAL_CORRELATION_CURRENT_ROW_TILL_UNBOUNDED(TPE, SAMPLE, OP) /* SAMPLE and OP not used */ \
1259 : do { \
1260 : TPE *bp1 = (TPE*)b1i.base, *bp2 = (TPE*)b2i.base; \
1261 : l = i - 1; \
1262 : for (j = l; ; j--) { \
1263 : TPE v1 = bp1[j], v2 = bp2[j]; \
1264 : if (!is_##TPE##_nil(v1) && !is_##TPE##_nil(v2)) { \
1265 : n++; \
1266 : delta1 = (dbl) v1 - mean1; \
1267 : mean1 += delta1 / n; \
1268 : delta2 = (dbl) v2 - mean2; \
1269 : mean2 += delta2 / n; \
1270 : aux = (dbl) v2 - mean2; \
1271 : up += delta1 * aux; \
1272 : down1 += delta1 * ((dbl) v1 - mean1); \
1273 : down2 += delta2 * aux; \
1274 : } \
1275 : if (op[j] || j == k) { \
1276 : if (isinf(up) || isinf(down1) || isinf(down2)) \
1277 : goto overflow; \
1278 : if (n != 0 && down1 != 0 && down2 != 0) { \
1279 : rr = (up / n) / (sqrt(down1 / n) * sqrt(down2 / n)); \
1280 : assert(!is_dbl_nil(rr)); \
1281 : } else { \
1282 : rr = dbl_nil; \
1283 : has_nils = true; \
1284 : } \
1285 : for (; ; l--) { \
1286 : rb[l] = rr; \
1287 : if (l == j) \
1288 : break; \
1289 : } \
1290 : if (j == k) \
1291 : break; \
1292 : l = j - 1; \
1293 : } \
1294 : } \
1295 : n = 0; \
1296 : mean1 = 0; \
1297 : mean2 = 0; \
1298 : up = 0; \
1299 : down1 = 0; \
1300 : down2 = 0; \
1301 : k = i; \
1302 : } while (0)
1303 :
1304 : #define ANALYTICAL_CORRELATION_ALL_ROWS(TPE, SAMPLE, OP) /* SAMPLE and OP not used */ \
1305 : do { \
1306 : TPE *bp1 = (TPE*)b1i.base, *bp2 = (TPE*)b2i.base; \
1307 : for (; j < i; j++) { \
1308 : TPE v1 = bp1[j], v2 = bp2[j]; \
1309 : if (!is_##TPE##_nil(v1) && !is_##TPE##_nil(v2)) { \
1310 : n++; \
1311 : delta1 = (dbl) v1 - mean1; \
1312 : mean1 += delta1 / n; \
1313 : delta2 = (dbl) v2 - mean2; \
1314 : mean2 += delta2 / n; \
1315 : aux = (dbl) v2 - mean2; \
1316 : up += delta1 * aux; \
1317 : down1 += delta1 * ((dbl) v1 - mean1); \
1318 : down2 += delta2 * aux; \
1319 : } \
1320 : } \
1321 : if (isinf(up) || isinf(down1) || isinf(down2)) \
1322 : goto overflow; \
1323 : if (n != 0 && down1 != 0 && down2 != 0) { \
1324 : rr = (up / n) / (sqrt(down1 / n) * sqrt(down2 / n)); \
1325 : assert(!is_dbl_nil(rr)); \
1326 : } else { \
1327 : rr = dbl_nil; \
1328 : has_nils = true; \
1329 : } \
1330 : for (; k < i ; k++) \
1331 : rb[k] = rr; \
1332 : n = 0; \
1333 : mean1 = 0; \
1334 : mean2 = 0; \
1335 : up = 0; \
1336 : down1 = 0; \
1337 : down2 = 0; \
1338 : } while (0)
1339 :
1340 : #define ANALYTICAL_CORRELATION_CURRENT_ROW(TPE, SAMPLE, OP) /* SAMPLE and OP not used */ \
1341 : do { \
1342 : for (; k < i; k++) \
1343 : rb[k] = dbl_nil; \
1344 : has_nils = true; \
1345 : } while (0)
1346 :
1347 :
1348 : typedef struct correlation_deltas {
1349 : BUN n;
1350 : dbl mean1, mean2, delta1, delta2, up, down1, down2;
1351 : } correlation_deltas;
1352 :
1353 : #define INIT_AGGREGATE_CORRELATION(TPE, SAMPLE, OP) \
1354 : do { \
1355 : computed = (correlation_deltas) {0}; \
1356 : } while (0)
1357 : #define COMPUTE_LEVEL0_CORRELATION(X, TPE, SAMPLE, OP) \
1358 : do { \
1359 : TPE v1 = bp1[j + X], v2 = bp2[j + X]; \
1360 : computed = is_##TPE##_nil(v1) || is_##TPE##_nil(v2) ? (correlation_deltas) {0} \
1361 : : (correlation_deltas) {.n = 1, .mean1 = (dbl)v1, .mean2 = (dbl)v2, .delta1 = (dbl)v1, .delta2 = (dbl)v2}; \
1362 : } while (0)
1363 : #define COMPUTE_LEVELN_CORRELATION(VAL, TPE, SAMPLE, OP) \
1364 : do { \
1365 : if (VAL.n) { \
1366 : computed.n++; \
1367 : computed.delta1 = VAL.delta1 - computed.mean1; \
1368 : computed.mean1 += computed.delta1 / computed.n; \
1369 : computed.delta2 = VAL.delta2 - computed.mean2; \
1370 : computed.mean2 += computed.delta2 / computed.n; \
1371 : dbl aux = VAL.delta2 - computed.mean2; \
1372 : computed.up += computed.delta1 * aux; \
1373 : computed.down1 += computed.delta1 * (VAL.delta1 - computed.mean1); \
1374 : computed.down2 += computed.delta2 * aux; \
1375 : } \
1376 : } while (0)
1377 : #define FINALIZE_AGGREGATE_CORRELATION(TPE, SAMPLE, OP) \
1378 : do { \
1379 : n = computed.n; \
1380 : up = computed.up; \
1381 : down1 = computed.down1; \
1382 : down2 = computed.down2; \
1383 : if (isinf(up) || isinf(down1) || isinf(down2)) \
1384 : goto overflow; \
1385 : if (n != 0 && down1 != 0 && down2 != 0) { \
1386 : rr = (up / n) / (sqrt(down1 / n) * sqrt(down2 / n)); \
1387 : assert(!is_dbl_nil(rr)); \
1388 : } else { \
1389 : rr = dbl_nil; \
1390 : has_nils = true; \
1391 : } \
1392 : rb[k] = rr; \
1393 : } while (0)
1394 : #define ANALYTICAL_CORRELATION_OTHERS(TPE, SAMPLE, OP) /* SAMPLE and OP not used */ \
1395 : do { \
1396 : TPE *bp1 = (TPE*)b1i.base, *bp2 = (TPE*)b2i.base; \
1397 : oid ncount = i - k; \
1398 : if ((res = GDKrebuild_segment_tree(ncount, sizeof(correlation_deltas), st, &segment_tree, &levels_offset, &nlevels)) != GDK_SUCCEED) \
1399 : goto cleanup; \
1400 : populate_segment_tree(correlation_deltas, ncount, INIT_AGGREGATE_CORRELATION, COMPUTE_LEVEL0_CORRELATION, COMPUTE_LEVELN_CORRELATION, TPE, SAMPLE, OP); \
1401 : for (; k < i; k++) \
1402 : compute_on_segment_tree(correlation_deltas, start[k] - j, end[k] - j, INIT_AGGREGATE_CORRELATION, COMPUTE_LEVELN_CORRELATION, FINALIZE_AGGREGATE_CORRELATION, TPE, SAMPLE, OP); \
1403 : j = k; \
1404 : } while (0)
1405 :
1406 : BAT *
1407 51 : GDKanalytical_correlation(BAT *p, BAT *o, BAT *b1, BAT *b2, BAT *s, BAT *e, int tpe, int frame_type)
1408 : {
1409 51 : BAT *r = COLnew(b1->hseqbase, TYPE_dbl, BATcount(b1), TRANSIENT);
1410 51 : if (r == NULL)
1411 : return NULL;
1412 51 : bool has_nils = false, last = false;
1413 51 : BATiter pi = bat_iterator(p);
1414 51 : BATiter oi = bat_iterator(o);
1415 51 : BATiter b1i = bat_iterator(b1);
1416 51 : BATiter b2i = bat_iterator(b2);
1417 51 : BATiter si = bat_iterator(s);
1418 51 : BATiter ei = bat_iterator(e);
1419 51 : oid i = 1, j = 0, k = 0, l = 0, cnt = BATcount(b1),
1420 51 : *levels_offset = NULL, nlevels = 0;
1421 51 : const oid *restrict start = si.base, *restrict end = ei.base;
1422 51 : lng n = 0;
1423 51 : const bit *np = pi.base, *op = oi.base;
1424 51 : dbl *rb = (dbl *) Tloc(r, 0), mean1 = 0, mean2 = 0, up = 0, down1 = 0, down2 = 0, delta1, delta2, aux, rr;
1425 51 : void *segment_tree = NULL;
1426 51 : gdk_return res = GDK_SUCCEED;
1427 51 : BAT *st = NULL;
1428 :
1429 51 : assert(np == NULL || cnt == 0 || np[0] == 0);
1430 51 : if (cnt > 0) {
1431 51 : switch (frame_type) {
1432 18 : case 3: /* unbounded until current row */
1433 504 : ANALYTICAL_STATISTICS_BRANCHES(CORRELATION_UNBOUNDED_TILL_CURRENT_ROW, ;, ;);
1434 : break;
1435 0 : case 4: /* current row until unbounded */
1436 0 : ANALYTICAL_STATISTICS_BRANCHES(CORRELATION_CURRENT_ROW_TILL_UNBOUNDED, ;, ;);
1437 : break;
1438 18 : case 5: /* all rows */
1439 443 : ANALYTICAL_STATISTICS_BRANCHES(CORRELATION_ALL_ROWS, ;, ;);
1440 : break;
1441 0 : case 6: /* current row */
1442 0 : ANALYTICAL_STATISTICS_BRANCHES(CORRELATION_CURRENT_ROW, ;, ;);
1443 : break;
1444 15 : default:
1445 15 : if ((st = GDKinitialize_segment_tree()) == NULL) {
1446 0 : res = GDK_FAIL;
1447 0 : goto cleanup;
1448 : }
1449 17007 : ANALYTICAL_STATISTICS_BRANCHES(CORRELATION_OTHERS, ;, ;);
1450 : break;
1451 : }
1452 : }
1453 :
1454 50 : BATsetcount(r, cnt);
1455 50 : r->tnonil = !has_nils;
1456 50 : r->tnil = has_nils;
1457 50 : goto cleanup; /* all these gotos seem confusing but it cleans up the ending of the operator */
1458 1 : overflow:
1459 1 : GDKerror("22003!overflow in calculation.\n");
1460 1 : res = GDK_FAIL;
1461 51 : cleanup:
1462 51 : bat_iterator_end(&pi);
1463 51 : bat_iterator_end(&oi);
1464 51 : bat_iterator_end(&b1i);
1465 51 : bat_iterator_end(&b2i);
1466 51 : bat_iterator_end(&si);
1467 51 : bat_iterator_end(&ei);
1468 51 : BBPreclaim(st);
1469 51 : if (res != GDK_SUCCEED) {
1470 1 : BBPreclaim(r);
1471 1 : r = NULL;
1472 : }
1473 : return r;
1474 0 : nosupport:
1475 0 : GDKerror("42000!correlation of type %s unsupported.\n", ATOMname(tpe));
1476 0 : res = GDK_FAIL;
1477 0 : goto cleanup;
1478 : }
|