Line data Source code
1 : /*
2 : * SPDX-License-Identifier: MPL-2.0
3 : *
4 : * This Source Code Form is subject to the terms of the Mozilla Public
5 : * License, v. 2.0. If a copy of the MPL was not distributed with this
6 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
7 : *
8 : * Copyright 2024 MonetDB Foundation;
9 : * Copyright August 2008 - 2023 MonetDB B.V.;
10 : * Copyright 1997 - July 2008 CWI.
11 : */
12 :
13 : #include "monetdb_config.h"
14 : #include "gdk.h"
15 : #include "gdk_analytic.h"
16 : #include "gdk_calc_private.h"
17 :
18 : #ifdef HAVE_HGE
19 : #define LNG_HGE hge
20 : #define GDK_LNG_HGE_max GDK_hge_max
21 : #define LNG_HGE_nil hge_nil
22 : #else
23 : #define LNG_HGE lng
24 : #define GDK_LNG_HGE_max GDK_lng_max
25 : #define LNG_HGE_nil lng_nil
26 : #endif
27 :
28 : /* average on integers */
29 : #define ANALYTICAL_AVERAGE_CALC_NUM_STEP1(TPE, IMP, ARG) \
30 : do { \
31 : if (!is_##TPE##_nil(ARG)) { \
32 : ADDI_WITH_CHECK(ARG, sum, LNG_HGE, sum, \
33 : GDK_LNG_HGE_max, \
34 : goto avg_overflow##TPE##IMP); \
35 : /* count only when no overflow occurs */ \
36 : n++; \
37 : } \
38 : } while (0)
39 :
40 : /* do the common part of the overflow handling from STEP1 */
41 : #define ANALYTICAL_AVERAGE_CALC_NUM_STEP2(TPE, IMP) \
42 : do { \
43 : avg_overflow##TPE##IMP: \
44 : assert(n > 0); \
45 : if (sum >= 0) { \
46 : a = (TPE) (sum / n); \
47 : rr = (lng) (sum % n); \
48 : } else { \
49 : sum = -sum; \
50 : a = - (TPE) (sum / n); \
51 : rr = (lng) (sum % n); \
52 : if (r) { \
53 : a--; \
54 : rr = n - rr; \
55 : } \
56 : } \
57 : } while (0)
58 :
59 : #define ANALYTICAL_AVG_IMP_NUM_UNBOUNDED_TILL_CURRENT_ROW(TPE, IMP) \
60 : do { \
61 : TPE a = 0; \
62 : dbl curval = dbl_nil; \
63 : for (; k < i;) { \
64 : j = k; \
65 : do { \
66 : ANALYTICAL_AVERAGE_CALC_NUM_STEP1(TPE, IMP, bp[k]); \
67 : k++; \
68 : } while (k < i && !op[k]); \
69 : curval = n > 0 ? (dbl) sum / n : dbl_nil; \
70 : if (0) { /* overflow handling from STEP1 */ \
71 : ANALYTICAL_AVERAGE_CALC_NUM_STEP2(TPE, IMP); \
72 : while (k < i && !op[k]) { \
73 : TPE v = bp[k++]; \
74 : if (is_##TPE##_nil(v)) \
75 : continue; \
76 : AVERAGE_ITER(TPE, v, a, rr, n); \
77 : } \
78 : curval = a + (dbl) rr / n; \
79 : } \
80 : for (; j < k; j++) \
81 : rb[j] = curval; \
82 : has_nils |= (n == 0); \
83 : } \
84 : n = 0; \
85 : sum = 0; \
86 : } while (0)
87 :
88 : #define ANALYTICAL_AVG_IMP_NUM_CURRENT_ROW_TILL_UNBOUNDED(TPE, IMP) \
89 : do { \
90 : TPE a = 0; \
91 : dbl curval = dbl_nil; \
92 : l = i - 1; \
93 : for (j = l; ; j--) { \
94 : ANALYTICAL_AVERAGE_CALC_NUM_STEP1(TPE, IMP, bp[j]); \
95 : if (op[j] || j == k) { \
96 : curval = n > 0 ? (dbl) sum / n : dbl_nil; \
97 : if (0) { /* overflow handling from STEP1 */ \
98 : ANALYTICAL_AVERAGE_CALC_NUM_STEP2(TPE, IMP); \
99 : while (!op[j] && j != k) { \
100 : TPE v = bp[j--]; \
101 : if (is_##TPE##_nil(v)) \
102 : continue; \
103 : AVERAGE_ITER(TPE, v, a, rr, n); \
104 : curval = a + (dbl) rr / n; \
105 : } \
106 : } \
107 : for (; ; l--) { \
108 : rb[l] = curval; \
109 : if (l == j) \
110 : break; \
111 : } \
112 : has_nils |= (n == 0); \
113 : if (j == k) \
114 : break; \
115 : l = j - 1; \
116 : } \
117 : } \
118 : n = 0; \
119 : sum = 0; \
120 : k = i; \
121 : } while (0)
122 :
123 : #define ANALYTICAL_AVG_IMP_NUM_ALL_ROWS(TPE, IMP) \
124 : do { \
125 : TPE a = 0; \
126 : TPE v; \
127 : for (; j < i; j++) { \
128 : v = bp[j]; \
129 : ANALYTICAL_AVERAGE_CALC_NUM_STEP1(TPE, IMP, v); \
130 : } \
131 : curval = n > 0 ? (dbl) sum / n : dbl_nil; \
132 : if (0) { /* overflow handling from STEP1 */ \
133 : ANALYTICAL_AVERAGE_CALC_NUM_STEP2(TPE, IMP); \
134 : for (; j < i; j++) { \
135 : v = bp[j]; \
136 : if (is_##TPE##_nil(v)) \
137 : continue; \
138 : AVERAGE_ITER(TPE, v, a, rr, n); \
139 : } \
140 : curval = a + (dbl) rr / n; \
141 : } \
142 : for (; k < i; k++) \
143 : rb[k] = curval; \
144 : has_nils |= (n == 0); \
145 : n = 0; \
146 : sum = 0; \
147 : } while (0)
148 :
149 : #define ANALYTICAL_AVG_IMP_NUM_CURRENT_ROW(TPE, IMP) \
150 : do { \
151 : for (; k < i; k++) { \
152 : TPE v = bp[k]; \
153 : if (is_##TPE##_nil(v)) { \
154 : rb[k] = dbl_nil; \
155 : has_nils = true; \
156 : } else { \
157 : rb[k] = (dbl) v; \
158 : } \
159 : } \
160 : } while (0)
161 :
162 : #define avg_num_deltas(TPE) typedef struct avg_num_deltas##TPE { TPE a; lng n; lng rr;} avg_num_deltas##TPE;
163 : avg_num_deltas(bte)
164 : avg_num_deltas(sht)
165 : avg_num_deltas(int)
166 : avg_num_deltas(lng)
167 :
168 : #define INIT_AGGREGATE_AVG_NUM(TPE, NOTHING1, NOTHING2) \
169 : do { \
170 : computed = (avg_num_deltas##TPE) {0}; \
171 : } while (0)
172 : #define COMPUTE_LEVEL0_AVG_NUM(X, TPE, NOTHING1, NOTHING2) \
173 : do { \
174 : TPE v = bp[j + X]; \
175 : computed = is_##TPE##_nil(v) ? (avg_num_deltas##TPE){0} : (avg_num_deltas##TPE) {.a = v, .n = 1}; \
176 : } while (0)
177 : #define COMPUTE_LEVELN_AVG_NUM(VAL, TPE, NOTHING1, NOTHING2) \
178 : do { \
179 : if (VAL.n) \
180 : AVERAGE_ITER(TPE, VAL.a, computed.a, computed.rr, computed.n); \
181 : } while (0)
182 : #define FINALIZE_AGGREGATE_AVG_NUM(TPE, NOTHING1, NOTHING2) \
183 : do { \
184 : if (computed.n == 0) { \
185 : rb[k] = dbl_nil; \
186 : has_nils = true; \
187 : } else { \
188 : rb[k] = computed.a + (dbl) computed.rr / computed.n; \
189 : } \
190 : } while (0)
191 : #define ANALYTICAL_AVG_IMP_NUM_OTHERS(TPE, IMP) \
192 : do { \
193 : oid ncount = i - k; \
194 : if ((res = GDKrebuild_segment_tree(ncount, sizeof(avg_num_deltas##TPE), st, &segment_tree, &levels_offset, &nlevels)) != GDK_SUCCEED) \
195 : goto cleanup; \
196 : populate_segment_tree(avg_num_deltas##TPE, ncount, INIT_AGGREGATE_AVG_NUM, COMPUTE_LEVEL0_AVG_NUM, COMPUTE_LEVELN_AVG_NUM, TPE, NOTHING, NOTHING); \
197 : for (; k < i; k++) \
198 : compute_on_segment_tree(avg_num_deltas##TPE, start[k] - j, end[k] - j, INIT_AGGREGATE_AVG_NUM, COMPUTE_LEVELN_AVG_NUM, FINALIZE_AGGREGATE_AVG_NUM, TPE, NOTHING, NOTHING); \
199 : j = k; \
200 : } while (0)
201 :
202 : /* average on floating-points */
203 : #define ANALYTICAL_AVG_IMP_FP_UNBOUNDED_TILL_CURRENT_ROW(TPE, IMP) \
204 : do { \
205 : TPE a = 0; \
206 : dbl curval = dbl_nil; \
207 : for (; k < i;) { \
208 : j = k; \
209 : do { \
210 : if (!is_##TPE##_nil(bp[k])) \
211 : AVERAGE_ITER_FLOAT(TPE, bp[k], a, n); \
212 : k++; \
213 : } while (k < i && !op[k]); \
214 : if (n > 0) \
215 : curval = a; \
216 : else \
217 : has_nils = true; \
218 : for (; j < k; j++) \
219 : rb[j] = curval; \
220 : } \
221 : n = 0; \
222 : } while (0)
223 :
224 : #define ANALYTICAL_AVG_IMP_FP_CURRENT_ROW_TILL_UNBOUNDED(TPE, IMP) \
225 : do { \
226 : TPE a = 0; \
227 : dbl curval = dbl_nil; \
228 : l = i - 1; \
229 : for (j = l; ; j--) { \
230 : if (!is_##TPE##_nil(bp[j])) \
231 : AVERAGE_ITER_FLOAT(TPE, bp[j], a, n); \
232 : if (op[j] || j == k) { \
233 : for (; ; l--) { \
234 : rb[l] = curval; \
235 : if (l == j) \
236 : break; \
237 : } \
238 : has_nils |= is_##TPE##_nil(curval); \
239 : if (j == k) \
240 : break; \
241 : l = j - 1; \
242 : } \
243 : } \
244 : n = 0; \
245 : k = i; \
246 : } while (0)
247 :
248 : #define ANALYTICAL_AVG_IMP_FP_ALL_ROWS(TPE, IMP) \
249 : do { \
250 : TPE a = 0; \
251 : dbl curval = dbl_nil; \
252 : for (; j < i; j++) { \
253 : TPE v = bp[j]; \
254 : if (!is_##TPE##_nil(v)) \
255 : AVERAGE_ITER_FLOAT(TPE, v, a, n); \
256 : } \
257 : if (n > 0) \
258 : curval = a; \
259 : else \
260 : has_nils = true; \
261 : for (; k < i; k++) \
262 : rb[k] = curval; \
263 : n = 0; \
264 : } while (0)
265 :
266 : #define ANALYTICAL_AVG_IMP_FP_CURRENT_ROW(TPE, IMP) ANALYTICAL_AVG_IMP_NUM_CURRENT_ROW(TPE, IMP)
267 :
268 : #define avg_fp_deltas(TPE) typedef struct avg_fp_deltas_##TPE {TPE a; lng n;} avg_fp_deltas_##TPE;
269 : avg_fp_deltas(flt)
270 : avg_fp_deltas(dbl)
271 :
272 : #define INIT_AGGREGATE_AVG_FP(TPE, NOTHING1, NOTHING2) \
273 : do { \
274 : computed = (avg_fp_deltas_##TPE) {0}; \
275 : } while (0)
276 : #define COMPUTE_LEVEL0_AVG_FP(X, TPE, NOTHING1, NOTHING2) \
277 : do { \
278 : TPE v = bp[j + X]; \
279 : computed = is_##TPE##_nil(v) ? (avg_fp_deltas_##TPE) {0} : (avg_fp_deltas_##TPE) {.n = 1, .a = v}; \
280 : } while (0)
281 : #define COMPUTE_LEVELN_AVG_FP(VAL, TPE, NOTHING1, NOTHING2) \
282 : do { \
283 : if (VAL.n) \
284 : AVERAGE_ITER_FLOAT(TPE, VAL.a, computed.a, computed.n); \
285 : } while (0)
286 : #define FINALIZE_AGGREGATE_AVG_FP(TPE, NOTHING1, NOTHING2) \
287 : do { \
288 : if (computed.n == 0) { \
289 : rb[k] = dbl_nil; \
290 : has_nils = true; \
291 : } else { \
292 : rb[k] = computed.a; \
293 : } \
294 : } while (0)
295 : #define ANALYTICAL_AVG_IMP_FP_OTHERS(TPE, IMP) \
296 : do { \
297 : oid ncount = i - k; \
298 : if ((res = GDKrebuild_segment_tree(ncount, sizeof(avg_fp_deltas_##TPE), st, &segment_tree, &levels_offset, &nlevels)) != GDK_SUCCEED) \
299 : goto cleanup; \
300 : populate_segment_tree(avg_fp_deltas_##TPE, ncount, INIT_AGGREGATE_AVG_FP, COMPUTE_LEVEL0_AVG_FP, COMPUTE_LEVELN_AVG_FP, TPE, NOTHING, NOTHING); \
301 : for (; k < i; k++) \
302 : compute_on_segment_tree(avg_fp_deltas_##TPE, start[k] - j, end[k] - j, INIT_AGGREGATE_AVG_FP, COMPUTE_LEVELN_AVG_FP, FINALIZE_AGGREGATE_AVG_FP, TPE, NOTHING, NOTHING); \
303 : j = k; \
304 : } while (0)
305 :
306 : #define ANALYTICAL_AVG_PARTITIONS(TPE, IMP, REAL_IMP) \
307 : do { \
308 : TPE *restrict bp = (TPE*)bi.base; \
309 : if (p) { \
310 : while (i < cnt) { \
311 : if (np[i]) { \
312 : avg##TPE##IMP: \
313 : REAL_IMP(TPE, IMP); \
314 : } \
315 : if (!last) \
316 : i++; \
317 : } \
318 : } \
319 : if (!last) { \
320 : last = true; \
321 : i = cnt; \
322 : goto avg##TPE##IMP; \
323 : } \
324 : } while (0)
325 :
326 : #ifdef HAVE_HGE
327 : avg_num_deltas(hge)
328 : #define ANALYTICAL_AVG_LIMIT(IMP) \
329 : case TYPE_hge: \
330 : ANALYTICAL_AVG_PARTITIONS(hge, IMP, ANALYTICAL_AVG_IMP_NUM_##IMP); \
331 : break;
332 : #else
333 : #define ANALYTICAL_AVG_LIMIT(IMP)
334 : #endif
335 :
336 : #define ANALYTICAL_AVG_BRANCHES(IMP) \
337 : do { \
338 : switch (tpe) { \
339 : case TYPE_bte: \
340 : ANALYTICAL_AVG_PARTITIONS(bte, IMP, ANALYTICAL_AVG_IMP_NUM_##IMP); \
341 : break; \
342 : case TYPE_sht: \
343 : ANALYTICAL_AVG_PARTITIONS(sht, IMP, ANALYTICAL_AVG_IMP_NUM_##IMP); \
344 : break; \
345 : case TYPE_int: \
346 : ANALYTICAL_AVG_PARTITIONS(int, IMP, ANALYTICAL_AVG_IMP_NUM_##IMP); \
347 : break; \
348 : case TYPE_lng: \
349 : ANALYTICAL_AVG_PARTITIONS(lng, IMP, ANALYTICAL_AVG_IMP_NUM_##IMP); \
350 : break; \
351 : ANALYTICAL_AVG_LIMIT(IMP) \
352 : case TYPE_flt: \
353 : ANALYTICAL_AVG_PARTITIONS(flt, IMP, ANALYTICAL_AVG_IMP_FP_##IMP); \
354 : break; \
355 : case TYPE_dbl: \
356 : ANALYTICAL_AVG_PARTITIONS(dbl, IMP, ANALYTICAL_AVG_IMP_FP_##IMP); \
357 : break; \
358 : default: \
359 : goto nosupport; \
360 : } \
361 : } while (0)
362 :
363 : gdk_return
364 72 : GDKanalyticalavg(BAT *r, BAT *p, BAT *o, BAT *b, BAT *s, BAT *e, int tpe, int frame_type)
365 : {
366 72 : BATiter pi = bat_iterator(p);
367 72 : BATiter oi = bat_iterator(o);
368 72 : BATiter bi = bat_iterator(b);
369 72 : BATiter si = bat_iterator(s);
370 72 : BATiter ei = bat_iterator(e);
371 72 : bool has_nils = false, last = false;
372 72 : oid i = 1, j = 0, k = 0, l = 0, cnt = BATcount(b), *restrict start = si.base, *restrict end = ei.base,
373 72 : *levels_offset = NULL, nlevels = 0;
374 72 : lng n = 0, rr = 0;
375 72 : dbl *rb = (dbl *) Tloc(r, 0), curval = dbl_nil;
376 72 : bit *np = pi.base, *op = oi.base;
377 72 : void *segment_tree = NULL;
378 72 : gdk_return res = GDK_SUCCEED;
379 : #ifdef HAVE_HGE
380 72 : hge sum = 0;
381 : #else
382 : lng sum = 0;
383 : #endif
384 72 : BAT *st = NULL;
385 :
386 72 : assert(np == NULL || cnt == 0 || np[0] == 0);
387 72 : if (cnt > 0) {
388 72 : switch (frame_type) {
389 25 : case 3: /* unbounded until current row */
390 612 : ANALYTICAL_AVG_BRANCHES(UNBOUNDED_TILL_CURRENT_ROW);
391 : break;
392 1 : case 4: /* current row until unbounded */
393 11 : ANALYTICAL_AVG_BRANCHES(CURRENT_ROW_TILL_UNBOUNDED);
394 : break;
395 17 : case 5: /* all rows */
396 356 : ANALYTICAL_AVG_BRANCHES(ALL_ROWS);
397 : break;
398 0 : case 6: /* current row */
399 0 : ANALYTICAL_AVG_BRANCHES(CURRENT_ROW);
400 : break;
401 29 : default:
402 29 : if ((st = GDKinitialize_segment_tree()) == NULL) {
403 0 : res = GDK_FAIL;
404 0 : goto cleanup;
405 : }
406 2393 : ANALYTICAL_AVG_BRANCHES(OTHERS);
407 : break;
408 : }
409 : }
410 :
411 72 : BATsetcount(r, cnt);
412 72 : r->tnonil = !has_nils;
413 72 : r->tnil = has_nils;
414 72 : cleanup:
415 72 : bat_iterator_end(&pi);
416 72 : bat_iterator_end(&oi);
417 72 : bat_iterator_end(&bi);
418 72 : bat_iterator_end(&si);
419 72 : bat_iterator_end(&ei);
420 72 : BBPreclaim(st);
421 72 : return res;
422 0 : nosupport:
423 0 : GDKerror("42000!average of type %s to dbl unsupported.\n", ATOMname(tpe));
424 0 : res = GDK_FAIL;
425 0 : goto cleanup;
426 : }
427 :
428 : #ifdef TRUNCATE_NUMBERS
429 : #define ANALYTICAL_AVERAGE_INT_CALC_FINALIZE(avg, rem, ncnt) \
430 : do { \
431 : if (rem > 0 && avg < 0) \
432 : avg++; \
433 : } while(0)
434 : #else
435 : #define ANALYTICAL_AVERAGE_INT_CALC_FINALIZE(avg, rem, ncnt) \
436 : do { \
437 : if (rem > 0) { \
438 : if (avg < 0) { \
439 : if (2*rem > ncnt) \
440 : avg++; \
441 : } else { \
442 : if (2*rem >= ncnt) \
443 : avg++; \
444 : } \
445 : } \
446 : } while(0)
447 : #endif
448 :
449 : #define ANALYTICAL_AVG_INT_UNBOUNDED_TILL_CURRENT_ROW(TPE) \
450 : do { \
451 : TPE avg = 0; \
452 : for (; k < i;) { \
453 : j = k; \
454 : do { \
455 : if (!is_##TPE##_nil(bp[k])) \
456 : AVERAGE_ITER(TPE, bp[k], avg, rem, ncnt); \
457 : k++; \
458 : } while (k < i && !op[k]); \
459 : if (ncnt == 0) { \
460 : has_nils = true; \
461 : for (; j < k; j++) \
462 : rb[j] = TPE##_nil; \
463 : } else { \
464 : TPE avgfinal = avg; \
465 : ANALYTICAL_AVERAGE_INT_CALC_FINALIZE(avgfinal, rem, ncnt); \
466 : for (; j < k; j++) \
467 : rb[j] = avgfinal; \
468 : } \
469 : } \
470 : rem = 0; \
471 : ncnt = 0; \
472 : } while (0)
473 :
474 : #define ANALYTICAL_AVG_INT_CURRENT_ROW_TILL_UNBOUNDED(TPE) \
475 : do { \
476 : TPE avg = 0; \
477 : l = i - 1; \
478 : for (j = l; ; j--) { \
479 : if (!is_##TPE##_nil(bp[j])) \
480 : AVERAGE_ITER(TPE, bp[j], avg, rem, ncnt); \
481 : if (op[j] || j == k) { \
482 : if (ncnt == 0) { \
483 : has_nils = true; \
484 : for (; ; l--) { \
485 : rb[l] = TPE##_nil; \
486 : if (l == j) \
487 : break; \
488 : } \
489 : } else { \
490 : TPE avgfinal = avg; \
491 : ANALYTICAL_AVERAGE_INT_CALC_FINALIZE(avgfinal, rem, ncnt); \
492 : for (; ; l--) { \
493 : rb[l] = avgfinal; \
494 : if (l == j) \
495 : break; \
496 : } \
497 : } \
498 : if (j == k) \
499 : break; \
500 : l = j - 1; \
501 : } \
502 : } \
503 : rem = 0; \
504 : ncnt = 0; \
505 : k = i; \
506 : } while (0)
507 :
508 : #define ANALYTICAL_AVG_INT_ALL_ROWS(TPE) \
509 : do { \
510 : TPE avg = 0; \
511 : for (; j < i; j++) { \
512 : TPE v = bp[j]; \
513 : if (!is_##TPE##_nil(v)) \
514 : AVERAGE_ITER(TPE, v, avg, rem, ncnt); \
515 : } \
516 : if (ncnt == 0) { \
517 : for (; k < i; k++) \
518 : rb[k] = TPE##_nil; \
519 : has_nils = true; \
520 : } else { \
521 : ANALYTICAL_AVERAGE_INT_CALC_FINALIZE(avg, rem, ncnt); \
522 : for (; k < i; k++) \
523 : rb[k] = avg; \
524 : } \
525 : rem = 0; \
526 : ncnt = 0; \
527 : } while (0)
528 :
529 : #define ANALYTICAL_AVG_INT_CURRENT_ROW(TPE) \
530 : do { \
531 : for (; k < i; k++) { \
532 : TPE v = bp[k]; \
533 : rb[k] = bp[k]; \
534 : has_nils |= is_##TPE##_nil(v); \
535 : } \
536 : } while (0)
537 :
538 : #define avg_int_deltas(TPE) typedef struct avg_int_deltas_##TPE { TPE avg; lng rem, ncnt;} avg_int_deltas_##TPE;
539 : avg_int_deltas(bte)
540 : avg_int_deltas(sht)
541 : avg_int_deltas(int)
542 : avg_int_deltas(lng)
543 :
544 : #define INIT_AGGREGATE_AVG_INT(TPE, NOTHING1, NOTHING2) \
545 : do { \
546 : computed = (avg_int_deltas_##TPE) {0}; \
547 : } while (0)
548 : #define COMPUTE_LEVEL0_AVG_INT(X, TPE, NOTHING1, NOTHING2) \
549 : do { \
550 : TPE v = bp[j + X]; \
551 : computed = is_##TPE##_nil(v) ? (avg_int_deltas_##TPE) {0} : (avg_int_deltas_##TPE) {.avg = v, .ncnt = 1}; \
552 : } while (0)
553 : #define COMPUTE_LEVELN_AVG_INT(VAL, TPE, NOTHING1, NOTHING2) \
554 : do { \
555 : if (VAL.ncnt) \
556 : AVERAGE_ITER(TPE, VAL.avg, computed.avg, computed.rem, computed.ncnt); \
557 : } while (0)
558 : #define FINALIZE_AGGREGATE_AVG_INT(TPE, NOTHING1, NOTHING2) \
559 : do { \
560 : if (computed.ncnt == 0) { \
561 : has_nils = true; \
562 : rb[k] = TPE##_nil; \
563 : } else { \
564 : ANALYTICAL_AVERAGE_INT_CALC_FINALIZE(computed.avg, computed.rem, computed.ncnt); \
565 : rb[k] = computed.avg; \
566 : } \
567 : } while (0)
568 : #define ANALYTICAL_AVG_INT_OTHERS(TPE) \
569 : do { \
570 : oid ncount = i - k; \
571 : if ((res = GDKrebuild_segment_tree(ncount, sizeof(avg_int_deltas_##TPE), st, &segment_tree, &levels_offset, &nlevels)) != GDK_SUCCEED) \
572 : goto cleanup; \
573 : populate_segment_tree(avg_int_deltas_##TPE, ncount, INIT_AGGREGATE_AVG_INT, COMPUTE_LEVEL0_AVG_INT, COMPUTE_LEVELN_AVG_INT, TPE, NOTHING, NOTHING); \
574 : for (; k < i; k++) \
575 : compute_on_segment_tree(avg_int_deltas_##TPE, start[k] - j, end[k] - j, INIT_AGGREGATE_AVG_INT, COMPUTE_LEVELN_AVG_INT, FINALIZE_AGGREGATE_AVG_INT, TPE, NOTHING, NOTHING); \
576 : j = k; \
577 : } while (0)
578 :
579 : #define ANALYTICAL_AVG_INT_PARTITIONS(TPE, IMP) \
580 : do { \
581 : TPE *restrict bp = (TPE*)bi.base, *rb = (TPE *) Tloc(r, 0); \
582 : if (p) { \
583 : while (i < cnt) { \
584 : if (np[i]) { \
585 : avg##TPE##IMP: \
586 : IMP(TPE); \
587 : } \
588 : if (!last) \
589 : i++; \
590 : } \
591 : } \
592 : if (!last) { \
593 : last = true; \
594 : i = cnt; \
595 : goto avg##TPE##IMP; \
596 : } \
597 : } while (0)
598 :
599 : #ifdef HAVE_HGE
600 : avg_int_deltas(hge)
601 : #define ANALYTICAL_AVG_INT_LIMIT(IMP) \
602 : case TYPE_hge: \
603 : ANALYTICAL_AVG_INT_PARTITIONS(hge, ANALYTICAL_AVG_INT_##IMP); \
604 : break;
605 : #else
606 : #define ANALYTICAL_AVG_INT_LIMIT(IMP)
607 : #endif
608 :
609 : #define ANALYTICAL_AVG_INT_BRANCHES(IMP) \
610 : do { \
611 : switch (tpe) { \
612 : case TYPE_bte: \
613 : ANALYTICAL_AVG_INT_PARTITIONS(bte, ANALYTICAL_AVG_INT_##IMP); \
614 : break; \
615 : case TYPE_sht: \
616 : ANALYTICAL_AVG_INT_PARTITIONS(sht, ANALYTICAL_AVG_INT_##IMP); \
617 : break; \
618 : case TYPE_int: \
619 : ANALYTICAL_AVG_INT_PARTITIONS(int, ANALYTICAL_AVG_INT_##IMP); \
620 : break; \
621 : case TYPE_lng: \
622 : ANALYTICAL_AVG_INT_PARTITIONS(lng, ANALYTICAL_AVG_INT_##IMP); \
623 : break; \
624 : ANALYTICAL_AVG_INT_LIMIT(IMP) \
625 : default: \
626 : goto nosupport; \
627 : } \
628 : } while (0)
629 :
630 : gdk_return
631 48 : GDKanalyticalavginteger(BAT *r, BAT *p, BAT *o, BAT *b, BAT *s, BAT *e, int tpe, int frame_type)
632 : {
633 48 : BATiter pi = bat_iterator(p);
634 48 : BATiter oi = bat_iterator(o);
635 48 : BATiter bi = bat_iterator(b);
636 48 : BATiter si = bat_iterator(s);
637 48 : BATiter ei = bat_iterator(e);
638 48 : bool has_nils = false, last = false;
639 48 : oid i = 1, j = 0, k = 0, l = 0, cnt = BATcount(b), *restrict start = si.base, *restrict end = ei.base,
640 48 : *levels_offset = NULL, nlevels = 0;
641 48 : lng rem = 0, ncnt = 0;
642 48 : bit *np = pi.base, *op = oi.base;
643 48 : void *segment_tree = NULL;
644 48 : gdk_return res = GDK_SUCCEED;
645 48 : BAT *st = NULL;
646 :
647 48 : assert(np == NULL || cnt == 0 || np[0] == 0);
648 48 : if (cnt > 0) {
649 48 : switch (frame_type) {
650 18 : case 3: /* unbounded until current row */
651 696 : ANALYTICAL_AVG_INT_BRANCHES(UNBOUNDED_TILL_CURRENT_ROW);
652 : break;
653 0 : case 4: /* current row until unbounded */
654 0 : ANALYTICAL_AVG_INT_BRANCHES(CURRENT_ROW_TILL_UNBOUNDED);
655 : break;
656 17 : case 5: /* all rows */
657 308498 : ANALYTICAL_AVG_INT_BRANCHES(ALL_ROWS);
658 : break;
659 0 : case 6: /* current row */
660 0 : ANALYTICAL_AVG_INT_BRANCHES(CURRENT_ROW);
661 : break;
662 13 : default:
663 13 : if ((st = GDKinitialize_segment_tree()) == NULL) {
664 0 : res = GDK_FAIL;
665 0 : goto cleanup;
666 : }
667 1320 : ANALYTICAL_AVG_INT_BRANCHES(OTHERS);
668 : break;
669 : }
670 : }
671 :
672 48 : BATsetcount(r, cnt);
673 48 : r->tnonil = !has_nils;
674 48 : r->tnil = has_nils;
675 48 : cleanup:
676 48 : bat_iterator_end(&pi);
677 48 : bat_iterator_end(&oi);
678 48 : bat_iterator_end(&bi);
679 48 : bat_iterator_end(&si);
680 48 : bat_iterator_end(&ei);
681 48 : BBPreclaim(st);
682 48 : return res;
683 0 : nosupport:
684 0 : GDKerror("42000!average of type %s to %s unsupported.\n", ATOMname(tpe), ATOMname(tpe));
685 0 : res = GDK_FAIL;
686 0 : goto cleanup;
687 : }
688 :
689 : #define ANALYTICAL_STDEV_VARIANCE_UNBOUNDED_TILL_CURRENT_ROW(TPE, SAMPLE, OP) \
690 : do { \
691 : TPE *restrict bp = (TPE*)bi.base; \
692 : for (; k < i;) { \
693 : j = k; \
694 : do { \
695 : TPE v = bp[k]; \
696 : if (!is_##TPE##_nil(v)) { \
697 : n++; \
698 : delta = (dbl) v - mean; \
699 : mean += delta / n; \
700 : m2 += delta * ((dbl) v - mean); \
701 : } \
702 : k++; \
703 : } while (k < i && !op[k]); \
704 : if (isinf(m2)) { \
705 : goto overflow; \
706 : } else if (n > SAMPLE) { \
707 : for (; j < k; j++) \
708 : rb[j] = OP; \
709 : } else { \
710 : for (; j < k; j++) \
711 : rb[j] = dbl_nil; \
712 : has_nils = true; \
713 : } \
714 : } \
715 : n = 0; \
716 : mean = 0; \
717 : m2 = 0; \
718 : } while (0)
719 :
720 : #define ANALYTICAL_STDEV_VARIANCE_CURRENT_ROW_TILL_UNBOUNDED(TPE, SAMPLE, OP) \
721 : do { \
722 : TPE *restrict bp = (TPE*)bi.base; \
723 : l = i - 1; \
724 : for (j = l; ; j--) { \
725 : TPE v = bp[j]; \
726 : if (!is_##TPE##_nil(v)) { \
727 : n++; \
728 : delta = (dbl) v - mean; \
729 : mean += delta / n; \
730 : m2 += delta * ((dbl) v - mean); \
731 : } \
732 : if (op[j] || j == k) { \
733 : if (isinf(m2)) { \
734 : goto overflow; \
735 : } else if (n > SAMPLE) { \
736 : for (; ; l--) { \
737 : rb[l] = OP; \
738 : if (l == j) \
739 : break; \
740 : } \
741 : } else { \
742 : for (; ; l--) { \
743 : rb[l] = dbl_nil; \
744 : if (l == j) \
745 : break; \
746 : } \
747 : has_nils = true; \
748 : } \
749 : if (j == k) \
750 : break; \
751 : l = j - 1; \
752 : } \
753 : } \
754 : n = 0; \
755 : mean = 0; \
756 : m2 = 0; \
757 : k = i; \
758 : } while (0)
759 :
760 : #define ANALYTICAL_STDEV_VARIANCE_ALL_ROWS(TPE, SAMPLE, OP) \
761 : do { \
762 : TPE *restrict bp = (TPE*)bi.base; \
763 : for (; j < i; j++) { \
764 : TPE v = bp[j]; \
765 : if (is_##TPE##_nil(v)) \
766 : continue; \
767 : n++; \
768 : delta = (dbl) v - mean; \
769 : mean += delta / n; \
770 : m2 += delta * ((dbl) v - mean); \
771 : } \
772 : if (isinf(m2)) { \
773 : goto overflow; \
774 : } else if (n > SAMPLE) { \
775 : for (; k < i; k++) \
776 : rb[k] = OP; \
777 : } else { \
778 : for (; k < i; k++) \
779 : rb[k] = dbl_nil; \
780 : has_nils = true; \
781 : } \
782 : n = 0; \
783 : mean = 0; \
784 : m2 = 0; \
785 : } while (0)
786 :
787 : #define ANALYTICAL_STDEV_VARIANCE_CURRENT_ROW(TPE, SAMPLE, OP) \
788 : do { \
789 : for (; k < i; k++) \
790 : rb[k] = SAMPLE == 1 ? dbl_nil : 0; \
791 : has_nils = is_dbl_nil(rb[k - 1]); \
792 : } while (0)
793 :
794 : typedef struct stdev_var_deltas {
795 : BUN n;
796 : dbl mean, delta, m2;
797 : } stdev_var_deltas;
798 :
799 : #define INIT_AGGREGATE_STDEV_VARIANCE(TPE, SAMPLE, OP) \
800 : do { \
801 : computed = (stdev_var_deltas) {0}; \
802 : } while (0)
803 : #define COMPUTE_LEVEL0_STDEV_VARIANCE(X, TPE, SAMPLE, OP) \
804 : do { \
805 : TPE v = bp[j + X]; \
806 : computed = is_##TPE##_nil(v) ? (stdev_var_deltas) {0} : (stdev_var_deltas) {.n = 1, .mean = (dbl)v, .delta = (dbl)v}; \
807 : } while (0)
808 : #define COMPUTE_LEVELN_STDEV_VARIANCE(VAL, TPE, SAMPLE, OP) \
809 : do { \
810 : if (VAL.n) { \
811 : computed.n++; \
812 : computed.delta = VAL.delta - computed.mean; \
813 : computed.mean += computed.delta / computed.n; \
814 : computed.m2 += computed.delta * (VAL.delta - computed.mean); \
815 : } \
816 : } while (0)
817 : #define FINALIZE_AGGREGATE_STDEV_VARIANCE(TPE, SAMPLE, OP) \
818 : do { \
819 : dbl m2 = computed.m2; \
820 : BUN n = computed.n; \
821 : if (isinf(m2)) { \
822 : goto overflow; \
823 : } else if (n > SAMPLE) { \
824 : rb[k] = OP; \
825 : } else { \
826 : rb[k] = dbl_nil; \
827 : has_nils = true; \
828 : } \
829 : } while (0)
830 : #define ANALYTICAL_STDEV_VARIANCE_OTHERS(TPE, SAMPLE, OP) \
831 : do { \
832 : TPE *restrict bp = (TPE*)bi.base; \
833 : oid ncount = i - k; \
834 : if ((res = GDKrebuild_segment_tree(ncount, sizeof(stdev_var_deltas), st, &segment_tree, &levels_offset, &nlevels)) != GDK_SUCCEED) \
835 : goto cleanup; \
836 : populate_segment_tree(stdev_var_deltas, ncount, INIT_AGGREGATE_STDEV_VARIANCE, COMPUTE_LEVEL0_STDEV_VARIANCE, COMPUTE_LEVELN_STDEV_VARIANCE, TPE, SAMPLE, OP); \
837 : for (; k < i; k++) \
838 : compute_on_segment_tree(stdev_var_deltas, start[k] - j, end[k] - j, INIT_AGGREGATE_STDEV_VARIANCE, COMPUTE_LEVELN_STDEV_VARIANCE, FINALIZE_AGGREGATE_STDEV_VARIANCE, TPE, SAMPLE, OP); \
839 : j = k; \
840 : } while (0)
841 :
842 : #define ANALYTICAL_STATISTICS_PARTITIONS(TPE, SAMPLE, OP, IMP) \
843 : do { \
844 : if (p) { \
845 : while (i < cnt) { \
846 : if (np[i]) { \
847 : statistics##TPE##IMP: \
848 : IMP(TPE, SAMPLE, OP); \
849 : } \
850 : if (!last) \
851 : i++; \
852 : } \
853 : } \
854 : if (!last) { \
855 : last = true; \
856 : i = cnt; \
857 : goto statistics##TPE##IMP; \
858 : } \
859 : } while (0)
860 :
861 : #ifdef HAVE_HGE
862 : #define ANALYTICAL_STATISTICS_LIMIT(IMP, SAMPLE, OP) \
863 : case TYPE_hge: \
864 : ANALYTICAL_STATISTICS_PARTITIONS(hge, SAMPLE, OP, ANALYTICAL_##IMP); \
865 : break;
866 : #else
867 : #define ANALYTICAL_STATISTICS_LIMIT(IMP, SAMPLE, OP)
868 : #endif
869 :
870 : #define ANALYTICAL_STATISTICS_BRANCHES(IMP, SAMPLE, OP) \
871 : do { \
872 : switch (tpe) { \
873 : case TYPE_bte: \
874 : ANALYTICAL_STATISTICS_PARTITIONS(bte, SAMPLE, OP, ANALYTICAL_##IMP); \
875 : break; \
876 : case TYPE_sht: \
877 : ANALYTICAL_STATISTICS_PARTITIONS(sht, SAMPLE, OP, ANALYTICAL_##IMP); \
878 : break; \
879 : case TYPE_int: \
880 : ANALYTICAL_STATISTICS_PARTITIONS(int, SAMPLE, OP, ANALYTICAL_##IMP); \
881 : break; \
882 : case TYPE_lng: \
883 : ANALYTICAL_STATISTICS_PARTITIONS(lng, SAMPLE, OP, ANALYTICAL_##IMP); \
884 : break; \
885 : case TYPE_flt: \
886 : ANALYTICAL_STATISTICS_PARTITIONS(flt, SAMPLE, OP, ANALYTICAL_##IMP); \
887 : break; \
888 : case TYPE_dbl: \
889 : ANALYTICAL_STATISTICS_PARTITIONS(dbl, SAMPLE, OP, ANALYTICAL_##IMP); \
890 : break; \
891 : ANALYTICAL_STATISTICS_LIMIT(IMP, SAMPLE, OP) \
892 : default: \
893 : goto nosupport; \
894 : } \
895 : } while (0)
896 :
897 : #define GDK_ANALYTICAL_STDEV_VARIANCE(NAME, SAMPLE, OP, DESC) \
898 : gdk_return \
899 : GDKanalytical_##NAME(BAT *r, BAT *p, BAT *o, BAT *b, BAT *s, BAT *e, int tpe, int frame_type) \
900 : { \
901 : BATiter pi = bat_iterator(p); \
902 : BATiter oi = bat_iterator(o); \
903 : BATiter bi = bat_iterator(b); \
904 : BATiter si = bat_iterator(s); \
905 : BATiter ei = bat_iterator(e); \
906 : bool has_nils = false, last = false; \
907 : oid i = 0, j = 0, k = 0, l = 0, cnt = BATcount(b), *restrict start = si.base, *restrict end = ei.base, \
908 : *levels_offset = NULL, nlevels = 0; \
909 : lng n = 0; \
910 : bit *np = pi.base, *op = oi.base; \
911 : dbl *rb = (dbl *) Tloc(r, 0), mean = 0, m2 = 0, delta; \
912 : void *segment_tree = NULL; \
913 : gdk_return res = GDK_SUCCEED; \
914 : BAT *st = NULL; \
915 : \
916 : assert(np == NULL || cnt == 0 || np[0] == 0); \
917 : if (cnt > 0) { \
918 : switch (frame_type) { \
919 : case 3: /* unbounded until current row */ \
920 : ANALYTICAL_STATISTICS_BRANCHES(STDEV_VARIANCE_UNBOUNDED_TILL_CURRENT_ROW, SAMPLE, OP); \
921 : break; \
922 : case 4: /* current row until unbounded */ \
923 : ANALYTICAL_STATISTICS_BRANCHES(STDEV_VARIANCE_CURRENT_ROW_TILL_UNBOUNDED, SAMPLE, OP); \
924 : break; \
925 : case 5: /* all rows */ \
926 : ANALYTICAL_STATISTICS_BRANCHES(STDEV_VARIANCE_ALL_ROWS, SAMPLE, OP); \
927 : break; \
928 : case 6: /* current row */ \
929 : ANALYTICAL_STATISTICS_BRANCHES(STDEV_VARIANCE_CURRENT_ROW, SAMPLE, OP); \
930 : break; \
931 : default: \
932 : if ((st = GDKinitialize_segment_tree()) == NULL) { \
933 : res = GDK_FAIL; \
934 : goto cleanup; \
935 : } \
936 : ANALYTICAL_STATISTICS_BRANCHES(STDEV_VARIANCE_OTHERS, SAMPLE, OP); \
937 : break; \
938 : } \
939 : } \
940 : \
941 : BATsetcount(r, cnt); \
942 : r->tnonil = !has_nils; \
943 : r->tnil = has_nils; \
944 : goto cleanup; /* all these gotos seem confusing but it cleans up the ending of the operator */ \
945 : overflow: \
946 : GDKerror("22003!overflow in calculation.\n"); \
947 : res = GDK_FAIL; \
948 : cleanup: \
949 : bat_iterator_end(&pi); \
950 : bat_iterator_end(&oi); \
951 : bat_iterator_end(&bi); \
952 : bat_iterator_end(&si); \
953 : bat_iterator_end(&ei); \
954 : BBPreclaim(st); \
955 : return res; \
956 : nosupport: \
957 : GDKerror("42000!%s of type %s unsupported.\n", DESC, ATOMname(tpe)); \
958 : res = GDK_FAIL; \
959 : goto cleanup; \
960 : }
961 :
962 880 : GDK_ANALYTICAL_STDEV_VARIANCE(stddev_samp, 1, sqrt(m2 / (n - 1)), "standard deviation")
963 402 : GDK_ANALYTICAL_STDEV_VARIANCE(stddev_pop, 0, sqrt(m2 / n), "standard deviation")
964 332 : GDK_ANALYTICAL_STDEV_VARIANCE(variance_samp, 1, m2 / (n - 1), "variance")
965 1012 : GDK_ANALYTICAL_STDEV_VARIANCE(variance_pop, 0, m2 / n, "variance")
966 :
967 : #define ANALYTICAL_COVARIANCE_UNBOUNDED_TILL_CURRENT_ROW(TPE, SAMPLE, OP) \
968 : do { \
969 : TPE *bp1 = (TPE*)b1i.base, *bp2 = (TPE*)b2i.base; \
970 : for (; k < i;) { \
971 : j = k; \
972 : do { \
973 : TPE v1 = bp1[k], v2 = bp2[k]; \
974 : if (!is_##TPE##_nil(v1) && !is_##TPE##_nil(v2)) { \
975 : n++; \
976 : delta1 = (dbl) v1 - mean1; \
977 : mean1 += delta1 / n; \
978 : delta2 = (dbl) v2 - mean2; \
979 : mean2 += delta2 / n; \
980 : m2 += delta1 * ((dbl) v2 - mean2); \
981 : } \
982 : k++; \
983 : } while (k < i && !op[k]); \
984 : if (isinf(m2)) \
985 : goto overflow; \
986 : if (n > SAMPLE) { \
987 : for (; j < k; j++) \
988 : rb[j] = OP; \
989 : } else { \
990 : for (; j < k; j++) \
991 : rb[j] = dbl_nil; \
992 : has_nils = true; \
993 : } \
994 : } \
995 : n = 0; \
996 : mean1 = 0; \
997 : mean2 = 0; \
998 : m2 = 0; \
999 : } while (0)
1000 :
1001 : #define ANALYTICAL_COVARIANCE_CURRENT_ROW_TILL_UNBOUNDED(TPE, SAMPLE, OP) \
1002 : do { \
1003 : TPE *bp1 = (TPE*)b1i.base, *bp2 = (TPE*)b2i.base; \
1004 : l = i - 1; \
1005 : for (j = l; ; j--) { \
1006 : TPE v1 = bp1[j], v2 = bp2[j]; \
1007 : if (!is_##TPE##_nil(v1) && !is_##TPE##_nil(v2)) { \
1008 : n++; \
1009 : delta1 = (dbl) v1 - mean1; \
1010 : mean1 += delta1 / n; \
1011 : delta2 = (dbl) v2 - mean2; \
1012 : mean2 += delta2 / n; \
1013 : m2 += delta1 * ((dbl) v2 - mean2); \
1014 : } \
1015 : if (op[j] || j == k) { \
1016 : if (isinf(m2)) \
1017 : goto overflow; \
1018 : if (n > SAMPLE) { \
1019 : for (; ; l--) { \
1020 : rb[l] = OP; \
1021 : if (l == j) \
1022 : break; \
1023 : } \
1024 : } else { \
1025 : for (; ; l--) { \
1026 : rb[l] = dbl_nil; \
1027 : if (l == j) \
1028 : break; \
1029 : } \
1030 : has_nils = true; \
1031 : } \
1032 : if (j == k) \
1033 : break; \
1034 : l = j - 1; \
1035 : } \
1036 : } \
1037 : n = 0; \
1038 : mean1 = 0; \
1039 : mean2 = 0; \
1040 : m2 = 0; \
1041 : k = i; \
1042 : } while (0)
1043 :
1044 : #define ANALYTICAL_COVARIANCE_ALL_ROWS(TPE, SAMPLE, OP) \
1045 : do { \
1046 : TPE *bp1 = (TPE*)b1i.base, *bp2 = (TPE*)b2i.base; \
1047 : for (; j < i; j++) { \
1048 : TPE v1 = bp1[j], v2 = bp2[j]; \
1049 : if (!is_##TPE##_nil(v1) && !is_##TPE##_nil(v2)) { \
1050 : n++; \
1051 : delta1 = (dbl) v1 - mean1; \
1052 : mean1 += delta1 / n; \
1053 : delta2 = (dbl) v2 - mean2; \
1054 : mean2 += delta2 / n; \
1055 : m2 += delta1 * ((dbl) v2 - mean2); \
1056 : } \
1057 : } \
1058 : if (isinf(m2)) \
1059 : goto overflow; \
1060 : if (n > SAMPLE) { \
1061 : for (; k < i; k++) \
1062 : rb[k] = OP; \
1063 : } else { \
1064 : for (; k < i; k++) \
1065 : rb[k] = dbl_nil; \
1066 : has_nils = true; \
1067 : } \
1068 : n = 0; \
1069 : mean1 = 0; \
1070 : mean2 = 0; \
1071 : m2 = 0; \
1072 : } while (0)
1073 :
1074 : #define ANALYTICAL_COVARIANCE_CURRENT_ROW(TPE, SAMPLE, OP) \
1075 : do { \
1076 : for (; k < i; k++) \
1077 : rb[k] = SAMPLE == 1 ? dbl_nil : 0; \
1078 : has_nils = is_dbl_nil(rb[k - 1]); \
1079 : } while (0)
1080 :
1081 : typedef struct covariance_deltas {
1082 : BUN n;
1083 : dbl mean1, mean2, delta1, delta2, m2;
1084 : } covariance_deltas;
1085 :
1086 : #define INIT_AGGREGATE_COVARIANCE(TPE, SAMPLE, OP) \
1087 : do { \
1088 : computed = (covariance_deltas) {0}; \
1089 : } while (0)
1090 : #define COMPUTE_LEVEL0_COVARIANCE(X, TPE, SAMPLE, OP) \
1091 : do { \
1092 : TPE v1 = bp1[j + X], v2 = bp2[j + X]; \
1093 : computed = is_##TPE##_nil(v1) || is_##TPE##_nil(v2) ? (covariance_deltas) {0} \
1094 : : (covariance_deltas) {.n = 1, .mean1 = (dbl)v1, .mean2 = (dbl)v2, .delta1 = (dbl)v1, .delta2 = (dbl)v2}; \
1095 : } while (0)
1096 : #define COMPUTE_LEVELN_COVARIANCE(VAL, TPE, SAMPLE, OP) \
1097 : do { \
1098 : if (VAL.n) { \
1099 : computed.n++; \
1100 : computed.delta1 = VAL.delta1 - computed.mean1; \
1101 : computed.mean1 += computed.delta1 / computed.n; \
1102 : computed.delta2 = VAL.delta2 - computed.mean2; \
1103 : computed.mean2 += computed.delta2 / computed.n; \
1104 : computed.m2 += computed.delta1 * (VAL.delta2 - computed.mean2); \
1105 : } \
1106 : } while (0)
1107 : #define FINALIZE_AGGREGATE_COVARIANCE(TPE, SAMPLE, OP) FINALIZE_AGGREGATE_STDEV_VARIANCE(TPE, SAMPLE, OP)
1108 : #define ANALYTICAL_COVARIANCE_OTHERS(TPE, SAMPLE, OP) \
1109 : do { \
1110 : TPE *bp1 = (TPE*)b1i.base, *bp2 = (TPE*)b2i.base; \
1111 : oid ncount = i - k; \
1112 : if ((res = GDKrebuild_segment_tree(ncount, sizeof(covariance_deltas), st, &segment_tree, &levels_offset, &nlevels)) != GDK_SUCCEED) \
1113 : goto cleanup; \
1114 : populate_segment_tree(covariance_deltas, ncount, INIT_AGGREGATE_COVARIANCE, COMPUTE_LEVEL0_COVARIANCE, COMPUTE_LEVELN_COVARIANCE, TPE, SAMPLE, OP); \
1115 : for (; k < i; k++) \
1116 : compute_on_segment_tree(covariance_deltas, start[k] - j, end[k] - j, INIT_AGGREGATE_COVARIANCE, COMPUTE_LEVELN_COVARIANCE, FINALIZE_AGGREGATE_COVARIANCE, TPE, SAMPLE, OP); \
1117 : j = k; \
1118 : } while (0)
1119 :
1120 : #define GDK_ANALYTICAL_COVARIANCE(NAME, SAMPLE, OP) \
1121 : gdk_return \
1122 : GDKanalytical_##NAME(BAT *r, BAT *p, BAT *o, BAT *b1, BAT *b2, BAT *s, BAT *e, int tpe, int frame_type) \
1123 : { \
1124 : BATiter pi = bat_iterator(p); \
1125 : BATiter oi = bat_iterator(o); \
1126 : BATiter b1i = bat_iterator(b1); \
1127 : BATiter b2i = bat_iterator(b2); \
1128 : BATiter si = bat_iterator(s); \
1129 : BATiter ei = bat_iterator(e); \
1130 : bool has_nils = false, last = false; \
1131 : oid i = 1, j = 0, k = 0, l = 0, cnt = BATcount(b1), *restrict start = si.base, *restrict end = ei.base, \
1132 : *levels_offset = NULL, nlevels = 0; \
1133 : lng n = 0; \
1134 : bit *np = pi.base, *op = oi.base; \
1135 : dbl *rb = (dbl *) Tloc(r, 0), mean1 = 0, mean2 = 0, m2 = 0, delta1, delta2; \
1136 : void *segment_tree = NULL; \
1137 : gdk_return res = GDK_SUCCEED; \
1138 : BAT *st = NULL; \
1139 : \
1140 : assert(np == NULL || cnt == 0 || np[0] == 0); \
1141 : if (cnt > 0) { \
1142 : switch (frame_type) { \
1143 : case 3: /* unbounded until current row */ \
1144 : ANALYTICAL_STATISTICS_BRANCHES(COVARIANCE_UNBOUNDED_TILL_CURRENT_ROW, SAMPLE, OP); \
1145 : break; \
1146 : case 4: /* current row until unbounded */ \
1147 : ANALYTICAL_STATISTICS_BRANCHES(COVARIANCE_CURRENT_ROW_TILL_UNBOUNDED, SAMPLE, OP); \
1148 : break; \
1149 : case 5: /* all rows */ \
1150 : ANALYTICAL_STATISTICS_BRANCHES(COVARIANCE_ALL_ROWS, SAMPLE, OP); \
1151 : break; \
1152 : case 6: /* current row */ \
1153 : ANALYTICAL_STATISTICS_BRANCHES(COVARIANCE_CURRENT_ROW, SAMPLE, OP); \
1154 : break; \
1155 : default: \
1156 : if ((st = GDKinitialize_segment_tree()) == NULL) { \
1157 : res = GDK_FAIL; \
1158 : goto cleanup; \
1159 : } \
1160 : ANALYTICAL_STATISTICS_BRANCHES(COVARIANCE_OTHERS, SAMPLE, OP); \
1161 : break; \
1162 : } \
1163 : } \
1164 : \
1165 : BATsetcount(r, cnt); \
1166 : r->tnonil = !has_nils; \
1167 : r->tnil = has_nils; \
1168 : goto cleanup; /* all these gotos seem confusing but it cleans up the ending of the operator */ \
1169 : overflow: \
1170 : GDKerror("22003!overflow in calculation.\n"); \
1171 : res = GDK_FAIL; \
1172 : cleanup: \
1173 : bat_iterator_end(&pi); \
1174 : bat_iterator_end(&oi); \
1175 : bat_iterator_end(&b1i); \
1176 : bat_iterator_end(&b2i); \
1177 : bat_iterator_end(&si); \
1178 : bat_iterator_end(&ei); \
1179 : BBPreclaim(st); \
1180 : return res; \
1181 : nosupport: \
1182 : GDKerror("42000!covariance of type %s unsupported.\n", ATOMname(tpe)); \
1183 : res = GDK_FAIL; \
1184 : goto cleanup; \
1185 : }
1186 :
1187 1372 : GDK_ANALYTICAL_COVARIANCE(covariance_samp, 1, m2 / (n - 1))
1188 1440 : GDK_ANALYTICAL_COVARIANCE(covariance_pop, 0, m2 / n)
1189 :
1190 : #define ANALYTICAL_CORRELATION_UNBOUNDED_TILL_CURRENT_ROW(TPE, SAMPLE, OP) /* SAMPLE and OP not used */ \
1191 : do { \
1192 : TPE *bp1 = (TPE*)b1i.base, *bp2 = (TPE*)b2i.base; \
1193 : for (; k < i;) { \
1194 : j = k; \
1195 : do { \
1196 : TPE v1 = bp1[k], v2 = bp2[k]; \
1197 : if (!is_##TPE##_nil(v1) && !is_##TPE##_nil(v2)) { \
1198 : n++; \
1199 : delta1 = (dbl) v1 - mean1; \
1200 : mean1 += delta1 / n; \
1201 : delta2 = (dbl) v2 - mean2; \
1202 : mean2 += delta2 / n; \
1203 : aux = (dbl) v2 - mean2; \
1204 : up += delta1 * aux; \
1205 : down1 += delta1 * ((dbl) v1 - mean1); \
1206 : down2 += delta2 * aux; \
1207 : } \
1208 : k++; \
1209 : } while (k < i && !op[k]); \
1210 : if (isinf(up) || isinf(down1) || isinf(down2)) \
1211 : goto overflow; \
1212 : if (n != 0 && down1 != 0 && down2 != 0) { \
1213 : rr = (up / n) / (sqrt(down1 / n) * sqrt(down2 / n)); \
1214 : assert(!is_dbl_nil(rr)); \
1215 : } else { \
1216 : rr = dbl_nil; \
1217 : has_nils = true; \
1218 : } \
1219 : for (; j < k; j++) \
1220 : rb[j] = rr; \
1221 : } \
1222 : n = 0; \
1223 : mean1 = 0; \
1224 : mean2 = 0; \
1225 : up = 0; \
1226 : down1 = 0; \
1227 : down2 = 0; \
1228 : } while (0)
1229 :
1230 : #define ANALYTICAL_CORRELATION_CURRENT_ROW_TILL_UNBOUNDED(TPE, SAMPLE, OP) /* SAMPLE and OP not used */ \
1231 : do { \
1232 : TPE *bp1 = (TPE*)b1i.base, *bp2 = (TPE*)b2i.base; \
1233 : l = i - 1; \
1234 : for (j = l; ; j--) { \
1235 : TPE v1 = bp1[j], v2 = bp2[j]; \
1236 : if (!is_##TPE##_nil(v1) && !is_##TPE##_nil(v2)) { \
1237 : n++; \
1238 : delta1 = (dbl) v1 - mean1; \
1239 : mean1 += delta1 / n; \
1240 : delta2 = (dbl) v2 - mean2; \
1241 : mean2 += delta2 / n; \
1242 : aux = (dbl) v2 - mean2; \
1243 : up += delta1 * aux; \
1244 : down1 += delta1 * ((dbl) v1 - mean1); \
1245 : down2 += delta2 * aux; \
1246 : } \
1247 : if (op[j] || j == k) { \
1248 : if (isinf(up) || isinf(down1) || isinf(down2)) \
1249 : goto overflow; \
1250 : if (n != 0 && down1 != 0 && down2 != 0) { \
1251 : rr = (up / n) / (sqrt(down1 / n) * sqrt(down2 / n)); \
1252 : assert(!is_dbl_nil(rr)); \
1253 : } else { \
1254 : rr = dbl_nil; \
1255 : has_nils = true; \
1256 : } \
1257 : for (; ; l--) { \
1258 : rb[l] = rr; \
1259 : if (l == j) \
1260 : break; \
1261 : } \
1262 : if (j == k) \
1263 : break; \
1264 : l = j - 1; \
1265 : } \
1266 : } \
1267 : n = 0; \
1268 : mean1 = 0; \
1269 : mean2 = 0; \
1270 : up = 0; \
1271 : down1 = 0; \
1272 : down2 = 0; \
1273 : k = i; \
1274 : } while (0)
1275 :
1276 : #define ANALYTICAL_CORRELATION_ALL_ROWS(TPE, SAMPLE, OP) /* SAMPLE and OP not used */ \
1277 : do { \
1278 : TPE *bp1 = (TPE*)b1i.base, *bp2 = (TPE*)b2i.base; \
1279 : for (; j < i; j++) { \
1280 : TPE v1 = bp1[j], v2 = bp2[j]; \
1281 : if (!is_##TPE##_nil(v1) && !is_##TPE##_nil(v2)) { \
1282 : n++; \
1283 : delta1 = (dbl) v1 - mean1; \
1284 : mean1 += delta1 / n; \
1285 : delta2 = (dbl) v2 - mean2; \
1286 : mean2 += delta2 / n; \
1287 : aux = (dbl) v2 - mean2; \
1288 : up += delta1 * aux; \
1289 : down1 += delta1 * ((dbl) v1 - mean1); \
1290 : down2 += delta2 * aux; \
1291 : } \
1292 : } \
1293 : if (isinf(up) || isinf(down1) || isinf(down2)) \
1294 : goto overflow; \
1295 : if (n != 0 && down1 != 0 && down2 != 0) { \
1296 : rr = (up / n) / (sqrt(down1 / n) * sqrt(down2 / n)); \
1297 : assert(!is_dbl_nil(rr)); \
1298 : } else { \
1299 : rr = dbl_nil; \
1300 : has_nils = true; \
1301 : } \
1302 : for (; k < i ; k++) \
1303 : rb[k] = rr; \
1304 : n = 0; \
1305 : mean1 = 0; \
1306 : mean2 = 0; \
1307 : up = 0; \
1308 : down1 = 0; \
1309 : down2 = 0; \
1310 : } while (0)
1311 :
1312 : #define ANALYTICAL_CORRELATION_CURRENT_ROW(TPE, SAMPLE, OP) /* SAMPLE and OP not used */ \
1313 : do { \
1314 : for (; k < i; k++) \
1315 : rb[k] = dbl_nil; \
1316 : has_nils = true; \
1317 : } while (0)
1318 :
1319 :
1320 : typedef struct correlation_deltas {
1321 : BUN n;
1322 : dbl mean1, mean2, delta1, delta2, up, down1, down2;
1323 : } correlation_deltas;
1324 :
1325 : #define INIT_AGGREGATE_CORRELATION(TPE, SAMPLE, OP) \
1326 : do { \
1327 : computed = (correlation_deltas) {0}; \
1328 : } while (0)
1329 : #define COMPUTE_LEVEL0_CORRELATION(X, TPE, SAMPLE, OP) \
1330 : do { \
1331 : TPE v1 = bp1[j + X], v2 = bp2[j + X]; \
1332 : computed = is_##TPE##_nil(v1) || is_##TPE##_nil(v2) ? (correlation_deltas) {0} \
1333 : : (correlation_deltas) {.n = 1, .mean1 = (dbl)v1, .mean2 = (dbl)v2, .delta1 = (dbl)v1, .delta2 = (dbl)v2}; \
1334 : } while (0)
1335 : #define COMPUTE_LEVELN_CORRELATION(VAL, TPE, SAMPLE, OP) \
1336 : do { \
1337 : if (VAL.n) { \
1338 : computed.n++; \
1339 : computed.delta1 = VAL.delta1 - computed.mean1; \
1340 : computed.mean1 += computed.delta1 / computed.n; \
1341 : computed.delta2 = VAL.delta2 - computed.mean2; \
1342 : computed.mean2 += computed.delta2 / computed.n; \
1343 : dbl aux = VAL.delta2 - computed.mean2; \
1344 : computed.up += computed.delta1 * aux; \
1345 : computed.down1 += computed.delta1 * (VAL.delta1 - computed.mean1); \
1346 : computed.down2 += computed.delta2 * aux; \
1347 : } \
1348 : } while (0)
1349 : #define FINALIZE_AGGREGATE_CORRELATION(TPE, SAMPLE, OP) \
1350 : do { \
1351 : n = computed.n; \
1352 : up = computed.up; \
1353 : down1 = computed.down1; \
1354 : down2 = computed.down2; \
1355 : if (isinf(up) || isinf(down1) || isinf(down2)) \
1356 : goto overflow; \
1357 : if (n != 0 && down1 != 0 && down2 != 0) { \
1358 : rr = (up / n) / (sqrt(down1 / n) * sqrt(down2 / n)); \
1359 : assert(!is_dbl_nil(rr)); \
1360 : } else { \
1361 : rr = dbl_nil; \
1362 : has_nils = true; \
1363 : } \
1364 : rb[k] = rr; \
1365 : } while (0)
1366 : #define ANALYTICAL_CORRELATION_OTHERS(TPE, SAMPLE, OP) /* SAMPLE and OP not used */ \
1367 : do { \
1368 : TPE *bp1 = (TPE*)b1i.base, *bp2 = (TPE*)b2i.base; \
1369 : oid ncount = i - k; \
1370 : if ((res = GDKrebuild_segment_tree(ncount, sizeof(correlation_deltas), st, &segment_tree, &levels_offset, &nlevels)) != GDK_SUCCEED) \
1371 : goto cleanup; \
1372 : populate_segment_tree(correlation_deltas, ncount, INIT_AGGREGATE_CORRELATION, COMPUTE_LEVEL0_CORRELATION, COMPUTE_LEVELN_CORRELATION, TPE, SAMPLE, OP); \
1373 : for (; k < i; k++) \
1374 : compute_on_segment_tree(correlation_deltas, start[k] - j, end[k] - j, INIT_AGGREGATE_CORRELATION, COMPUTE_LEVELN_CORRELATION, FINALIZE_AGGREGATE_CORRELATION, TPE, SAMPLE, OP); \
1375 : j = k; \
1376 : } while (0)
1377 :
1378 : gdk_return
1379 51 : GDKanalytical_correlation(BAT *r, BAT *p, BAT *o, BAT *b1, BAT *b2, BAT *s, BAT *e, int tpe, int frame_type)
1380 : {
1381 51 : bool has_nils = false, last = false;
1382 51 : BATiter pi = bat_iterator(p);
1383 51 : BATiter oi = bat_iterator(o);
1384 51 : BATiter b1i = bat_iterator(b1);
1385 51 : BATiter b2i = bat_iterator(b2);
1386 51 : BATiter si = bat_iterator(s);
1387 51 : BATiter ei = bat_iterator(e);
1388 51 : oid i = 1, j = 0, k = 0, l = 0, cnt = BATcount(b1),
1389 51 : *levels_offset = NULL, nlevels = 0;
1390 51 : const oid *restrict start = si.base, *restrict end = ei.base;
1391 51 : lng n = 0;
1392 51 : const bit *np = pi.base, *op = oi.base;
1393 51 : dbl *rb = (dbl *) Tloc(r, 0), mean1 = 0, mean2 = 0, up = 0, down1 = 0, down2 = 0, delta1, delta2, aux, rr;
1394 51 : void *segment_tree = NULL;
1395 51 : gdk_return res = GDK_SUCCEED;
1396 51 : BAT *st = NULL;
1397 :
1398 51 : assert(np == NULL || cnt == 0 || np[0] == 0);
1399 51 : if (cnt > 0) {
1400 51 : switch (frame_type) {
1401 18 : case 3: /* unbounded until current row */
1402 648 : ANALYTICAL_STATISTICS_BRANCHES(CORRELATION_UNBOUNDED_TILL_CURRENT_ROW, ;, ;);
1403 : break;
1404 0 : case 4: /* current row until unbounded */
1405 0 : ANALYTICAL_STATISTICS_BRANCHES(CORRELATION_CURRENT_ROW_TILL_UNBOUNDED, ;, ;);
1406 : break;
1407 18 : case 5: /* all rows */
1408 443 : ANALYTICAL_STATISTICS_BRANCHES(CORRELATION_ALL_ROWS, ;, ;);
1409 : break;
1410 0 : case 6: /* current row */
1411 0 : ANALYTICAL_STATISTICS_BRANCHES(CORRELATION_CURRENT_ROW, ;, ;);
1412 : break;
1413 15 : default:
1414 15 : if ((st = GDKinitialize_segment_tree()) == NULL) {
1415 0 : res = GDK_FAIL;
1416 0 : goto cleanup;
1417 : }
1418 21155 : ANALYTICAL_STATISTICS_BRANCHES(CORRELATION_OTHERS, ;, ;);
1419 : break;
1420 : }
1421 : }
1422 :
1423 50 : BATsetcount(r, cnt);
1424 50 : r->tnonil = !has_nils;
1425 50 : r->tnil = has_nils;
1426 50 : goto cleanup; /* all these gotos seem confusing but it cleans up the ending of the operator */
1427 1 : overflow:
1428 1 : GDKerror("22003!overflow in calculation.\n");
1429 1 : res = GDK_FAIL;
1430 51 : cleanup:
1431 51 : bat_iterator_end(&pi);
1432 51 : bat_iterator_end(&oi);
1433 51 : bat_iterator_end(&b1i);
1434 51 : bat_iterator_end(&b2i);
1435 51 : bat_iterator_end(&si);
1436 51 : bat_iterator_end(&ei);
1437 51 : BBPreclaim(st);
1438 51 : return res;
1439 0 : nosupport:
1440 0 : GDKerror("42000!correlation of type %s unsupported.\n", ATOMname(tpe));
1441 0 : res = GDK_FAIL;
1442 0 : goto cleanup;
1443 : }
|