Line data Source code
1 : /*
2 : * SPDX-License-Identifier: MPL-2.0
3 : *
4 : * This Source Code Form is subject to the terms of the Mozilla Public
5 : * License, v. 2.0. If a copy of the MPL was not distributed with this
6 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
7 : *
8 : * Copyright 2024 MonetDB Foundation;
9 : * Copyright August 2008 - 2023 MonetDB B.V.;
10 : * Copyright 1997 - July 2008 CWI.
11 : */
12 :
13 : #include "monetdb_config.h"
14 : #include "gdk.h"
15 : #include "gdk_analytic.h"
16 : #include "gdk_calc_private.h"
17 :
18 : #ifdef HAVE_HGE
19 : #define LNG_HGE hge
20 : #define GDK_LNG_HGE_max GDK_hge_max
21 : #define LNG_HGE_nil hge_nil
22 : #else
23 : #define LNG_HGE lng
24 : #define GDK_LNG_HGE_max GDK_lng_max
25 : #define LNG_HGE_nil lng_nil
26 : #endif
27 :
28 : /* average on integers */
29 : #define ANALYTICAL_AVERAGE_CALC_NUM_STEP1(TPE, IMP, ARG) \
30 : do { \
31 : if (!is_##TPE##_nil(ARG)) { \
32 : ADD_WITH_CHECK(ARG, sum, LNG_HGE, sum, \
33 : GDK_LNG_HGE_max, \
34 : goto avg_overflow##TPE##IMP); \
35 : /* count only when no overflow occurs */ \
36 : n++; \
37 : } \
38 : } while (0)
39 :
40 : /* do the common part of the overflow handling from STEP1 */
41 : #define ANALYTICAL_AVERAGE_CALC_NUM_STEP2(TPE, IMP) \
42 : do { \
43 : avg_overflow##TPE##IMP: \
44 : assert(n > 0); \
45 : if (sum >= 0) { \
46 : a = (TPE) (sum / n); \
47 : rr = (lng) (sum % n); \
48 : } else { \
49 : sum = -sum; \
50 : a = - (TPE) (sum / n); \
51 : rr = (lng) (sum % n); \
52 : if (r) { \
53 : a--; \
54 : rr = n - rr; \
55 : } \
56 : } \
57 : } while (0)
58 :
59 : #define ANALYTICAL_AVG_IMP_NUM_UNBOUNDED_TILL_CURRENT_ROW(TPE, IMP) \
60 : do { \
61 : TPE a = 0; \
62 : dbl curval = dbl_nil; \
63 : for (; k < i;) { \
64 : j = k; \
65 : do { \
66 : ANALYTICAL_AVERAGE_CALC_NUM_STEP1(TPE, IMP, bp[k]); \
67 : k++; \
68 : } while (k < i && !op[k]); \
69 : curval = n > 0 ? (dbl) sum / n : dbl_nil; \
70 : if (0) { /* overflow handling from STEP1 */ \
71 : ANALYTICAL_AVERAGE_CALC_NUM_STEP2(TPE, IMP); \
72 : while (k < i && !op[k]) { \
73 : TPE v = bp[k++]; \
74 : if (is_##TPE##_nil(v)) \
75 : continue; \
76 : AVERAGE_ITER(TPE, v, a, rr, n); \
77 : } \
78 : curval = a + (dbl) rr / n; \
79 : } \
80 : for (; j < k; j++) \
81 : rb[j] = curval; \
82 : has_nils |= (n == 0); \
83 : } \
84 : n = 0; \
85 : sum = 0; \
86 : } while (0)
87 :
88 : #define ANALYTICAL_AVG_IMP_NUM_CURRENT_ROW_TILL_UNBOUNDED(TPE, IMP) \
89 : do { \
90 : TPE a = 0; \
91 : dbl curval = dbl_nil; \
92 : l = i - 1; \
93 : for (j = l; ; j--) { \
94 : ANALYTICAL_AVERAGE_CALC_NUM_STEP1(TPE, IMP, bp[j]); \
95 : if (op[j] || j == k) { \
96 : curval = n > 0 ? (dbl) sum / n : dbl_nil; \
97 : if (0) { /* overflow handling from STEP1 */ \
98 : ANALYTICAL_AVERAGE_CALC_NUM_STEP2(TPE, IMP); \
99 : while (!op[j] && j != k) { \
100 : TPE v = bp[j--]; \
101 : if (is_##TPE##_nil(v)) \
102 : continue; \
103 : AVERAGE_ITER(TPE, v, a, rr, n); \
104 : curval = a + (dbl) rr / n; \
105 : } \
106 : } \
107 : for (; ; l--) { \
108 : rb[l] = curval; \
109 : if (l == j) \
110 : break; \
111 : } \
112 : has_nils |= (n == 0); \
113 : if (j == k) \
114 : break; \
115 : l = j - 1; \
116 : } \
117 : } \
118 : n = 0; \
119 : sum = 0; \
120 : k = i; \
121 : } while (0)
122 :
123 : #define ANALYTICAL_AVG_IMP_NUM_ALL_ROWS(TPE, IMP) \
124 : do { \
125 : TPE a = 0; \
126 : TPE v; \
127 : for (; j < i; j++) { \
128 : v = bp[j]; \
129 : ANALYTICAL_AVERAGE_CALC_NUM_STEP1(TPE, IMP, v); \
130 : } \
131 : curval = n > 0 ? (dbl) sum / n : dbl_nil; \
132 : if (0) { /* overflow handling from STEP1 */ \
133 : ANALYTICAL_AVERAGE_CALC_NUM_STEP2(TPE, IMP); \
134 : for (; j < i; j++) { \
135 : v = bp[j]; \
136 : if (is_##TPE##_nil(v)) \
137 : continue; \
138 : AVERAGE_ITER(TPE, v, a, rr, n); \
139 : } \
140 : curval = a + (dbl) rr / n; \
141 : } \
142 : for (; k < i; k++) \
143 : rb[k] = curval; \
144 : has_nils |= (n == 0); \
145 : n = 0; \
146 : sum = 0; \
147 : } while (0)
148 :
149 : #define ANALYTICAL_AVG_IMP_NUM_CURRENT_ROW(TPE, IMP) \
150 : do { \
151 : for (; k < i; k++) { \
152 : TPE v = bp[k]; \
153 : if (is_##TPE##_nil(v)) { \
154 : rb[k] = dbl_nil; \
155 : has_nils = true; \
156 : } else { \
157 : rb[k] = (dbl) v; \
158 : } \
159 : } \
160 : } while (0)
161 :
162 : #define avg_num_deltas(TPE) typedef struct avg_num_deltas##TPE { TPE a; lng n; lng rr;} avg_num_deltas##TPE;
163 : avg_num_deltas(bte)
164 : avg_num_deltas(sht)
165 : avg_num_deltas(int)
166 : avg_num_deltas(lng)
167 :
168 : #define INIT_AGGREGATE_AVG_NUM(TPE, NOTHING1, NOTHING2) \
169 : do { \
170 : computed = (avg_num_deltas##TPE) {0}; \
171 : } while (0)
172 : #define COMPUTE_LEVEL0_AVG_NUM(X, TPE, NOTHING1, NOTHING2) \
173 : do { \
174 : TPE v = bp[j + X]; \
175 : computed = is_##TPE##_nil(v) ? (avg_num_deltas##TPE){0} : (avg_num_deltas##TPE) {.a = v, .n = 1}; \
176 : } while (0)
177 : #define COMPUTE_LEVELN_AVG_NUM(VAL, TPE, NOTHING1, NOTHING2) \
178 : do { \
179 : if (VAL.n) \
180 : AVERAGE_ITER(TPE, VAL.a, computed.a, computed.rr, computed.n); \
181 : } while (0)
182 : #define FINALIZE_AGGREGATE_AVG_NUM(TPE, NOTHING1, NOTHING2) \
183 : do { \
184 : if (computed.n == 0) { \
185 : rb[k] = dbl_nil; \
186 : has_nils = true; \
187 : } else { \
188 : rb[k] = computed.a + (dbl) computed.rr / computed.n; \
189 : } \
190 : } while (0)
191 : #define ANALYTICAL_AVG_IMP_NUM_OTHERS(TPE, IMP) \
192 : do { \
193 : oid ncount = i - k; \
194 : if ((res = GDKrebuild_segment_tree(ncount, sizeof(avg_num_deltas##TPE), st, &segment_tree, &levels_offset, &nlevels)) != GDK_SUCCEED) \
195 : goto cleanup; \
196 : populate_segment_tree(avg_num_deltas##TPE, ncount, INIT_AGGREGATE_AVG_NUM, COMPUTE_LEVEL0_AVG_NUM, COMPUTE_LEVELN_AVG_NUM, TPE, NOTHING, NOTHING); \
197 : for (; k < i; k++) \
198 : compute_on_segment_tree(avg_num_deltas##TPE, start[k] - j, end[k] - j, INIT_AGGREGATE_AVG_NUM, COMPUTE_LEVELN_AVG_NUM, FINALIZE_AGGREGATE_AVG_NUM, TPE, NOTHING, NOTHING); \
199 : j = k; \
200 : } while (0)
201 :
202 : /* average on floating-points */
203 : #define ANALYTICAL_AVG_IMP_FP_UNBOUNDED_TILL_CURRENT_ROW(TPE, IMP) \
204 : do { \
205 : TPE a = 0; \
206 : dbl curval = dbl_nil; \
207 : for (; k < i;) { \
208 : j = k; \
209 : do { \
210 : if (!is_##TPE##_nil(bp[k])) \
211 : AVERAGE_ITER_FLOAT(TPE, bp[k], a, n); \
212 : k++; \
213 : } while (k < i && !op[k]); \
214 : if (n > 0) \
215 : curval = a; \
216 : else \
217 : has_nils = true; \
218 : for (; j < k; j++) \
219 : rb[j] = curval; \
220 : } \
221 : n = 0; \
222 : } while (0)
223 :
224 : #define ANALYTICAL_AVG_IMP_FP_CURRENT_ROW_TILL_UNBOUNDED(TPE, IMP) \
225 : do { \
226 : TPE a = 0; \
227 : dbl curval = dbl_nil; \
228 : l = i - 1; \
229 : for (j = l; ; j--) { \
230 : if (!is_##TPE##_nil(bp[j])) \
231 : AVERAGE_ITER_FLOAT(TPE, bp[j], a, n); \
232 : if (op[j] || j == k) { \
233 : for (; ; l--) { \
234 : rb[l] = curval; \
235 : if (l == j) \
236 : break; \
237 : } \
238 : has_nils |= is_##TPE##_nil(curval); \
239 : if (j == k) \
240 : break; \
241 : l = j - 1; \
242 : } \
243 : } \
244 : n = 0; \
245 : k = i; \
246 : } while (0)
247 :
248 : #define ANALYTICAL_AVG_IMP_FP_ALL_ROWS(TPE, IMP) \
249 : do { \
250 : TPE a = 0; \
251 : dbl curval = dbl_nil; \
252 : for (; j < i; j++) { \
253 : TPE v = bp[j]; \
254 : if (!is_##TPE##_nil(v)) \
255 : AVERAGE_ITER_FLOAT(TPE, v, a, n); \
256 : } \
257 : if (n > 0) \
258 : curval = a; \
259 : else \
260 : has_nils = true; \
261 : for (; k < i; k++) \
262 : rb[k] = curval; \
263 : n = 0; \
264 : } while (0)
265 :
266 : #define ANALYTICAL_AVG_IMP_FP_CURRENT_ROW(TPE, IMP) ANALYTICAL_AVG_IMP_NUM_CURRENT_ROW(TPE, IMP)
267 :
268 : #define avg_fp_deltas(TPE) typedef struct avg_fp_deltas_##TPE {TPE a; lng n;} avg_fp_deltas_##TPE;
269 : avg_fp_deltas(flt)
270 : avg_fp_deltas(dbl)
271 :
272 : #define INIT_AGGREGATE_AVG_FP(TPE, NOTHING1, NOTHING2) \
273 : do { \
274 : computed = (avg_fp_deltas_##TPE) {0}; \
275 : } while (0)
276 : #define COMPUTE_LEVEL0_AVG_FP(X, TPE, NOTHING1, NOTHING2) \
277 : do { \
278 : TPE v = bp[j + X]; \
279 : computed = is_##TPE##_nil(v) ? (avg_fp_deltas_##TPE) {0} : (avg_fp_deltas_##TPE) {.n = 1, .a = v}; \
280 : } while (0)
281 : #define COMPUTE_LEVELN_AVG_FP(VAL, TPE, NOTHING1, NOTHING2) \
282 : do { \
283 : if (VAL.n) \
284 : AVERAGE_ITER_FLOAT(TPE, VAL.a, computed.a, computed.n); \
285 : } while (0)
286 : #define FINALIZE_AGGREGATE_AVG_FP(TPE, NOTHING1, NOTHING2) \
287 : do { \
288 : if (computed.n == 0) { \
289 : rb[k] = dbl_nil; \
290 : has_nils = true; \
291 : } else { \
292 : rb[k] = computed.a; \
293 : } \
294 : } while (0)
295 : #define ANALYTICAL_AVG_IMP_FP_OTHERS(TPE, IMP) \
296 : do { \
297 : oid ncount = i - k; \
298 : if ((res = GDKrebuild_segment_tree(ncount, sizeof(avg_fp_deltas_##TPE), st, &segment_tree, &levels_offset, &nlevels)) != GDK_SUCCEED) \
299 : goto cleanup; \
300 : populate_segment_tree(avg_fp_deltas_##TPE, ncount, INIT_AGGREGATE_AVG_FP, COMPUTE_LEVEL0_AVG_FP, COMPUTE_LEVELN_AVG_FP, TPE, NOTHING, NOTHING); \
301 : for (; k < i; k++) \
302 : compute_on_segment_tree(avg_fp_deltas_##TPE, start[k] - j, end[k] - j, INIT_AGGREGATE_AVG_FP, COMPUTE_LEVELN_AVG_FP, FINALIZE_AGGREGATE_AVG_FP, TPE, NOTHING, NOTHING); \
303 : j = k; \
304 : } while (0)
305 :
306 : #define ANALYTICAL_AVG_PARTITIONS(TPE, IMP, REAL_IMP) \
307 : do { \
308 : TPE *restrict bp = (TPE*)bi.base; \
309 : if (p) { \
310 : while (i < cnt) { \
311 : if (np[i]) { \
312 : avg##TPE##IMP: \
313 : REAL_IMP(TPE, IMP); \
314 : } \
315 : if (!last) \
316 : i++; \
317 : } \
318 : } \
319 : if (!last) { \
320 : last = true; \
321 : i = cnt; \
322 : goto avg##TPE##IMP; \
323 : } \
324 : } while (0)
325 :
326 : #ifdef HAVE_HGE
327 : avg_num_deltas(hge)
328 : #define ANALYTICAL_AVG_LIMIT(IMP) \
329 : case TYPE_hge: \
330 : ANALYTICAL_AVG_PARTITIONS(hge, IMP, ANALYTICAL_AVG_IMP_NUM_##IMP); \
331 : break;
332 : #else
333 : #define ANALYTICAL_AVG_LIMIT(IMP)
334 : #endif
335 :
336 : #define ANALYTICAL_AVG_BRANCHES(IMP) \
337 : do { \
338 : switch (tpe) { \
339 : case TYPE_bte: \
340 : ANALYTICAL_AVG_PARTITIONS(bte, IMP, ANALYTICAL_AVG_IMP_NUM_##IMP); \
341 : break; \
342 : case TYPE_sht: \
343 : ANALYTICAL_AVG_PARTITIONS(sht, IMP, ANALYTICAL_AVG_IMP_NUM_##IMP); \
344 : break; \
345 : case TYPE_int: \
346 : ANALYTICAL_AVG_PARTITIONS(int, IMP, ANALYTICAL_AVG_IMP_NUM_##IMP); \
347 : break; \
348 : case TYPE_lng: \
349 : ANALYTICAL_AVG_PARTITIONS(lng, IMP, ANALYTICAL_AVG_IMP_NUM_##IMP); \
350 : break; \
351 : ANALYTICAL_AVG_LIMIT(IMP) \
352 : case TYPE_flt: \
353 : ANALYTICAL_AVG_PARTITIONS(flt, IMP, ANALYTICAL_AVG_IMP_FP_##IMP); \
354 : break; \
355 : case TYPE_dbl: \
356 : ANALYTICAL_AVG_PARTITIONS(dbl, IMP, ANALYTICAL_AVG_IMP_FP_##IMP); \
357 : break; \
358 : default: \
359 : goto nosupport; \
360 : } \
361 : } while (0)
362 :
363 : gdk_return
364 72 : GDKanalyticalavg(BAT *r, BAT *p, BAT *o, BAT *b, BAT *s, BAT *e, int tpe, int frame_type)
365 : {
366 72 : BATiter pi = bat_iterator(p);
367 72 : BATiter oi = bat_iterator(o);
368 72 : BATiter bi = bat_iterator(b);
369 72 : BATiter si = bat_iterator(s);
370 72 : BATiter ei = bat_iterator(e);
371 72 : bool has_nils = false, last = false;
372 72 : oid i = 0, j = 0, k = 0, l = 0, cnt = BATcount(b), *restrict start = si.base, *restrict end = ei.base,
373 72 : *levels_offset = NULL, nlevels = 0;
374 72 : lng n = 0, rr = 0;
375 72 : dbl *rb = (dbl *) Tloc(r, 0), curval = dbl_nil;
376 72 : bit *np = pi.base, *op = oi.base;
377 72 : void *segment_tree = NULL;
378 72 : gdk_return res = GDK_SUCCEED;
379 : #ifdef HAVE_HGE
380 72 : hge sum = 0;
381 : #else
382 : lng sum = 0;
383 : #endif
384 72 : BAT *st = NULL;
385 :
386 72 : if (cnt > 0) {
387 72 : switch (frame_type) {
388 25 : case 3: /* unbounded until current row */
389 623 : ANALYTICAL_AVG_BRANCHES(UNBOUNDED_TILL_CURRENT_ROW);
390 : break;
391 1 : case 4: /* current row until unbounded */
392 11 : ANALYTICAL_AVG_BRANCHES(CURRENT_ROW_TILL_UNBOUNDED);
393 : break;
394 17 : case 5: /* all rows */
395 363 : ANALYTICAL_AVG_BRANCHES(ALL_ROWS);
396 : break;
397 0 : case 6: /* current row */
398 0 : ANALYTICAL_AVG_BRANCHES(CURRENT_ROW);
399 : break;
400 29 : default:
401 29 : if ((st = GDKinitialize_segment_tree()) == NULL) {
402 0 : res = GDK_FAIL;
403 0 : goto cleanup;
404 : }
405 2148 : ANALYTICAL_AVG_BRANCHES(OTHERS);
406 : break;
407 : }
408 : }
409 :
410 72 : BATsetcount(r, cnt);
411 72 : r->tnonil = !has_nils;
412 72 : r->tnil = has_nils;
413 72 : cleanup:
414 72 : bat_iterator_end(&pi);
415 72 : bat_iterator_end(&oi);
416 72 : bat_iterator_end(&bi);
417 72 : bat_iterator_end(&si);
418 72 : bat_iterator_end(&ei);
419 72 : BBPreclaim(st);
420 72 : return res;
421 0 : nosupport:
422 0 : GDKerror("42000!average of type %s to dbl unsupported.\n", ATOMname(tpe));
423 0 : res = GDK_FAIL;
424 0 : goto cleanup;
425 : }
426 :
427 : #ifdef TRUNCATE_NUMBERS
428 : #define ANALYTICAL_AVERAGE_INT_CALC_FINALIZE(avg, rem, ncnt) \
429 : do { \
430 : if (rem > 0 && avg < 0) \
431 : avg++; \
432 : } while(0)
433 : #else
434 : #define ANALYTICAL_AVERAGE_INT_CALC_FINALIZE(avg, rem, ncnt) \
435 : do { \
436 : if (rem > 0) { \
437 : if (avg < 0) { \
438 : if (2*rem > ncnt) \
439 : avg++; \
440 : } else { \
441 : if (2*rem >= ncnt) \
442 : avg++; \
443 : } \
444 : } \
445 : } while(0)
446 : #endif
447 :
448 : #define ANALYTICAL_AVG_INT_UNBOUNDED_TILL_CURRENT_ROW(TPE) \
449 : do { \
450 : TPE avg = 0; \
451 : for (; k < i;) { \
452 : j = k; \
453 : do { \
454 : if (!is_##TPE##_nil(bp[k])) \
455 : AVERAGE_ITER(TPE, bp[k], avg, rem, ncnt); \
456 : k++; \
457 : } while (k < i && !op[k]); \
458 : if (ncnt == 0) { \
459 : has_nils = true; \
460 : for (; j < k; j++) \
461 : rb[j] = TPE##_nil; \
462 : } else { \
463 : TPE avgfinal = avg; \
464 : ANALYTICAL_AVERAGE_INT_CALC_FINALIZE(avgfinal, rem, ncnt); \
465 : for (; j < k; j++) \
466 : rb[j] = avgfinal; \
467 : } \
468 : } \
469 : rem = 0; \
470 : ncnt = 0; \
471 : } while (0)
472 :
473 : #define ANALYTICAL_AVG_INT_CURRENT_ROW_TILL_UNBOUNDED(TPE) \
474 : do { \
475 : TPE avg = 0; \
476 : l = i - 1; \
477 : for (j = l; ; j--) { \
478 : if (!is_##TPE##_nil(bp[j])) \
479 : AVERAGE_ITER(TPE, bp[j], avg, rem, ncnt); \
480 : if (op[j] || j == k) { \
481 : if (ncnt == 0) { \
482 : has_nils = true; \
483 : for (; ; l--) { \
484 : rb[l] = TPE##_nil; \
485 : if (l == j) \
486 : break; \
487 : } \
488 : } else { \
489 : TPE avgfinal = avg; \
490 : ANALYTICAL_AVERAGE_INT_CALC_FINALIZE(avgfinal, rem, ncnt); \
491 : for (; ; l--) { \
492 : rb[l] = avgfinal; \
493 : if (l == j) \
494 : break; \
495 : } \
496 : } \
497 : if (j == k) \
498 : break; \
499 : l = j - 1; \
500 : } \
501 : } \
502 : rem = 0; \
503 : ncnt = 0; \
504 : k = i; \
505 : } while (0)
506 :
507 : #define ANALYTICAL_AVG_INT_ALL_ROWS(TPE) \
508 : do { \
509 : TPE avg = 0; \
510 : for (; j < i; j++) { \
511 : TPE v = bp[j]; \
512 : if (!is_##TPE##_nil(v)) \
513 : AVERAGE_ITER(TPE, v, avg, rem, ncnt); \
514 : } \
515 : if (ncnt == 0) { \
516 : for (; k < i; k++) \
517 : rb[k] = TPE##_nil; \
518 : has_nils = true; \
519 : } else { \
520 : ANALYTICAL_AVERAGE_INT_CALC_FINALIZE(avg, rem, ncnt); \
521 : for (; k < i; k++) \
522 : rb[k] = avg; \
523 : } \
524 : rem = 0; \
525 : ncnt = 0; \
526 : } while (0)
527 :
528 : #define ANALYTICAL_AVG_INT_CURRENT_ROW(TPE) \
529 : do { \
530 : for (; k < i; k++) { \
531 : TPE v = bp[k]; \
532 : rb[k] = bp[k]; \
533 : has_nils |= is_##TPE##_nil(v); \
534 : } \
535 : } while (0)
536 :
537 : #define avg_int_deltas(TPE) typedef struct avg_int_deltas_##TPE { TPE avg; lng rem, ncnt;} avg_int_deltas_##TPE;
538 : avg_int_deltas(bte)
539 : avg_int_deltas(sht)
540 : avg_int_deltas(int)
541 : avg_int_deltas(lng)
542 :
543 : #define INIT_AGGREGATE_AVG_INT(TPE, NOTHING1, NOTHING2) \
544 : do { \
545 : computed = (avg_int_deltas_##TPE) {0}; \
546 : } while (0)
547 : #define COMPUTE_LEVEL0_AVG_INT(X, TPE, NOTHING1, NOTHING2) \
548 : do { \
549 : TPE v = bp[j + X]; \
550 : computed = is_##TPE##_nil(v) ? (avg_int_deltas_##TPE) {0} : (avg_int_deltas_##TPE) {.avg = v, .ncnt = 1}; \
551 : } while (0)
552 : #define COMPUTE_LEVELN_AVG_INT(VAL, TPE, NOTHING1, NOTHING2) \
553 : do { \
554 : if (VAL.ncnt) \
555 : AVERAGE_ITER(TPE, VAL.avg, computed.avg, computed.rem, computed.ncnt); \
556 : } while (0)
557 : #define FINALIZE_AGGREGATE_AVG_INT(TPE, NOTHING1, NOTHING2) \
558 : do { \
559 : if (computed.ncnt == 0) { \
560 : has_nils = true; \
561 : rb[k] = TPE##_nil; \
562 : } else { \
563 : ANALYTICAL_AVERAGE_INT_CALC_FINALIZE(computed.avg, computed.rem, computed.ncnt); \
564 : rb[k] = computed.avg; \
565 : } \
566 : } while (0)
567 : #define ANALYTICAL_AVG_INT_OTHERS(TPE) \
568 : do { \
569 : oid ncount = i - k; \
570 : if ((res = GDKrebuild_segment_tree(ncount, sizeof(avg_int_deltas_##TPE), st, &segment_tree, &levels_offset, &nlevels)) != GDK_SUCCEED) \
571 : goto cleanup; \
572 : populate_segment_tree(avg_int_deltas_##TPE, ncount, INIT_AGGREGATE_AVG_INT, COMPUTE_LEVEL0_AVG_INT, COMPUTE_LEVELN_AVG_INT, TPE, NOTHING, NOTHING); \
573 : for (; k < i; k++) \
574 : compute_on_segment_tree(avg_int_deltas_##TPE, start[k] - j, end[k] - j, INIT_AGGREGATE_AVG_INT, COMPUTE_LEVELN_AVG_INT, FINALIZE_AGGREGATE_AVG_INT, TPE, NOTHING, NOTHING); \
575 : j = k; \
576 : } while (0)
577 :
578 : #define ANALYTICAL_AVG_INT_PARTITIONS(TPE, IMP) \
579 : do { \
580 : TPE *restrict bp = (TPE*)bi.base, *rb = (TPE *) Tloc(r, 0); \
581 : if (p) { \
582 : while (i < cnt) { \
583 : if (np[i]) { \
584 : avg##TPE##IMP: \
585 : IMP(TPE); \
586 : } \
587 : if (!last) \
588 : i++; \
589 : } \
590 : } \
591 : if (!last) { \
592 : last = true; \
593 : i = cnt; \
594 : goto avg##TPE##IMP; \
595 : } \
596 : } while (0)
597 :
598 : #ifdef HAVE_HGE
599 : avg_int_deltas(hge)
600 : #define ANALYTICAL_AVG_INT_LIMIT(IMP) \
601 : case TYPE_hge: \
602 : ANALYTICAL_AVG_INT_PARTITIONS(hge, ANALYTICAL_AVG_INT_##IMP); \
603 : break;
604 : #else
605 : #define ANALYTICAL_AVG_INT_LIMIT(IMP)
606 : #endif
607 :
608 : #define ANALYTICAL_AVG_INT_BRANCHES(IMP) \
609 : do { \
610 : switch (tpe) { \
611 : case TYPE_bte: \
612 : ANALYTICAL_AVG_INT_PARTITIONS(bte, ANALYTICAL_AVG_INT_##IMP); \
613 : break; \
614 : case TYPE_sht: \
615 : ANALYTICAL_AVG_INT_PARTITIONS(sht, ANALYTICAL_AVG_INT_##IMP); \
616 : break; \
617 : case TYPE_int: \
618 : ANALYTICAL_AVG_INT_PARTITIONS(int, ANALYTICAL_AVG_INT_##IMP); \
619 : break; \
620 : case TYPE_lng: \
621 : ANALYTICAL_AVG_INT_PARTITIONS(lng, ANALYTICAL_AVG_INT_##IMP); \
622 : break; \
623 : ANALYTICAL_AVG_INT_LIMIT(IMP) \
624 : default: \
625 : goto nosupport; \
626 : } \
627 : } while (0)
628 :
629 : gdk_return
630 48 : GDKanalyticalavginteger(BAT *r, BAT *p, BAT *o, BAT *b, BAT *s, BAT *e, int tpe, int frame_type)
631 : {
632 48 : BATiter pi = bat_iterator(p);
633 48 : BATiter oi = bat_iterator(o);
634 48 : BATiter bi = bat_iterator(b);
635 48 : BATiter si = bat_iterator(s);
636 48 : BATiter ei = bat_iterator(e);
637 48 : bool has_nils = false, last = false;
638 48 : oid i = 0, j = 0, k = 0, l = 0, cnt = BATcount(b), *restrict start = si.base, *restrict end = ei.base,
639 48 : *levels_offset = NULL, nlevels = 0;
640 48 : lng rem = 0, ncnt = 0;
641 48 : bit *np = pi.base, *op = oi.base;
642 48 : void *segment_tree = NULL;
643 48 : gdk_return res = GDK_SUCCEED;
644 48 : BAT *st = NULL;
645 :
646 48 : if (cnt > 0) {
647 48 : switch (frame_type) {
648 18 : case 3: /* unbounded until current row */
649 708 : ANALYTICAL_AVG_INT_BRANCHES(UNBOUNDED_TILL_CURRENT_ROW);
650 : break;
651 0 : case 4: /* current row until unbounded */
652 0 : ANALYTICAL_AVG_INT_BRANCHES(CURRENT_ROW_TILL_UNBOUNDED);
653 : break;
654 17 : case 5: /* all rows */
655 308509 : ANALYTICAL_AVG_INT_BRANCHES(ALL_ROWS);
656 : break;
657 0 : case 6: /* current row */
658 0 : ANALYTICAL_AVG_INT_BRANCHES(CURRENT_ROW);
659 : break;
660 13 : default:
661 13 : if ((st = GDKinitialize_segment_tree()) == NULL) {
662 0 : res = GDK_FAIL;
663 0 : goto cleanup;
664 : }
665 1135 : ANALYTICAL_AVG_INT_BRANCHES(OTHERS);
666 : break;
667 : }
668 : }
669 :
670 48 : BATsetcount(r, cnt);
671 48 : r->tnonil = !has_nils;
672 48 : r->tnil = has_nils;
673 48 : cleanup:
674 48 : bat_iterator_end(&pi);
675 48 : bat_iterator_end(&oi);
676 48 : bat_iterator_end(&bi);
677 48 : bat_iterator_end(&si);
678 48 : bat_iterator_end(&ei);
679 48 : BBPreclaim(st);
680 48 : return res;
681 0 : nosupport:
682 0 : GDKerror("42000!average of type %s to %s unsupported.\n", ATOMname(tpe), ATOMname(tpe));
683 0 : res = GDK_FAIL;
684 0 : goto cleanup;
685 : }
686 :
687 : #define ANALYTICAL_STDEV_VARIANCE_UNBOUNDED_TILL_CURRENT_ROW(TPE, SAMPLE, OP) \
688 : do { \
689 : TPE *restrict bp = (TPE*)bi.base; \
690 : for (; k < i;) { \
691 : j = k; \
692 : do { \
693 : TPE v = bp[k]; \
694 : if (!is_##TPE##_nil(v)) { \
695 : n++; \
696 : delta = (dbl) v - mean; \
697 : mean += delta / n; \
698 : m2 += delta * ((dbl) v - mean); \
699 : } \
700 : k++; \
701 : } while (k < i && !op[k]); \
702 : if (isinf(m2)) { \
703 : goto overflow; \
704 : } else if (n > SAMPLE) { \
705 : for (; j < k; j++) \
706 : rb[j] = OP; \
707 : } else { \
708 : for (; j < k; j++) \
709 : rb[j] = dbl_nil; \
710 : has_nils = true; \
711 : } \
712 : } \
713 : n = 0; \
714 : mean = 0; \
715 : m2 = 0; \
716 : } while (0)
717 :
718 : #define ANALYTICAL_STDEV_VARIANCE_CURRENT_ROW_TILL_UNBOUNDED(TPE, SAMPLE, OP) \
719 : do { \
720 : TPE *restrict bp = (TPE*)bi.base; \
721 : l = i - 1; \
722 : for (j = l; ; j--) { \
723 : TPE v = bp[j]; \
724 : if (!is_##TPE##_nil(v)) { \
725 : n++; \
726 : delta = (dbl) v - mean; \
727 : mean += delta / n; \
728 : m2 += delta * ((dbl) v - mean); \
729 : } \
730 : if (op[j] || j == k) { \
731 : if (isinf(m2)) { \
732 : goto overflow; \
733 : } else if (n > SAMPLE) { \
734 : for (; ; l--) { \
735 : rb[l] = OP; \
736 : if (l == j) \
737 : break; \
738 : } \
739 : } else { \
740 : for (; ; l--) { \
741 : rb[l] = dbl_nil; \
742 : if (l == j) \
743 : break; \
744 : } \
745 : has_nils = true; \
746 : } \
747 : if (j == k) \
748 : break; \
749 : l = j - 1; \
750 : } \
751 : } \
752 : n = 0; \
753 : mean = 0; \
754 : m2 = 0; \
755 : k = i; \
756 : } while (0)
757 :
758 : #define ANALYTICAL_STDEV_VARIANCE_ALL_ROWS(TPE, SAMPLE, OP) \
759 : do { \
760 : TPE *restrict bp = (TPE*)bi.base; \
761 : for (; j < i; j++) { \
762 : TPE v = bp[j]; \
763 : if (is_##TPE##_nil(v)) \
764 : continue; \
765 : n++; \
766 : delta = (dbl) v - mean; \
767 : mean += delta / n; \
768 : m2 += delta * ((dbl) v - mean); \
769 : } \
770 : if (isinf(m2)) { \
771 : goto overflow; \
772 : } else if (n > SAMPLE) { \
773 : for (; k < i; k++) \
774 : rb[k] = OP; \
775 : } else { \
776 : for (; k < i; k++) \
777 : rb[k] = dbl_nil; \
778 : has_nils = true; \
779 : } \
780 : n = 0; \
781 : mean = 0; \
782 : m2 = 0; \
783 : } while (0)
784 :
785 : #define ANALYTICAL_STDEV_VARIANCE_CURRENT_ROW(TPE, SAMPLE, OP) \
786 : do { \
787 : for (; k < i; k++) \
788 : rb[k] = SAMPLE == 1 ? dbl_nil : 0; \
789 : has_nils = is_dbl_nil(rb[k - 1]); \
790 : } while (0)
791 :
792 : typedef struct stdev_var_deltas {
793 : BUN n;
794 : dbl mean, delta, m2;
795 : } stdev_var_deltas;
796 :
797 : #define INIT_AGGREGATE_STDEV_VARIANCE(TPE, SAMPLE, OP) \
798 : do { \
799 : computed = (stdev_var_deltas) {0}; \
800 : } while (0)
801 : #define COMPUTE_LEVEL0_STDEV_VARIANCE(X, TPE, SAMPLE, OP) \
802 : do { \
803 : TPE v = bp[j + X]; \
804 : computed = is_##TPE##_nil(v) ? (stdev_var_deltas) {0} : (stdev_var_deltas) {.n = 1, .mean = (dbl)v, .delta = (dbl)v}; \
805 : } while (0)
806 : #define COMPUTE_LEVELN_STDEV_VARIANCE(VAL, TPE, SAMPLE, OP) \
807 : do { \
808 : if (VAL.n) { \
809 : computed.n++; \
810 : computed.delta = VAL.delta - computed.mean; \
811 : computed.mean += computed.delta / computed.n; \
812 : computed.m2 += computed.delta * (VAL.delta - computed.mean); \
813 : } \
814 : } while (0)
815 : #define FINALIZE_AGGREGATE_STDEV_VARIANCE(TPE, SAMPLE, OP) \
816 : do { \
817 : dbl m2 = computed.m2; \
818 : BUN n = computed.n; \
819 : if (isinf(m2)) { \
820 : goto overflow; \
821 : } else if (n > SAMPLE) { \
822 : rb[k] = OP; \
823 : } else { \
824 : rb[k] = dbl_nil; \
825 : has_nils = true; \
826 : } \
827 : } while (0)
828 : #define ANALYTICAL_STDEV_VARIANCE_OTHERS(TPE, SAMPLE, OP) \
829 : do { \
830 : TPE *restrict bp = (TPE*)bi.base; \
831 : oid ncount = i - k; \
832 : if ((res = GDKrebuild_segment_tree(ncount, sizeof(stdev_var_deltas), st, &segment_tree, &levels_offset, &nlevels)) != GDK_SUCCEED) \
833 : goto cleanup; \
834 : populate_segment_tree(stdev_var_deltas, ncount, INIT_AGGREGATE_STDEV_VARIANCE, COMPUTE_LEVEL0_STDEV_VARIANCE, COMPUTE_LEVELN_STDEV_VARIANCE, TPE, SAMPLE, OP); \
835 : for (; k < i; k++) \
836 : compute_on_segment_tree(stdev_var_deltas, start[k] - j, end[k] - j, INIT_AGGREGATE_STDEV_VARIANCE, COMPUTE_LEVELN_STDEV_VARIANCE, FINALIZE_AGGREGATE_STDEV_VARIANCE, TPE, SAMPLE, OP); \
837 : j = k; \
838 : } while (0)
839 :
840 : #define ANALYTICAL_STATISTICS_PARTITIONS(TPE, SAMPLE, OP, IMP) \
841 : do { \
842 : if (p) { \
843 : while (i < cnt) { \
844 : if (np[i]) { \
845 : statistics##TPE##IMP: \
846 : IMP(TPE, SAMPLE, OP); \
847 : } \
848 : if (!last) \
849 : i++; \
850 : } \
851 : } \
852 : if (!last) { \
853 : last = true; \
854 : i = cnt; \
855 : goto statistics##TPE##IMP; \
856 : } \
857 : } while (0)
858 :
859 : #ifdef HAVE_HGE
860 : #define ANALYTICAL_STATISTICS_LIMIT(IMP, SAMPLE, OP) \
861 : case TYPE_hge: \
862 : ANALYTICAL_STATISTICS_PARTITIONS(hge, SAMPLE, OP, ANALYTICAL_##IMP); \
863 : break;
864 : #else
865 : #define ANALYTICAL_STATISTICS_LIMIT(IMP, SAMPLE, OP)
866 : #endif
867 :
868 : #define ANALYTICAL_STATISTICS_BRANCHES(IMP, SAMPLE, OP) \
869 : do { \
870 : switch (tpe) { \
871 : case TYPE_bte: \
872 : ANALYTICAL_STATISTICS_PARTITIONS(bte, SAMPLE, OP, ANALYTICAL_##IMP); \
873 : break; \
874 : case TYPE_sht: \
875 : ANALYTICAL_STATISTICS_PARTITIONS(sht, SAMPLE, OP, ANALYTICAL_##IMP); \
876 : break; \
877 : case TYPE_int: \
878 : ANALYTICAL_STATISTICS_PARTITIONS(int, SAMPLE, OP, ANALYTICAL_##IMP); \
879 : break; \
880 : case TYPE_lng: \
881 : ANALYTICAL_STATISTICS_PARTITIONS(lng, SAMPLE, OP, ANALYTICAL_##IMP); \
882 : break; \
883 : case TYPE_flt: \
884 : ANALYTICAL_STATISTICS_PARTITIONS(flt, SAMPLE, OP, ANALYTICAL_##IMP); \
885 : break; \
886 : case TYPE_dbl: \
887 : ANALYTICAL_STATISTICS_PARTITIONS(dbl, SAMPLE, OP, ANALYTICAL_##IMP); \
888 : break; \
889 : ANALYTICAL_STATISTICS_LIMIT(IMP, SAMPLE, OP) \
890 : default: \
891 : goto nosupport; \
892 : } \
893 : } while (0)
894 :
895 : #define GDK_ANALYTICAL_STDEV_VARIANCE(NAME, SAMPLE, OP, DESC) \
896 : gdk_return \
897 : GDKanalytical_##NAME(BAT *r, BAT *p, BAT *o, BAT *b, BAT *s, BAT *e, int tpe, int frame_type) \
898 : { \
899 : BATiter pi = bat_iterator(p); \
900 : BATiter oi = bat_iterator(o); \
901 : BATiter bi = bat_iterator(b); \
902 : BATiter si = bat_iterator(s); \
903 : BATiter ei = bat_iterator(e); \
904 : bool has_nils = false, last = false; \
905 : oid i = 0, j = 0, k = 0, l = 0, cnt = BATcount(b), *restrict start = si.base, *restrict end = ei.base, \
906 : *levels_offset = NULL, nlevels = 0; \
907 : lng n = 0; \
908 : bit *np = pi.base, *op = oi.base; \
909 : dbl *rb = (dbl *) Tloc(r, 0), mean = 0, m2 = 0, delta; \
910 : void *segment_tree = NULL; \
911 : gdk_return res = GDK_SUCCEED; \
912 : BAT *st = NULL; \
913 : \
914 : if (cnt > 0) { \
915 : switch (frame_type) { \
916 : case 3: /* unbounded until current row */ \
917 : ANALYTICAL_STATISTICS_BRANCHES(STDEV_VARIANCE_UNBOUNDED_TILL_CURRENT_ROW, SAMPLE, OP); \
918 : break; \
919 : case 4: /* current row until unbounded */ \
920 : ANALYTICAL_STATISTICS_BRANCHES(STDEV_VARIANCE_CURRENT_ROW_TILL_UNBOUNDED, SAMPLE, OP); \
921 : break; \
922 : case 5: /* all rows */ \
923 : ANALYTICAL_STATISTICS_BRANCHES(STDEV_VARIANCE_ALL_ROWS, SAMPLE, OP); \
924 : break; \
925 : case 6: /* current row */ \
926 : ANALYTICAL_STATISTICS_BRANCHES(STDEV_VARIANCE_CURRENT_ROW, SAMPLE, OP); \
927 : break; \
928 : default: \
929 : if ((st = GDKinitialize_segment_tree()) == NULL) { \
930 : res = GDK_FAIL; \
931 : goto cleanup; \
932 : } \
933 : ANALYTICAL_STATISTICS_BRANCHES(STDEV_VARIANCE_OTHERS, SAMPLE, OP); \
934 : break; \
935 : } \
936 : } \
937 : \
938 : BATsetcount(r, cnt); \
939 : r->tnonil = !has_nils; \
940 : r->tnil = has_nils; \
941 : goto cleanup; /* all these gotos seem confusing but it cleans up the ending of the operator */ \
942 : overflow: \
943 : GDKerror("22003!overflow in calculation.\n"); \
944 : res = GDK_FAIL; \
945 : cleanup: \
946 : bat_iterator_end(&pi); \
947 : bat_iterator_end(&oi); \
948 : bat_iterator_end(&bi); \
949 : bat_iterator_end(&si); \
950 : bat_iterator_end(&ei); \
951 : BBPreclaim(st); \
952 : return res; \
953 : nosupport: \
954 : GDKerror("42000!%s of type %s unsupported.\n", DESC, ATOMname(tpe)); \
955 : res = GDK_FAIL; \
956 : goto cleanup; \
957 : }
958 :
959 820 : GDK_ANALYTICAL_STDEV_VARIANCE(stddev_samp, 1, sqrt(m2 / (n - 1)), "standard deviation")
960 419009 : GDK_ANALYTICAL_STDEV_VARIANCE(stddev_pop, 0, sqrt(m2 / n), "standard deviation")
961 332 : GDK_ANALYTICAL_STDEV_VARIANCE(variance_samp, 1, m2 / (n - 1), "variance")
962 952 : GDK_ANALYTICAL_STDEV_VARIANCE(variance_pop, 0, m2 / n, "variance")
963 :
964 : #define ANALYTICAL_COVARIANCE_UNBOUNDED_TILL_CURRENT_ROW(TPE, SAMPLE, OP) \
965 : do { \
966 : TPE *bp1 = (TPE*)b1i.base, *bp2 = (TPE*)b2i.base; \
967 : for (; k < i;) { \
968 : j = k; \
969 : do { \
970 : TPE v1 = bp1[k], v2 = bp2[k]; \
971 : if (!is_##TPE##_nil(v1) && !is_##TPE##_nil(v2)) { \
972 : n++; \
973 : delta1 = (dbl) v1 - mean1; \
974 : mean1 += delta1 / n; \
975 : delta2 = (dbl) v2 - mean2; \
976 : mean2 += delta2 / n; \
977 : m2 += delta1 * ((dbl) v2 - mean2); \
978 : } \
979 : k++; \
980 : } while (k < i && !op[k]); \
981 : if (isinf(m2)) \
982 : goto overflow; \
983 : if (n > SAMPLE) { \
984 : for (; j < k; j++) \
985 : rb[j] = OP; \
986 : } else { \
987 : for (; j < k; j++) \
988 : rb[j] = dbl_nil; \
989 : has_nils = true; \
990 : } \
991 : } \
992 : n = 0; \
993 : mean1 = 0; \
994 : mean2 = 0; \
995 : m2 = 0; \
996 : } while (0)
997 :
998 : #define ANALYTICAL_COVARIANCE_CURRENT_ROW_TILL_UNBOUNDED(TPE, SAMPLE, OP) \
999 : do { \
1000 : TPE *bp1 = (TPE*)b1i.base, *bp2 = (TPE*)b2i.base; \
1001 : l = i - 1; \
1002 : for (j = l; ; j--) { \
1003 : TPE v1 = bp1[j], v2 = bp2[j]; \
1004 : if (!is_##TPE##_nil(v1) && !is_##TPE##_nil(v2)) { \
1005 : n++; \
1006 : delta1 = (dbl) v1 - mean1; \
1007 : mean1 += delta1 / n; \
1008 : delta2 = (dbl) v2 - mean2; \
1009 : mean2 += delta2 / n; \
1010 : m2 += delta1 * ((dbl) v2 - mean2); \
1011 : } \
1012 : if (op[j] || j == k) { \
1013 : if (isinf(m2)) \
1014 : goto overflow; \
1015 : if (n > SAMPLE) { \
1016 : for (; ; l--) { \
1017 : rb[l] = OP; \
1018 : if (l == j) \
1019 : break; \
1020 : } \
1021 : } else { \
1022 : for (; ; l--) { \
1023 : rb[l] = dbl_nil; \
1024 : if (l == j) \
1025 : break; \
1026 : } \
1027 : has_nils = true; \
1028 : } \
1029 : if (j == k) \
1030 : break; \
1031 : l = j - 1; \
1032 : } \
1033 : } \
1034 : n = 0; \
1035 : mean1 = 0; \
1036 : mean2 = 0; \
1037 : m2 = 0; \
1038 : k = i; \
1039 : } while (0)
1040 :
1041 : #define ANALYTICAL_COVARIANCE_ALL_ROWS(TPE, SAMPLE, OP) \
1042 : do { \
1043 : TPE *bp1 = (TPE*)b1i.base, *bp2 = (TPE*)b2i.base; \
1044 : for (; j < i; j++) { \
1045 : TPE v1 = bp1[j], v2 = bp2[j]; \
1046 : if (!is_##TPE##_nil(v1) && !is_##TPE##_nil(v2)) { \
1047 : n++; \
1048 : delta1 = (dbl) v1 - mean1; \
1049 : mean1 += delta1 / n; \
1050 : delta2 = (dbl) v2 - mean2; \
1051 : mean2 += delta2 / n; \
1052 : m2 += delta1 * ((dbl) v2 - mean2); \
1053 : } \
1054 : } \
1055 : if (isinf(m2)) \
1056 : goto overflow; \
1057 : if (n > SAMPLE) { \
1058 : for (; k < i; k++) \
1059 : rb[k] = OP; \
1060 : } else { \
1061 : for (; k < i; k++) \
1062 : rb[k] = dbl_nil; \
1063 : has_nils = true; \
1064 : } \
1065 : n = 0; \
1066 : mean1 = 0; \
1067 : mean2 = 0; \
1068 : m2 = 0; \
1069 : } while (0)
1070 :
1071 : #define ANALYTICAL_COVARIANCE_CURRENT_ROW(TPE, SAMPLE, OP) \
1072 : do { \
1073 : for (; k < i; k++) \
1074 : rb[k] = SAMPLE == 1 ? dbl_nil : 0; \
1075 : has_nils = is_dbl_nil(rb[k - 1]); \
1076 : } while (0)
1077 :
1078 : typedef struct covariance_deltas {
1079 : BUN n;
1080 : dbl mean1, mean2, delta1, delta2, m2;
1081 : } covariance_deltas;
1082 :
1083 : #define INIT_AGGREGATE_COVARIANCE(TPE, SAMPLE, OP) \
1084 : do { \
1085 : computed = (covariance_deltas) {0}; \
1086 : } while (0)
1087 : #define COMPUTE_LEVEL0_COVARIANCE(X, TPE, SAMPLE, OP) \
1088 : do { \
1089 : TPE v1 = bp1[j + X], v2 = bp2[j + X]; \
1090 : computed = is_##TPE##_nil(v1) || is_##TPE##_nil(v2) ? (covariance_deltas) {0} \
1091 : : (covariance_deltas) {.n = 1, .mean1 = (dbl)v1, .mean2 = (dbl)v2, .delta1 = (dbl)v1, .delta2 = (dbl)v2}; \
1092 : } while (0)
1093 : #define COMPUTE_LEVELN_COVARIANCE(VAL, TPE, SAMPLE, OP) \
1094 : do { \
1095 : if (VAL.n) { \
1096 : computed.n++; \
1097 : computed.delta1 = VAL.delta1 - computed.mean1; \
1098 : computed.mean1 += computed.delta1 / computed.n; \
1099 : computed.delta2 = VAL.delta2 - computed.mean2; \
1100 : computed.mean2 += computed.delta2 / computed.n; \
1101 : computed.m2 += computed.delta1 * (VAL.delta2 - computed.mean2); \
1102 : } \
1103 : } while (0)
1104 : #define FINALIZE_AGGREGATE_COVARIANCE(TPE, SAMPLE, OP) FINALIZE_AGGREGATE_STDEV_VARIANCE(TPE, SAMPLE, OP)
1105 : #define ANALYTICAL_COVARIANCE_OTHERS(TPE, SAMPLE, OP) \
1106 : do { \
1107 : TPE *bp1 = (TPE*)b1i.base, *bp2 = (TPE*)b2i.base; \
1108 : oid ncount = i - k; \
1109 : if ((res = GDKrebuild_segment_tree(ncount, sizeof(covariance_deltas), st, &segment_tree, &levels_offset, &nlevels)) != GDK_SUCCEED) \
1110 : goto cleanup; \
1111 : populate_segment_tree(covariance_deltas, ncount, INIT_AGGREGATE_COVARIANCE, COMPUTE_LEVEL0_COVARIANCE, COMPUTE_LEVELN_COVARIANCE, TPE, SAMPLE, OP); \
1112 : for (; k < i; k++) \
1113 : compute_on_segment_tree(covariance_deltas, start[k] - j, end[k] - j, INIT_AGGREGATE_COVARIANCE, COMPUTE_LEVELN_COVARIANCE, FINALIZE_AGGREGATE_COVARIANCE, TPE, SAMPLE, OP); \
1114 : j = k; \
1115 : } while (0)
1116 :
1117 : #define GDK_ANALYTICAL_COVARIANCE(NAME, SAMPLE, OP) \
1118 : gdk_return \
1119 : GDKanalytical_##NAME(BAT *r, BAT *p, BAT *o, BAT *b1, BAT *b2, BAT *s, BAT *e, int tpe, int frame_type) \
1120 : { \
1121 : BATiter pi = bat_iterator(p); \
1122 : BATiter oi = bat_iterator(o); \
1123 : BATiter b1i = bat_iterator(b1); \
1124 : BATiter b2i = bat_iterator(b2); \
1125 : BATiter si = bat_iterator(s); \
1126 : BATiter ei = bat_iterator(e); \
1127 : bool has_nils = false, last = false; \
1128 : oid i = 0, j = 0, k = 0, l = 0, cnt = BATcount(b1), *restrict start = si.base, *restrict end = ei.base, \
1129 : *levels_offset = NULL, nlevels = 0; \
1130 : lng n = 0; \
1131 : bit *np = pi.base, *op = oi.base; \
1132 : dbl *rb = (dbl *) Tloc(r, 0), mean1 = 0, mean2 = 0, m2 = 0, delta1, delta2; \
1133 : void *segment_tree = NULL; \
1134 : gdk_return res = GDK_SUCCEED; \
1135 : BAT *st = NULL; \
1136 : \
1137 : if (cnt > 0) { \
1138 : switch (frame_type) { \
1139 : case 3: /* unbounded until current row */ \
1140 : ANALYTICAL_STATISTICS_BRANCHES(COVARIANCE_UNBOUNDED_TILL_CURRENT_ROW, SAMPLE, OP); \
1141 : break; \
1142 : case 4: /* current row until unbounded */ \
1143 : ANALYTICAL_STATISTICS_BRANCHES(COVARIANCE_CURRENT_ROW_TILL_UNBOUNDED, SAMPLE, OP); \
1144 : break; \
1145 : case 5: /* all rows */ \
1146 : ANALYTICAL_STATISTICS_BRANCHES(COVARIANCE_ALL_ROWS, SAMPLE, OP); \
1147 : break; \
1148 : case 6: /* current row */ \
1149 : ANALYTICAL_STATISTICS_BRANCHES(COVARIANCE_CURRENT_ROW, SAMPLE, OP); \
1150 : break; \
1151 : default: \
1152 : if ((st = GDKinitialize_segment_tree()) == NULL) { \
1153 : res = GDK_FAIL; \
1154 : goto cleanup; \
1155 : } \
1156 : ANALYTICAL_STATISTICS_BRANCHES(COVARIANCE_OTHERS, SAMPLE, OP); \
1157 : break; \
1158 : } \
1159 : } \
1160 : \
1161 : BATsetcount(r, cnt); \
1162 : r->tnonil = !has_nils; \
1163 : r->tnil = has_nils; \
1164 : goto cleanup; /* all these gotos seem confusing but it cleans up the ending of the operator */ \
1165 : overflow: \
1166 : GDKerror("22003!overflow in calculation.\n"); \
1167 : res = GDK_FAIL; \
1168 : cleanup: \
1169 : bat_iterator_end(&pi); \
1170 : bat_iterator_end(&oi); \
1171 : bat_iterator_end(&b1i); \
1172 : bat_iterator_end(&b2i); \
1173 : bat_iterator_end(&si); \
1174 : bat_iterator_end(&ei); \
1175 : BBPreclaim(st); \
1176 : return res; \
1177 : nosupport: \
1178 : GDKerror("42000!covariance of type %s unsupported.\n", ATOMname(tpe)); \
1179 : res = GDK_FAIL; \
1180 : goto cleanup; \
1181 : }
1182 :
1183 1340 : GDK_ANALYTICAL_COVARIANCE(covariance_samp, 1, m2 / (n - 1))
1184 1421 : GDK_ANALYTICAL_COVARIANCE(covariance_pop, 0, m2 / n)
1185 :
1186 : #define ANALYTICAL_CORRELATION_UNBOUNDED_TILL_CURRENT_ROW(TPE, SAMPLE, OP) /* SAMPLE and OP not used */ \
1187 : do { \
1188 : TPE *bp1 = (TPE*)b1i.base, *bp2 = (TPE*)b2i.base; \
1189 : for (; k < i;) { \
1190 : j = k; \
1191 : do { \
1192 : TPE v1 = bp1[k], v2 = bp2[k]; \
1193 : if (!is_##TPE##_nil(v1) && !is_##TPE##_nil(v2)) { \
1194 : n++; \
1195 : delta1 = (dbl) v1 - mean1; \
1196 : mean1 += delta1 / n; \
1197 : delta2 = (dbl) v2 - mean2; \
1198 : mean2 += delta2 / n; \
1199 : aux = (dbl) v2 - mean2; \
1200 : up += delta1 * aux; \
1201 : down1 += delta1 * ((dbl) v1 - mean1); \
1202 : down2 += delta2 * aux; \
1203 : } \
1204 : k++; \
1205 : } while (k < i && !op[k]); \
1206 : if (isinf(up) || isinf(down1) || isinf(down2)) \
1207 : goto overflow; \
1208 : if (n != 0 && down1 != 0 && down2 != 0) { \
1209 : rr = (up / n) / (sqrt(down1 / n) * sqrt(down2 / n)); \
1210 : assert(!is_dbl_nil(rr)); \
1211 : } else { \
1212 : rr = dbl_nil; \
1213 : has_nils = true; \
1214 : } \
1215 : for (; j < k; j++) \
1216 : rb[j] = rr; \
1217 : } \
1218 : n = 0; \
1219 : mean1 = 0; \
1220 : mean2 = 0; \
1221 : up = 0; \
1222 : down1 = 0; \
1223 : down2 = 0; \
1224 : } while (0)
1225 :
1226 : #define ANALYTICAL_CORRELATION_CURRENT_ROW_TILL_UNBOUNDED(TPE, SAMPLE, OP) /* SAMPLE and OP not used */ \
1227 : do { \
1228 : TPE *bp1 = (TPE*)b1i.base, *bp2 = (TPE*)b2i.base; \
1229 : l = i - 1; \
1230 : for (j = l; ; j--) { \
1231 : TPE v1 = bp1[j], v2 = bp2[j]; \
1232 : if (!is_##TPE##_nil(v1) && !is_##TPE##_nil(v2)) { \
1233 : n++; \
1234 : delta1 = (dbl) v1 - mean1; \
1235 : mean1 += delta1 / n; \
1236 : delta2 = (dbl) v2 - mean2; \
1237 : mean2 += delta2 / n; \
1238 : aux = (dbl) v2 - mean2; \
1239 : up += delta1 * aux; \
1240 : down1 += delta1 * ((dbl) v1 - mean1); \
1241 : down2 += delta2 * aux; \
1242 : } \
1243 : if (op[j] || j == k) { \
1244 : if (isinf(up) || isinf(down1) || isinf(down2)) \
1245 : goto overflow; \
1246 : if (n != 0 && down1 != 0 && down2 != 0) { \
1247 : rr = (up / n) / (sqrt(down1 / n) * sqrt(down2 / n)); \
1248 : assert(!is_dbl_nil(rr)); \
1249 : } else { \
1250 : rr = dbl_nil; \
1251 : has_nils = true; \
1252 : } \
1253 : for (; ; l--) { \
1254 : rb[l] = rr; \
1255 : if (l == j) \
1256 : break; \
1257 : } \
1258 : if (j == k) \
1259 : break; \
1260 : l = j - 1; \
1261 : } \
1262 : } \
1263 : n = 0; \
1264 : mean1 = 0; \
1265 : mean2 = 0; \
1266 : up = 0; \
1267 : down1 = 0; \
1268 : down2 = 0; \
1269 : k = i; \
1270 : } while (0)
1271 :
1272 : #define ANALYTICAL_CORRELATION_ALL_ROWS(TPE, SAMPLE, OP) /* SAMPLE and OP not used */ \
1273 : do { \
1274 : TPE *bp1 = (TPE*)b1i.base, *bp2 = (TPE*)b2i.base; \
1275 : for (; j < i; j++) { \
1276 : TPE v1 = bp1[j], v2 = bp2[j]; \
1277 : if (!is_##TPE##_nil(v1) && !is_##TPE##_nil(v2)) { \
1278 : n++; \
1279 : delta1 = (dbl) v1 - mean1; \
1280 : mean1 += delta1 / n; \
1281 : delta2 = (dbl) v2 - mean2; \
1282 : mean2 += delta2 / n; \
1283 : aux = (dbl) v2 - mean2; \
1284 : up += delta1 * aux; \
1285 : down1 += delta1 * ((dbl) v1 - mean1); \
1286 : down2 += delta2 * aux; \
1287 : } \
1288 : } \
1289 : if (isinf(up) || isinf(down1) || isinf(down2)) \
1290 : goto overflow; \
1291 : if (n != 0 && down1 != 0 && down2 != 0) { \
1292 : rr = (up / n) / (sqrt(down1 / n) * sqrt(down2 / n)); \
1293 : assert(!is_dbl_nil(rr)); \
1294 : } else { \
1295 : rr = dbl_nil; \
1296 : has_nils = true; \
1297 : } \
1298 : for (; k < i ; k++) \
1299 : rb[k] = rr; \
1300 : n = 0; \
1301 : mean1 = 0; \
1302 : mean2 = 0; \
1303 : up = 0; \
1304 : down1 = 0; \
1305 : down2 = 0; \
1306 : } while (0)
1307 :
1308 : #define ANALYTICAL_CORRELATION_CURRENT_ROW(TPE, SAMPLE, OP) /* SAMPLE and OP not used */ \
1309 : do { \
1310 : for (; k < i; k++) \
1311 : rb[k] = dbl_nil; \
1312 : has_nils = true; \
1313 : } while (0)
1314 :
1315 :
1316 : typedef struct correlation_deltas {
1317 : BUN n;
1318 : dbl mean1, mean2, delta1, delta2, up, down1, down2;
1319 : } correlation_deltas;
1320 :
1321 : #define INIT_AGGREGATE_CORRELATION(TPE, SAMPLE, OP) \
1322 : do { \
1323 : computed = (correlation_deltas) {0}; \
1324 : } while (0)
1325 : #define COMPUTE_LEVEL0_CORRELATION(X, TPE, SAMPLE, OP) \
1326 : do { \
1327 : TPE v1 = bp1[j + X], v2 = bp2[j + X]; \
1328 : computed = is_##TPE##_nil(v1) || is_##TPE##_nil(v2) ? (correlation_deltas) {0} \
1329 : : (correlation_deltas) {.n = 1, .mean1 = (dbl)v1, .mean2 = (dbl)v2, .delta1 = (dbl)v1, .delta2 = (dbl)v2}; \
1330 : } while (0)
1331 : #define COMPUTE_LEVELN_CORRELATION(VAL, TPE, SAMPLE, OP) \
1332 : do { \
1333 : if (VAL.n) { \
1334 : computed.n++; \
1335 : computed.delta1 = VAL.delta1 - computed.mean1; \
1336 : computed.mean1 += computed.delta1 / computed.n; \
1337 : computed.delta2 = VAL.delta2 - computed.mean2; \
1338 : computed.mean2 += computed.delta2 / computed.n; \
1339 : dbl aux = VAL.delta2 - computed.mean2; \
1340 : computed.up += computed.delta1 * aux; \
1341 : computed.down1 += computed.delta1 * (VAL.delta1 - computed.mean1); \
1342 : computed.down2 += computed.delta2 * aux; \
1343 : } \
1344 : } while (0)
1345 : #define FINALIZE_AGGREGATE_CORRELATION(TPE, SAMPLE, OP) \
1346 : do { \
1347 : n = computed.n; \
1348 : up = computed.up; \
1349 : down1 = computed.down1; \
1350 : down2 = computed.down2; \
1351 : if (isinf(up) || isinf(down1) || isinf(down2)) \
1352 : goto overflow; \
1353 : if (n != 0 && down1 != 0 && down2 != 0) { \
1354 : rr = (up / n) / (sqrt(down1 / n) * sqrt(down2 / n)); \
1355 : assert(!is_dbl_nil(rr)); \
1356 : } else { \
1357 : rr = dbl_nil; \
1358 : has_nils = true; \
1359 : } \
1360 : rb[k] = rr; \
1361 : } while (0)
1362 : #define ANALYTICAL_CORRELATION_OTHERS(TPE, SAMPLE, OP) /* SAMPLE and OP not used */ \
1363 : do { \
1364 : TPE *bp1 = (TPE*)b1i.base, *bp2 = (TPE*)b2i.base; \
1365 : oid ncount = i - k; \
1366 : if ((res = GDKrebuild_segment_tree(ncount, sizeof(correlation_deltas), st, &segment_tree, &levels_offset, &nlevels)) != GDK_SUCCEED) \
1367 : goto cleanup; \
1368 : populate_segment_tree(correlation_deltas, ncount, INIT_AGGREGATE_CORRELATION, COMPUTE_LEVEL0_CORRELATION, COMPUTE_LEVELN_CORRELATION, TPE, SAMPLE, OP); \
1369 : for (; k < i; k++) \
1370 : compute_on_segment_tree(correlation_deltas, start[k] - j, end[k] - j, INIT_AGGREGATE_CORRELATION, COMPUTE_LEVELN_CORRELATION, FINALIZE_AGGREGATE_CORRELATION, TPE, SAMPLE, OP); \
1371 : j = k; \
1372 : } while (0)
1373 :
1374 : gdk_return
1375 51 : GDKanalytical_correlation(BAT *r, BAT *p, BAT *o, BAT *b1, BAT *b2, BAT *s, BAT *e, int tpe, int frame_type)
1376 : {
1377 51 : bool has_nils = false, last = false;
1378 51 : BATiter pi = bat_iterator(p);
1379 51 : BATiter oi = bat_iterator(o);
1380 51 : BATiter b1i = bat_iterator(b1);
1381 51 : BATiter b2i = bat_iterator(b2);
1382 51 : BATiter si = bat_iterator(s);
1383 51 : BATiter ei = bat_iterator(e);
1384 51 : oid i = 0, j = 0, k = 0, l = 0, cnt = BATcount(b1),
1385 51 : *levels_offset = NULL, nlevels = 0;
1386 51 : const oid *restrict start = si.base, *restrict end = ei.base;
1387 51 : lng n = 0;
1388 51 : const bit *np = pi.base, *op = oi.base;
1389 51 : dbl *rb = (dbl *) Tloc(r, 0), mean1 = 0, mean2 = 0, up = 0, down1 = 0, down2 = 0, delta1, delta2, aux, rr;
1390 51 : void *segment_tree = NULL;
1391 51 : gdk_return res = GDK_SUCCEED;
1392 51 : BAT *st = NULL;
1393 :
1394 51 : if (cnt > 0) {
1395 51 : switch (frame_type) {
1396 18 : case 3: /* unbounded until current row */
1397 660 : ANALYTICAL_STATISTICS_BRANCHES(CORRELATION_UNBOUNDED_TILL_CURRENT_ROW, ;, ;);
1398 : break;
1399 0 : case 4: /* current row until unbounded */
1400 0 : ANALYTICAL_STATISTICS_BRANCHES(CORRELATION_CURRENT_ROW_TILL_UNBOUNDED, ;, ;);
1401 : break;
1402 18 : case 5: /* all rows */
1403 449 : ANALYTICAL_STATISTICS_BRANCHES(CORRELATION_ALL_ROWS, ;, ;);
1404 : break;
1405 0 : case 6: /* current row */
1406 0 : ANALYTICAL_STATISTICS_BRANCHES(CORRELATION_CURRENT_ROW, ;, ;);
1407 : break;
1408 15 : default:
1409 15 : if ((st = GDKinitialize_segment_tree()) == NULL) {
1410 0 : res = GDK_FAIL;
1411 0 : goto cleanup;
1412 : }
1413 19053 : ANALYTICAL_STATISTICS_BRANCHES(CORRELATION_OTHERS, ;, ;);
1414 : break;
1415 : }
1416 : }
1417 :
1418 50 : BATsetcount(r, cnt);
1419 50 : r->tnonil = !has_nils;
1420 50 : r->tnil = has_nils;
1421 50 : goto cleanup; /* all these gotos seem confusing but it cleans up the ending of the operator */
1422 1 : overflow:
1423 1 : GDKerror("22003!overflow in calculation.\n");
1424 1 : res = GDK_FAIL;
1425 51 : cleanup:
1426 51 : bat_iterator_end(&pi);
1427 51 : bat_iterator_end(&oi);
1428 51 : bat_iterator_end(&b1i);
1429 51 : bat_iterator_end(&b2i);
1430 51 : bat_iterator_end(&si);
1431 51 : bat_iterator_end(&ei);
1432 51 : BBPreclaim(st);
1433 51 : return res;
1434 0 : nosupport:
1435 0 : GDKerror("42000!correlation of type %s unsupported.\n", ATOMname(tpe));
1436 0 : res = GDK_FAIL;
1437 0 : goto cleanup;
1438 : }
|