Mercurial > hg > MonetDB-extend
view gmean/gmean.c @ 46:3b9611f1b048
Some updates.
author | Sjoerd Mullender <sjoerd@acm.org> |
---|---|
date | Fri, 11 Jun 2021 10:02:09 +0200 (2021-06-11) |
parents | 1cfe9d8a07e8 |
children | c8140b1fabf5 |
line wrap: on
line source
/* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. * * Copyright 2021 MonetDB B.V. */ /* monetdb_config.h must be included as the first include file */ #include <monetdb_config.h> /* mal_exception.h actually contains almost everything we need */ #include <mal_exception.h> /* for the candidate iterator */ #include <gdk_cand.h> /* for log() and exp() */ #include <math.h> static char * gmean_int(dbl *res, const bat *bid) { BAT *b; double val = 0; BUN nneg = 0; BUN nval = 0; if ((b = BATdescriptor(*bid)) == NULL) throw(MAL, "gmean.gmean", RUNTIME_OBJECT_MISSING); const int *vals = (const int *) Tloc(b, 0); for (BUN i = 0, n = BATcount(b); i < n; i++) { if (is_int_nil(vals[i])) continue; /* nils are ignored */ if (vals[i] == 0) { /* any value zero is easy: result is zero */ BBPunfix(b->batCacheid); *res = 0; return MAL_SUCCEED; } if (vals[i] < 0) { nneg++; /* one more negative value */ val += log((double) -vals[i]); } else { val += log((double) vals[i]); } nval++; /* count non-nil values */ } BBPunfix(b->batCacheid); if (nval == 0) { /* if there are no non-nil values, the result is nil */ *res = dbl_nil; } else { *res = exp(val / nval); /* if the number of negative values in the input is odd, * we return (incorrectly, but on purpose) a negative * value (it should really be an imaginary number) */ if (nneg & (BUN) 1) *res = - *res; } return MAL_SUCCEED; } static char * subgroupedgmeancand_int(bat *retval, const bat *bid, const bat *gid, const bat *eid, const bat *sid, const bit *skip_nils, const bit *abort_on_error) { BAT *b, *bn; /* these two are always assigned */ BAT *g = NULL; /* these three are optional and may not ... */ BAT *e = NULL; /* ... be assigned to below, ... */ BAT *s = NULL; /* ... so we initialize them here */ /* we ignore these two inputs */ (void) skip_nils; (void) abort_on_error; /* the bat we're supposed to be working on (bid) is not * optional, but the others are, so we test whether the bat id * is not nil, and if it isn't, whether we can find the BAT * descriptor */ if ((b = BATdescriptor(*bid)) == NULL || (gid && !is_bat_nil(*gid) && (g = BATdescriptor(*gid)) == NULL) || (eid && !is_bat_nil(*eid) && (e = BATdescriptor(*eid)) == NULL) || (sid && !is_bat_nil(*sid) && (s = BATdescriptor(*sid)) == NULL)) { if (b) BBPunfix(b->batCacheid); if (g) BBPunfix(g->batCacheid); if (e) BBPunfix(e->batCacheid); if (s) BBPunfix(s->batCacheid); throw(MAL, "gmean.gmean", RUNTIME_OBJECT_MISSING); } oid min, max; /* min and max group id */ BUN ngrp, ncand; /* number of groups, number of candidates */ struct canditer ci; /* candidate list iterator */ const char *err; /* error message */ err = BATgroupaggrinit(b, g, e, s, &min, &max, &ngrp, &ci, &ncand); /* note that ncand == ci.ncand */ if (err != NULL) { BBPunfix(b->batCacheid); if (g) BBPunfix(g->batCacheid); if (e) BBPunfix(e->batCacheid); if (s) BBPunfix(s->batCacheid); throw(MAL, "gmean.gmean", "%s\n", err); } /* create a result BAT and initialize it with all zeros */ bn = BATconstant(min, TYPE_dbl, &(dbl){0}, ngrp, TRANSIENT); if (bn == NULL) { BBPunfix(b->batCacheid); if (g) BBPunfix(g->batCacheid); if (e) BBPunfix(e->batCacheid); if (s) BBPunfix(s->batCacheid); throw(MAL, "gmean.gmean", "%s\n", GDK_EXCEPTION); } /* In general we need to distinguish two cases per group: there * are no non-nil values, then the result should be nil; there * are non-nil values, then the result should be the aggregate. * If there is an error (e.g. overflow), the abort_on_error flag * determines what should happen: if set, there should not be * any result (error return), if not set, the group value should * be set to nil. We shouldn't get any overflow errors, so this * doesn't apply here. */ const int *vals = (const int *) Tloc(b, 0); const oid *gids = NULL; if (g && !BATtdense(g)) gids= (const oid *) Tloc(g, 0); dbl *sums = (dbl *) Tloc(bn, 0); /* results */ /* For this aggregate, we need to maintain three states per * group: no non-nil values encountered yet; we encountered a * zero value; we encountered values, but no zeros. We maintain whether we have seen any values with a bit in the temporary array 'seen'. When we encoun */ uint32_t *seen = GDKzalloc(((ngrp + 31) / 32) * sizeof(uint32_t)); /* We need to maintain whether we have seen an odd or an even * number of negative values in each group, so we use a bitmap * for that (true -> odd number of negatives) */ uint32_t *odds = GDKzalloc(((ngrp + 31) / 32) * sizeof(uint32_t)); if (seen == NULL || odds == NULL) { BBPunfix(b->batCacheid); if (g) BBPunfix(g->batCacheid); if (e) BBPunfix(e->batCacheid); if (s) BBPunfix(s->batCacheid); /* one of these is NULL, but GDKfree can deal with that */ GDKfree(seen); GDKfree(odds); throw(MAL, "gmean.gmean", MAL_MALLOC_FAIL); } for (BUN i = 0; i < ncand; i++) { oid o = canditer_next(&ci); /* id of candidate */ BUN p = o - b->hseqbase; /* BUN (index) of candidate */ int v = vals[p]; /* the actual value */ oid grp = gids ? gids[p] : g ? (oid) p : 0; /* group id */ if (grp >= min && grp <= max) { /* extra check */ if (is_int_nil(v)) continue; /* skip nils (no matter skip_nils) */ else if (v == 0) sums[grp] = dbl_nil; oid grp32 = grp >> 5; oid mod32 = grp & 31; if (!(seen[grp32] & (1U << mod32))) { seen[grp32] |= 1U << mod32; } else if (is_dbl_nil(sums[grp])) { /* if sums[grp] is nil we've encountered * a zero */ continue; } if (v < 0) { /* flip the bit for this group */ odds[grp32] ^= 1U << mod32; sums[grp] += log((double) -v); } else sums[grp] += log((double) v); } } /* we may change these two below */ bn->tnil = false; bn->tnonil = true; for (oid grp = 0; grp < ngrp; grp++) { if (seen[grp >> 5] & (1U << (grp & 31))) { if (is_dbl_nil(sums[grp])) { /* we encountered a zero, so the result * is zero */ sums[grp] = 0; } else { /* convert the sum of the logs into the * geometric mean */ sums[grp] = exp(sums[grp]); /* if we saw an odd number of negatives * in the group, return a negative * value */ if (odds[grp >> 5] & (1U << (grp & 31))) sums[grp] = -sums[grp]; } } else { /* no values found in the group, so result is nil */ sums[grp] = dbl_nil; bn->tnil = true; bn->tnonil = false; } } /* free resources */ GDKfree(seen); GDKfree(odds); BBPunfix(b->batCacheid); if (g) BBPunfix(g->batCacheid); if (e) BBPunfix(e->batCacheid); if (s) BBPunfix(s->batCacheid); /* set the properties and count */ BATsetcount(bn, ngrp); /* we don't know whether the result is sorted or whether there * are any duplicate values, so we must set these properties to * false */ bn->tsorted = false; bn->trevsorted = false; bn->tkey = false; *retval = bn->batCacheid; BBPkeepref(bn->batCacheid); return MAL_SUCCEED; } static char * subgroupedgmean_int(bat *retval, const bat *bid, const bat *gid, const bat *eid, const bit *skip_nils, const bit *abort_on_error) { return subgroupedgmeancand_int(retval, bid, gid, eid, NULL, skip_nils, abort_on_error); } static char * groupedgmean_int(bat *retval, const bat *bid, const bat *gid, const bat *eid) { return subgroupedgmeancand_int(retval, bid, gid, eid, NULL, &(bit){1}, &(bit){1}); } #include "mel.h" static char gmean_sql[] = "CREATE AGGREGATE FUNCTION gmean(val INTEGER)" " returns DOUBLE external name gmean.gmean;"; static mel_func gmean_init_funcs[] = { command("gmean", "gmean", gmean_int, false, "geometric mean", args(1,2, arg("",dbl),batarg("b",int))), command("gmean", "gmean", groupedgmean_int, false, "geometric mean", args(1,4, batarg("",dbl),batarg("b",int),batarg("g",oid),batargany("e",1))), command("gmean", "subgmean", subgroupedgmean_int, false, "geometric mean", args(1,6, batarg("",dbl),batarg("b",int),batarg("g",oid),batargany("e",1),arg("skip_nils",bit),arg("abort_on_error",bit))), command("gmean", "subgmean", subgroupedgmeancand_int, false, "geometric mean", args(1,7, batarg("",dbl),batarg("b",int),batarg("g",oid),batargany("e",1),batarg("s",oid),arg("skip_nils",bit),arg("abort_on_error",bit))), { .imp=NULL } /* sentinel */ }; #include "mal_import.h" #include "sql_import.h" #ifdef _MSC_VER #undef read #pragma section(".CRT$XCU",read) #endif LIB_STARTUP_FUNC(init_gmean) { mal_module("gmean", NULL, gmean_init_funcs); sql_register("gmean", gmean_sql); }