Mercurial > hg > MonetDB-extend
view gmean/gmean.c @ 61:b42fe5c9bd34 default tip
update to new versioned monetdb directories
author | Niels Nes <niels@cwi.nl> |
---|---|
date | Mon, 23 Sep 2024 13:34:22 +0200 (10 months ago) |
parents | 02895996506d |
children |
line wrap: on
line source
/* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. * * Copyright 2021-2022 MonetDB B.V. */ /* monetdb_config.h must be included as the first include file */ #include <monetdb_config.h> /* mal_exception.h actually contains almost everything we need */ #include <mal_exception.h> /* for the candidate iterator */ #include <gdk_cand.h> /* for log() and exp() */ #include <math.h> static char * gmean_int(dbl *res, const bat *bid) { BAT *b; double val = 0; BUN nneg = 0; BUN nval = 0; if ((b = BATdescriptor(*bid)) == NULL) throw(MAL, "gmean.gmean", RUNTIME_OBJECT_MISSING); const int *vals = (const int *) Tloc(b, 0); for (BUN i = 0, n = BATcount(b); i < n; i++) { if (is_int_nil(vals[i])) continue; /* nils are ignored */ if (vals[i] == 0) { /* any value zero is easy: result is zero */ BBPunfix(b->batCacheid); *res = 0; return MAL_SUCCEED; } if (vals[i] < 0) { nneg++; /* one more negative value */ val += log((double) -vals[i]); } else { val += log((double) vals[i]); } nval++; /* count non-nil values */ } BBPunfix(b->batCacheid); if (nval == 0) { /* if there are no non-nil values, the result is nil */ *res = dbl_nil; } else { *res = exp(val / nval); /* if the number of negative values in the input is odd, * we return (incorrectly, but on purpose) a negative * value (it should really be an imaginary number) */ if (nneg & (BUN) 1) *res = - *res; } return MAL_SUCCEED; } static char * subgroupedgmeancand_int(bat *retval, const bat *bid, const bat *gid, const bat *eid, const bat *sid, const bit *skip_nils) { BAT *b, *bn; /* these two are always assigned */ BAT *g = NULL; /* these three are optional and may not ... */ BAT *e = NULL; /* ... be assigned to below, ... */ BAT *s = NULL; /* ... so we initialize them here */ /* we ignore these two inputs */ (void) skip_nils; /* the bat we're supposed to be working on (bid) is not * optional, but the others are, so we test whether the bat id * is not nil, and if it isn't, whether we can find the BAT * descriptor */ if ((b = BATdescriptor(*bid)) == NULL || (gid && !is_bat_nil(*gid) && (g = BATdescriptor(*gid)) == NULL) || (eid && !is_bat_nil(*eid) && (e = BATdescriptor(*eid)) == NULL) || (sid && !is_bat_nil(*sid) && (s = BATdescriptor(*sid)) == NULL)) { if (b) BBPunfix(b->batCacheid); if (g) BBPunfix(g->batCacheid); if (e) BBPunfix(e->batCacheid); if (s) BBPunfix(s->batCacheid); throw(MAL, "gmean.gmean", RUNTIME_OBJECT_MISSING); } oid min, max; /* min and max group id */ BUN ngrp, ncand; /* number of groups, number of candidates */ struct canditer ci; /* candidate list iterator */ const char *err; /* error message */ err = BATgroupaggrinit(b, g, e, s, &min, &max, &ngrp, &ci, &ncand); /* note that ncand == ci.ncand */ if (err != NULL) { BBPunfix(b->batCacheid); if (g) BBPunfix(g->batCacheid); if (e) BBPunfix(e->batCacheid); if (s) BBPunfix(s->batCacheid); throw(MAL, "gmean.gmean", "%s\n", err); } /* create a result BAT and initialize it with all zeros */ bn = BATconstant(min, TYPE_dbl, &(dbl){0}, ngrp, TRANSIENT); if (bn == NULL) { BBPunfix(b->batCacheid); if (g) BBPunfix(g->batCacheid); if (e) BBPunfix(e->batCacheid); if (s) BBPunfix(s->batCacheid); throw(MAL, "gmean.gmean", "%s\n", GDK_EXCEPTION); } /* We are going to change the contents of the newly created BAT. * We are allowed to do that because we are the "owner" and no * other thread can have a reference to this BAT yet. First we * set the properties. We don't know whether the result is * sorted or whether there are any duplicate values, so we must * set these properties to false. */ bn->tsorted = false; bn->trevsorted = false; bn->tkey = false; /* we may change these two below */ bn->tnil = false; bn->tnonil = true; /* In general we need to distinguish two cases per group: there * are no non-nil values, then the result should be nil; there * are non-nil values, then the result should be the aggregate. * If there is an error (e.g. overflow), there should not be any * result (error return). We shouldn't get any overflow errors, * so this doesn't apply here. */ const int *vals = (const int *) Tloc(b, 0); const oid *gids = NULL; if (g && !BATtdense(g)) gids= (const oid *) Tloc(g, 0); dbl *sums = (dbl *) Tloc(bn, 0); /* results */ /* For this aggregate, we need to maintain the number of values * we have added up per group. We can also keep track of * whether we have seen any zero values. GDKzalloc initializes * the allocated memory with zero bits. */ BUN *cnts = GDKzalloc(ngrp * sizeof(BUN)); /* We need to maintain whether we have seen an odd or an even * number of negative values in each group, so we use a bitmap * for that (true -> odd number of negatives) */ uint32_t *odds = GDKzalloc(((ngrp + 31) / 32) * sizeof(uint32_t)); if (cnts == NULL || odds == NULL) { BBPunfix(b->batCacheid); if (g) BBPunfix(g->batCacheid); if (e) BBPunfix(e->batCacheid); if (s) BBPunfix(s->batCacheid); /* one of these is NULL, but GDKfree can deal with that */ GDKfree(cnts); GDKfree(odds); throw(MAL, "gmean.gmean", MAL_MALLOC_FAIL); } for (BUN i = 0; i < ncand; i++) { oid o = canditer_next(&ci); /* id of candidate */ BUN p = o - b->hseqbase; /* BUN (index) of candidate */ int v = vals[p]; /* the actual value */ oid grp = gids ? gids[p] : g ? min + (oid) p : 0; /* group id */ if (grp >= min && grp <= max) { /* extra check */ grp -= min; /* zero based access */ if (is_int_nil(v)) continue; /* skip nils (no matter skip_nils) */ else if (v == 0) { cnts[grp] = BUN_NONE; continue; } else if (cnts[grp] == BUN_NONE) { /* we already encountered a zero value * in this group */ continue; } /* non zero and non NULL value */ cnts[grp]++; if (v < 0) { /* flip the bit for this group */ sums[grp] += log((double) -v); /* flip the bit for this group */ odds[grp >> 5] ^= 1U << (grp & 31); } else sums[grp] += log((double) v); } } for (oid grp = 0; grp < ngrp; grp++) { if (cnts[grp] == 0) { /* no values found in the group, so result is nil */ sums[grp] = dbl_nil; bn->tnil = true; bn->tnonil = false; } else if (cnts[grp] == BUN_NONE) { /* we encountered a zero, so the result is * zero */ sums[grp] = 0; } else { /* convert the sum of the logs into the * geometric mean, note there should be no * overflow here: the geometric mean is at most * equal to the largest value */ sums[grp] = exp(sums[grp] / cnts[grp]); /* if we saw an odd number of negatives * in the group, return a negative * value */ if (odds[grp >> 5] & (1U << (grp & 31))) sums[grp] = -sums[grp]; } } /* free resources */ GDKfree(cnts); GDKfree(odds); BBPunfix(b->batCacheid); if (g) BBPunfix(g->batCacheid); if (e) BBPunfix(e->batCacheid); if (s) BBPunfix(s->batCacheid); *retval = bn->batCacheid; BBPkeepref(bn); return MAL_SUCCEED; } static char * subgroupedgmean_int(bat *retval, const bat *bid, const bat *gid, const bat *eid, const bit *skip_nils) { return subgroupedgmeancand_int(retval, bid, gid, eid, NULL, skip_nils); } static char * groupedgmean_int(bat *retval, const bat *bid, const bat *gid, const bat *eid) { return subgroupedgmeancand_int(retval, bid, gid, eid, NULL, &(bit){1}); } #include "mel.h" static char gmean_sql[] = "CREATE AGGREGATE FUNCTION gmean(val INTEGER)" " returns DOUBLE external name gmean.gmean;"; static mel_func gmean_init_funcs[] = { command("gmean", "gmean", gmean_int, false, "geometric mean", args(1,2, arg("",dbl),batarg("b",int))), command("gmean", "gmean", groupedgmean_int, false, "geometric mean", args(1,4, batarg("",dbl),batarg("b",int),batarg("g",oid),batargany("e",1))), command("gmean", "subgmean", subgroupedgmean_int, false, "geometric mean", args(1,5, batarg("",dbl),batarg("b",int),batarg("g",oid),batargany("e",1),arg("skip_nils",bit))), command("gmean", "subgmean", subgroupedgmeancand_int, false, "geometric mean", args(1,6, batarg("",dbl),batarg("b",int),batarg("g",oid),batargany("e",1),batarg("s",oid),arg("skip_nils",bit))), { .imp=NULL } /* sentinel */ }; #include "mal_import.h" #include "sql_import.h" #ifdef _MSC_VER #undef read #pragma section(".CRT$XCU",read) #endif LIB_STARTUP_FUNC(init_gmean) { mal_module("gmean", NULL, gmean_init_funcs); sql_register("gmean", gmean_sql); }