Re: MonetDB: msk-type - Initial support for the msk type.
wil je dat ik het doortrek naar MAL niveau?
On 24 Apr 2020, at 17:30, Sjoerd Mullender
wrote: Changeset: abf00b42a524 for MonetDB URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=abf00b42a524 Modified Files: clients/Tests/exports.stable.out gdk/gdk.h gdk/gdk_atoms.c gdk/gdk_bat.c gdk/gdk_batop.c gdk/gdk_calc.c gdk/gdk_hash.c gdk/gdk_value.c monetdb5/mal/mal_type.c monetdb5/modules/atoms/inet.c monetdb5/modules/kernel/bat5.c sql/backends/monet5/UDF/capi/capi.c sql/backends/monet5/sql_result.c Branch: msk-type Log Message:
Initial support for the msk type.
diffs (truncated from 1402 to 300 lines):
diff --git a/clients/Tests/exports.stable.out b/clients/Tests/exports.stable.out --- a/clients/Tests/exports.stable.out +++ b/clients/Tests/exports.stable.out @@ -1273,7 +1273,7 @@ str INETnetmask(inet *retval, const inet str INETnetwork(inet *retval, const inet *val); str INETnew(inet *retval, str *in); const inet *INETnull(void); -str INETsetmasklen(inet *retval, const inet *val, const int *msk); +str INETsetmasklen(inet *retval, const inet *val, const int *mask); str INETtext(str *retval, const inet *val); ssize_t INETtoString(str *retval, size_t *len, const inet *handle, bool external); str INSPECTatom_names(bat *ret); diff --git a/gdk/gdk.h b/gdk/gdk.h --- a/gdk/gdk.h +++ b/gdk/gdk.h @@ -425,7 +425,8 @@
enum { TYPE_void = 0, - TYPE_bit, + TYPE_msk, /* bit mask */ + TYPE_bit, /* TRUE, FALSE, or nil */ TYPE_bte, TYPE_sht, TYPE_bat, /* BAT id: index in BBPcache */ @@ -445,6 +446,7 @@ enum { TYPE_any = 255, /* limit types to <255! */ };
+typedef bool msk; typedef int8_t bit; typedef int8_t bte; typedef int16_t sht; @@ -602,6 +604,7 @@ typedef struct { oid oval; sht shval; bte btval; + msk mval; flt fval; ptr pval; bat bval; @@ -626,7 +629,7 @@ gdk_export void VALclear(ValPtr v); gdk_export ValPtr VALset(ValPtr v, int t, void *p); gdk_export void *VALget(ValPtr v); gdk_export int VALcmp(const ValRecord *p, const ValRecord *q); -gdk_export int VALisnil(const ValRecord *v); +gdk_export bool VALisnil(const ValRecord *v);
/* * @- The BAT record @@ -746,7 +749,10 @@ typedef struct BAT {
typedef struct BATiter { BAT *b; - oid tvid; + union { + oid tvid; + bool tmsk; + }; } BATiter;
/* macros to hide complexity of the BAT structure */ @@ -772,6 +778,33 @@ typedef struct BATiter { #define tprops T.props
+/* some access functions for the bitmask type */ +static inline void +mskSet(BAT *b, BUN p) +{ + ((uint32_t *) b->theap.base)[p / 32] |= 1U << (p % 32); +} + +static inline void +mskClr(BAT *b, BUN p) +{ + ((uint32_t *) b->theap.base)[p / 32] &= ~(1U << (p % 32)); +} + +static inline void +mskSetVal(BAT *b, BUN p, msk v) +{ + if (v) + mskSet(b, p); + else + mskClr(b, p); +} + +static inline msk +mskGet(BAT *b, BUN p) +{ + return ((uint32_t *) b->theap.base)[p / 32] & (1U << (p % 32)); +}
/* * @- Heap Management @@ -925,7 +958,11 @@ gdk_export BUN BUNfnd(BAT *b, const void
#define Tsize(b) ((b)->twidth)
-#define tailsize(b,p) ((b)->ttype?((size_t)(p))<<(b)->tshift:0) +#define tailsize(b,p) ((b)->ttype ? \ + (ATOMstorage((b)->ttype) == TYPE_msk ? \ + (((size_t) (p) + 31) / 32) * 4 : \ + ((size_t) (p)) << (b)->tshift) : \ + 0)
#define Tloc(b,p) ((void *)((b)->theap.base+(((size_t)(p))<<(b)->tshift)))
@@ -935,7 +972,7 @@ typedef var_t stridx_t;
#define BUNtvaroff(bi,p) VarHeapVal((bi).b->theap.base, (p), (bi).b->twidth)
-#define BUNtloc(bi,p) Tloc((bi).b,p) +#define BUNtloc(bi,p) (ATOMstorage((bi).b->ttype) == TYPE_msk ? Tmsk(&(bi), p) : Tloc((bi).b,p)) #define BUNtpos(bi,p) Tpos(&(bi),p) #define BUNtvar(bi,p) (assert((bi).b->ttype && (bi).b->tvarsized), (void *) (Tbase((bi).b)+BUNtvaroff(bi,p))) #define BUNtail(bi,p) ((bi).b->ttype?(bi).b->tvarsized?BUNtvar(bi,p):BUNtloc(bi,p):BUNtpos(bi,p)) @@ -1519,6 +1556,8 @@ Tputvalue(BAT *b, BUN p, const void *v, break; #endif } + } else if (b->ttype == TYPE_msk) { + mskSetVal(b, p, * (msk *) v); } else { return ATOMputFIX(b->ttype, Tloc(b, p), v); } @@ -1528,7 +1567,11 @@ Tputvalue(BAT *b, BUN p, const void *v, static inline gdk_return __attribute__((__warn_unused_result__)) tfastins_nocheck(BAT *b, BUN p, const void *v, int s) { - b->theap.free += s; + if (ATOMstorage(b->ttype) == TYPE_msk) { + if (p % 32 == 0) + b->theap.free += 4; + } else + b->theap.free += s; return Tputvalue(b, p, v, false); }
@@ -1676,6 +1719,7 @@ VALptr(const ValRecord *v) { switch (ATOMstorage(v->vtype)) { case TYPE_void: return (const void *) &v->val.oval; + case TYPE_msk: return (const void *) &v->val.mval; case TYPE_bte: return (const void *) &v->val.btval; case TYPE_sht: return (const void *) &v->val.shval; case TYPE_int: return (const void *) &v->val.ival; @@ -1734,8 +1778,6 @@ gdk_export void *THRdata[THREADDATA]; #define THRget_errbuf(t) ((char*)t->data[2]) #define THRset_errbuf(t,b) (t->data[2] = b)
-#ifndef GDK_NOLINK - static inline bat BBPcheck(bat x, const char *y) { @@ -1773,7 +1815,12 @@ Tpos(BATiter *bi, BUN p) return (void*)&bi->tvid; }
-#endif +static inline void * +Tmsk(BATiter *bi, BUN p) +{ + bi->tmsk = mskGet(bi->b, p); + return &bi->tmsk; +}
/* * @+ Transaction Management diff --git a/gdk/gdk_atoms.c b/gdk/gdk_atoms.c --- a/gdk/gdk_atoms.c +++ b/gdk/gdk_atoms.c @@ -36,6 +36,12 @@ * NaN). */
static int +mskCmp(const msk *l, const msk *r) +{ + return (*l > *r) - (*l < *r); +} + +static int bteCmp(const bte *l, const bte *r) { return (*l > *r) - (*l < *r); @@ -450,6 +456,42 @@ voidWrite(const void *a, stream *s, size * experiments showed that library function is even slightly faster and we * now also support True/False (and trUe/FAlSE should this become a thing). */ +static ssize_t +mskFromStr(const char *src, size_t *len, msk **dst, bool external) +{ + const char *p = src; + + (void) external; + atommem(sizeof(msk)); + + if (strNil(src)) + return -1; + + while (GDKisspace(*p)) + p++; + if (*p == '0') { + **dst = 0; + p++; + } else if (*p == '1') { + **dst = 1; + p++; + } else { + return -1; + } + while (GDKisspace(*p)) + p++; + return (ssize_t) (p - src); +} + +static ssize_t +mskToStr(char **dst, size_t *len, const msk *src, bool external) +{ + (void) external; + atommem(2); + strcpy(*dst, *src ? "1" : "0"); + return 1; +} + ssize_t bitFromStr(const char *src, size_t *len, bit **dst, bool external) { @@ -1202,6 +1244,17 @@ atomDesc BATatoms[MAXATOMS] = { .atomCmp = (int (*)(const void *, const void *)) bteCmp, .atomHash = (BUN (*)(const void *)) bteHash, }, + [TYPE_msk] = { + .name = "msk", + .storage = TYPE_msk, + .linear = false, + .size = 1, /* really 1/8 */ + .atomFromStr = (ssize_t (*)(const char *, size_t *, void **, bool)) mskFromStr, + .atomToStr = (ssize_t (*)(char **, size_t *, const void *, bool)) mskToStr, +// .atomRead = (void *(*)(void *, stream *, size_t)) mskRead, +// .atomWrite = (gdk_return (*)(const void *, stream *, size_t)) mskWrite, + .atomCmp = (int (*)(const void *, const void *)) mskCmp, + }, [TYPE_bte] = { .name = "bte", .storage = TYPE_bte, diff --git a/gdk/gdk_bat.c b/gdk/gdk_bat.c --- a/gdk/gdk_bat.c +++ b/gdk/gdk_bat.c @@ -184,7 +184,9 @@ COLnew(oid hseq, int tt, BUN cap, role_t /* round up to multiple of BATTINY */ if (cap < BUN_MAX - BATTINY) cap = (cap + BATTINY - 1) & ~(BATTINY - 1); - if (cap < BATTINY) + if (ATOMstorage(tt) == TYPE_msk && cap < 8*BATTINY) + cap = 8*BATTINY; + else if (cap < BATTINY) cap = BATTINY; /* limit the size */ if (cap > BUN_MAX) @@ -197,6 +199,9 @@ COLnew(oid hseq, int tt, BUN cap, role_t BATsetdims(bn); bn->batCapacity = cap;
+ if (ATOMstorage(tt) == TYPE_msk) + cap /= 8; /* 8 values per byte */ + /* alloc the main heaps */ if (tt && HEAPalloc(&bn->theap, cap, bn->twidth) != GDK_SUCCEED) { goto bailout; @@ -339,12 +344,16 @@ BATattach(int tt, const char *heapfile, GDKerror("heapfile size not integral number of atoms\n"); return NULL; } - if ((size_t) (st.st_size / atomsize) > (size_t) BUN_MAX) { + if (ATOMstorage(tt) == TYPE_msk ? + (st.st_size > (off_t) (BUN_MAX / 8)) : + ((size_t) (st.st_size / atomsize) > (size_t) BUN_MAX)) { fclose(f); GDKerror("heapfile too large\n"); return NULL; } - cap = (BUN) (st.st_size / atomsize); + cap = (BUN) (ATOMstorage(tt) == TYPE_msk ? + st.st_size * 8 : + st.st_size / atomsize); bn = COLnew(0, tt, cap, role); if (bn == NULL) { fclose(f); @@ -371,8 +380,8 @@ BATattach(int tt, const char *heapfile, bn->trevsorted = false; bn->tkey = false; } else { - bn->tsorted = true; - bn->trevsorted = true; + bn->tsorted = ATOMlinear(tt); + bn->trevsorted = ATOMlinear(tt); bn->tkey = true; _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list
Voorlopig nog niet. Ik wil eerste nog wat verder eraan werken, en we moeten ook eerste duidelijk hebben hoe dit gebruikt moet worden. Bijvoorbeeld hoeveel van de GDK functies (denk aan calc, join, select) dit type moeten begrijpen. On 24/04/2020 18.54, Martin Kersten wrote:
wil je dat ik het doortrek naar MAL niveau?
On 24 Apr 2020, at 17:30, Sjoerd Mullender
wrote: Changeset: abf00b42a524 for MonetDB URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=abf00b42a524 Modified Files: clients/Tests/exports.stable.out gdk/gdk.h gdk/gdk_atoms.c gdk/gdk_bat.c gdk/gdk_batop.c gdk/gdk_calc.c gdk/gdk_hash.c gdk/gdk_value.c monetdb5/mal/mal_type.c monetdb5/modules/atoms/inet.c monetdb5/modules/kernel/bat5.c sql/backends/monet5/UDF/capi/capi.c sql/backends/monet5/sql_result.c Branch: msk-type Log Message:
Initial support for the msk type.
diffs (truncated from 1402 to 300 lines):
diff --git a/clients/Tests/exports.stable.out b/clients/Tests/exports.stable.out --- a/clients/Tests/exports.stable.out +++ b/clients/Tests/exports.stable.out @@ -1273,7 +1273,7 @@ str INETnetmask(inet *retval, const inet str INETnetwork(inet *retval, const inet *val); str INETnew(inet *retval, str *in); const inet *INETnull(void); -str INETsetmasklen(inet *retval, const inet *val, const int *msk); +str INETsetmasklen(inet *retval, const inet *val, const int *mask); str INETtext(str *retval, const inet *val); ssize_t INETtoString(str *retval, size_t *len, const inet *handle, bool external); str INSPECTatom_names(bat *ret); diff --git a/gdk/gdk.h b/gdk/gdk.h --- a/gdk/gdk.h +++ b/gdk/gdk.h @@ -425,7 +425,8 @@
enum { TYPE_void = 0, - TYPE_bit, + TYPE_msk, /* bit mask */ + TYPE_bit, /* TRUE, FALSE, or nil */ TYPE_bte, TYPE_sht, TYPE_bat, /* BAT id: index in BBPcache */ @@ -445,6 +446,7 @@ enum { TYPE_any = 255, /* limit types to <255! */ };
+typedef bool msk; typedef int8_t bit; typedef int8_t bte; typedef int16_t sht; @@ -602,6 +604,7 @@ typedef struct { oid oval; sht shval; bte btval; + msk mval; flt fval; ptr pval; bat bval; @@ -626,7 +629,7 @@ gdk_export void VALclear(ValPtr v); gdk_export ValPtr VALset(ValPtr v, int t, void *p); gdk_export void *VALget(ValPtr v); gdk_export int VALcmp(const ValRecord *p, const ValRecord *q); -gdk_export int VALisnil(const ValRecord *v); +gdk_export bool VALisnil(const ValRecord *v);
/* * @- The BAT record @@ -746,7 +749,10 @@ typedef struct BAT {
typedef struct BATiter { BAT *b; - oid tvid; + union { + oid tvid; + bool tmsk; + }; } BATiter;
/* macros to hide complexity of the BAT structure */ @@ -772,6 +778,33 @@ typedef struct BATiter { #define tprops T.props
+/* some access functions for the bitmask type */ +static inline void +mskSet(BAT *b, BUN p) +{ + ((uint32_t *) b->theap.base)[p / 32] |= 1U << (p % 32); +} + +static inline void +mskClr(BAT *b, BUN p) +{ + ((uint32_t *) b->theap.base)[p / 32] &= ~(1U << (p % 32)); +} + +static inline void +mskSetVal(BAT *b, BUN p, msk v) +{ + if (v) + mskSet(b, p); + else + mskClr(b, p); +} + +static inline msk +mskGet(BAT *b, BUN p) +{ + return ((uint32_t *) b->theap.base)[p / 32] & (1U << (p % 32)); +}
/* * @- Heap Management @@ -925,7 +958,11 @@ gdk_export BUN BUNfnd(BAT *b, const void
#define Tsize(b) ((b)->twidth)
-#define tailsize(b,p) ((b)->ttype?((size_t)(p))<<(b)->tshift:0) +#define tailsize(b,p) ((b)->ttype ? \ + (ATOMstorage((b)->ttype) == TYPE_msk ? \ + (((size_t) (p) + 31) / 32) * 4 : \ + ((size_t) (p)) << (b)->tshift) : \ + 0)
#define Tloc(b,p) ((void *)((b)->theap.base+(((size_t)(p))<<(b)->tshift)))
@@ -935,7 +972,7 @@ typedef var_t stridx_t;
#define BUNtvaroff(bi,p) VarHeapVal((bi).b->theap.base, (p), (bi).b->twidth)
-#define BUNtloc(bi,p) Tloc((bi).b,p) +#define BUNtloc(bi,p) (ATOMstorage((bi).b->ttype) == TYPE_msk ? Tmsk(&(bi), p) : Tloc((bi).b,p)) #define BUNtpos(bi,p) Tpos(&(bi),p) #define BUNtvar(bi,p) (assert((bi).b->ttype && (bi).b->tvarsized), (void *) (Tbase((bi).b)+BUNtvaroff(bi,p))) #define BUNtail(bi,p) ((bi).b->ttype?(bi).b->tvarsized?BUNtvar(bi,p):BUNtloc(bi,p):BUNtpos(bi,p)) @@ -1519,6 +1556,8 @@ Tputvalue(BAT *b, BUN p, const void *v, break; #endif } + } else if (b->ttype == TYPE_msk) { + mskSetVal(b, p, * (msk *) v); } else { return ATOMputFIX(b->ttype, Tloc(b, p), v); } @@ -1528,7 +1567,11 @@ Tputvalue(BAT *b, BUN p, const void *v, static inline gdk_return __attribute__((__warn_unused_result__)) tfastins_nocheck(BAT *b, BUN p, const void *v, int s) { - b->theap.free += s; + if (ATOMstorage(b->ttype) == TYPE_msk) { + if (p % 32 == 0) + b->theap.free += 4; + } else + b->theap.free += s; return Tputvalue(b, p, v, false); }
@@ -1676,6 +1719,7 @@ VALptr(const ValRecord *v) { switch (ATOMstorage(v->vtype)) { case TYPE_void: return (const void *) &v->val.oval; + case TYPE_msk: return (const void *) &v->val.mval; case TYPE_bte: return (const void *) &v->val.btval; case TYPE_sht: return (const void *) &v->val.shval; case TYPE_int: return (const void *) &v->val.ival; @@ -1734,8 +1778,6 @@ gdk_export void *THRdata[THREADDATA]; #define THRget_errbuf(t) ((char*)t->data[2]) #define THRset_errbuf(t,b) (t->data[2] = b)
-#ifndef GDK_NOLINK - static inline bat BBPcheck(bat x, const char *y) { @@ -1773,7 +1815,12 @@ Tpos(BATiter *bi, BUN p) return (void*)&bi->tvid; }
-#endif +static inline void * +Tmsk(BATiter *bi, BUN p) +{ + bi->tmsk = mskGet(bi->b, p); + return &bi->tmsk; +}
/* * @+ Transaction Management diff --git a/gdk/gdk_atoms.c b/gdk/gdk_atoms.c --- a/gdk/gdk_atoms.c +++ b/gdk/gdk_atoms.c @@ -36,6 +36,12 @@ * NaN). */
static int +mskCmp(const msk *l, const msk *r) +{ + return (*l > *r) - (*l < *r); +} + +static int bteCmp(const bte *l, const bte *r) { return (*l > *r) - (*l < *r); @@ -450,6 +456,42 @@ voidWrite(const void *a, stream *s, size * experiments showed that library function is even slightly faster and we * now also support True/False (and trUe/FAlSE should this become a thing). */ +static ssize_t +mskFromStr(const char *src, size_t *len, msk **dst, bool external) +{ + const char *p = src; + + (void) external; + atommem(sizeof(msk)); + + if (strNil(src)) + return -1; + + while (GDKisspace(*p)) + p++; + if (*p == '0') { + **dst = 0; + p++; + } else if (*p == '1') { + **dst = 1; + p++; + } else { + return -1; + } + while (GDKisspace(*p)) + p++; + return (ssize_t) (p - src); +} + +static ssize_t +mskToStr(char **dst, size_t *len, const msk *src, bool external) +{ + (void) external; + atommem(2); + strcpy(*dst, *src ? "1" : "0"); + return 1; +} + ssize_t bitFromStr(const char *src, size_t *len, bit **dst, bool external) { @@ -1202,6 +1244,17 @@ atomDesc BATatoms[MAXATOMS] = { .atomCmp = (int (*)(const void *, const void *)) bteCmp, .atomHash = (BUN (*)(const void *)) bteHash, }, + [TYPE_msk] = { + .name = "msk", + .storage = TYPE_msk, + .linear = false, + .size = 1, /* really 1/8 */ + .atomFromStr = (ssize_t (*)(const char *, size_t *, void **, bool)) mskFromStr, + .atomToStr = (ssize_t (*)(char **, size_t *, const void *, bool)) mskToStr, +// .atomRead = (void *(*)(void *, stream *, size_t)) mskRead, +// .atomWrite = (gdk_return (*)(const void *, stream *, size_t)) mskWrite, + .atomCmp = (int (*)(const void *, const void *)) mskCmp, + }, [TYPE_bte] = { .name = "bte", .storage = TYPE_bte, diff --git a/gdk/gdk_bat.c b/gdk/gdk_bat.c --- a/gdk/gdk_bat.c +++ b/gdk/gdk_bat.c @@ -184,7 +184,9 @@ COLnew(oid hseq, int tt, BUN cap, role_t /* round up to multiple of BATTINY */ if (cap < BUN_MAX - BATTINY) cap = (cap + BATTINY - 1) & ~(BATTINY - 1); - if (cap < BATTINY) + if (ATOMstorage(tt) == TYPE_msk && cap < 8*BATTINY) + cap = 8*BATTINY; + else if (cap < BATTINY) cap = BATTINY; /* limit the size */ if (cap > BUN_MAX) @@ -197,6 +199,9 @@ COLnew(oid hseq, int tt, BUN cap, role_t BATsetdims(bn); bn->batCapacity = cap;
+ if (ATOMstorage(tt) == TYPE_msk) + cap /= 8; /* 8 values per byte */ + /* alloc the main heaps */ if (tt && HEAPalloc(&bn->theap, cap, bn->twidth) != GDK_SUCCEED) { goto bailout; @@ -339,12 +344,16 @@ BATattach(int tt, const char *heapfile, GDKerror("heapfile size not integral number of atoms\n"); return NULL; } - if ((size_t) (st.st_size / atomsize) > (size_t) BUN_MAX) { + if (ATOMstorage(tt) == TYPE_msk ? + (st.st_size > (off_t) (BUN_MAX / 8)) : + ((size_t) (st.st_size / atomsize) > (size_t) BUN_MAX)) { fclose(f); GDKerror("heapfile too large\n"); return NULL; } - cap = (BUN) (st.st_size / atomsize); + cap = (BUN) (ATOMstorage(tt) == TYPE_msk ? + st.st_size * 8 : + st.st_size / atomsize); bn = COLnew(0, tt, cap, role); if (bn == NULL) { fclose(f); @@ -371,8 +380,8 @@ BATattach(int tt, const char *heapfile, bn->trevsorted = false; bn->tkey = false; } else { - bn->tsorted = true; - bn->trevsorted = true; + bn->tsorted = ATOMlinear(tt); + bn->trevsorted = ATOMlinear(tt); bn->tkey = true; _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list
_______________________________________________ developers-list mailing list developers-list@monetdb.org https://www.monetdb.org/mailman/listinfo/developers-list
-- Sjoerd Mullender
participants (2)
-
Martin Kersten
-
Sjoerd Mullender