Re: MonetDB: default - Defense against non-existing MAL primitives
FYI, test clients/R/Tests/survey.R still fails with "Server says '!TypeException:user.s94_2[40]:'batcalc.*' undefined in: X_168:bat[:oid,:hge] := batcalc.*(X_146,X_166);'." i.e., the check whether the requested new signature exists does not seem to work, yet. cf., http://monetdb.cwi.nl/testweb/web/testgrid.php?serial=55075:ca15cab457b8&order=platform,arch,compiler&targets=Cla-Darwin-x86_64,GNU-Fedora-x86_64-oid32-dbfarm,GNU-FreeBSD-x86_64,GNU-OpenIndiana-x86_64&module=clients Stefan ----- Original Message -----
Changeset: 4165dc34b2bb for MonetDB URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=4165dc34b2bb Modified Files: monetdb5/mal/mal_resolve.c monetdb5/mal/mal_resolve.h monetdb5/optimizer/opt_coercion.c Branch: default Log Message:
Defense against non-existing MAL primitives and further limit the rewrite to the simpliest and most expensive case encountered.
diffs (244 lines):
diff --git a/monetdb5/mal/mal_resolve.c b/monetdb5/mal/mal_resolve.c --- a/monetdb5/mal/mal_resolve.c +++ b/monetdb5/mal/mal_resolve.c @@ -115,7 +115,6 @@ findFunctionType(stream *out, Module sco Symbol s; InstrPtr sig; int i, k, unmatched = 0, s1; - /* int foundbutwrong=0; */ int polytype[MAXTYPEVAR]; int returns[256]; int *returntype = NULL; @@ -396,7 +395,6 @@ findFunctionType(stream *out, Module sco } } if (s1 < 0) { - /* if(getSignature(s)->token !=PATTERNsymbol) foundbutwrong++; */ s = s->peer; continue; } @@ -445,7 +443,6 @@ findFunctionType(stream *out, Module sco * typed. This should be reflected in the symbol table. */ s1 = returntype[0]; /* for those interested */ - /* foundbutwrong = 0; */ /* * If the call refers to a polymorphic function, we clone it * to arrive at a bounded instance. Polymorphic patterns and @@ -485,12 +482,6 @@ findFunctionType(stream *out, Module sco * arguments, but that clashes with one of the target variables. */ wrapup: - /* foundbutwrong has not been changed, commented out code above - if (foundbutwrong && !silent) { - showScriptException(out, mb, getPC(mb, p), TYPE, - "type conflict in assignment"); - } - */ if (returntype && returntype != returns) GDKfree(returntype); return -3; @@ -819,13 +810,17 @@ chkTypes(stream *out, Module s, MalBlkPt
/* * Type checking an individual instruction is dangerous, - * because it ignores data flow and declarations issues. - * It is only to be used in isolated cases. + * because it ignores data flow and variable declarations. */ -void +int chkInstruction(stream *out, Module s, MalBlkPtr mb, InstrPtr p) { - typeChecker(out, s, mb, p, FALSE); + int olderrors= mb->errors; + int error; + typeChecker(out, s, mb, p, TRUE); + error = mb->errors; + mb->errors = olderrors; + return error; }
void diff --git a/monetdb5/mal/mal_resolve.h b/monetdb5/mal/mal_resolve.h --- a/monetdb5/mal/mal_resolve.h +++ b/monetdb5/mal/mal_resolve.h @@ -20,7 +20,7 @@ #define MAXTYPEVAR 10
mal_export void chkProgram(stream *out, Module s, MalBlkPtr mb); -mal_export void chkInstruction(stream *out, Module s, MalBlkPtr mb, InstrPtr p); +mal_export int chkInstruction(stream *out, Module s, MalBlkPtr mb, InstrPtr p); mal_export void chkTypes(stream *out, Module s, MalBlkPtr mb, int silent); mal_export void typeChecker(stream *out, Module scope, MalBlkPtr mb, InstrPtr p, int silent); mal_export int fcnBinder(stream *out, Module scope, MalBlkPtr mb, InstrPtr p); diff --git a/monetdb5/optimizer/opt_coercion.c b/monetdb5/optimizer/opt_coercion.c --- a/monetdb5/optimizer/opt_coercion.c +++ b/monetdb5/optimizer/opt_coercion.c @@ -18,8 +18,9 @@ typedef struct{ int fromtype; int totype; int src; -/* not used, yet !?? +/* not used, yet !?? Indeed int digits; + int fromscale; int scale; */ } Coercion; @@ -47,58 +48,60 @@ coercionOptimizerStep(MalBlkPtr mb, int return 0; }
-/* Check coercions for numeric types that can be handled with smaller ones. +/* Check coercions for numeric types towards :hge that can be handled with smaller ones. * For now, limit to +,-,/,*,% hge expressions - * To be extended to deal with math calls as well. + * Not every combination may be available in the MAL layer, which calls + * for a separate type check before fixing it. + * Superflous coercion statements will be garbagecollected later on in the pipeline */ static void -coercionOptimizerCalcStep(MalBlkPtr mb, int i, Coercion *coerce) +coercionOptimizerCalcStep(Client cntxt, MalBlkPtr mb, int i, Coercion *coerce) { InstrPtr p = getInstrPtr(mb,i); - int r, a, b; + int r, a, b, varid;
+ r = getColumnType(getVarType(mb, getArg(p,0))); +#ifdef HAVE_HGE + if ( r != TYPE_hge) + return; +#endif if( getModuleId(p) != batcalcRef || getFunctionId(p) == 0) return; if( ! (getFunctionId(p) == plusRef || getFunctionId(p) == minusRef || getFunctionId(p) == mulRef || getFunctionId(p) == divRef || *getFunctionId(p) =='%') || p->argc !=3) return;
- r = getColumnType(getVarType(mb, getArg(p,0))); - switch(r){ - case TYPE_bte: - case TYPE_sht: - case TYPE_int: - case TYPE_lng: -#ifdef HAVE_HGE - case TYPE_hge: -#endif - break; - case TYPE_dbl: - case TYPE_flt: - /* to be determined */ - default: - return; - } a = getColumnType(getVarType(mb, getArg(p,1))); b = getColumnType(getVarType(mb, getArg(p,2))); - if ( a == r && coerce[getArg(p,1)].src && coerce[getArg(p,1)].fromtype < r ) /*digit/scale test as well*/ + if ( a == r && coerce[getArg(p,1)].src && coerce[getArg(p,1)].fromtype < r ) + { + varid = getArg(p,1); getArg(p,1) = coerce[getArg(p,1)].src; - if ( b == r && coerce[getArg(p,2)].src && coerce[getArg(p,2)].fromtype < r ) /*digit/scale test as well*/ + if ( chkInstruction(NULL, cntxt->nspace, mb, p)) + p->argv[1] = varid; + } + if ( b == r && coerce[getArg(p,2)].src && coerce[getArg(p,2)].fromtype < r ) + { + varid = getArg(p,2); getArg(p,2) = coerce[getArg(p,2)].src; + if ( chkInstruction(NULL, cntxt->nspace, mb, p)) + p->argv[2] = varid; + } return; }
static void -coercionOptimizerAggrStep(MalBlkPtr mb, int i, Coercion *coerce) +coercionOptimizerAggrStep(Client cntxt, MalBlkPtr mb, int i, Coercion *coerce) { InstrPtr p = getInstrPtr(mb,i); int r, k;
+ (void) cntxt; + if( getModuleId(p) != aggrRef || getFunctionId(p) == 0) return; if( ! (getFunctionId(p) == subavgRef ) || p->argc !=6) return;
r = getColumnType(getVarType(mb, getArg(p,0))); k = getArg(p,1); - // check the digits/scale if( r == TYPE_dbl && coerce[k].src ) getArg(p,1) = coerce[getArg(p,1)].src; return; @@ -124,35 +127,33 @@ OPTcoercionImplementation(Client cntxt,M if (getModuleId(p) == NULL) continue; /* Downscale the type, avoiding hge storage when lng would be sufficient. - * The code template can be extended to handle other downscale options as well */ #ifdef HAVE_HGE if ( getModuleId(p) == batcalcRef && getFunctionId(p) == hgeRef && p->retc == 1 - && ( p->argc == 2 - || ( p->argc == 5 - && isVarConstant(mb,getArg(p,1)) - && getArgType(mb,p,1) == TYPE_int - && isVarConstant(mb,getArg(p,3)) - && getArgType(mb,p,3) == TYPE_int - && isVarConstant(mb,getArg(p,4)) - && getArgType(mb,p,4) == TYPE_int - /* from-scale == to-scale, i.e., no scale change */ - && *(int*) getVarValue(mb, getArg(p,1)) == *(int*) getVarValue(mb, getArg(p,4)) ) ) ){ + && ( p->argc == 5 + && isVarConstant(mb,getArg(p,1)) + && getArgType(mb,p,1) == TYPE_int + && isVarConstant(mb,getArg(p,3)) + && getArgType(mb,p,3) == TYPE_int + && isVarConstant(mb,getArg(p,4)) + && getArgType(mb,p,4) == TYPE_int + /* from-scale == to-scale, i.e., no scale change */ + && *(int*) getVarValue(mb, getArg(p,1)) == *(int*) getVarValue(mb, getArg(p,4)) ) ){ k = getArg(p,0); coerce[k].pc= i; coerce[k].totype= TYPE_hge; coerce[k].src= getArg(p,2); coerce[k].fromtype= getColumnType(getArgType(mb,p,2)); -/* not used, yet !?? - if (p->argc == 5) { - coerce[k].digits= getVarConstant(mb,getArg(p,3)).val.ival; - coerce[k].scale= getVarConstant(mb,getArg(p,4)).val.ival; - } +/* not used, yet !?? indeed + coerce[k].fromscale= getVarConstant(mb,getArg(p,1)).val.ival; + coerce[k].digits= getVarConstant(mb,getArg(p,3)).val.ival; + coerce[k].scale= getVarConstant(mb,getArg(p,4)).val.ival; */ } #endif +/* if ( getModuleId(p) == batcalcRef && getFunctionId(p) == dblRef && p->retc == 1 @@ -160,7 +161,7 @@ OPTcoercionImplementation(Client cntxt,M || ( p->argc == 3 && isVarConstant(mb,getArg(p,1)) && getArgType(mb,p,1) == TYPE_int - /* to-scale == 0, i.e., no scale change */ + //to-scale == 0, i.e., no scale change && *(int*) getVarValue(mb, getArg(p,1)) == 0 ) ) ) { k = getArg(p,0); coerce[k].pc= i; @@ -168,8 +169,9 @@ OPTcoercionImplementation(Client cntxt,M coerce[k].src= getArg(p,1 + (p->argc ==3)); coerce[k].fromtype= getColumnType(getArgType(mb,p,1 + (p->argc ==3))); } - coercionOptimizerAggrStep(mb, i, coerce); - coercionOptimizerCalcStep(mb, i, coerce); +*/ + coercionOptimizerAggrStep(cntxt,mb, i, coerce); + coercionOptimizerCalcStep(cntxt,mb, i, coerce); if (getModuleId(p)==calcRef && p->argc == 2) { k= coercionOptimizerStep(mb, i, p); actions += k; _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list
-- | Stefan.Manegold@CWI.nl | DB Architectures (DA) | | www.CWI.nl/~manegold/ | Science Park 123 (L321) | | +31 (0)20 592-4212 | 1098 XG Amsterdam (NL) |
Martin, thanks for extending the error message info! It now says "Server says '!TypeException:user.s94_2[40]:'batcalc.*' undefined in: X_168:bat[:oid,:hge] := batcalc.*(X_146:bat[:oid,:int],X_166:bat[:oid,:sht]);'." Martin, Hannes, Niels, Question is, why is why is there an upcast from int to hge in the original plan (now removed by the coercion optimizer), and is the batcalc.* to hge on (originally) int & sht generated because of that (now removed) upcast, or for some other reason, e.g., does the query request the result to by of type bigint, or of type decimal with more that 18 digits? Stefan ----- Original Message -----
FYI,
test clients/R/Tests/survey.R still fails with "Server says '!TypeException:user.s94_2[40]:'batcalc.*' undefined in: X_168:bat[:oid,:hge] := batcalc.*(X_146,X_166);'." i.e., the check whether the requested new signature exists does not seem to work, yet.
Stefan
----- Original Message -----
Changeset: 4165dc34b2bb for MonetDB URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=4165dc34b2bb Modified Files: monetdb5/mal/mal_resolve.c monetdb5/mal/mal_resolve.h monetdb5/optimizer/opt_coercion.c Branch: default Log Message:
Defense against non-existing MAL primitives and further limit the rewrite to the simpliest and most expensive case encountered.
diffs (244 lines):
diff --git a/monetdb5/mal/mal_resolve.c b/monetdb5/mal/mal_resolve.c --- a/monetdb5/mal/mal_resolve.c +++ b/monetdb5/mal/mal_resolve.c @@ -115,7 +115,6 @@ findFunctionType(stream *out, Module sco Symbol s; InstrPtr sig; int i, k, unmatched = 0, s1; - /* int foundbutwrong=0; */ int polytype[MAXTYPEVAR]; int returns[256]; int *returntype = NULL; @@ -396,7 +395,6 @@ findFunctionType(stream *out, Module sco } } if (s1 < 0) { - /* if(getSignature(s)->token !=PATTERNsymbol) foundbutwrong++; */ s = s->peer; continue; } @@ -445,7 +443,6 @@ findFunctionType(stream *out, Module sco * typed. This should be reflected in the symbol table. */ s1 = returntype[0]; /* for those interested */ - /* foundbutwrong = 0; */ /* * If the call refers to a polymorphic function, we clone it * to arrive at a bounded instance. Polymorphic patterns and @@ -485,12 +482,6 @@ findFunctionType(stream *out, Module sco * arguments, but that clashes with one of the target variables. */ wrapup: - /* foundbutwrong has not been changed, commented out code above - if (foundbutwrong && !silent) { - showScriptException(out, mb, getPC(mb, p), TYPE, - "type conflict in assignment"); - } - */ if (returntype && returntype != returns) GDKfree(returntype); return -3; @@ -819,13 +810,17 @@ chkTypes(stream *out, Module s, MalBlkPt
/* * Type checking an individual instruction is dangerous, - * because it ignores data flow and declarations issues. - * It is only to be used in isolated cases. + * because it ignores data flow and variable declarations. */ -void +int chkInstruction(stream *out, Module s, MalBlkPtr mb, InstrPtr p) { - typeChecker(out, s, mb, p, FALSE); + int olderrors= mb->errors; + int error; + typeChecker(out, s, mb, p, TRUE); + error = mb->errors; + mb->errors = olderrors; + return error; }
void diff --git a/monetdb5/mal/mal_resolve.h b/monetdb5/mal/mal_resolve.h --- a/monetdb5/mal/mal_resolve.h +++ b/monetdb5/mal/mal_resolve.h @@ -20,7 +20,7 @@ #define MAXTYPEVAR 10
mal_export void chkProgram(stream *out, Module s, MalBlkPtr mb); -mal_export void chkInstruction(stream *out, Module s, MalBlkPtr mb, InstrPtr p); +mal_export int chkInstruction(stream *out, Module s, MalBlkPtr mb, InstrPtr p); mal_export void chkTypes(stream *out, Module s, MalBlkPtr mb, int silent); mal_export void typeChecker(stream *out, Module scope, MalBlkPtr mb, InstrPtr p, int silent); mal_export int fcnBinder(stream *out, Module scope, MalBlkPtr mb, InstrPtr p); diff --git a/monetdb5/optimizer/opt_coercion.c b/monetdb5/optimizer/opt_coercion.c --- a/monetdb5/optimizer/opt_coercion.c +++ b/monetdb5/optimizer/opt_coercion.c @@ -18,8 +18,9 @@ typedef struct{ int fromtype; int totype; int src; -/* not used, yet !?? +/* not used, yet !?? Indeed int digits; + int fromscale; int scale; */ } Coercion; @@ -47,58 +48,60 @@ coercionOptimizerStep(MalBlkPtr mb, int return 0; }
-/* Check coercions for numeric types that can be handled with smaller ones. +/* Check coercions for numeric types towards :hge that can be handled with smaller ones. * For now, limit to +,-,/,*,% hge expressions - * To be extended to deal with math calls as well. + * Not every combination may be available in the MAL layer, which calls + * for a separate type check before fixing it. + * Superflous coercion statements will be garbagecollected later on in the pipeline */ static void -coercionOptimizerCalcStep(MalBlkPtr mb, int i, Coercion *coerce) +coercionOptimizerCalcStep(Client cntxt, MalBlkPtr mb, int i, Coercion *coerce) { InstrPtr p = getInstrPtr(mb,i); - int r, a, b; + int r, a, b, varid;
+ r = getColumnType(getVarType(mb, getArg(p,0))); +#ifdef HAVE_HGE + if ( r != TYPE_hge) + return; +#endif if( getModuleId(p) != batcalcRef || getFunctionId(p) == 0) return; if( ! (getFunctionId(p) == plusRef || getFunctionId(p) == minusRef || getFunctionId(p) == mulRef || getFunctionId(p) == divRef || *getFunctionId(p) =='%') || p->argc !=3) return;
- r = getColumnType(getVarType(mb, getArg(p,0))); - switch(r){ - case TYPE_bte: - case TYPE_sht: - case TYPE_int: - case TYPE_lng: -#ifdef HAVE_HGE - case TYPE_hge: -#endif - break; - case TYPE_dbl: - case TYPE_flt: - /* to be determined */ - default: - return; - } a = getColumnType(getVarType(mb, getArg(p,1))); b = getColumnType(getVarType(mb, getArg(p,2))); - if ( a == r && coerce[getArg(p,1)].src && coerce[getArg(p,1)].fromtype < r ) /*digit/scale test as well*/ + if ( a == r && coerce[getArg(p,1)].src && coerce[getArg(p,1)].fromtype < r ) + { + varid = getArg(p,1); getArg(p,1) = coerce[getArg(p,1)].src; - if ( b == r && coerce[getArg(p,2)].src && coerce[getArg(p,2)].fromtype < r ) /*digit/scale test as well*/ + if ( chkInstruction(NULL, cntxt->nspace, mb, p)) + p->argv[1] = varid; + } + if ( b == r && coerce[getArg(p,2)].src && coerce[getArg(p,2)].fromtype < r ) + { + varid = getArg(p,2); getArg(p,2) = coerce[getArg(p,2)].src; + if ( chkInstruction(NULL, cntxt->nspace, mb, p)) + p->argv[2] = varid; + } return; }
static void -coercionOptimizerAggrStep(MalBlkPtr mb, int i, Coercion *coerce) +coercionOptimizerAggrStep(Client cntxt, MalBlkPtr mb, int i, Coercion *coerce) { InstrPtr p = getInstrPtr(mb,i); int r, k;
+ (void) cntxt; + if( getModuleId(p) != aggrRef || getFunctionId(p) == 0) return; if( ! (getFunctionId(p) == subavgRef ) || p->argc !=6) return;
r = getColumnType(getVarType(mb, getArg(p,0))); k = getArg(p,1); - // check the digits/scale if( r == TYPE_dbl && coerce[k].src ) getArg(p,1) = coerce[getArg(p,1)].src; return; @@ -124,35 +127,33 @@ OPTcoercionImplementation(Client cntxt,M if (getModuleId(p) == NULL) continue; /* Downscale the type, avoiding hge storage when lng would be sufficient. - * The code template can be extended to handle other downscale options as well */ #ifdef HAVE_HGE if ( getModuleId(p) == batcalcRef && getFunctionId(p) == hgeRef && p->retc == 1 - && ( p->argc == 2 - || ( p->argc == 5 - && isVarConstant(mb,getArg(p,1)) - && getArgType(mb,p,1) == TYPE_int - && isVarConstant(mb,getArg(p,3)) - && getArgType(mb,p,3) == TYPE_int - && isVarConstant(mb,getArg(p,4)) - && getArgType(mb,p,4) == TYPE_int - /* from-scale == to-scale, i.e., no scale change */ - && *(int*) getVarValue(mb, getArg(p,1)) == *(int*) getVarValue(mb, getArg(p,4)) ) ) ){ + && ( p->argc == 5 + && isVarConstant(mb,getArg(p,1)) + && getArgType(mb,p,1) == TYPE_int + && isVarConstant(mb,getArg(p,3)) + && getArgType(mb,p,3) == TYPE_int + && isVarConstant(mb,getArg(p,4)) + && getArgType(mb,p,4) == TYPE_int + /* from-scale == to-scale, i.e., no scale change */ + && *(int*) getVarValue(mb, getArg(p,1)) == *(int*) getVarValue(mb, getArg(p,4)) ) ){ k = getArg(p,0); coerce[k].pc= i; coerce[k].totype= TYPE_hge; coerce[k].src= getArg(p,2); coerce[k].fromtype= getColumnType(getArgType(mb,p,2)); -/* not used, yet !?? - if (p->argc == 5) { - coerce[k].digits= getVarConstant(mb,getArg(p,3)).val.ival; - coerce[k].scale= getVarConstant(mb,getArg(p,4)).val.ival; - } +/* not used, yet !?? indeed + coerce[k].fromscale= getVarConstant(mb,getArg(p,1)).val.ival; + coerce[k].digits= getVarConstant(mb,getArg(p,3)).val.ival; + coerce[k].scale= getVarConstant(mb,getArg(p,4)).val.ival; */ } #endif +/* if ( getModuleId(p) == batcalcRef && getFunctionId(p) == dblRef && p->retc == 1 @@ -160,7 +161,7 @@ OPTcoercionImplementation(Client cntxt,M || ( p->argc == 3 && isVarConstant(mb,getArg(p,1)) && getArgType(mb,p,1) == TYPE_int - /* to-scale == 0, i.e., no scale change */ + //to-scale == 0, i.e., no scale change && *(int*) getVarValue(mb, getArg(p,1)) == 0 ) ) ) { k = getArg(p,0); coerce[k].pc= i; @@ -168,8 +169,9 @@ OPTcoercionImplementation(Client cntxt,M coerce[k].src= getArg(p,1 + (p->argc ==3)); coerce[k].fromtype= getColumnType(getArgType(mb,p,1 + (p->argc ==3))); } - coercionOptimizerAggrStep(mb, i, coerce); - coercionOptimizerCalcStep(mb, i, coerce); +*/ + coercionOptimizerAggrStep(cntxt,mb, i, coerce); + coercionOptimizerCalcStep(cntxt,mb, i, coerce); if (getModuleId(p)==calcRef && p->argc == 2) { k= coercionOptimizerStep(mb, i, p); actions += k; _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list
-- | Stefan.Manegold@CWI.nl | DB Architectures (DA) | | www.CWI.nl/~manegold/ | Science Park 123 (L321) | | +31 (0)20 592-4212 | 1098 XG Amsterdam (NL) |
_______________________________________________ developers-list mailing list developers-list@monetdb.org https://www.monetdb.org/mailman/listinfo/developers-list
-- | Stefan.Manegold@CWI.nl | DB Architectures (DA) | | www.CWI.nl/~manegold/ | Science Park 123 (L321) | | +31 (0)20 592-4212 | 1098 XG Amsterdam (NL) |
----- Original Message -----
Martin,
thanks for extending the error message info!
It now says "Server says '!TypeException:user.s94_2[40]:'batcalc.*' undefined in: X_168:bat[:oid,:hge] := batcalc.*(X_146:bat[:oid,:int],X_166:bat[:oid,:sht]);'."
Martin, Hannes, Niels,
Question is, why is why is there an upcast from int to hge in the original plan (now removed by the coercion optimizer),
or was the upcast from sht to hge removed? or both?
and is the batcalc.* to hge on (originally) int & sht generated because of that (now removed) upcast, or for some other reason, e.g., does the query request the result to by of type bigint, or of type decimal with more that 18 digits?
Stefan
----- Original Message -----
FYI,
test clients/R/Tests/survey.R still fails with "Server says '!TypeException:user.s94_2[40]:'batcalc.*' undefined in: X_168:bat[:oid,:hge] := batcalc.*(X_146,X_166);'." i.e., the check whether the requested new signature exists does not seem to work, yet.
Stefan
----- Original Message -----
Changeset: 4165dc34b2bb for MonetDB URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=4165dc34b2bb Modified Files: monetdb5/mal/mal_resolve.c monetdb5/mal/mal_resolve.h monetdb5/optimizer/opt_coercion.c Branch: default Log Message:
Defense against non-existing MAL primitives and further limit the rewrite to the simpliest and most expensive case encountered.
diffs (244 lines):
diff --git a/monetdb5/mal/mal_resolve.c b/monetdb5/mal/mal_resolve.c --- a/monetdb5/mal/mal_resolve.c +++ b/monetdb5/mal/mal_resolve.c @@ -115,7 +115,6 @@ findFunctionType(stream *out, Module sco Symbol s; InstrPtr sig; int i, k, unmatched = 0, s1; - /* int foundbutwrong=0; */ int polytype[MAXTYPEVAR]; int returns[256]; int *returntype = NULL; @@ -396,7 +395,6 @@ findFunctionType(stream *out, Module sco } } if (s1 < 0) { - /* if(getSignature(s)->token !=PATTERNsymbol) foundbutwrong++; */ s = s->peer; continue; } @@ -445,7 +443,6 @@ findFunctionType(stream *out, Module sco * typed. This should be reflected in the symbol table. */ s1 = returntype[0]; /* for those interested */ - /* foundbutwrong = 0; */ /* * If the call refers to a polymorphic function, we clone it * to arrive at a bounded instance. Polymorphic patterns and @@ -485,12 +482,6 @@ findFunctionType(stream *out, Module sco * arguments, but that clashes with one of the target variables. */ wrapup: - /* foundbutwrong has not been changed, commented out code above - if (foundbutwrong && !silent) { - showScriptException(out, mb, getPC(mb, p), TYPE, - "type conflict in assignment"); - } - */ if (returntype && returntype != returns) GDKfree(returntype); return -3; @@ -819,13 +810,17 @@ chkTypes(stream *out, Module s, MalBlkPt
/* * Type checking an individual instruction is dangerous, - * because it ignores data flow and declarations issues. - * It is only to be used in isolated cases. + * because it ignores data flow and variable declarations. */ -void +int chkInstruction(stream *out, Module s, MalBlkPtr mb, InstrPtr p) { - typeChecker(out, s, mb, p, FALSE); + int olderrors= mb->errors; + int error; + typeChecker(out, s, mb, p, TRUE); + error = mb->errors; + mb->errors = olderrors; + return error; }
void diff --git a/monetdb5/mal/mal_resolve.h b/monetdb5/mal/mal_resolve.h --- a/monetdb5/mal/mal_resolve.h +++ b/monetdb5/mal/mal_resolve.h @@ -20,7 +20,7 @@ #define MAXTYPEVAR 10
mal_export void chkProgram(stream *out, Module s, MalBlkPtr mb); -mal_export void chkInstruction(stream *out, Module s, MalBlkPtr mb, InstrPtr p); +mal_export int chkInstruction(stream *out, Module s, MalBlkPtr mb, InstrPtr p); mal_export void chkTypes(stream *out, Module s, MalBlkPtr mb, int silent); mal_export void typeChecker(stream *out, Module scope, MalBlkPtr mb, InstrPtr p, int silent); mal_export int fcnBinder(stream *out, Module scope, MalBlkPtr mb, InstrPtr p); diff --git a/monetdb5/optimizer/opt_coercion.c b/monetdb5/optimizer/opt_coercion.c --- a/monetdb5/optimizer/opt_coercion.c +++ b/monetdb5/optimizer/opt_coercion.c @@ -18,8 +18,9 @@ typedef struct{ int fromtype; int totype; int src; -/* not used, yet !?? +/* not used, yet !?? Indeed int digits; + int fromscale; int scale; */ } Coercion; @@ -47,58 +48,60 @@ coercionOptimizerStep(MalBlkPtr mb, int return 0; }
-/* Check coercions for numeric types that can be handled with smaller ones. +/* Check coercions for numeric types towards :hge that can be handled with smaller ones. * For now, limit to +,-,/,*,% hge expressions - * To be extended to deal with math calls as well. + * Not every combination may be available in the MAL layer, which calls + * for a separate type check before fixing it. + * Superflous coercion statements will be garbagecollected later on in the pipeline */ static void -coercionOptimizerCalcStep(MalBlkPtr mb, int i, Coercion *coerce) +coercionOptimizerCalcStep(Client cntxt, MalBlkPtr mb, int i, Coercion *coerce) { InstrPtr p = getInstrPtr(mb,i); - int r, a, b; + int r, a, b, varid;
+ r = getColumnType(getVarType(mb, getArg(p,0))); +#ifdef HAVE_HGE + if ( r != TYPE_hge) + return; +#endif if( getModuleId(p) != batcalcRef || getFunctionId(p) == 0) return; if( ! (getFunctionId(p) == plusRef || getFunctionId(p) == minusRef || getFunctionId(p) == mulRef || getFunctionId(p) == divRef || *getFunctionId(p) =='%') || p->argc !=3) return;
- r = getColumnType(getVarType(mb, getArg(p,0))); - switch(r){ - case TYPE_bte: - case TYPE_sht: - case TYPE_int: - case TYPE_lng: -#ifdef HAVE_HGE - case TYPE_hge: -#endif - break; - case TYPE_dbl: - case TYPE_flt: - /* to be determined */ - default: - return; - } a = getColumnType(getVarType(mb, getArg(p,1))); b = getColumnType(getVarType(mb, getArg(p,2))); - if ( a == r && coerce[getArg(p,1)].src && coerce[getArg(p,1)].fromtype < r ) /*digit/scale test as well*/ + if ( a == r && coerce[getArg(p,1)].src && coerce[getArg(p,1)].fromtype < r ) + { + varid = getArg(p,1); getArg(p,1) = coerce[getArg(p,1)].src; - if ( b == r && coerce[getArg(p,2)].src && coerce[getArg(p,2)].fromtype < r ) /*digit/scale test as well*/ + if ( chkInstruction(NULL, cntxt->nspace, mb, p)) + p->argv[1] = varid; + } + if ( b == r && coerce[getArg(p,2)].src && coerce[getArg(p,2)].fromtype < r ) + { + varid = getArg(p,2); getArg(p,2) = coerce[getArg(p,2)].src; + if ( chkInstruction(NULL, cntxt->nspace, mb, p)) + p->argv[2] = varid; + } return; }
static void -coercionOptimizerAggrStep(MalBlkPtr mb, int i, Coercion *coerce) +coercionOptimizerAggrStep(Client cntxt, MalBlkPtr mb, int i, Coercion *coerce) { InstrPtr p = getInstrPtr(mb,i); int r, k;
+ (void) cntxt; + if( getModuleId(p) != aggrRef || getFunctionId(p) == 0) return; if( ! (getFunctionId(p) == subavgRef ) || p->argc !=6) return;
r = getColumnType(getVarType(mb, getArg(p,0))); k = getArg(p,1); - // check the digits/scale if( r == TYPE_dbl && coerce[k].src ) getArg(p,1) = coerce[getArg(p,1)].src; return; @@ -124,35 +127,33 @@ OPTcoercionImplementation(Client cntxt,M if (getModuleId(p) == NULL) continue; /* Downscale the type, avoiding hge storage when lng would be sufficient. - * The code template can be extended to handle other downscale options as well */ #ifdef HAVE_HGE if ( getModuleId(p) == batcalcRef && getFunctionId(p) == hgeRef && p->retc == 1 - && ( p->argc == 2 - || ( p->argc == 5 - && isVarConstant(mb,getArg(p,1)) - && getArgType(mb,p,1) == TYPE_int - && isVarConstant(mb,getArg(p,3)) - && getArgType(mb,p,3) == TYPE_int - && isVarConstant(mb,getArg(p,4)) - && getArgType(mb,p,4) == TYPE_int - /* from-scale == to-scale, i.e., no scale change */ - && *(int*) getVarValue(mb, getArg(p,1)) == *(int*) getVarValue(mb, getArg(p,4)) ) ) ){ + && ( p->argc == 5 + && isVarConstant(mb,getArg(p,1)) + && getArgType(mb,p,1) == TYPE_int + && isVarConstant(mb,getArg(p,3)) + && getArgType(mb,p,3) == TYPE_int + && isVarConstant(mb,getArg(p,4)) + && getArgType(mb,p,4) == TYPE_int + /* from-scale == to-scale, i.e., no scale change */ + && *(int*) getVarValue(mb, getArg(p,1)) == *(int*) getVarValue(mb, getArg(p,4)) ) ){ k = getArg(p,0); coerce[k].pc= i; coerce[k].totype= TYPE_hge; coerce[k].src= getArg(p,2); coerce[k].fromtype= getColumnType(getArgType(mb,p,2)); -/* not used, yet !?? - if (p->argc == 5) { - coerce[k].digits= getVarConstant(mb,getArg(p,3)).val.ival; - coerce[k].scale= getVarConstant(mb,getArg(p,4)).val.ival; - } +/* not used, yet !?? indeed + coerce[k].fromscale= getVarConstant(mb,getArg(p,1)).val.ival; + coerce[k].digits= getVarConstant(mb,getArg(p,3)).val.ival; + coerce[k].scale= getVarConstant(mb,getArg(p,4)).val.ival; */ } #endif +/* if ( getModuleId(p) == batcalcRef && getFunctionId(p) == dblRef && p->retc == 1 @@ -160,7 +161,7 @@ OPTcoercionImplementation(Client cntxt,M || ( p->argc == 3 && isVarConstant(mb,getArg(p,1)) && getArgType(mb,p,1) == TYPE_int - /* to-scale == 0, i.e., no scale change */ + //to-scale == 0, i.e., no scale change && *(int*) getVarValue(mb, getArg(p,1)) == 0 ) ) ) { k = getArg(p,0); coerce[k].pc= i; @@ -168,8 +169,9 @@ OPTcoercionImplementation(Client cntxt,M coerce[k].src= getArg(p,1 + (p->argc ==3)); coerce[k].fromtype= getColumnType(getArgType(mb,p,1 + (p->argc ==3))); } - coercionOptimizerAggrStep(mb, i, coerce); - coercionOptimizerCalcStep(mb, i, coerce); +*/ + coercionOptimizerAggrStep(cntxt,mb, i, coerce); + coercionOptimizerCalcStep(cntxt,mb, i, coerce); if (getModuleId(p)==calcRef && p->argc == 2) { k= coercionOptimizerStep(mb, i, p); actions += k; _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list
-- | Stefan.Manegold@CWI.nl | DB Architectures (DA) | | www.CWI.nl/~manegold/ | Science Park 123 (L321) | | +31 (0)20 592-4212 | 1098 XG Amsterdam (NL) |
_______________________________________________ developers-list mailing list developers-list@monetdb.org https://www.monetdb.org/mailman/listinfo/developers-list
-- | Stefan.Manegold@CWI.nl | DB Architectures (DA) | | www.CWI.nl/~manegold/ | Science Park 123 (L321) | | +31 (0)20 592-4212 | 1098 XG Amsterdam (NL) |
_______________________________________________ developers-list mailing list developers-list@monetdb.org https://www.monetdb.org/mailman/listinfo/developers-list
-- | Stefan.Manegold@CWI.nl | DB Architectures (DA) | | www.CWI.nl/~manegold/ | Science Park 123 (L321) | | +31 (0)20 592-4212 | 1098 XG Amsterdam (NL) |
In case you were wondering, this is the query that fails: create table _U_7fd87635044b as (select idkey, (dnameSan_Leandro_Unified-0.04) *(1*( comp_imp = 'No' )) as _dnameSan_Leandro_Unified_No, (dnameMendota_Unified-0) *(1*( comp_imp = 'No' )) as _dnameMendota_Unified_No, (dnameVineland_Elementary-0.02) *(1*( comp_imp = 'No' )) as _dnameVineland_Elementary_No, (dnameEast_Whittier_City_Elem-0.0799999999999999) *(1*( comp_imp = 'No' )) as _dnameEast_Whittier_City_Elem_No, (dnameLos_Nietos_Elementary-0) *(1*( comp_imp = 'No' )) as _dnameLos_Nietos_Elementary_No, (dnameWillits_Unified-0.0799999999999999) *(1*( comp_imp = 'No' )) as _dnameWillits_Unified_No, (dnameDelhi_Unified-0.02) *(1*( comp_imp = 'No' )) as _dnameDelhi_Unified_No, (dnameFullerton_Elementary-0.0599999999999999) *(1*( comp_imp = 'No' )) as _dnameFullerton_Elementary_No, (dnamePlumas_Unified-0.0999999999999999) *(1*( comp_imp = 'No' )) as _dnamePlumas_Unified_No, (dnameChula_Vista_Elementary-0.16) *(1*( comp_imp = 'No' )) as _dnameChula_Vista_Elementary_No, (dnameOceanside_Unified-0.14) *(1*( comp_imp = 'No' )) as _dnameOceanside_Unified_No, (dnameStockton_City_Unified-0.22) *(1*( comp_imp = 'No' )) as _dnameStockton_City_Unified_No, (dnameBerryessa_Union_Elem-0.0599999999999999) *(1*( comp_imp = 'No' )) as _dnameBerryessa_Union_Elem_No, (dnameLuther_Burbank_Elem-0) *(1*( comp_imp = 'No' )) as _dnameLuther_Burbank_Elem_No, (dnameMilpitas_Unified-0.02) *(1*( comp_imp = 'No' )) as _dnameMilpitas_Unified_No, (dnameSan_Leandro_Unified-0.0676691729323307) *(1*( comp_imp = 'Yes' )) as _dnameSan_Leandro_Unified_Yes, (dnameMendota_Unified-0.0300751879699248) *(1*( comp_imp = 'Yes' )) as _dnameMendota_Unified_Yes, (dnameVineland_Elementary-0.00751879699248119) *(1*( comp_imp = 'Yes' )) as _dnameVineland_Elementary_Yes, (dnameEast_Whittier_City_Elem-0.0676691729323307) *(1*( comp_imp = 'Yes' )) as _dnameEast_Whittier_City_Elem_Yes, (dnameLos_Nietos_Elementary-0.0150375939849624) *(1*( comp_imp = 'Yes' )) as _dnameLos_Nietos_Elementary_Yes, (dnameWillits_Unified-0) *(1*( comp_imp = 'Yes' )) as _dnameWillits_Unified_Yes, (dnameDelhi_Unified-0.0225563909774436) *(1*( comp_imp = 'Yes' )) as _dnameDelhi_Unified_Yes, (dnameFullerton_Elementary-0.0977443609022555) *(1*( comp_imp = 'Yes' )) as _dnameFullerton_Elementary_Yes, (dnamePlumas_Unified-0.0300751879699248) *(1*( comp_imp = 'Yes' )) as _dnamePlumas_Unified_Yes, (dnameChula_Vista_Elementary-0.195488721804511) *(1*( comp_imp = 'Yes' )) as _dnameChula_Vista_Elementary_Yes, (dnameOceanside_Unified-0.105263157894737) *(1*( comp_imp = 'Yes' )) as _dnameOceanside_Unified_Yes, (dnameStockton_City_Unified-0.195488721804511) *(1*( comp_imp = 'Yes' )) as _dnameStockton_City_Unified_Yes, (dnameBerryessa_Union_Elem-0.0751879699248119) *(1*( comp_imp = 'Yes' )) as _dnameBerryessa_Union_Elem_Yes, (dnameLuther_Burbank_Elem-0.00751879699248119) *(1*( comp_imp = 'Yes' )) as _dnameLuther_Burbank_Elem_Yes, (dnameMilpitas_Unified-0.0827067669172931) *(1*( comp_imp = 'Yes' )) as _dnameMilpitas_Unified_Yes from apiclus1 inner join _mm_7fd8287ef1cd using(idkey)) with data I have created a script that triggers the issue: http://homepages.cwi.nl/~hannes/surveyfail.sql Best, Hannes
On 20 Mar 2015, at 13:08, Stefan Manegold
wrote: ----- Original Message -----
Martin,
thanks for extending the error message info!
It now says "Server says '!TypeException:user.s94_2[40]:'batcalc.*' undefined in: X_168:bat[:oid,:hge] := batcalc.*(X_146:bat[:oid,:int],X_166:bat[:oid,:sht]);'."
Martin, Hannes, Niels,
Question is, why is why is there an upcast from int to hge in the original plan (now removed by the coercion optimizer),
or was the upcast from sht to hge removed? or both?
and is the batcalc.* to hge on (originally) int & sht generated because of that (now removed) upcast, or for some other reason, e.g., does the query request the result to by of type bigint, or of type decimal with more that 18 digits?
Stefan
----- Original Message -----
FYI,
test clients/R/Tests/survey.R still fails with "Server says '!TypeException:user.s94_2[40]:'batcalc.*' undefined in: X_168:bat[:oid,:hge] := batcalc.*(X_146,X_166);'." i.e., the check whether the requested new signature exists does not seem to work, yet.
Stefan
----- Original Message -----
Changeset: 4165dc34b2bb for MonetDB URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=4165dc34b2bb Modified Files: monetdb5/mal/mal_resolve.c monetdb5/mal/mal_resolve.h monetdb5/optimizer/opt_coercion.c Branch: default Log Message:
Defense against non-existing MAL primitives and further limit the rewrite to the simpliest and most expensive case encountered.
diffs (244 lines):
diff --git a/monetdb5/mal/mal_resolve.c b/monetdb5/mal/mal_resolve.c --- a/monetdb5/mal/mal_resolve.c +++ b/monetdb5/mal/mal_resolve.c @@ -115,7 +115,6 @@ findFunctionType(stream *out, Module sco Symbol s; InstrPtr sig; int i, k, unmatched = 0, s1; - /* int foundbutwrong=0; */ int polytype[MAXTYPEVAR]; int returns[256]; int *returntype = NULL; @@ -396,7 +395,6 @@ findFunctionType(stream *out, Module sco } } if (s1 < 0) { - /* if(getSignature(s)->token !=PATTERNsymbol) foundbutwrong++; */ s = s->peer; continue; } @@ -445,7 +443,6 @@ findFunctionType(stream *out, Module sco * typed. This should be reflected in the symbol table. */ s1 = returntype[0]; /* for those interested */ - /* foundbutwrong = 0; */ /* * If the call refers to a polymorphic function, we clone it * to arrive at a bounded instance. Polymorphic patterns and @@ -485,12 +482,6 @@ findFunctionType(stream *out, Module sco * arguments, but that clashes with one of the target variables. */ wrapup: - /* foundbutwrong has not been changed, commented out code above - if (foundbutwrong && !silent) { - showScriptException(out, mb, getPC(mb, p), TYPE, - "type conflict in assignment"); - } - */ if (returntype && returntype != returns) GDKfree(returntype); return -3; @@ -819,13 +810,17 @@ chkTypes(stream *out, Module s, MalBlkPt
/* * Type checking an individual instruction is dangerous, - * because it ignores data flow and declarations issues. - * It is only to be used in isolated cases. + * because it ignores data flow and variable declarations. */ -void +int chkInstruction(stream *out, Module s, MalBlkPtr mb, InstrPtr p) { - typeChecker(out, s, mb, p, FALSE); + int olderrors= mb->errors; + int error; + typeChecker(out, s, mb, p, TRUE); + error = mb->errors; + mb->errors = olderrors; + return error; }
void diff --git a/monetdb5/mal/mal_resolve.h b/monetdb5/mal/mal_resolve.h --- a/monetdb5/mal/mal_resolve.h +++ b/monetdb5/mal/mal_resolve.h @@ -20,7 +20,7 @@ #define MAXTYPEVAR 10
mal_export void chkProgram(stream *out, Module s, MalBlkPtr mb); -mal_export void chkInstruction(stream *out, Module s, MalBlkPtr mb, InstrPtr p); +mal_export int chkInstruction(stream *out, Module s, MalBlkPtr mb, InstrPtr p); mal_export void chkTypes(stream *out, Module s, MalBlkPtr mb, int silent); mal_export void typeChecker(stream *out, Module scope, MalBlkPtr mb, InstrPtr p, int silent); mal_export int fcnBinder(stream *out, Module scope, MalBlkPtr mb, InstrPtr p); diff --git a/monetdb5/optimizer/opt_coercion.c b/monetdb5/optimizer/opt_coercion.c --- a/monetdb5/optimizer/opt_coercion.c +++ b/monetdb5/optimizer/opt_coercion.c @@ -18,8 +18,9 @@ typedef struct{ int fromtype; int totype; int src; -/* not used, yet !?? +/* not used, yet !?? Indeed int digits; + int fromscale; int scale; */ } Coercion; @@ -47,58 +48,60 @@ coercionOptimizerStep(MalBlkPtr mb, int return 0; }
-/* Check coercions for numeric types that can be handled with smaller ones. +/* Check coercions for numeric types towards :hge that can be handled with smaller ones. * For now, limit to +,-,/,*,% hge expressions - * To be extended to deal with math calls as well. + * Not every combination may be available in the MAL layer, which calls + * for a separate type check before fixing it. + * Superflous coercion statements will be garbagecollected later on in the pipeline */ static void -coercionOptimizerCalcStep(MalBlkPtr mb, int i, Coercion *coerce) +coercionOptimizerCalcStep(Client cntxt, MalBlkPtr mb, int i, Coercion *coerce) { InstrPtr p = getInstrPtr(mb,i); - int r, a, b; + int r, a, b, varid;
+ r = getColumnType(getVarType(mb, getArg(p,0))); +#ifdef HAVE_HGE + if ( r != TYPE_hge) + return; +#endif if( getModuleId(p) != batcalcRef || getFunctionId(p) == 0) return; if( ! (getFunctionId(p) == plusRef || getFunctionId(p) == minusRef || getFunctionId(p) == mulRef || getFunctionId(p) == divRef || *getFunctionId(p) =='%') || p->argc !=3) return;
- r = getColumnType(getVarType(mb, getArg(p,0))); - switch(r){ - case TYPE_bte: - case TYPE_sht: - case TYPE_int: - case TYPE_lng: -#ifdef HAVE_HGE - case TYPE_hge: -#endif - break; - case TYPE_dbl: - case TYPE_flt: - /* to be determined */ - default: - return; - } a = getColumnType(getVarType(mb, getArg(p,1))); b = getColumnType(getVarType(mb, getArg(p,2))); - if ( a == r && coerce[getArg(p,1)].src && coerce[getArg(p,1)].fromtype < r ) /*digit/scale test as well*/ + if ( a == r && coerce[getArg(p,1)].src && coerce[getArg(p,1)].fromtype < r ) + { + varid = getArg(p,1); getArg(p,1) = coerce[getArg(p,1)].src; - if ( b == r && coerce[getArg(p,2)].src && coerce[getArg(p,2)].fromtype < r ) /*digit/scale test as well*/ + if ( chkInstruction(NULL, cntxt->nspace, mb, p)) + p->argv[1] = varid; + } + if ( b == r && coerce[getArg(p,2)].src && coerce[getArg(p,2)].fromtype < r ) + { + varid = getArg(p,2); getArg(p,2) = coerce[getArg(p,2)].src; + if ( chkInstruction(NULL, cntxt->nspace, mb, p)) + p->argv[2] = varid; + } return; }
static void -coercionOptimizerAggrStep(MalBlkPtr mb, int i, Coercion *coerce) +coercionOptimizerAggrStep(Client cntxt, MalBlkPtr mb, int i, Coercion *coerce) { InstrPtr p = getInstrPtr(mb,i); int r, k;
+ (void) cntxt; + if( getModuleId(p) != aggrRef || getFunctionId(p) == 0) return; if( ! (getFunctionId(p) == subavgRef ) || p->argc !=6) return;
r = getColumnType(getVarType(mb, getArg(p,0))); k = getArg(p,1); - // check the digits/scale if( r == TYPE_dbl && coerce[k].src ) getArg(p,1) = coerce[getArg(p,1)].src; return; @@ -124,35 +127,33 @@ OPTcoercionImplementation(Client cntxt,M if (getModuleId(p) == NULL) continue; /* Downscale the type, avoiding hge storage when lng would be sufficient. - * The code template can be extended to handle other downscale options as well */ #ifdef HAVE_HGE if ( getModuleId(p) == batcalcRef && getFunctionId(p) == hgeRef && p->retc == 1 - && ( p->argc == 2 - || ( p->argc == 5 - && isVarConstant(mb,getArg(p,1)) - && getArgType(mb,p,1) == TYPE_int - && isVarConstant(mb,getArg(p,3)) - && getArgType(mb,p,3) == TYPE_int - && isVarConstant(mb,getArg(p,4)) - && getArgType(mb,p,4) == TYPE_int - /* from-scale == to-scale, i.e., no scale change */ - && *(int*) getVarValue(mb, getArg(p,1)) == *(int*) getVarValue(mb, getArg(p,4)) ) ) ){ + && ( p->argc == 5 + && isVarConstant(mb,getArg(p,1)) + && getArgType(mb,p,1) == TYPE_int + && isVarConstant(mb,getArg(p,3)) + && getArgType(mb,p,3) == TYPE_int + && isVarConstant(mb,getArg(p,4)) + && getArgType(mb,p,4) == TYPE_int + /* from-scale == to-scale, i.e., no scale change */ + && *(int*) getVarValue(mb, getArg(p,1)) == *(int*) getVarValue(mb, getArg(p,4)) ) ){ k = getArg(p,0); coerce[k].pc= i; coerce[k].totype= TYPE_hge; coerce[k].src= getArg(p,2); coerce[k].fromtype= getColumnType(getArgType(mb,p,2)); -/* not used, yet !?? - if (p->argc == 5) { - coerce[k].digits= getVarConstant(mb,getArg(p,3)).val.ival; - coerce[k].scale= getVarConstant(mb,getArg(p,4)).val.ival; - } +/* not used, yet !?? indeed + coerce[k].fromscale= getVarConstant(mb,getArg(p,1)).val.ival; + coerce[k].digits= getVarConstant(mb,getArg(p,3)).val.ival; + coerce[k].scale= getVarConstant(mb,getArg(p,4)).val.ival; */ } #endif +/* if ( getModuleId(p) == batcalcRef && getFunctionId(p) == dblRef && p->retc == 1 @@ -160,7 +161,7 @@ OPTcoercionImplementation(Client cntxt,M || ( p->argc == 3 && isVarConstant(mb,getArg(p,1)) && getArgType(mb,p,1) == TYPE_int - /* to-scale == 0, i.e., no scale change */ + //to-scale == 0, i.e., no scale change && *(int*) getVarValue(mb, getArg(p,1)) == 0 ) ) ) { k = getArg(p,0); coerce[k].pc= i; @@ -168,8 +169,9 @@ OPTcoercionImplementation(Client cntxt,M coerce[k].src= getArg(p,1 + (p->argc ==3)); coerce[k].fromtype= getColumnType(getArgType(mb,p,1 + (p->argc ==3))); } - coercionOptimizerAggrStep(mb, i, coerce); - coercionOptimizerCalcStep(mb, i, coerce); +*/ + coercionOptimizerAggrStep(cntxt,mb, i, coerce); + coercionOptimizerCalcStep(cntxt,mb, i, coerce); if (getModuleId(p)==calcRef && p->argc == 2) { k= coercionOptimizerStep(mb, i, p); actions += k; _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list
-- | Stefan.Manegold@CWI.nl | DB Architectures (DA) | | www.CWI.nl/~manegold/ | Science Park 123 (L321) | | +31 (0)20 592-4212 | 1098 XG Amsterdam (NL) |
_______________________________________________ developers-list mailing list developers-list@monetdb.org https://www.monetdb.org/mailman/listinfo/developers-list
-- | Stefan.Manegold@CWI.nl | DB Architectures (DA) | | www.CWI.nl/~manegold/ | Science Park 123 (L321) | | +31 (0)20 592-4212 | 1098 XG Amsterdam (NL) |
_______________________________________________ developers-list mailing list developers-list@monetdb.org https://www.monetdb.org/mailman/listinfo/developers-list
-- | Stefan.Manegold@CWI.nl | DB Architectures (DA) | | www.CWI.nl/~manegold/ | Science Park 123 (L321) | | +31 (0)20 592-4212 | 1098 XG Amsterdam (NL) |
_______________________________________________ developers-list mailing list developers-list@monetdb.org https://www.monetdb.org/mailman/listinfo/developers-list
For what it's worth, a brief look at the code reveals that chkInstruction() always returns 0, also in case of an error, mainly because it calls typeChecker() with silent==TRUE, and then typeChecker() does not increase the error count. Attached patch over changeset 497ecabb77d7 reveals the following output with Hannes' script: # coercionOptimizerCalcStep(), a/1: chkInstruction() returns 0 with p->typechk == TYPE_UNKNOWN # coercionOptimizerCalcStep(): batcalc.*() CANNOT replace first argument: :bat:hge := (:bat:hge,:bat:sht) -> :bat:hge := (:bat:int,:bat:sht) # coercionOptimizerCalcStep(), a/1: chkInstruction() returns 0 with p->typechk == TYPE_UNKNOWN # coercionOptimizerCalcStep(): batcalc.*() CANNOT replace first argument: :bat:hge := (:bat:hge,:bat:sht) -> :bat:hge := (:bat:int,:bat:sht) # coercionOptimizerCalcStep(), a/1: chkInstruction() returns 0 with p->typechk == TYPE_UNKNOWN # coercionOptimizerCalcStep(): batcalc.*() CANNOT replace first argument: :bat:hge := (:bat:hge,:bat:sht) -> :bat:hge := (:bat:int,:bat:sht) # coercionOptimizerCalcStep(), a/1: chkInstruction() returns 0 with p->typechk == TYPE_UNKNOWN # coercionOptimizerCalcStep(): batcalc.*() CANNOT replace first argument: :bat:hge := (:bat:hge,:bat:sht) -> :bat:hge := (:bat:int,:bat:sht) # coercionOptimizerCalcStep(), a/1: chkInstruction() returns 0 with p->typechk == TYPE_UNKNOWN # coercionOptimizerCalcStep(): batcalc.*() CANNOT replace first argument: :bat:hge := (:bat:hge,:bat:sht) -> :bat:hge := (:bat:int,:bat:sht) # coercionOptimizerCalcStep(), a/1: chkInstruction() returns 0 with p->typechk == TYPE_UNKNOWN # coercionOptimizerCalcStep(): batcalc.*() CANNOT replace first argument: :bat:hge := (:bat:hge,:bat:sht) -> :bat:hge := (:bat:int,:bat:sht) # coercionOptimizerCalcStep(), a/1: chkInstruction() returns 0 with p->typechk == TYPE_UNKNOWN # coercionOptimizerCalcStep(): batcalc.*() CANNOT replace first argument: :bat:hge := (:bat:hge,:bat:sht) -> :bat:hge := (:bat:int,:bat:sht) # coercionOptimizerCalcStep(), a/1: chkInstruction() returns 0 with p->typechk == TYPE_UNKNOWN # coercionOptimizerCalcStep(): batcalc.*() CANNOT replace first argument: :bat:hge := (:bat:hge,:bat:sht) -> :bat:hge := (:bat:int,:bat:sht) # coercionOptimizerCalcStep(), a/1: chkInstruction() returns 0 with p->typechk == TYPE_UNKNOWN # coercionOptimizerCalcStep(): batcalc.*() CANNOT replace first argument: :bat:hge := (:bat:hge,:bat:sht) -> :bat:hge := (:bat:int,:bat:sht) # coercionOptimizerCalcStep(), a/1: chkInstruction() returns 0 with p->typechk == TYPE_UNKNOWN # coercionOptimizerCalcStep(): batcalc.*() CANNOT replace first argument: :bat:hge := (:bat:hge,:bat:sht) -> :bat:hge := (:bat:int,:bat:sht) # coercionOptimizerCalcStep(), a/1: chkInstruction() returns 0 with p->typechk == TYPE_UNKNOWN # coercionOptimizerCalcStep(): batcalc.*() CANNOT replace first argument: :bat:hge := (:bat:hge,:bat:sht) -> :bat:hge := (:bat:int,:bat:sht) # coercionOptimizerCalcStep(), a/1: chkInstruction() returns 0 with p->typechk == TYPE_UNKNOWN # coercionOptimizerCalcStep(): batcalc.*() CANNOT replace first argument: :bat:hge := (:bat:hge,:bat:sht) -> :bat:hge := (:bat:int,:bat:sht) # coercionOptimizerCalcStep(), a/1: chkInstruction() returns 0 with p->typechk == TYPE_UNKNOWN # coercionOptimizerCalcStep(): batcalc.*() CANNOT replace first argument: :bat:hge := (:bat:hge,:bat:sht) -> :bat:hge := (:bat:int,:bat:sht) # coercionOptimizerCalcStep(), a/1: chkInstruction() returns 0 with p->typechk == TYPE_UNKNOWN # coercionOptimizerCalcStep(): batcalc.*() CANNOT replace first argument: :bat:hge := (:bat:hge,:bat:sht) -> :bat:hge := (:bat:int,:bat:sht) Stefan ----- Original Message -----
In case you were wondering, this is the query that fails:
create table _U_7fd87635044b as (select idkey, (dnameSan_Leandro_Unified-0.04) *(1*( comp_imp = 'No' )) as _dnameSan_Leandro_Unified_No, (dnameMendota_Unified-0) *(1*( comp_imp = 'No' )) as _dnameMendota_Unified_No, (dnameVineland_Elementary-0.02) *(1*( comp_imp = 'No' )) as _dnameVineland_Elementary_No, (dnameEast_Whittier_City_Elem-0.0799999999999999) *(1*( comp_imp = 'No' )) as _dnameEast_Whittier_City_Elem_No, (dnameLos_Nietos_Elementary-0) *(1*( comp_imp = 'No' )) as _dnameLos_Nietos_Elementary_No, (dnameWillits_Unified-0.0799999999999999) *(1*( comp_imp = 'No' )) as _dnameWillits_Unified_No, (dnameDelhi_Unified-0.02) *(1*( comp_imp = 'No' )) as _dnameDelhi_Unified_No, (dnameFullerton_Elementary-0.0599999999999999) *(1*( comp_imp = 'No' )) as _dnameFullerton_Elementary_No, (dnamePlumas_Unified-0.0999999999999999) *(1*( comp_imp = 'No' )) as _dnamePlumas_Unified_No, (dnameChula_Vista_Elementary-0.16) *(1*( comp_imp = 'No' )) as _dnameChula_Vista_Elementary_No, (dnameOceanside_Unified-0.14) *(1*( comp_imp = 'No' )) as _dnameOceanside_Unified_No, (dnameStockton_City_Unified-0.22) *(1*( comp_imp = 'No' )) as _dnameStockton_City_Unified_No, (dnameBerryessa_Union_Elem-0.0599999999999999) *(1*( comp_imp = 'No' )) as _dnameBerryessa_Union_Elem_No, (dnameLuther_Burbank_Elem-0) *(1*( comp_imp = 'No' )) as _dnameLuther_Burbank_Elem_No, (dnameMilpitas_Unified-0.02) *(1*( comp_imp = 'No' )) as _dnameMilpitas_Unified_No, (dnameSan_Leandro_Unified-0.0676691729323307) *(1*( comp_imp = 'Yes' )) as _dnameSan_Leandro_Unified_Yes, (dnameMendota_Unified-0.0300751879699248) *(1*( comp_imp = 'Yes' )) as _dnameMendota_Unified_Yes, (dnameVineland_Elementary-0.00751879699248119) *(1*( comp_imp = 'Yes' )) as _dnameVineland_Elementary_Yes, (dnameEast_Whittier_City_Elem-0.0676691729323307) *(1*( comp_imp = 'Yes' )) as _dnameEast_Whittier_City_Elem_Yes, (dnameLos_Nietos_Elementary-0.0150375939849624) *(1*( comp_imp = 'Yes' )) as _dnameLos_Nietos_Elementary_Yes, (dnameWillits_Unified-0) *(1*( comp_imp = 'Yes' )) as _dnameWillits_Unified_Yes, (dnameDelhi_Unified-0.0225563909774436) *(1*( comp_imp = 'Yes' )) as _dnameDelhi_Unified_Yes, (dnameFullerton_Elementary-0.0977443609022555) *(1*( comp_imp = 'Yes' )) as _dnameFullerton_Elementary_Yes, (dnamePlumas_Unified-0.0300751879699248) *(1*( comp_imp = 'Yes' )) as _dnamePlumas_Unified_Yes, (dnameChula_Vista_Elementary-0.195488721804511) *(1*( comp_imp = 'Yes' )) as _dnameChula_Vista_Elementary_Yes, (dnameOceanside_Unified-0.105263157894737) *(1*( comp_imp = 'Yes' )) as _dnameOceanside_Unified_Yes, (dnameStockton_City_Unified-0.195488721804511) *(1*( comp_imp = 'Yes' )) as _dnameStockton_City_Unified_Yes, (dnameBerryessa_Union_Elem-0.0751879699248119) *(1*( comp_imp = 'Yes' )) as _dnameBerryessa_Union_Elem_Yes, (dnameLuther_Burbank_Elem-0.00751879699248119) *(1*( comp_imp = 'Yes' )) as _dnameLuther_Burbank_Elem_Yes, (dnameMilpitas_Unified-0.0827067669172931) *(1*( comp_imp = 'Yes' )) as _dnameMilpitas_Unified_Yes from apiclus1 inner join _mm_7fd8287ef1cd using(idkey)) with data
I have created a script that triggers the issue: http://homepages.cwi.nl/~hannes/surveyfail.sql
Best,
Hannes
On 20 Mar 2015, at 13:08, Stefan Manegold
wrote: ----- Original Message -----
Martin,
thanks for extending the error message info!
It now says "Server says '!TypeException:user.s94_2[40]:'batcalc.*' undefined in: X_168:bat[:oid,:hge] := batcalc.*(X_146:bat[:oid,:int],X_166:bat[:oid,:sht]);'."
Martin, Hannes, Niels,
Question is, why is why is there an upcast from int to hge in the original plan (now removed by the coercion optimizer),
or was the upcast from sht to hge removed? or both?
and is the batcalc.* to hge on (originally) int & sht generated because of that (now removed) upcast, or for some other reason, e.g., does the query request the result to by of type bigint, or of type decimal with more that 18 digits?
Stefan
----- Original Message -----
FYI,
test clients/R/Tests/survey.R still fails with "Server says '!TypeException:user.s94_2[40]:'batcalc.*' undefined in: X_168:bat[:oid,:hge] := batcalc.*(X_146,X_166);'." i.e., the check whether the requested new signature exists does not seem to work, yet.
Stefan
----- Original Message -----
Changeset: 4165dc34b2bb for MonetDB URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=4165dc34b2bb Modified Files: monetdb5/mal/mal_resolve.c monetdb5/mal/mal_resolve.h monetdb5/optimizer/opt_coercion.c Branch: default Log Message:
Defense against non-existing MAL primitives and further limit the rewrite to the simpliest and most expensive case encountered.
diffs (244 lines):
diff --git a/monetdb5/mal/mal_resolve.c b/monetdb5/mal/mal_resolve.c --- a/monetdb5/mal/mal_resolve.c +++ b/monetdb5/mal/mal_resolve.c @@ -115,7 +115,6 @@ findFunctionType(stream *out, Module sco Symbol s; InstrPtr sig; int i, k, unmatched = 0, s1; - /* int foundbutwrong=0; */ int polytype[MAXTYPEVAR]; int returns[256]; int *returntype = NULL; @@ -396,7 +395,6 @@ findFunctionType(stream *out, Module sco } } if (s1 < 0) { - /* if(getSignature(s)->token !=PATTERNsymbol) foundbutwrong++; */ s = s->peer; continue; } @@ -445,7 +443,6 @@ findFunctionType(stream *out, Module sco * typed. This should be reflected in the symbol table. */ s1 = returntype[0]; /* for those interested */ - /* foundbutwrong = 0; */ /* * If the call refers to a polymorphic function, we clone it * to arrive at a bounded instance. Polymorphic patterns and @@ -485,12 +482,6 @@ findFunctionType(stream *out, Module sco * arguments, but that clashes with one of the target variables. */ wrapup: - /* foundbutwrong has not been changed, commented out code above - if (foundbutwrong && !silent) { - showScriptException(out, mb, getPC(mb, p), TYPE, - "type conflict in assignment"); - } - */ if (returntype && returntype != returns) GDKfree(returntype); return -3; @@ -819,13 +810,17 @@ chkTypes(stream *out, Module s, MalBlkPt
/* * Type checking an individual instruction is dangerous, - * because it ignores data flow and declarations issues. - * It is only to be used in isolated cases. + * because it ignores data flow and variable declarations. */ -void +int chkInstruction(stream *out, Module s, MalBlkPtr mb, InstrPtr p) { - typeChecker(out, s, mb, p, FALSE); + int olderrors= mb->errors; + int error; + typeChecker(out, s, mb, p, TRUE); + error = mb->errors; + mb->errors = olderrors; + return error; }
void diff --git a/monetdb5/mal/mal_resolve.h b/monetdb5/mal/mal_resolve.h --- a/monetdb5/mal/mal_resolve.h +++ b/monetdb5/mal/mal_resolve.h @@ -20,7 +20,7 @@ #define MAXTYPEVAR 10
mal_export void chkProgram(stream *out, Module s, MalBlkPtr mb); -mal_export void chkInstruction(stream *out, Module s, MalBlkPtr mb, InstrPtr p); +mal_export int chkInstruction(stream *out, Module s, MalBlkPtr mb, InstrPtr p); mal_export void chkTypes(stream *out, Module s, MalBlkPtr mb, int silent); mal_export void typeChecker(stream *out, Module scope, MalBlkPtr mb, InstrPtr p, int silent); mal_export int fcnBinder(stream *out, Module scope, MalBlkPtr mb, InstrPtr p); diff --git a/monetdb5/optimizer/opt_coercion.c b/monetdb5/optimizer/opt_coercion.c --- a/monetdb5/optimizer/opt_coercion.c +++ b/monetdb5/optimizer/opt_coercion.c @@ -18,8 +18,9 @@ typedef struct{ int fromtype; int totype; int src; -/* not used, yet !?? +/* not used, yet !?? Indeed int digits; + int fromscale; int scale; */ } Coercion; @@ -47,58 +48,60 @@ coercionOptimizerStep(MalBlkPtr mb, int return 0; }
-/* Check coercions for numeric types that can be handled with smaller ones. +/* Check coercions for numeric types towards :hge that can be handled with smaller ones. * For now, limit to +,-,/,*,% hge expressions - * To be extended to deal with math calls as well. + * Not every combination may be available in the MAL layer, which calls + * for a separate type check before fixing it. + * Superflous coercion statements will be garbagecollected later on in the pipeline */ static void -coercionOptimizerCalcStep(MalBlkPtr mb, int i, Coercion *coerce) +coercionOptimizerCalcStep(Client cntxt, MalBlkPtr mb, int i, Coercion *coerce) { InstrPtr p = getInstrPtr(mb,i); - int r, a, b; + int r, a, b, varid;
+ r = getColumnType(getVarType(mb, getArg(p,0))); +#ifdef HAVE_HGE + if ( r != TYPE_hge) + return; +#endif if( getModuleId(p) != batcalcRef || getFunctionId(p) == 0) return; if( ! (getFunctionId(p) == plusRef || getFunctionId(p) == minusRef || getFunctionId(p) == mulRef || getFunctionId(p) == divRef || *getFunctionId(p) =='%') || p->argc !=3) return;
- r = getColumnType(getVarType(mb, getArg(p,0))); - switch(r){ - case TYPE_bte: - case TYPE_sht: - case TYPE_int: - case TYPE_lng: -#ifdef HAVE_HGE - case TYPE_hge: -#endif - break; - case TYPE_dbl: - case TYPE_flt: - /* to be determined */ - default: - return; - } a = getColumnType(getVarType(mb, getArg(p,1))); b = getColumnType(getVarType(mb, getArg(p,2))); - if ( a == r && coerce[getArg(p,1)].src && coerce[getArg(p,1)].fromtype < r ) /*digit/scale test as well*/ + if ( a == r && coerce[getArg(p,1)].src && coerce[getArg(p,1)].fromtype < r ) + { + varid = getArg(p,1); getArg(p,1) = coerce[getArg(p,1)].src; - if ( b == r && coerce[getArg(p,2)].src && coerce[getArg(p,2)].fromtype < r ) /*digit/scale test as well*/ + if ( chkInstruction(NULL, cntxt->nspace, mb, p)) + p->argv[1] = varid; + } + if ( b == r && coerce[getArg(p,2)].src && coerce[getArg(p,2)].fromtype < r ) + { + varid = getArg(p,2); getArg(p,2) = coerce[getArg(p,2)].src; + if ( chkInstruction(NULL, cntxt->nspace, mb, p)) + p->argv[2] = varid; + } return; }
static void -coercionOptimizerAggrStep(MalBlkPtr mb, int i, Coercion *coerce) +coercionOptimizerAggrStep(Client cntxt, MalBlkPtr mb, int i, Coercion *coerce) { InstrPtr p = getInstrPtr(mb,i); int r, k;
+ (void) cntxt; + if( getModuleId(p) != aggrRef || getFunctionId(p) == 0) return; if( ! (getFunctionId(p) == subavgRef ) || p->argc !=6) return;
r = getColumnType(getVarType(mb, getArg(p,0))); k = getArg(p,1); - // check the digits/scale if( r == TYPE_dbl && coerce[k].src ) getArg(p,1) = coerce[getArg(p,1)].src; return; @@ -124,35 +127,33 @@ OPTcoercionImplementation(Client cntxt,M if (getModuleId(p) == NULL) continue; /* Downscale the type, avoiding hge storage when lng would be sufficient. - * The code template can be extended to handle other downscale options as well */ #ifdef HAVE_HGE if ( getModuleId(p) == batcalcRef && getFunctionId(p) == hgeRef && p->retc == 1 - && ( p->argc == 2 - || ( p->argc == 5 - && isVarConstant(mb,getArg(p,1)) - && getArgType(mb,p,1) == TYPE_int - && isVarConstant(mb,getArg(p,3)) - && getArgType(mb,p,3) == TYPE_int - && isVarConstant(mb,getArg(p,4)) - && getArgType(mb,p,4) == TYPE_int - /* from-scale == to-scale, i.e., no scale change */ - && *(int*) getVarValue(mb, getArg(p,1)) == *(int*) getVarValue(mb, getArg(p,4)) ) ) ){ + && ( p->argc == 5 + && isVarConstant(mb,getArg(p,1)) + && getArgType(mb,p,1) == TYPE_int + && isVarConstant(mb,getArg(p,3)) + && getArgType(mb,p,3) == TYPE_int + && isVarConstant(mb,getArg(p,4)) + && getArgType(mb,p,4) == TYPE_int + /* from-scale == to-scale, i.e., no scale change */ + && *(int*) getVarValue(mb, getArg(p,1)) == *(int*) getVarValue(mb, getArg(p,4)) ) ){ k = getArg(p,0); coerce[k].pc= i; coerce[k].totype= TYPE_hge; coerce[k].src= getArg(p,2); coerce[k].fromtype= getColumnType(getArgType(mb,p,2)); -/* not used, yet !?? - if (p->argc == 5) { - coerce[k].digits= getVarConstant(mb,getArg(p,3)).val.ival; - coerce[k].scale= getVarConstant(mb,getArg(p,4)).val.ival; - } +/* not used, yet !?? indeed + coerce[k].fromscale= getVarConstant(mb,getArg(p,1)).val.ival; + coerce[k].digits= getVarConstant(mb,getArg(p,3)).val.ival; + coerce[k].scale= getVarConstant(mb,getArg(p,4)).val.ival; */ } #endif +/* if ( getModuleId(p) == batcalcRef && getFunctionId(p) == dblRef && p->retc == 1 @@ -160,7 +161,7 @@ OPTcoercionImplementation(Client cntxt,M || ( p->argc == 3 && isVarConstant(mb,getArg(p,1)) && getArgType(mb,p,1) == TYPE_int - /* to-scale == 0, i.e., no scale change */ + //to-scale == 0, i.e., no scale change && *(int*) getVarValue(mb, getArg(p,1)) == 0 ) ) ) { k = getArg(p,0); coerce[k].pc= i; @@ -168,8 +169,9 @@ OPTcoercionImplementation(Client cntxt,M coerce[k].src= getArg(p,1 + (p->argc ==3)); coerce[k].fromtype= getColumnType(getArgType(mb,p,1 + (p->argc ==3))); } - coercionOptimizerAggrStep(mb, i, coerce); - coercionOptimizerCalcStep(mb, i, coerce); +*/ + coercionOptimizerAggrStep(cntxt,mb, i, coerce); + coercionOptimizerCalcStep(cntxt,mb, i, coerce); if (getModuleId(p)==calcRef && p->argc == 2) { k= coercionOptimizerStep(mb, i, p); actions += k; _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list
-- | Stefan.Manegold@CWI.nl | DB Architectures (DA) | | www.CWI.nl/~manegold/ | Science Park 123 (L321) | | +31 (0)20 592-4212 | 1098 XG Amsterdam (NL) |
_______________________________________________ developers-list mailing list developers-list@monetdb.org https://www.monetdb.org/mailman/listinfo/developers-list
-- | Stefan.Manegold@CWI.nl | DB Architectures (DA) | | www.CWI.nl/~manegold/ | Science Park 123 (L321) | | +31 (0)20 592-4212 | 1098 XG Amsterdam (NL) |
_______________________________________________ developers-list mailing list developers-list@monetdb.org https://www.monetdb.org/mailman/listinfo/developers-list
-- | Stefan.Manegold@CWI.nl | DB Architectures (DA) | | www.CWI.nl/~manegold/ | Science Park 123 (L321) | | +31 (0)20 592-4212 | 1098 XG Amsterdam (NL) |
_______________________________________________ developers-list mailing list developers-list@monetdb.org https://www.monetdb.org/mailman/listinfo/developers-list
_______________________________________________ developers-list mailing list developers-list@monetdb.org https://www.monetdb.org/mailman/listinfo/developers-list
-- | Stefan.Manegold@CWI.nl | DB Architectures (DA) | | www.CWI.nl/~manegold/ | Science Park 123 (L321) | | +31 (0)20 592-4212 | 1098 XG Amsterdam (NL) |
The patch also contains a potential solution to the problem. ----- Original Message -----
For what it's worth, a brief look at the code reveals that chkInstruction() always returns 0, also in case of an error, mainly because it calls typeChecker() with silent==TRUE, and then typeChecker() does not increase the error count.
Attached patch over changeset 497ecabb77d7 reveals the following output with Hannes' script:
# coercionOptimizerCalcStep(), a/1: chkInstruction() returns 0 with p->typechk == TYPE_UNKNOWN # coercionOptimizerCalcStep(): batcalc.*() CANNOT replace first argument: :bat:hge := (:bat:hge,:bat:sht) -> :bat:hge := (:bat:int,:bat:sht) # coercionOptimizerCalcStep(), a/1: chkInstruction() returns 0 with p->typechk == TYPE_UNKNOWN # coercionOptimizerCalcStep(): batcalc.*() CANNOT replace first argument: :bat:hge := (:bat:hge,:bat:sht) -> :bat:hge := (:bat:int,:bat:sht) # coercionOptimizerCalcStep(), a/1: chkInstruction() returns 0 with p->typechk == TYPE_UNKNOWN # coercionOptimizerCalcStep(): batcalc.*() CANNOT replace first argument: :bat:hge := (:bat:hge,:bat:sht) -> :bat:hge := (:bat:int,:bat:sht) # coercionOptimizerCalcStep(), a/1: chkInstruction() returns 0 with p->typechk == TYPE_UNKNOWN # coercionOptimizerCalcStep(): batcalc.*() CANNOT replace first argument: :bat:hge := (:bat:hge,:bat:sht) -> :bat:hge := (:bat:int,:bat:sht) # coercionOptimizerCalcStep(), a/1: chkInstruction() returns 0 with p->typechk == TYPE_UNKNOWN # coercionOptimizerCalcStep(): batcalc.*() CANNOT replace first argument: :bat:hge := (:bat:hge,:bat:sht) -> :bat:hge := (:bat:int,:bat:sht) # coercionOptimizerCalcStep(), a/1: chkInstruction() returns 0 with p->typechk == TYPE_UNKNOWN # coercionOptimizerCalcStep(): batcalc.*() CANNOT replace first argument: :bat:hge := (:bat:hge,:bat:sht) -> :bat:hge := (:bat:int,:bat:sht) # coercionOptimizerCalcStep(), a/1: chkInstruction() returns 0 with p->typechk == TYPE_UNKNOWN # coercionOptimizerCalcStep(): batcalc.*() CANNOT replace first argument: :bat:hge := (:bat:hge,:bat:sht) -> :bat:hge := (:bat:int,:bat:sht) # coercionOptimizerCalcStep(), a/1: chkInstruction() returns 0 with p->typechk == TYPE_UNKNOWN # coercionOptimizerCalcStep(): batcalc.*() CANNOT replace first argument: :bat:hge := (:bat:hge,:bat:sht) -> :bat:hge := (:bat:int,:bat:sht) # coercionOptimizerCalcStep(), a/1: chkInstruction() returns 0 with p->typechk == TYPE_UNKNOWN # coercionOptimizerCalcStep(): batcalc.*() CANNOT replace first argument: :bat:hge := (:bat:hge,:bat:sht) -> :bat:hge := (:bat:int,:bat:sht) # coercionOptimizerCalcStep(), a/1: chkInstruction() returns 0 with p->typechk == TYPE_UNKNOWN # coercionOptimizerCalcStep(): batcalc.*() CANNOT replace first argument: :bat:hge := (:bat:hge,:bat:sht) -> :bat:hge := (:bat:int,:bat:sht) # coercionOptimizerCalcStep(), a/1: chkInstruction() returns 0 with p->typechk == TYPE_UNKNOWN # coercionOptimizerCalcStep(): batcalc.*() CANNOT replace first argument: :bat:hge := (:bat:hge,:bat:sht) -> :bat:hge := (:bat:int,:bat:sht) # coercionOptimizerCalcStep(), a/1: chkInstruction() returns 0 with p->typechk == TYPE_UNKNOWN # coercionOptimizerCalcStep(): batcalc.*() CANNOT replace first argument: :bat:hge := (:bat:hge,:bat:sht) -> :bat:hge := (:bat:int,:bat:sht) # coercionOptimizerCalcStep(), a/1: chkInstruction() returns 0 with p->typechk == TYPE_UNKNOWN # coercionOptimizerCalcStep(): batcalc.*() CANNOT replace first argument: :bat:hge := (:bat:hge,:bat:sht) -> :bat:hge := (:bat:int,:bat:sht) # coercionOptimizerCalcStep(), a/1: chkInstruction() returns 0 with p->typechk == TYPE_UNKNOWN # coercionOptimizerCalcStep(): batcalc.*() CANNOT replace first argument: :bat:hge := (:bat:hge,:bat:sht) -> :bat:hge := (:bat:int,:bat:sht)
Stefan
----- Original Message -----
In case you were wondering, this is the query that fails:
create table _U_7fd87635044b as (select idkey, (dnameSan_Leandro_Unified-0.04) *(1*( comp_imp = 'No' )) as _dnameSan_Leandro_Unified_No, (dnameMendota_Unified-0) *(1*( comp_imp = 'No' )) as _dnameMendota_Unified_No, (dnameVineland_Elementary-0.02) *(1*( comp_imp = 'No' )) as _dnameVineland_Elementary_No, (dnameEast_Whittier_City_Elem-0.0799999999999999) *(1*( comp_imp = 'No' )) as _dnameEast_Whittier_City_Elem_No, (dnameLos_Nietos_Elementary-0) *(1*( comp_imp = 'No' )) as _dnameLos_Nietos_Elementary_No, (dnameWillits_Unified-0.0799999999999999) *(1*( comp_imp = 'No' )) as _dnameWillits_Unified_No, (dnameDelhi_Unified-0.02) *(1*( comp_imp = 'No' )) as _dnameDelhi_Unified_No, (dnameFullerton_Elementary-0.0599999999999999) *(1*( comp_imp = 'No' )) as _dnameFullerton_Elementary_No, (dnamePlumas_Unified-0.0999999999999999) *(1*( comp_imp = 'No' )) as _dnamePlumas_Unified_No, (dnameChula_Vista_Elementary-0.16) *(1*( comp_imp = 'No' )) as _dnameChula_Vista_Elementary_No, (dnameOceanside_Unified-0.14) *(1*( comp_imp = 'No' )) as _dnameOceanside_Unified_No, (dnameStockton_City_Unified-0.22) *(1*( comp_imp = 'No' )) as _dnameStockton_City_Unified_No, (dnameBerryessa_Union_Elem-0.0599999999999999) *(1*( comp_imp = 'No' )) as _dnameBerryessa_Union_Elem_No, (dnameLuther_Burbank_Elem-0) *(1*( comp_imp = 'No' )) as _dnameLuther_Burbank_Elem_No, (dnameMilpitas_Unified-0.02) *(1*( comp_imp = 'No' )) as _dnameMilpitas_Unified_No, (dnameSan_Leandro_Unified-0.0676691729323307) *(1*( comp_imp = 'Yes' )) as _dnameSan_Leandro_Unified_Yes, (dnameMendota_Unified-0.0300751879699248) *(1*( comp_imp = 'Yes' )) as _dnameMendota_Unified_Yes, (dnameVineland_Elementary-0.00751879699248119) *(1*( comp_imp = 'Yes' )) as _dnameVineland_Elementary_Yes, (dnameEast_Whittier_City_Elem-0.0676691729323307) *(1*( comp_imp = 'Yes' )) as _dnameEast_Whittier_City_Elem_Yes, (dnameLos_Nietos_Elementary-0.0150375939849624) *(1*( comp_imp = 'Yes' )) as _dnameLos_Nietos_Elementary_Yes, (dnameWillits_Unified-0) *(1*( comp_imp = 'Yes' )) as _dnameWillits_Unified_Yes, (dnameDelhi_Unified-0.0225563909774436) *(1*( comp_imp = 'Yes' )) as _dnameDelhi_Unified_Yes, (dnameFullerton_Elementary-0.0977443609022555) *(1*( comp_imp = 'Yes' )) as _dnameFullerton_Elementary_Yes, (dnamePlumas_Unified-0.0300751879699248) *(1*( comp_imp = 'Yes' )) as _dnamePlumas_Unified_Yes, (dnameChula_Vista_Elementary-0.195488721804511) *(1*( comp_imp = 'Yes' )) as _dnameChula_Vista_Elementary_Yes, (dnameOceanside_Unified-0.105263157894737) *(1*( comp_imp = 'Yes' )) as _dnameOceanside_Unified_Yes, (dnameStockton_City_Unified-0.195488721804511) *(1*( comp_imp = 'Yes' )) as _dnameStockton_City_Unified_Yes, (dnameBerryessa_Union_Elem-0.0751879699248119) *(1*( comp_imp = 'Yes' )) as _dnameBerryessa_Union_Elem_Yes, (dnameLuther_Burbank_Elem-0.00751879699248119) *(1*( comp_imp = 'Yes' )) as _dnameLuther_Burbank_Elem_Yes, (dnameMilpitas_Unified-0.0827067669172931) *(1*( comp_imp = 'Yes' )) as _dnameMilpitas_Unified_Yes from apiclus1 inner join _mm_7fd8287ef1cd using(idkey)) with data
I have created a script that triggers the issue: http://homepages.cwi.nl/~hannes/surveyfail.sql
Best,
Hannes
On 20 Mar 2015, at 13:08, Stefan Manegold
wrote: ----- Original Message -----
Martin,
thanks for extending the error message info!
It now says "Server says '!TypeException:user.s94_2[40]:'batcalc.*' undefined in: X_168:bat[:oid,:hge] := batcalc.*(X_146:bat[:oid,:int],X_166:bat[:oid,:sht]);'."
Martin, Hannes, Niels,
Question is, why is why is there an upcast from int to hge in the original plan (now removed by the coercion optimizer),
or was the upcast from sht to hge removed? or both?
and is the batcalc.* to hge on (originally) int & sht generated because of that (now removed) upcast, or for some other reason, e.g., does the query request the result to by of type bigint, or of type decimal with more that 18 digits?
Stefan
----- Original Message -----
FYI,
test clients/R/Tests/survey.R still fails with "Server says '!TypeException:user.s94_2[40]:'batcalc.*' undefined in: X_168:bat[:oid,:hge] := batcalc.*(X_146,X_166);'." i.e., the check whether the requested new signature exists does not seem to work, yet.
Stefan
----- Original Message -----
Changeset: 4165dc34b2bb for MonetDB URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=4165dc34b2bb Modified Files: monetdb5/mal/mal_resolve.c monetdb5/mal/mal_resolve.h monetdb5/optimizer/opt_coercion.c Branch: default Log Message:
Defense against non-existing MAL primitives and further limit the rewrite to the simpliest and most expensive case encountered.
diffs (244 lines):
diff --git a/monetdb5/mal/mal_resolve.c b/monetdb5/mal/mal_resolve.c --- a/monetdb5/mal/mal_resolve.c +++ b/monetdb5/mal/mal_resolve.c @@ -115,7 +115,6 @@ findFunctionType(stream *out, Module sco Symbol s; InstrPtr sig; int i, k, unmatched = 0, s1; - /* int foundbutwrong=0; */ int polytype[MAXTYPEVAR]; int returns[256]; int *returntype = NULL; @@ -396,7 +395,6 @@ findFunctionType(stream *out, Module sco } } if (s1 < 0) { - /* if(getSignature(s)->token !=PATTERNsymbol) foundbutwrong++; */ s = s->peer; continue; } @@ -445,7 +443,6 @@ findFunctionType(stream *out, Module sco * typed. This should be reflected in the symbol table. */ s1 = returntype[0]; /* for those interested */ - /* foundbutwrong = 0; */ /* * If the call refers to a polymorphic function, we clone it * to arrive at a bounded instance. Polymorphic patterns and @@ -485,12 +482,6 @@ findFunctionType(stream *out, Module sco * arguments, but that clashes with one of the target variables. */ wrapup: - /* foundbutwrong has not been changed, commented out code above - if (foundbutwrong && !silent) { - showScriptException(out, mb, getPC(mb, p), TYPE, - "type conflict in assignment"); - } - */ if (returntype && returntype != returns) GDKfree(returntype); return -3; @@ -819,13 +810,17 @@ chkTypes(stream *out, Module s, MalBlkPt
/* * Type checking an individual instruction is dangerous, - * because it ignores data flow and declarations issues. - * It is only to be used in isolated cases. + * because it ignores data flow and variable declarations. */ -void +int chkInstruction(stream *out, Module s, MalBlkPtr mb, InstrPtr p) { - typeChecker(out, s, mb, p, FALSE); + int olderrors= mb->errors; + int error; + typeChecker(out, s, mb, p, TRUE); + error = mb->errors; + mb->errors = olderrors; + return error; }
void diff --git a/monetdb5/mal/mal_resolve.h b/monetdb5/mal/mal_resolve.h --- a/monetdb5/mal/mal_resolve.h +++ b/monetdb5/mal/mal_resolve.h @@ -20,7 +20,7 @@ #define MAXTYPEVAR 10
mal_export void chkProgram(stream *out, Module s, MalBlkPtr mb); -mal_export void chkInstruction(stream *out, Module s, MalBlkPtr mb, InstrPtr p); +mal_export int chkInstruction(stream *out, Module s, MalBlkPtr mb, InstrPtr p); mal_export void chkTypes(stream *out, Module s, MalBlkPtr mb, int silent); mal_export void typeChecker(stream *out, Module scope, MalBlkPtr mb, InstrPtr p, int silent); mal_export int fcnBinder(stream *out, Module scope, MalBlkPtr mb, InstrPtr p); diff --git a/monetdb5/optimizer/opt_coercion.c b/monetdb5/optimizer/opt_coercion.c --- a/monetdb5/optimizer/opt_coercion.c +++ b/monetdb5/optimizer/opt_coercion.c @@ -18,8 +18,9 @@ typedef struct{ int fromtype; int totype; int src; -/* not used, yet !?? +/* not used, yet !?? Indeed int digits; + int fromscale; int scale; */ } Coercion; @@ -47,58 +48,60 @@ coercionOptimizerStep(MalBlkPtr mb, int return 0; }
-/* Check coercions for numeric types that can be handled with smaller ones. +/* Check coercions for numeric types towards :hge that can be handled with smaller ones. * For now, limit to +,-,/,*,% hge expressions - * To be extended to deal with math calls as well. + * Not every combination may be available in the MAL layer, which calls + * for a separate type check before fixing it. + * Superflous coercion statements will be garbagecollected later on in the pipeline */ static void -coercionOptimizerCalcStep(MalBlkPtr mb, int i, Coercion *coerce) +coercionOptimizerCalcStep(Client cntxt, MalBlkPtr mb, int i, Coercion *coerce) { InstrPtr p = getInstrPtr(mb,i); - int r, a, b; + int r, a, b, varid;
+ r = getColumnType(getVarType(mb, getArg(p,0))); +#ifdef HAVE_HGE + if ( r != TYPE_hge) + return; +#endif if( getModuleId(p) != batcalcRef || getFunctionId(p) == 0) return; if( ! (getFunctionId(p) == plusRef || getFunctionId(p) == minusRef || getFunctionId(p) == mulRef || getFunctionId(p) == divRef || *getFunctionId(p) =='%') || p->argc !=3) return;
- r = getColumnType(getVarType(mb, getArg(p,0))); - switch(r){ - case TYPE_bte: - case TYPE_sht: - case TYPE_int: - case TYPE_lng: -#ifdef HAVE_HGE - case TYPE_hge: -#endif - break; - case TYPE_dbl: - case TYPE_flt: - /* to be determined */ - default: - return; - } a = getColumnType(getVarType(mb, getArg(p,1))); b = getColumnType(getVarType(mb, getArg(p,2))); - if ( a == r && coerce[getArg(p,1)].src && coerce[getArg(p,1)].fromtype < r ) /*digit/scale test as well*/ + if ( a == r && coerce[getArg(p,1)].src && coerce[getArg(p,1)].fromtype < r ) + { + varid = getArg(p,1); getArg(p,1) = coerce[getArg(p,1)].src; - if ( b == r && coerce[getArg(p,2)].src && coerce[getArg(p,2)].fromtype < r ) /*digit/scale test as well*/ + if ( chkInstruction(NULL, cntxt->nspace, mb, p)) + p->argv[1] = varid; + } + if ( b == r && coerce[getArg(p,2)].src && coerce[getArg(p,2)].fromtype < r ) + { + varid = getArg(p,2); getArg(p,2) = coerce[getArg(p,2)].src; + if ( chkInstruction(NULL, cntxt->nspace, mb, p)) + p->argv[2] = varid; + } return; }
static void -coercionOptimizerAggrStep(MalBlkPtr mb, int i, Coercion *coerce) +coercionOptimizerAggrStep(Client cntxt, MalBlkPtr mb, int i, Coercion *coerce) { InstrPtr p = getInstrPtr(mb,i); int r, k;
+ (void) cntxt; + if( getModuleId(p) != aggrRef || getFunctionId(p) == 0) return; if( ! (getFunctionId(p) == subavgRef ) || p->argc !=6) return;
r = getColumnType(getVarType(mb, getArg(p,0))); k = getArg(p,1); - // check the digits/scale if( r == TYPE_dbl && coerce[k].src ) getArg(p,1) = coerce[getArg(p,1)].src; return; @@ -124,35 +127,33 @@ OPTcoercionImplementation(Client cntxt,M if (getModuleId(p) == NULL) continue; /* Downscale the type, avoiding hge storage when lng would be sufficient. - * The code template can be extended to handle other downscale options as well */ #ifdef HAVE_HGE if ( getModuleId(p) == batcalcRef && getFunctionId(p) == hgeRef && p->retc == 1 - && ( p->argc == 2 - || ( p->argc == 5 - && isVarConstant(mb,getArg(p,1)) - && getArgType(mb,p,1) == TYPE_int - && isVarConstant(mb,getArg(p,3)) - && getArgType(mb,p,3) == TYPE_int - && isVarConstant(mb,getArg(p,4)) - && getArgType(mb,p,4) == TYPE_int - /* from-scale == to-scale, i.e., no scale change */ - && *(int*) getVarValue(mb, getArg(p,1)) == *(int*) getVarValue(mb, getArg(p,4)) ) ) ){ + && ( p->argc == 5 + && isVarConstant(mb,getArg(p,1)) + && getArgType(mb,p,1) == TYPE_int + && isVarConstant(mb,getArg(p,3)) + && getArgType(mb,p,3) == TYPE_int + && isVarConstant(mb,getArg(p,4)) + && getArgType(mb,p,4) == TYPE_int + /* from-scale == to-scale, i.e., no scale change */ + && *(int*) getVarValue(mb, getArg(p,1)) == *(int*) getVarValue(mb, getArg(p,4)) ) ){ k = getArg(p,0); coerce[k].pc= i; coerce[k].totype= TYPE_hge; coerce[k].src= getArg(p,2); coerce[k].fromtype= getColumnType(getArgType(mb,p,2)); -/* not used, yet !?? - if (p->argc == 5) { - coerce[k].digits= getVarConstant(mb,getArg(p,3)).val.ival; - coerce[k].scale= getVarConstant(mb,getArg(p,4)).val.ival; - } +/* not used, yet !?? indeed + coerce[k].fromscale= getVarConstant(mb,getArg(p,1)).val.ival; + coerce[k].digits= getVarConstant(mb,getArg(p,3)).val.ival; + coerce[k].scale= getVarConstant(mb,getArg(p,4)).val.ival; */ } #endif +/* if ( getModuleId(p) == batcalcRef && getFunctionId(p) == dblRef && p->retc == 1 @@ -160,7 +161,7 @@ OPTcoercionImplementation(Client cntxt,M || ( p->argc == 3 && isVarConstant(mb,getArg(p,1)) && getArgType(mb,p,1) == TYPE_int - /* to-scale == 0, i.e., no scale change */ + //to-scale == 0, i.e., no scale change && *(int*) getVarValue(mb, getArg(p,1)) == 0 ) ) ) { k = getArg(p,0); coerce[k].pc= i; @@ -168,8 +169,9 @@ OPTcoercionImplementation(Client cntxt,M coerce[k].src= getArg(p,1 + (p->argc ==3)); coerce[k].fromtype= getColumnType(getArgType(mb,p,1 + (p->argc ==3))); } - coercionOptimizerAggrStep(mb, i, coerce); - coercionOptimizerCalcStep(mb, i, coerce); +*/ + coercionOptimizerAggrStep(cntxt,mb, i, coerce); + coercionOptimizerCalcStep(cntxt,mb, i, coerce); if (getModuleId(p)==calcRef && p->argc == 2) { k= coercionOptimizerStep(mb, i, p); actions += k; _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list
-- | Stefan.Manegold@CWI.nl | DB Architectures (DA) | | www.CWI.nl/~manegold/ | Science Park 123 (L321) | | +31 (0)20 592-4212 | 1098 XG Amsterdam (NL) |
_______________________________________________ developers-list mailing list developers-list@monetdb.org https://www.monetdb.org/mailman/listinfo/developers-list
-- | Stefan.Manegold@CWI.nl | DB Architectures (DA) | | www.CWI.nl/~manegold/ | Science Park 123 (L321) | | +31 (0)20 592-4212 | 1098 XG Amsterdam (NL) |
_______________________________________________ developers-list mailing list developers-list@monetdb.org https://www.monetdb.org/mailman/listinfo/developers-list
-- | Stefan.Manegold@CWI.nl | DB Architectures (DA) | | www.CWI.nl/~manegold/ | Science Park 123 (L321) | | +31 (0)20 592-4212 | 1098 XG Amsterdam (NL) |
_______________________________________________ developers-list mailing list developers-list@monetdb.org https://www.monetdb.org/mailman/listinfo/developers-list
_______________________________________________ developers-list mailing list developers-list@monetdb.org https://www.monetdb.org/mailman/listinfo/developers-list
-- | Stefan.Manegold@CWI.nl | DB Architectures (DA) | | www.CWI.nl/~manegold/ | Science Park 123 (L321) | | +31 (0)20 592-4212 | 1098 XG Amsterdam (NL) |
_______________________________________________ developers-list mailing list developers-list@monetdb.org https://www.monetdb.org/mailman/listinfo/developers-list
-- | Stefan.Manegold@CWI.nl | DB Architectures (DA) | | www.CWI.nl/~manegold/ | Science Park 123 (L321) | | +31 (0)20 592-4212 | 1098 XG Amsterdam (NL) |
Attached reduced (i.e., without debugging output) patch provides a potential solutions, i.e., fixes test clients/R/tests/survey.R without harming any other test. Stefan ----- Original Message -----
The patch also contains a potential solution to the problem.
----- Original Message -----
For what it's worth, a brief look at the code reveals that chkInstruction() always returns 0, also in case of an error, mainly because it calls typeChecker() with silent==TRUE, and then typeChecker() does not increase the error count.
Attached patch over changeset 497ecabb77d7 reveals the following output with Hannes' script:
# coercionOptimizerCalcStep(), a/1: chkInstruction() returns 0 with p->typechk == TYPE_UNKNOWN # coercionOptimizerCalcStep(): batcalc.*() CANNOT replace first argument: :bat:hge := (:bat:hge,:bat:sht) -> :bat:hge := (:bat:int,:bat:sht) # coercionOptimizerCalcStep(), a/1: chkInstruction() returns 0 with p->typechk == TYPE_UNKNOWN # coercionOptimizerCalcStep(): batcalc.*() CANNOT replace first argument: :bat:hge := (:bat:hge,:bat:sht) -> :bat:hge := (:bat:int,:bat:sht) # coercionOptimizerCalcStep(), a/1: chkInstruction() returns 0 with p->typechk == TYPE_UNKNOWN # coercionOptimizerCalcStep(): batcalc.*() CANNOT replace first argument: :bat:hge := (:bat:hge,:bat:sht) -> :bat:hge := (:bat:int,:bat:sht) # coercionOptimizerCalcStep(), a/1: chkInstruction() returns 0 with p->typechk == TYPE_UNKNOWN # coercionOptimizerCalcStep(): batcalc.*() CANNOT replace first argument: :bat:hge := (:bat:hge,:bat:sht) -> :bat:hge := (:bat:int,:bat:sht) # coercionOptimizerCalcStep(), a/1: chkInstruction() returns 0 with p->typechk == TYPE_UNKNOWN # coercionOptimizerCalcStep(): batcalc.*() CANNOT replace first argument: :bat:hge := (:bat:hge,:bat:sht) -> :bat:hge := (:bat:int,:bat:sht) # coercionOptimizerCalcStep(), a/1: chkInstruction() returns 0 with p->typechk == TYPE_UNKNOWN # coercionOptimizerCalcStep(): batcalc.*() CANNOT replace first argument: :bat:hge := (:bat:hge,:bat:sht) -> :bat:hge := (:bat:int,:bat:sht) # coercionOptimizerCalcStep(), a/1: chkInstruction() returns 0 with p->typechk == TYPE_UNKNOWN # coercionOptimizerCalcStep(): batcalc.*() CANNOT replace first argument: :bat:hge := (:bat:hge,:bat:sht) -> :bat:hge := (:bat:int,:bat:sht) # coercionOptimizerCalcStep(), a/1: chkInstruction() returns 0 with p->typechk == TYPE_UNKNOWN # coercionOptimizerCalcStep(): batcalc.*() CANNOT replace first argument: :bat:hge := (:bat:hge,:bat:sht) -> :bat:hge := (:bat:int,:bat:sht) # coercionOptimizerCalcStep(), a/1: chkInstruction() returns 0 with p->typechk == TYPE_UNKNOWN # coercionOptimizerCalcStep(): batcalc.*() CANNOT replace first argument: :bat:hge := (:bat:hge,:bat:sht) -> :bat:hge := (:bat:int,:bat:sht) # coercionOptimizerCalcStep(), a/1: chkInstruction() returns 0 with p->typechk == TYPE_UNKNOWN # coercionOptimizerCalcStep(): batcalc.*() CANNOT replace first argument: :bat:hge := (:bat:hge,:bat:sht) -> :bat:hge := (:bat:int,:bat:sht) # coercionOptimizerCalcStep(), a/1: chkInstruction() returns 0 with p->typechk == TYPE_UNKNOWN # coercionOptimizerCalcStep(): batcalc.*() CANNOT replace first argument: :bat:hge := (:bat:hge,:bat:sht) -> :bat:hge := (:bat:int,:bat:sht) # coercionOptimizerCalcStep(), a/1: chkInstruction() returns 0 with p->typechk == TYPE_UNKNOWN # coercionOptimizerCalcStep(): batcalc.*() CANNOT replace first argument: :bat:hge := (:bat:hge,:bat:sht) -> :bat:hge := (:bat:int,:bat:sht) # coercionOptimizerCalcStep(), a/1: chkInstruction() returns 0 with p->typechk == TYPE_UNKNOWN # coercionOptimizerCalcStep(): batcalc.*() CANNOT replace first argument: :bat:hge := (:bat:hge,:bat:sht) -> :bat:hge := (:bat:int,:bat:sht) # coercionOptimizerCalcStep(), a/1: chkInstruction() returns 0 with p->typechk == TYPE_UNKNOWN # coercionOptimizerCalcStep(): batcalc.*() CANNOT replace first argument: :bat:hge := (:bat:hge,:bat:sht) -> :bat:hge := (:bat:int,:bat:sht)
Stefan
----- Original Message -----
In case you were wondering, this is the query that fails:
create table _U_7fd87635044b as (select idkey, (dnameSan_Leandro_Unified-0.04) *(1*( comp_imp = 'No' )) as _dnameSan_Leandro_Unified_No, (dnameMendota_Unified-0) *(1*( comp_imp = 'No' )) as _dnameMendota_Unified_No, (dnameVineland_Elementary-0.02) *(1*( comp_imp = 'No' )) as _dnameVineland_Elementary_No, (dnameEast_Whittier_City_Elem-0.0799999999999999) *(1*( comp_imp = 'No' )) as _dnameEast_Whittier_City_Elem_No, (dnameLos_Nietos_Elementary-0) *(1*( comp_imp = 'No' )) as _dnameLos_Nietos_Elementary_No, (dnameWillits_Unified-0.0799999999999999) *(1*( comp_imp = 'No' )) as _dnameWillits_Unified_No, (dnameDelhi_Unified-0.02) *(1*( comp_imp = 'No' )) as _dnameDelhi_Unified_No, (dnameFullerton_Elementary-0.0599999999999999) *(1*( comp_imp = 'No' )) as _dnameFullerton_Elementary_No, (dnamePlumas_Unified-0.0999999999999999) *(1*( comp_imp = 'No' )) as _dnamePlumas_Unified_No, (dnameChula_Vista_Elementary-0.16) *(1*( comp_imp = 'No' )) as _dnameChula_Vista_Elementary_No, (dnameOceanside_Unified-0.14) *(1*( comp_imp = 'No' )) as _dnameOceanside_Unified_No, (dnameStockton_City_Unified-0.22) *(1*( comp_imp = 'No' )) as _dnameStockton_City_Unified_No, (dnameBerryessa_Union_Elem-0.0599999999999999) *(1*( comp_imp = 'No' )) as _dnameBerryessa_Union_Elem_No, (dnameLuther_Burbank_Elem-0) *(1*( comp_imp = 'No' )) as _dnameLuther_Burbank_Elem_No, (dnameMilpitas_Unified-0.02) *(1*( comp_imp = 'No' )) as _dnameMilpitas_Unified_No, (dnameSan_Leandro_Unified-0.0676691729323307) *(1*( comp_imp = 'Yes' )) as _dnameSan_Leandro_Unified_Yes, (dnameMendota_Unified-0.0300751879699248) *(1*( comp_imp = 'Yes' )) as _dnameMendota_Unified_Yes, (dnameVineland_Elementary-0.00751879699248119) *(1*( comp_imp = 'Yes' )) as _dnameVineland_Elementary_Yes, (dnameEast_Whittier_City_Elem-0.0676691729323307) *(1*( comp_imp = 'Yes' )) as _dnameEast_Whittier_City_Elem_Yes, (dnameLos_Nietos_Elementary-0.0150375939849624) *(1*( comp_imp = 'Yes' )) as _dnameLos_Nietos_Elementary_Yes, (dnameWillits_Unified-0) *(1*( comp_imp = 'Yes' )) as _dnameWillits_Unified_Yes, (dnameDelhi_Unified-0.0225563909774436) *(1*( comp_imp = 'Yes' )) as _dnameDelhi_Unified_Yes, (dnameFullerton_Elementary-0.0977443609022555) *(1*( comp_imp = 'Yes' )) as _dnameFullerton_Elementary_Yes, (dnamePlumas_Unified-0.0300751879699248) *(1*( comp_imp = 'Yes' )) as _dnamePlumas_Unified_Yes, (dnameChula_Vista_Elementary-0.195488721804511) *(1*( comp_imp = 'Yes' )) as _dnameChula_Vista_Elementary_Yes, (dnameOceanside_Unified-0.105263157894737) *(1*( comp_imp = 'Yes' )) as _dnameOceanside_Unified_Yes, (dnameStockton_City_Unified-0.195488721804511) *(1*( comp_imp = 'Yes' )) as _dnameStockton_City_Unified_Yes, (dnameBerryessa_Union_Elem-0.0751879699248119) *(1*( comp_imp = 'Yes' )) as _dnameBerryessa_Union_Elem_Yes, (dnameLuther_Burbank_Elem-0.00751879699248119) *(1*( comp_imp = 'Yes' )) as _dnameLuther_Burbank_Elem_Yes, (dnameMilpitas_Unified-0.0827067669172931) *(1*( comp_imp = 'Yes' )) as _dnameMilpitas_Unified_Yes from apiclus1 inner join _mm_7fd8287ef1cd using(idkey)) with data
I have created a script that triggers the issue: http://homepages.cwi.nl/~hannes/surveyfail.sql
Best,
Hannes
On 20 Mar 2015, at 13:08, Stefan Manegold
wrote: ----- Original Message -----
Martin,
thanks for extending the error message info!
It now says "Server says '!TypeException:user.s94_2[40]:'batcalc.*' undefined in: X_168:bat[:oid,:hge] := batcalc.*(X_146:bat[:oid,:int],X_166:bat[:oid,:sht]);'."
Martin, Hannes, Niels,
Question is, why is why is there an upcast from int to hge in the original plan (now removed by the coercion optimizer),
or was the upcast from sht to hge removed? or both?
and is the batcalc.* to hge on (originally) int & sht generated because of that (now removed) upcast, or for some other reason, e.g., does the query request the result to by of type bigint, or of type decimal with more that 18 digits?
Stefan
----- Original Message -----
FYI,
test clients/R/Tests/survey.R still fails with "Server says '!TypeException:user.s94_2[40]:'batcalc.*' undefined in: X_168:bat[:oid,:hge] := batcalc.*(X_146,X_166);'." i.e., the check whether the requested new signature exists does not seem to work, yet.
Stefan
----- Original Message ----- > Changeset: 4165dc34b2bb for MonetDB > URL: > http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=4165dc34b2bb > Modified Files: > monetdb5/mal/mal_resolve.c > monetdb5/mal/mal_resolve.h > monetdb5/optimizer/opt_coercion.c > Branch: default > Log Message: > > Defense against non-existing MAL primitives > and further limit the rewrite to the simpliest > and most expensive case encountered. > > > diffs (244 lines): > > diff --git a/monetdb5/mal/mal_resolve.c b/monetdb5/mal/mal_resolve.c > --- a/monetdb5/mal/mal_resolve.c > +++ b/monetdb5/mal/mal_resolve.c > @@ -115,7 +115,6 @@ findFunctionType(stream *out, Module sco > Symbol s; > InstrPtr sig; > int i, k, unmatched = 0, s1; > - /* int foundbutwrong=0; */ > int polytype[MAXTYPEVAR]; > int returns[256]; > int *returntype = NULL; > @@ -396,7 +395,6 @@ findFunctionType(stream *out, Module sco > } > } > if (s1 < 0) { > - /* if(getSignature(s)->token !=PATTERNsymbol) foundbutwrong++; > */ > s = s->peer; > continue; > } > @@ -445,7 +443,6 @@ findFunctionType(stream *out, Module sco > * typed. This should be reflected in the symbol table. > */ > s1 = returntype[0]; /* for those interested */ > - /* foundbutwrong = 0; */ > /* > * If the call refers to a polymorphic function, we clone it > * to arrive at a bounded instance. Polymorphic patterns and > @@ -485,12 +482,6 @@ findFunctionType(stream *out, Module sco > * arguments, but that clashes with one of the target variables. > */ > wrapup: > - /* foundbutwrong has not been changed, commented out code above > - if (foundbutwrong && !silent) { > - showScriptException(out, mb, getPC(mb, p), TYPE, > - "type conflict in assignment"); > - } > - */ > if (returntype && returntype != returns) > GDKfree(returntype); > return -3; > @@ -819,13 +810,17 @@ chkTypes(stream *out, Module s, MalBlkPt > > /* > * Type checking an individual instruction is dangerous, > - * because it ignores data flow and declarations issues. > - * It is only to be used in isolated cases. > + * because it ignores data flow and variable declarations. > */ > -void > +int > chkInstruction(stream *out, Module s, MalBlkPtr mb, InstrPtr p) > { > - typeChecker(out, s, mb, p, FALSE); > + int olderrors= mb->errors; > + int error; > + typeChecker(out, s, mb, p, TRUE); > + error = mb->errors; > + mb->errors = olderrors; > + return error; > } > > void > diff --git a/monetdb5/mal/mal_resolve.h b/monetdb5/mal/mal_resolve.h > --- a/monetdb5/mal/mal_resolve.h > +++ b/monetdb5/mal/mal_resolve.h > @@ -20,7 +20,7 @@ > #define MAXTYPEVAR 10 > > mal_export void chkProgram(stream *out, Module s, MalBlkPtr mb); > -mal_export void chkInstruction(stream *out, Module s, MalBlkPtr mb, > InstrPtr > p); > +mal_export int chkInstruction(stream *out, Module s, MalBlkPtr mb, > InstrPtr > p); > mal_export void chkTypes(stream *out, Module s, MalBlkPtr mb, int > silent); > mal_export void typeChecker(stream *out, Module scope, MalBlkPtr > mb, > InstrPtr p, int silent); > mal_export int fcnBinder(stream *out, Module scope, MalBlkPtr mb, > InstrPtr > p); > diff --git a/monetdb5/optimizer/opt_coercion.c > b/monetdb5/optimizer/opt_coercion.c > --- a/monetdb5/optimizer/opt_coercion.c > +++ b/monetdb5/optimizer/opt_coercion.c > @@ -18,8 +18,9 @@ typedef struct{ > int fromtype; > int totype; > int src; > -/* not used, yet !?? > +/* not used, yet !?? Indeed > int digits; > + int fromscale; > int scale; > */ > } Coercion; > @@ -47,58 +48,60 @@ coercionOptimizerStep(MalBlkPtr mb, int > return 0; > } > > -/* Check coercions for numeric types that can be handled with > smaller > ones. > +/* Check coercions for numeric types towards :hge that can be > handled > with > smaller ones. > * For now, limit to +,-,/,*,% hge expressions > - * To be extended to deal with math calls as well. > + * Not every combination may be available in the MAL layer, which > calls > + * for a separate type check before fixing it. > + * Superflous coercion statements will be garbagecollected later on > in > the > pipeline > */ > static void > -coercionOptimizerCalcStep(MalBlkPtr mb, int i, Coercion *coerce) > +coercionOptimizerCalcStep(Client cntxt, MalBlkPtr mb, int i, > Coercion > *coerce) > { > InstrPtr p = getInstrPtr(mb,i); > - int r, a, b; > + int r, a, b, varid; > > + r = getColumnType(getVarType(mb, getArg(p,0))); > +#ifdef HAVE_HGE > + if ( r != TYPE_hge) > + return; > +#endif > if( getModuleId(p) != batcalcRef || getFunctionId(p) == 0) return; > if( ! (getFunctionId(p) == plusRef || getFunctionId(p) == minusRef > || > getFunctionId(p) == mulRef || getFunctionId(p) == divRef || > *getFunctionId(p) =='%') || p->argc !=3) > return; > > - r = getColumnType(getVarType(mb, getArg(p,0))); > - switch(r){ > - case TYPE_bte: > - case TYPE_sht: > - case TYPE_int: > - case TYPE_lng: > -#ifdef HAVE_HGE > - case TYPE_hge: > -#endif > - break; > - case TYPE_dbl: > - case TYPE_flt: > - /* to be determined */ > - default: > - return; > - } > a = getColumnType(getVarType(mb, getArg(p,1))); > b = getColumnType(getVarType(mb, getArg(p,2))); > - if ( a == r && coerce[getArg(p,1)].src && > coerce[getArg(p,1)].fromtype > < > r > ) /*digit/scale test as well*/ > + if ( a == r && coerce[getArg(p,1)].src && > coerce[getArg(p,1)].fromtype > < > r > ) > + { > + varid = getArg(p,1); > getArg(p,1) = coerce[getArg(p,1)].src; > - if ( b == r && coerce[getArg(p,2)].src && > coerce[getArg(p,2)].fromtype > < > r > ) /*digit/scale test as well*/ > + if ( chkInstruction(NULL, cntxt->nspace, mb, p)) > + p->argv[1] = varid; > + } > + if ( b == r && coerce[getArg(p,2)].src && > coerce[getArg(p,2)].fromtype > < > r > ) > + { > + varid = getArg(p,2); > getArg(p,2) = coerce[getArg(p,2)].src; > + if ( chkInstruction(NULL, cntxt->nspace, mb, p)) > + p->argv[2] = varid; > + } > return; > } > > static void > -coercionOptimizerAggrStep(MalBlkPtr mb, int i, Coercion *coerce) > +coercionOptimizerAggrStep(Client cntxt, MalBlkPtr mb, int i, > Coercion > *coerce) > { > InstrPtr p = getInstrPtr(mb,i); > int r, k; > > + (void) cntxt; > + > if( getModuleId(p) != aggrRef || getFunctionId(p) == 0) return; > if( ! (getFunctionId(p) == subavgRef ) || p->argc !=6) > return; > > r = getColumnType(getVarType(mb, getArg(p,0))); > k = getArg(p,1); > - // check the digits/scale > if( r == TYPE_dbl && coerce[k].src ) > getArg(p,1) = coerce[getArg(p,1)].src; > return; > @@ -124,35 +127,33 @@ OPTcoercionImplementation(Client cntxt,M > if (getModuleId(p) == NULL) > continue; > /* Downscale the type, avoiding hge storage when lng would be > sufficient. > - * The code template can be extended to handle other downscale > options > as > well > */ > #ifdef HAVE_HGE > if ( getModuleId(p) == batcalcRef > && getFunctionId(p) == hgeRef > && p->retc == 1 > - && ( p->argc == 2 > - || ( p->argc == 5 > - && isVarConstant(mb,getArg(p,1)) > - && getArgType(mb,p,1) == TYPE_int > - && isVarConstant(mb,getArg(p,3)) > - && getArgType(mb,p,3) == TYPE_int > - && isVarConstant(mb,getArg(p,4)) > - && getArgType(mb,p,4) == TYPE_int > - /* from-scale == to-scale, i.e., no scale change > */ > - && *(int*) getVarValue(mb, getArg(p,1)) == *(int*) > getVarValue(mb, getArg(p,4)) ) ) ){ > + && ( p->argc == 5 > + && isVarConstant(mb,getArg(p,1)) > + && getArgType(mb,p,1) == TYPE_int > + && isVarConstant(mb,getArg(p,3)) > + && getArgType(mb,p,3) == TYPE_int > + && isVarConstant(mb,getArg(p,4)) > + && getArgType(mb,p,4) == TYPE_int > + /* from-scale == to-scale, i.e., no scale change */ > + && *(int*) getVarValue(mb, getArg(p,1)) == *(int*) > getVarValue(mb, > getArg(p,4)) ) ){ > k = getArg(p,0); > coerce[k].pc= i; > coerce[k].totype= TYPE_hge; > coerce[k].src= getArg(p,2); > coerce[k].fromtype= getColumnType(getArgType(mb,p,2)); > -/* not used, yet !?? > - if (p->argc == 5) { > - coerce[k].digits= getVarConstant(mb,getArg(p,3)).val.ival; > - coerce[k].scale= getVarConstant(mb,getArg(p,4)).val.ival; > - } > +/* not used, yet !?? indeed > + coerce[k].fromscale= getVarConstant(mb,getArg(p,1)).val.ival; > + coerce[k].digits= getVarConstant(mb,getArg(p,3)).val.ival; > + coerce[k].scale= getVarConstant(mb,getArg(p,4)).val.ival; > */ > } > #endif > +/* > if ( getModuleId(p) == batcalcRef > && getFunctionId(p) == dblRef > && p->retc == 1 > @@ -160,7 +161,7 @@ OPTcoercionImplementation(Client cntxt,M > || ( p->argc == 3 > && isVarConstant(mb,getArg(p,1)) > && getArgType(mb,p,1) == TYPE_int > - /* to-scale == 0, i.e., no scale change */ > + //to-scale == 0, i.e., no scale change > && *(int*) getVarValue(mb, getArg(p,1)) == 0 ) ) ) > { > k = getArg(p,0); > coerce[k].pc= i; > @@ -168,8 +169,9 @@ OPTcoercionImplementation(Client cntxt,M > coerce[k].src= getArg(p,1 + (p->argc ==3)); > coerce[k].fromtype= getColumnType(getArgType(mb,p,1 + (p->argc > ==3))); > } > - coercionOptimizerAggrStep(mb, i, coerce); > - coercionOptimizerCalcStep(mb, i, coerce); > +*/ > + coercionOptimizerAggrStep(cntxt,mb, i, coerce); > + coercionOptimizerCalcStep(cntxt,mb, i, coerce); > if (getModuleId(p)==calcRef && p->argc == 2) { > k= coercionOptimizerStep(mb, i, p); > actions += k; > _______________________________________________ > checkin-list mailing list > checkin-list@monetdb.org > https://www.monetdb.org/mailman/listinfo/checkin-list >
-- | Stefan.Manegold@CWI.nl | DB Architectures (DA) | | www.CWI.nl/~manegold/ | Science Park 123 (L321) | | +31 (0)20 592-4212 | 1098 XG Amsterdam (NL) |
_______________________________________________ developers-list mailing list developers-list@monetdb.org https://www.monetdb.org/mailman/listinfo/developers-list
-- | Stefan.Manegold@CWI.nl | DB Architectures (DA) | | www.CWI.nl/~manegold/ | Science Park 123 (L321) | | +31 (0)20 592-4212 | 1098 XG Amsterdam (NL) |
_______________________________________________ developers-list mailing list developers-list@monetdb.org https://www.monetdb.org/mailman/listinfo/developers-list
-- | Stefan.Manegold@CWI.nl | DB Architectures (DA) | | www.CWI.nl/~manegold/ | Science Park 123 (L321) | | +31 (0)20 592-4212 | 1098 XG Amsterdam (NL) |
_______________________________________________ developers-list mailing list developers-list@monetdb.org https://www.monetdb.org/mailman/listinfo/developers-list
_______________________________________________ developers-list mailing list developers-list@monetdb.org https://www.monetdb.org/mailman/listinfo/developers-list
-- | Stefan.Manegold@CWI.nl | DB Architectures (DA) | | www.CWI.nl/~manegold/ | Science Park 123 (L321) | | +31 (0)20 592-4212 | 1098 XG Amsterdam (NL) |
_______________________________________________ developers-list mailing list developers-list@monetdb.org https://www.monetdb.org/mailman/listinfo/developers-list
-- | Stefan.Manegold@CWI.nl | DB Architectures (DA) | | www.CWI.nl/~manegold/ | Science Park 123 (L321) | | +31 (0)20 592-4212 | 1098 XG Amsterdam (NL) |
_______________________________________________ developers-list mailing list developers-list@monetdb.org https://www.monetdb.org/mailman/listinfo/developers-list
-- | Stefan.Manegold@CWI.nl | DB Architectures (DA) | | www.CWI.nl/~manegold/ | Science Park 123 (L321) | | +31 (0)20 592-4212 | 1098 XG Amsterdam (NL) |
participants (2)
-
Hannes Mühleisen
-
Stefan Manegold