Re: [Monetdb-developers] [Monetdb-checkins] MonetDB5/src/mal mal_interpreter.mx, , 1.197, 1.198
On Fri, Sep 28, 2007 at 04:01:00PM +0000, Martin Kersten wrote:
Update of /cvsroot/monetdb/MonetDB5/src/mal In directory sc8-pr-cvs16.sourceforge.net:/tmp/cvs-serv5547
Modified Files: mal_interpreter.mx Log Message: Split the interpreter in two versions. A FAST and SLOW version. If there is no request for debugging or performance measurement outstanding, we can go the fast path. However, given the fact that at any time we may enter the debugger, a jump to the slow line is always possible. The code relies on compiler optimization to get rid of blocks with constant arguments. Unclear if all compilers are that smart.
All C-compilers (respetively their C preprocessors) are expected to respect C preprocessor directives (e.g., #if ... #endif), "even" with optimization disabled. Hence, why not use those compile-time conditionals instead "pseudo"-runtime conditionals with constant conditions? (Would also speed-up the compilation process...) Stefan
Index: mal_interpreter.mx =================================================================== RCS file: /cvsroot/monetdb/MonetDB5/src/mal/mal_interpreter.mx,v retrieving revision 1.197 retrieving revision 1.198 diff -u -d -r1.197 -r1.198 --- mal_interpreter.mx 24 Sep 2007 08:24:22 -0000 1.197 +++ mal_interpreter.mx 28 Sep 2007 16:00:57 -0000 1.198 @@ -70,6 +70,9 @@ #include "mal_debugger.h" /* for mdbStep() */ #include "mal_type.h"
+#define SLOW 1 +#define FAST 0 + static str runMALsequence( Client cntxt, MalBlkPtr mb, int startpc, int stoppc, MalStkPtr stk, MalStkPtr env, InstrPtr pcicaller); static str setDynamicType(MalBlkPtr mb, VarPtr v, int tpe, int pc); @@ -421,8 +424,27 @@ stkpc = startpc; exceptionVar = exceptionPC = -1; (void)exceptionPC; /* TODO maybe we should use this variable somewhere*/ +@- +From this point onwards we should differentiate fast processing +against monitored processing. Fast processing is possible if there is +no call to the debugger statically/dynamically set. Same holds for +performance control statements. +The code currently does not statically checks the mode change. +Preferrably we should introduce a itrace flag PROFILE +We rely on optimizing compilers to remove the redundant code. +@c + if( malProfileMode == 0 && cntxt->itrace==0){ + while(stkpc < mb->stop && stkpc != stoppc ){ + pci = getInstrPtr(mb,stkpc); + if( malProfileMode + cntxt->itrace) + goto workslow; + @:MALinterpret(FAST)@ + @:MALflowofcontrol(FAST)@ + } + } else while(stkpc < mb->stop && stkpc != stoppc ){ pci = getInstrPtr(mb,stkpc); +workslow: if( cntxt->itrace ) { lng t=0; if( stk->cmd== 0) stk->cmd= cntxt->itrace; @@ -445,8 +467,8 @@ }
@:beginProfile@ - @:MALinterpret@ - @:MALflowofcontrol@ + @:MALinterpret(SLOW)@ + @:MALflowofcontrol(SLOW)@ @:endProfile@ } @:MALwrapup@ @@ -639,11 +661,11 @@ limited. @c switch( pci->token){ - case ASSIGNsymbol: @:assignStmt@ break; - case PATcall: @:patterncall@ break; - case CMDcall: @:commandcall@ break; - case FACcall: @:factorycall@ break; - case FCNcall: @:functioncall@ break; + case ASSIGNsymbol: @:assignStmt(SLOW)@ break; + case PATcall: @:patterncall(SLOW)@ break; + case CMDcall: @:commandcall(SLOW)@ break; + case FACcall: @:factorycall(SLOW)@ break; + case FCNcall: @:functioncall(SLOW)@ break; } /* we don't allow sequential flow control here */ dropMALflowStep(flow,stkpc,n,pending,mb,stk); @@ -710,22 +732,24 @@ @= MALinterpret ret = 0; switch( pci->token){ - case ASSIGNsymbol: @:assignStmt@ break; - case PATcall: @:patterncall@ break; - case CMDcall: @:commandcall@ break; - case FACcall: @:factorycall@ break; - case FCNcall: @:functioncall@ break; + case ASSIGNsymbol: @:assignStmt(@1)@ break; + case PATcall: @:patterncall(@1)@ break; + case CMDcall: @:commandcall(@1)@ break; + case FACcall: @:factorycall(@1)@ break; + case FCNcall: @:functioncall(@1)@ break; case NOOPsymbol: case REMsymbol: break; case ENDsymbol: if( getInstrPtr(mb,0)->token == FACTORYsymbol) ret= shutdownFactory(mb); - if( oldtimer) + if( @1 && oldtimer) cntxt->timer= oldtimer; if( pcicaller && garbageControl(getInstrPtr(mb,0)) ) garbageCollector(mb, stk, TRUE); - @:endProfile@ + if(@1){ + @:endProfile@ + } stkpc= mb->stop; continue; default: @@ -735,7 +759,9 @@ } ret = createScriptException(mb, stkpc,MAL, NULL, "unkown operation"); - @:endProfile@ + if( @1){ + @:endProfile@ + } stkpc= mb->stop; continue; } @@ -886,9 +912,10 @@ if( lhs->vtype == TYPE_bat) BBPincref(lhs->val.br.id, TRUE); } - @:restoreTarget@ + @:restoreTarget(@1)@ ret = 0; - @:exceptionHndlr@ + @:exceptionHndlr(@1)@ + @:timingHndlr(@1)@ } @} @- @@ -1000,7 +1027,7 @@ } } /* Provide debugging support */ - if( GDKdebug & 10 ){ + if( @1 && (GDKdebug & 10) ){ BAT *b; str oldmsg =0;
@@ -1146,8 +1173,9 @@ ret = createScriptException(mb, stkpc, MAL, NULL, "too many arguments for command call"); } - @:restoreTarget@ - @:exceptionHndlr@ + @:restoreTarget(@1)@ + @:exceptionHndlr(@1)@ + @:timingHndlr(@1)@ } @- @= patterncall @@ -1157,8 +1185,9 @@ else { @:safeTarget@ ret = (str) (*pci->fcn)(mb,stk,pci); - @:restoreTarget@ - @:exceptionHndlr@ + @:restoreTarget(@1)@ + @:exceptionHndlr(@1)@ + @:timingHndlr(@1)@ } @- MAL function calls are relatively expensive, because they have to assemble @@ -1171,8 +1200,9 @@ stk->pcup = stkpc; @:safeTarget@ ret= runMAL(cntxt,pci->blk,1,mb,stk,pci); - @:restoreTarget@ - @:exceptionHndlr@ + @:restoreTarget(@1)@ + @:exceptionHndlr(@1)@ + @:timingHndlr(@1)@ } @- Factory calls are more involved. At this stage it is a synchrononous @@ -1184,7 +1214,8 @@ "reference to MAL function missing"); else ret= runFactory(cntxt,pci->blk,mb,stk,pci); - @:exceptionHndlr@ + @:exceptionHndlr(@1)@ + @:timingHndlr(SLOW)@ @- The type dispatching table in getArgValue can be removed if we determine at compile time the address offset within a ValRecord. @@ -1323,7 +1354,6 @@ @{
@= exceptionHndlr - @:timingHndlr@ if( ret != MAL_SUCCEED ) { str msg = 0, nxt; if( stk->cmd ) { @@ -1569,7 +1599,7 @@ #endif
@= timingHndlr -if( cntxt->flags && stk->cmd != 't' && stk->cmd != 'C'){ +if( @1 && cntxt->flags && stk->cmd != 't' && stk->cmd != 'C'){ if( cntxt->flags & timerFlag) stream_printf(cntxt->fdout,"[%6d usec ",GDKusec()-cntxt->timer); #ifdef HAVE_SYS_RESOURCE_H
------------------------------------------------------------------------- This SF.net email is sponsored by: Microsoft Defy all challenges. Microsoft(R) Visual Studio 2005. http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/ _______________________________________________ Monetdb-checkins mailing list Monetdb-checkins@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/monetdb-checkins
-- | Dr. Stefan Manegold | mailto:Stefan.Manegold@cwi.nl | | CWI, P.O.Box 94079 | http://www.cwi.nl/~manegold/ | | 1090 GB Amsterdam | Tel.: +31 (20) 592-4212 | | The Netherlands | Fax : +31 (20) 592-4312 |
Stefan Manegold wrote:
On Fri, Sep 28, 2007 at 04:01:00PM +0000, Martin Kersten wrote:
Update of /cvsroot/monetdb/MonetDB5/src/mal In directory sc8-pr-cvs16.sourceforge.net:/tmp/cvs-serv5547
Modified Files: mal_interpreter.mx Log Message: Split the interpreter in two versions. A FAST and SLOW version. If there is no request for debugging or performance measurement outstanding, we can go the fast path. However, given the fact that at any time we may enter the debugger, a jump to the slow line is always possible. The code relies on compiler optimization to get rid of blocks with constant arguments. Unclear if all compilers are that smart.
All C-compilers (respetively their C preprocessors) are expected to respect C preprocessor directives (e.g., #if ... #endif), "even" with optimization disabled. Hence, why not use those compile-time conditionals instead "pseudo"-runtime conditionals with constant conditions? (Would also speed-up the compilation process...)
No. The point is that there should be two execution paths! If possible the fast path is executed until you issue e.g. a mdb.start(), mdb.setTrace(true),...or profiler.start() command, Then it falls back to the slow thread. The test for the compilers is to remove code fragments like if(0){ do some work} Or at least be fast on this switch. This avoids manual copying large pieces of code. Microbenchmarks have confirmed significant improvement.
participants (2)
-
Martin Kersten
-
Stefan Manegold