Re: [Monetdb-developers] [Monetdb-pf-checkins] pathfinder/modules/pftijah nexi.c, 1.49, 1.50 pftijah_tokenize.l, 1.12, 1.13 pftijah_util.mx, 1.2, 1.3 serialize_pftijah.mx, 1.41, 1.42
On 2007-02-27 16:43, Jan Flokstra wrote:
Update of /cvsroot/monetdb/pathfinder/modules/pftijah In directory sc8-pr-cvs7.sourceforge.net:/tmp/cvs-serv27686
Modified Files: nexi.c pftijah_tokenize.l pftijah_util.mx serialize_pftijah.mx Log Message: * repair BBP refcount bug for BAT
Is this a fix which also applies to the stable branch?
* reimplement the direct bat acces methods in pftijah serialization for more speed (and clarity).
* Start optimizing the the pftijah tokenizer. The flex functions are called once per handle_character() call. This leads to 2 malloc's per call. I tried to do without the malloc's but this caused to a lot of strange results:-) I am now planning to craft the flexer by hand. The first small experiment shows there is a lot to gain there. (25% speedup in indexing time).
Index: serialize_pftijah.mx =================================================================== RCS file: /cvsroot/monetdb/pathfinder/modules/pftijah/serialize_pftijah.mx,v retrieving revision 1.41 retrieving revision 1.42 diff -u -d -r1.41 -r1.42 --- serialize_pftijah.mx 23 Feb 2007 15:11:07 -0000 1.41 +++ serialize_pftijah.mx 27 Feb 2007 15:43:37 -0000 1.42 @@ -31,8 +31,8 @@
extern int handleTijahTerm(struct tijahContextStruct *ctx, char* term);
-extern int useFlexScanner(char* buf, int len, struct tijahContextStruct* tjCtx); /* FLEX */ -extern char* flexScanOneTerm(char* buf, int len); +extern int useFlexScanner(char* buf, struct tijahContextStruct* tjCtx); /* FLEX */ +extern char* flexScanOneTerm(char* buf);
extern char* normalizeTerm(struct tijahContextStruct *ctx, char* term );
@@ -70,15 +70,10 @@ typedef struct dbat_struct { const char* name; BAT* bat; - int oid_mark; - int max_i; - int max_sz; - bit dflt; /* fill with default value during extend */ - int dflt_int; /* the default int value */ - chr dflt_chr; /* the default chr value */ - oid dflt_oid; /* the default oid value */ - /* */ - union { /* cast to perform direct indexex insert in [void,any] BATs */ + oid raw_max; + oid seqbase; + oid seq_max; + union { /* cast to perform direct indexe insert in [void,any] BATs */ void* voidCAST; /* the basecast */ chr* chrCAST; /* cast for [void,chr] BAT */ int* intCAST; /* cast for [void,int] BAT */ @@ -89,7 +84,6 @@ int dbat_init(const char* name, dbat* dbat, BAT* b) { dbat->name = name; dbat->bat = b; - dbat->dflt = FALSE; if ( dbat->bat->htype != TYPE_void ) { stream_printf(GDKerr,"ERROR: dbat_init(%s) non void BAT\n",dbat->name); return 0; @@ -98,31 +92,25 @@ stream_printf(GDKerr,"ERROR: dbat_init(%s) unknown ttype(%d)\n",dbat->name,dbat->bat->ttype); return 0; } - dbat->oid_mark = b->hseqbase; - dbat->max_i = dbat->max_sz = BATcount(dbat->bat); + dbat->seqbase = (oid)b->hseqbase; + dbat->raw_max = (oid)BATcount(dbat->bat); + dbat->seq_max = dbat->raw_max + dbat->seqbase; dbat->cast.voidCAST = (void*)BUNfirst(dbat->bat); /* */ return 1; }
-int dbat_finalize(dbat* dbat) { - BAT* b = dbat->bat;
+int dbat_finalize(dbat* dbat, int topidx) { void* top; + BAT* b = dbat->bat;
- int bottomTop = dbat->max_i; - if ( 0 ) stream_printf(GDKout,"dbat_finalize(size=%d)\n",dbat->max_i); + topidx -= (int)dbat->seqbase; + int bottomTop = topidx; switch( b->ttype ) { case TYPE_int : top = &dbat->cast.intCAST[bottomTop]; break; - case TYPE_chr: { - b->batBuns->free = dbat->max_i; - BATsetcount(b, dbat->max_i); - b->tsorted = 0; - b->batDirty = TRUE; /* VERY important this one */ - return 1; - } case TYPE_oid: top = &dbat->cast.oidCAST[bottomTop]; break; @@ -137,7 +125,7 @@ /* */ dbat->name = NULL; dbat->bat = NULL; - dbat->max_i = dbat->max_sz = 0; + dbat->raw_max = dbat->seqbase = 0; /* */ return 1; } @@ -145,9 +133,14 @@ #define MINCHUNK 8192 #define MAXCHUNK 67108864
-int dbat_extend(dbat* dbat, int i_mark) { - /* CHECK THIS if ( i_mark ) i = i_mark - dbat->oid_mark; */ - size_t newsize = MAX(MIN(MAX(MINCHUNK,dbat->max_sz*2),(size_t)(dbat->max_sz+MAXCHUNK)),(size_t)i_mark); +int dbat_extend(dbat* dbat, oid min_i /*raw-index*/, size_t forced_size) { + size_t newsize; + + if ( forced_size ) { + newsize = forced_size; + } else { + newsize = MAX(MIN(MAX(MINCHUNK,dbat->raw_max*2),(size_t)(dbat->raw_max+MAXCHUNK)),(size_t)min_i); + }
/* first check if the number of BUN's < INT_MAX. If this was the case * and the previous time INT_MAX was returned this means the BAT cannot @@ -156,94 +149,34 @@ if ( newsize > INT_MAX ) { newsize = INT_MAX;
- if ( dbat->max_sz == INT_MAX ) { + if ( dbat->raw_max == INT_MAX ) { GDKerror("dbat_extend: BATextend[\"%s\"](size>INT_MAX) fails\n","incomplete"); return -1; } } - if ( 0 ) { stream_printf(GDKout,"dbat_extend[%s](%d -> %d)\n",dbat->name,dbat->max_sz,newsize); } - dbat->max_sz= newsize; +#if 0 + stream_printf(GDKout,"dbat_extend[%s](%d -> %d)\n",dbat->name,dbat->raw_max,newsize); +#endif + dbat->raw_max= newsize; + dbat->seq_max = dbat->raw_max + dbat->seqbase; if ( !(dbat->bat = BATextend(dbat->bat,newsize)) ) { GDKerror("dbat_extend: BATextend[\"%s\"](to %d) fails\n","incomplete",newsize); return -1; } dbat->cast.voidCAST = (void*)BUNfirst(dbat->bat); - /* - * now check if there's a default value handler used - * - */ - if ( dbat->dflt ) { - switch( dbat->bat->ttype ) { - case TYPE_int : { - int v = dbat->dflt_int; - int *to = &dbat->cast.intCAST[dbat->max_sz]; - for(register int *p = &dbat->cast.intCAST[dbat->max_i]; p
dflt_chr; - chr *to = &dbat->cast.chrCAST[dbat->max_sz]; - for(register chr *p = &dbat->cast.chrCAST[dbat->max_i]; p dflt_oid; - oid *to = &dbat->cast.oidCAST[dbat->max_sz]; - for(register oid *p = &dbat->cast.oidCAST[dbat->max_i]; p int dbat_sizeHint(dbat* dbat, int sizeHint_mark) { - int sizeHint = sizeHint_mark - dbat->oid_mark; - int estimate = dbat->max_i + sizeHint; - - return dbat_extend(dbat, estimate); -} - -INLINE static int dbat_set_oid(dbat* dbat, int pos_mark, oid v) { - register int pos; + int sizeHint = sizeHint_mark - dbat->seqbase; + int estimate = dbat->raw_max + sizeHint;
- if ( (pos=pos_mark - dbat->oid_mark) < dbat->max_i ) { - dbat->cast.oidCAST[pos] = v; - return 1; - } else { - if ( pos >= dbat->max_sz ) { - if ( dbat_extend(dbat,pos) < 0 ) - return -1; - } - dbat->max_i = pos + 1; - dbat->cast.oidCAST[pos] = v; - return 1; - } + return dbat_extend(dbat, estimate, 0); }
-INLINE static int dbat_set_int(dbat* dbat, int pos_mark, int v) { - register int pos; +#define dbat_set_oid(DBAT,I,V) (DBAT)->cast.oidCAST[I-(DBAT)->seqbase] = V
- if ( (pos=pos_mark - dbat->oid_mark) < dbat->max_i ) { - dbat->cast.intCAST[pos] = v; - return 1; - } else { - if ( pos >= dbat->max_sz ) { - if ( dbat_extend(dbat,pos) < 0 ) - return -1; - } - dbat->max_i = pos + 1; - dbat->cast.intCAST[pos] = v; - return 1; - } -} +#define dbat_set_int(DBAT,I,V) (DBAT)->cast.intCAST[I-(DBAT)->seqbase] = V
/************************************************ * @@ -315,9 +248,6 @@
/************************************************ * - * - * First the temporary shredder for Tijah by JF - * */
INLINE static oid @@ -328,15 +258,15 @@ BUN bun;
HASHfnd_str(bun, tjctx->hm_globalTag, (str)t); - if ( bun ) - /* if ( (bun = BUNfnd(tjctx->hm_globalTag,t)) ) OLD */ + if ( bun ) { return *(oid*)BUNtail(tjctx->hm_globalTag,bun); - else { - if ( !BUNins(tjctx->b_globalTag, &tjctx->n_globalTag, (str)t, FALSE) ) { + } else { + if ( BUNins(tjctx->b_globalTag, &tjctx->n_globalTag, (str)t, FALSE) ) { + return tjctx->n_globalTag++; + } else { GDKerror("INSERT OF \"%s\" in globalTag fails.\n"); return oid_nil; - } else - return tjctx->n_globalTag++; + } } #endif } @@ -349,10 +279,7 @@ BUN bun;
HASHfnd_str(bun, tjctx->hm_globalTerm, (str)t); - if ( bun ) { - return *(oid*)BUNtail(tjctx->hm_globalTerm,bun); - } else - return oid_nil; + return ( bun ? *(oid*)BUNtail(tjctx->hm_globalTerm,bun) : oid_nil ); }
INLINE static oid @@ -366,22 +293,35 @@ if ( bun ) return *(oid*)BUNtail(tjctx->hm_globalTerm,bun); else { - if ( !BUNins(tjctx->b_globalTerm, &tjctx->n_globalTerm, (str)t, FALSE)){ + if ( BUNins(tjctx->b_globalTerm, &tjctx->n_globalTerm, (str)t, FALSE)){ + return tjctx->n_globalTerm++; + } else { GDKerror("INSERT OF \"%s\" in globalTerm fails.\n"); return oid_nil; - } else - return tjctx->n_globalTerm++; + } } #endif }
-#define tj_add2plane(TJCTX,O) \ - ((dbat_set_oid(&(TJCTX)->dbat_collPre, (TJCTX)->tijahPre, O) < 0) \ - ? oid_nil : ((oid)(TJCTX)->tijahPre++)) +INLINE oid tj_extend_plane(struct tijahContextStruct *tjctx) { + oid base = tjctx->tijahPre - tjctx->dbat_collPre.seqbase;
-#define insertPreSize(TJCTX,POS,SIZE) \ - dbat_set_int(&TJCTX->dbat_collSize,(int)POS,SIZE) + if ( base >= tjctx->dbat_collPre.raw_max ) { + if ( dbat_extend(&tjctx->dbat_collPre,base, 0) < 0 ) + return oid_nil; + /* IMPORTANT: the size of the two bats is synchronized by the use + * of the forced size (last) parameter of dbat_extend + */ + if ( dbat_extend(&tjctx->dbat_collSize,base,tjctx->dbat_collPre.raw_max) < 0 ) + return oid_nil; + } + return tjctx->tijahPre++; +}
+#define tj_newPre(TJCTX) \ + (((TJCTX)->tijahPre < (TJCTX)->dbat_collPre.seq_max) \ + ? \ + ((oid)(TJCTX)->tijahPre++) : tj_extend_plane(TJCTX))
int handleTijahTerm(struct tijahContextStruct *tjctx, char* term) { @@ -397,13 +337,13 @@ } } if ( (termOid = tj_termOid(tjctx, term)) == oid_nil ) - return 0; + return -1; } if ( termOid ) { /* term is not a stopword */ - if ( (tjPre = tj_add2plane(tjctx, termOid)) == oid_nil ) + if ( (tjPre = tj_newPre(tjctx) ) == oid_nil ) return 0; - if ( insertPreSize(tjctx,tjPre,0) < 0 ) - return -1; + dbat_set_oid(&tjctx->dbat_collPre, tjPre, termOid); + dbat_set_int(&tjctx->dbat_collSize,(int)tjPre,0); #ifdef TJ_TRACE if ( TJ_TRACE ) stream_printf(GDKout,"C[%s]:TERM: \"%s\", termoid=%d, tjPre=%d\n",tjctx->name,term,termOid,tjPre); #endif @@ -416,40 +356,13 @@ return 1; }
-/************ - * - * The part where the Strings from Pathfinder are shredded into words - * by Tijah. The USE_FLEX macro determines if the strings is shredded - * by Hennings fancy flex scanner or Jan's simple strtok() scanner. - */ - -const char* obsoleteNexiChars = " \t\n\r,:;&*%$#!@="; - -int -useStrtokScanner(tjCtx* tjctx, char* s) -{ - char *t; - int sz = 0; - -#ifdef TJ_TRACE - if (TJ_TRACE) stream_printf(GDKout,"C[%s]:CHARACTERS:\n",tjctx->name); -#endif - if ( (t = strtok(s,obsoleteNexiChars)) ) do { - /* not the empty string here */ - if ( handleTijahTerm(tjctx,t) < 0 ) - return -1; - sz++; - } while ( (t=strtok(NULL,obsoleteNexiChars)) ); - return 1; -} - /************************************************ * * Now the real output handlers */
-#ifdef notused +#if 0 static int handle_sizeHint(XqueryCtx* ctx, int hinted_size) { tjCtx* tjctx = (tjCtx*)ctx->driverWs; @@ -502,14 +415,12 @@ return (str)str_nil; }
-#define GUESSFORCE FALSE - /* * Replace the value of a collection parameter int the collection parameter * bat */ static int replaceCollParam(tjCtx* tjctx, str param, str val) { - return ( BUNreplace(tjctx->b_collParam,param,val,GUESSFORCE) != NULL ); + return ( BUNreplace(tjctx->b_collParam,param,val,FALSE) != NULL ); }
static BAT* @@ -894,10 +805,10 @@ /* if ( DOEMIT(tjctx) ) { */ if ( (termOid = tj_tagOid(tjctx, name)) == oid_nil ) return 0; - if ( (tjPre = tj_add2plane(tjctx, termOid)) == oid_nil ) + if ( (tjPre = tj_newPre(tjctx) ) == oid_nil ) return 0; + dbat_set_oid(&tjctx->dbat_collPre, tjPre, termOid); if ( tj_pushTag(tjctx,tjPre) < 0 ) return 0; - if ( 0 ) stream_printf(GDKout,"C[%s]:startElement: \"%s\", termoid=%d, Tijah pre#=%d, Pathfinder pre#=%d\n",tjctx->name,name,termOid,tjPre,pre); #ifdef TJ_TRACE if ( TJ_TRACE ) stream_printf(GDKout,"C[%s]:startElement: \"%s\", termoid=%d, Tijah pre#=%d, Pathfinder pre#=%d\n",tjctx->name,name,termOid,tjPre,pre); #endif @@ -913,8 +824,7 @@ --tjctx->doc_height; oid start = tj_popTag(tjctx); /* oid of the first node of the element */ int size = tjctx->tijahPre - start - 1; /* the Tijah element size */ - if ( insertPreSize(tjctx,start,size) < 0 ) - return 0; + dbat_set_int(&tjctx->dbat_collSize,(int)start,size); #ifdef TJ_TRACE if ( TJ_TRACE ) stream_printf(GDKout,"C[%s]:empty_endElement: \"%s\"\n", tjctx->name,""); #endif @@ -934,8 +844,7 @@ /* if ( DOEMIT(tjctx) ) { */ oid start = tj_popTag(tjctx); /* oid of the first node of the element */ int size = tjctx->tijahPre - start - 1; /* the Tijah element size */ - if ( insertPreSize(tjctx,start,size) < 0 ) - return 0; + dbat_set_int(&tjctx->dbat_collSize,(int)start,size); #ifdef TJ_TRACE if ( TJ_TRACE ) stream_printf(GDKout,"C[%s]:endElement: \"%s\"\n", tjctx->name,name); #endif @@ -944,8 +853,6 @@ return 1; }
-#define USE_FLEX 1 - /** * Output generation handler. Handles equivalent of * SAX characters() event. */ @@ -954,28 +861,23 @@ EMPTY_CHECK; tjCtx* tjctx = (tjCtx*)ctx->driverWs;
+ register char* p = (char*)ch; + while( *p && isspace(*p) ) p++; + if ( !*p ) + return 1; #ifdef TJ_TRACE - if ( TJ_TRACE ) stream_printf(GDKout,"C[%s]:handle_characters(%s) start\n",tjctx->name, (char*)ch); + if ( TJ_TRACE ) stream_printf(GDKout,"C[%s]:handle_characters(%s) start\n",tjctx->name, p); #endif
if ( DOEMIT(tjctx) ) { -#ifdef USE_FLEX - return useFlexScanner((char*)ch,strlen((char*)ch),tjctx); -#else - return useStrtokScanner(tjctx,(char*)ch); -#endif + return useFlexScanner(p,tjctx); } return 1; }
char* normalizeTerm(struct tijahContextStruct *tjctx, char* term ) { char *res; -#ifdef USE_FLEX - res = flexScanOneTerm((char*)term,strlen((char*)term)); -#else - res = strtok(term,obsoleteNexiChars); -#endif - /* INCOMPLETE, should make shure tijahContext is always avail. here */ + res = flexScanOneTerm((char*)term); if ( res && tjctx && tjctx->stemCtx->stem) { if ( !(res = (char*)tjctx->stemCtx->stem(tjctx->stemCtx,(char*)res)) ) { /* must be a stopword */ @@ -986,14 +888,6 @@ }
int CMDtj_normalizeTerm(char** res, str term, str stemmer) { -//Leave tokenization disabled for now -// char* tokenized; -//#ifdef USE_FLEX -// tokenized = flexScanOneTerm(term,strlen(term)); -//#else -// tokenized = strtok(term,obsoleteNexiChars); -//#endif - tjStemCtx* stemCtx = getStemmingContext( stemmer );
if ( stemCtx->stem ) { @@ -1123,13 +1017,9 @@ #ifdef TJ_TRACE if ( TJ_TRACE ) stream_printf(GDKout,"C[%s]:FINISH INDEXING\n",tjctx->name); #endif - - /* feature not used anymore ????? */ - if ( 0 /* ROEL CHANGE VIRTUAL ROOT SIZE HERE */ ) - insertPreSize(tjctx,0,tjctx->tijahPre - 1); - if ( dbat_finalize(&tjctx->dbat_collPre) < 0 ) + if ( dbat_finalize(&tjctx->dbat_collPre, tjctx->tijahPre) < 0 ) return GDK_FAIL; - if ( dbat_finalize(&tjctx->dbat_collSize) < 0 ) + if ( dbat_finalize(&tjctx->dbat_collSize, tjctx->tijahPre) < 0 ) return GDK_FAIL; #ifdef TJ_TRACE if ( TJ_TRACE ) stream_printf(GDKout,"C[%s]:FINALIZED DIRECT BATS\n",tjctx->name);
Index: pftijah_tokenize.l =================================================================== RCS file: /cvsroot/monetdb/pathfinder/modules/pftijah/pftijah_tokenize.l,v retrieving revision 1.12 retrieving revision 1.13 diff -u -d -r1.12 -r1.13 --- pftijah_tokenize.l 9 Jan 2007 15:44:39 -0000 1.12 +++ pftijah_tokenize.l 27 Feb 2007 15:43:37 -0000 1.13 @@ -115,7 +115,40 @@
%%
-int useFlexScanner(char* buf, int len, struct tijahContextStruct* tjCtx) { +int OPT0useFlexScanner(char* buf, struct tijahContextStruct* tjCtx) { + /* UPDATE: this delivers very strange testset results and should not be + * used I think. + */ + /* This is an optimized version of the flex scanner which does not copy the + * input buffer. The only strange thing about this interface is that it + * requires 2 YY_END_OF_BUFFER_CHAR (eg. 0) at the end of the buffer. The + * size of the buffer is inclusive the 2 0's. + * The last zero is toggled with its original value to prevent corruption + * of memory management tables. This was for me the only way to prevent + * copying here. + */ + int len = strlen(buf); + char remember = buf[len+1]; + buf[len+1] = YY_END_OF_BUFFER_CHAR; + YY_BUFFER_STATE myBuf = yy_scan_buffer(buf, len+2); + + if ( !myBuf ) { + stream_printf(GDKout,"# useFlexScanner: unable to get setup non-copy buffer."); + return 0; + } + while ( pftijah_tokenizelex() ) { + /* stream_printf(GDKout,"# scan(%s).\n",pftijah_tokenizetext); */ + if ( !handleTijahTerm(tjCtx, pftijah_tokenizetext) ) + return 0; + } + yy_delete_buffer(myBuf); + buf[len+1] = remember; + return 1; +} + +int useFlexScanner(char* buf, struct tijahContextStruct* tjCtx) { + // the original + int len = strlen(buf); YY_BUFFER_STATE myBuf = yy_scan_bytes(buf, len); while (pftijah_tokenizelex()) { if ( !handleTijahTerm(tjCtx, pftijah_tokenizetext) ) @@ -125,6 +158,40 @@ return 1; }
+int OPT2useFlexScanner(char* input, struct tijahContextStruct* tjctx) +{ + /* the fast function. This function is in the pftijah context with lots + * of small strings to tokenize many times faster as the flex and the + * strtok() methods which seem to have a rather larger overhead + */ + register char* s = input; + register char x; +// #define EMIT x=*s; *s=0; stream_printf(GDKout,"#[%s]\n",base);if (!handleTijahTerm(tjctx,base)) return 0; *s=x +#define EMIT x=*s; *s=0; if (!handleTijahTerm(tjctx,base)) return 0; *s=x + + while ( 1 ) { + while ( isspace( *s ) ) s++; + if ( *s ) { + char* base = s; + if ( isalnum(*s) ) { + if ( isdigit(*s) ) { + while ( isdigit(*++s) ) ; + EMIT; + } else { + if (isupper(*s)) *s=tolower(*s); + while ( isalnum(*++s) ) if (isupper(*s)) *s=tolower(*s); + EMIT; + } + } else { + // INCOMPLETE, ENTITIES HERE + // stream_printf(GDKout,"#[SKIPPING:%c]\n",*s); + s++; + } + } else + return 1; + } +} + char* tijah_tokenize_string(char* buf, int len, char* outbuf) { int cnt = 0; YY_BUFFER_STATE myBuf = yy_scan_bytes(buf, len); @@ -137,9 +204,10 @@ return outbuf; }
-char* flexScanOneTerm(char* buf, int len) { +char* flexScanOneTerm(char* buf) { char *res; char resBUFF[256]; + int len = strlen(buf);
YY_BUFFER_STATE myBuf = yy_scan_bytes(buf, len); if ( pftijah_tokenizelex() ) {
Index: nexi.c =================================================================== RCS file: /cvsroot/monetdb/pathfinder/modules/pftijah/nexi.c,v retrieving revision 1.49 retrieving revision 1.50 diff -u -d -r1.49 -r1.50 --- nexi.c 23 Feb 2007 15:11:05 -0000 1.49 +++ nexi.c 27 Feb 2007 15:43:37 -0000 1.50 @@ -455,6 +455,7 @@ /* * Now find out if the collection is fragmented or not. */ + /* INCOMPLETE, ERROR HERE WITH REFCOUNTS IN HEAD */ BAT* fb = pftu_lookup_bat(pftu_batname1("tj_%s_fragments",(char*)parserCtx->collection,0)); if ( ! fb ) { stream_printf(GDKerr,"Error: cannot find fragments bat for collection \"%s\".\n",parserCtx->collection); @@ -471,6 +472,8 @@ parserCtx->ffPfx = ""; parserCtx->flastPfx = ", str(1)"; } + BBPunfix(BBPcacheid(fb)); + fb = NULL; // Some special cases for NLLR, since NLLR only works with COARSE2 at the moment if ( txt_retr_model->model == MODEL_NLLR ) { // Switch to COARSE2 algebra for NLLR
Index: pftijah_util.mx =================================================================== RCS file: /cvsroot/monetdb/pathfinder/modules/pftijah/pftijah_util.mx,v retrieving revision 1.2 retrieving revision 1.3 diff -u -d -r1.2 -r1.3 --- pftijah_util.mx 9 Jan 2007 17:15:23 -0000 1.2 +++ pftijah_util.mx 27 Feb 2007 15:43:37 -0000 1.3 @@ -73,6 +73,7 @@ if ( b == bat_nil ) { return NULL; } else { + BBPfix(b); return BBPdescriptor(b); } }
------------------------------------------------------------------------- Take Surveys. Earn Cash. Influence the Future of IT Join SourceForge.net's Techsay panel and you'll get the chance to share your opinions on IT & business topics through brief surveys-and earn cash http://www.techsay.com/default.php?page=join.php&p=sourceforge&CID=DEVDEV _______________________________________________ Monetdb-pf-checkins mailing list Monetdb-pf-checkins@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/monetdb-pf-checkins
-- Sjoerd Mullender
On 2/27/2007, "Sjoerd Mullender"
On 2007-02-27 16:43, Jan Flokstra wrote:
Update of /cvsroot/monetdb/pathfinder/modules/pftijah In directory sc8-pr-cvs7.sourceforge.net:/tmp/cvs-serv27686
Modified Files: nexi.c pftijah_tokenize.l pftijah_util.mx serialize_pftijah.mx Log Message: * repair BBP refcount bug for BAT
Is this a fix which also applies to the stable branch?
I'm not shure yet. The bug only shows in the HEAD branch and does not occur in the release branch. Problem was I did: bat b = BBPindex(......); if ( b != bat_nil ) return BBPdescriptor(b) The refcount assert crash occurs in the BBPdescriptor(). I used this construction before and never had any problem. The bug made the "Current" branch useless so I decided to (un)fix(:) it quickly with a BBPfix() / BBPunfix(). I will try to figure out what to do next in the near future. Maybe I even try to consult the CWI people :-)
* reimplement the direct bat acces methods in pftijah serialization for more speed (and clarity).
* Start optimizing the the pftijah tokenizer. The flex functions are called once per handle_character() call. This leads to 2 malloc's per call. I tried to do without the malloc's but this caused to a lot of strange results:-) I am now planning to craft the flexer by hand. The first small experiment shows there is a lot to gain there. (25% speedup in indexing time).
Index: serialize_pftijah.mx =================================================================== RCS file: /cvsroot/monetdb/pathfinder/modules/pftijah/serialize_pftijah.mx,v retrieving revision 1.41 retrieving revision 1.42 diff -u -d -r1.41 -r1.42 --- serialize_pftijah.mx 23 Feb 2007 15:11:07 -0000 1.41 +++ serialize_pftijah.mx 27 Feb 2007 15:43:37 -0000 1.42 @@ -31,8 +31,8 @@
extern int handleTijahTerm(struct tijahContextStruct *ctx, char* term);
-extern int useFlexScanner(char* buf, int len, struct tijahContextStruct* tjCtx); /* FLEX */ -extern char* flexScanOneTerm(char* buf, int len); +extern int useFlexScanner(char* buf, struct tijahContextStruct* tjCtx); /* FLEX */ +extern char* flexScanOneTerm(char* buf);
extern char* normalizeTerm(struct tijahContextStruct *ctx, char* term );
@@ -70,15 +70,10 @@ typedef struct dbat_struct { const char* name; BAT* bat; - int oid_mark; - int max_i; - int max_sz; - bit dflt; /* fill with default value during extend */ - int dflt_int; /* the default int value */ - chr dflt_chr; /* the default chr value */ - oid dflt_oid; /* the default oid value */ - /* */ - union { /* cast to perform direct indexex insert in [void,any] BATs */ + oid raw_max; + oid seqbase; + oid seq_max; + union { /* cast to perform direct indexe insert in [void,any] BATs */ void* voidCAST; /* the basecast */ chr* chrCAST; /* cast for [void,chr] BAT */ int* intCAST; /* cast for [void,int] BAT */ @@ -89,7 +84,6 @@ int dbat_init(const char* name, dbat* dbat, BAT* b) { dbat->name = name; dbat->bat = b; - dbat->dflt = FALSE; if ( dbat->bat->htype != TYPE_void ) { stream_printf(GDKerr,"ERROR: dbat_init(%s) non void BAT\n",dbat->name); return 0; @@ -98,31 +92,25 @@ stream_printf(GDKerr,"ERROR: dbat_init(%s) unknown ttype(%d)\n",dbat->name,dbat->bat->ttype); return 0; } - dbat->oid_mark = b->hseqbase; - dbat->max_i = dbat->max_sz = BATcount(dbat->bat); + dbat->seqbase = (oid)b->hseqbase; + dbat->raw_max = (oid)BATcount(dbat->bat); + dbat->seq_max = dbat->raw_max + dbat->seqbase; dbat->cast.voidCAST = (void*)BUNfirst(dbat->bat); /* */ return 1; }
-int dbat_finalize(dbat* dbat) { - BAT* b = dbat->bat;
+int dbat_finalize(dbat* dbat, int topidx) { void* top; + BAT* b = dbat->bat;
- int bottomTop = dbat->max_i; - if ( 0 ) stream_printf(GDKout,"dbat_finalize(size=%d)\n",dbat->max_i); + topidx -= (int)dbat->seqbase; + int bottomTop = topidx; switch( b->ttype ) { case TYPE_int : top = &dbat->cast.intCAST[bottomTop]; break; - case TYPE_chr: { - b->batBuns->free = dbat->max_i; - BATsetcount(b, dbat->max_i); - b->tsorted = 0; - b->batDirty = TRUE; /* VERY important this one */ - return 1; - } case TYPE_oid: top = &dbat->cast.oidCAST[bottomTop]; break; @@ -137,7 +125,7 @@ /* */ dbat->name = NULL; dbat->bat = NULL; - dbat->max_i = dbat->max_sz = 0; + dbat->raw_max = dbat->seqbase = 0; /* */ return 1; } @@ -145,9 +133,14 @@ #define MINCHUNK 8192 #define MAXCHUNK 67108864
-int dbat_extend(dbat* dbat, int i_mark) { - /* CHECK THIS if ( i_mark ) i = i_mark - dbat->oid_mark; */ - size_t newsize = MAX(MIN(MAX(MINCHUNK,dbat->max_sz*2),(size_t)(dbat->max_sz+MAXCHUNK)),(size_t)i_mark); +int dbat_extend(dbat* dbat, oid min_i /*raw-index*/, size_t forced_size) { + size_t newsize; + + if ( forced_size ) { + newsize = forced_size; + } else { + newsize = MAX(MIN(MAX(MINCHUNK,dbat->raw_max*2),(size_t)(dbat->raw_max+MAXCHUNK)),(size_t)min_i); + }
/* first check if the number of BUN's < INT_MAX. If this was the case * and the previous time INT_MAX was returned this means the BAT cannot @@ -156,94 +149,34 @@ if ( newsize > INT_MAX ) { newsize = INT_MAX;
- if ( dbat->max_sz == INT_MAX ) { + if ( dbat->raw_max == INT_MAX ) { GDKerror("dbat_extend: BATextend[\"%s\"](size>INT_MAX) fails\n","incomplete"); return -1; } } - if ( 0 ) { stream_printf(GDKout,"dbat_extend[%s](%d -> %d)\n",dbat->name,dbat->max_sz,newsize); } - dbat->max_sz= newsize; +#if 0 + stream_printf(GDKout,"dbat_extend[%s](%d -> %d)\n",dbat->name,dbat->raw_max,newsize); +#endif + dbat->raw_max= newsize; + dbat->seq_max = dbat->raw_max + dbat->seqbase; if ( !(dbat->bat = BATextend(dbat->bat,newsize)) ) { GDKerror("dbat_extend: BATextend[\"%s\"](to %d) fails\n","incomplete",newsize); return -1; } dbat->cast.voidCAST = (void*)BUNfirst(dbat->bat); - /* - * now check if there's a default value handler used - * - */ - if ( dbat->dflt ) { - switch( dbat->bat->ttype ) { - case TYPE_int : { - int v = dbat->dflt_int; - int *to = &dbat->cast.intCAST[dbat->max_sz]; - for(register int *p = &dbat->cast.intCAST[dbat->max_i]; p
dflt_chr; - chr *to = &dbat->cast.chrCAST[dbat->max_sz]; - for(register chr *p = &dbat->cast.chrCAST[dbat->max_i]; p dflt_oid; - oid *to = &dbat->cast.oidCAST[dbat->max_sz]; - for(register oid *p = &dbat->cast.oidCAST[dbat->max_i]; p int dbat_sizeHint(dbat* dbat, int sizeHint_mark) { - int sizeHint = sizeHint_mark - dbat->oid_mark; - int estimate = dbat->max_i + sizeHint; - - return dbat_extend(dbat, estimate); -} - -INLINE static int dbat_set_oid(dbat* dbat, int pos_mark, oid v) { - register int pos; + int sizeHint = sizeHint_mark - dbat->seqbase; + int estimate = dbat->raw_max + sizeHint;
- if ( (pos=pos_mark - dbat->oid_mark) < dbat->max_i ) { - dbat->cast.oidCAST[pos] = v; - return 1; - } else { - if ( pos >= dbat->max_sz ) { - if ( dbat_extend(dbat,pos) < 0 ) - return -1; - } - dbat->max_i = pos + 1; - dbat->cast.oidCAST[pos] = v; - return 1; - } + return dbat_extend(dbat, estimate, 0); }
-INLINE static int dbat_set_int(dbat* dbat, int pos_mark, int v) { - register int pos; +#define dbat_set_oid(DBAT,I,V) (DBAT)->cast.oidCAST[I-(DBAT)->seqbase] = V
- if ( (pos=pos_mark - dbat->oid_mark) < dbat->max_i ) { - dbat->cast.intCAST[pos] = v; - return 1; - } else { - if ( pos >= dbat->max_sz ) { - if ( dbat_extend(dbat,pos) < 0 ) - return -1; - } - dbat->max_i = pos + 1; - dbat->cast.intCAST[pos] = v; - return 1; - } -} +#define dbat_set_int(DBAT,I,V) (DBAT)->cast.intCAST[I-(DBAT)->seqbase] = V
/************************************************ * @@ -315,9 +248,6 @@
/************************************************ * - * - * First the temporary shredder for Tijah by JF - * */
INLINE static oid @@ -328,15 +258,15 @@ BUN bun;
HASHfnd_str(bun, tjctx->hm_globalTag, (str)t); - if ( bun ) - /* if ( (bun = BUNfnd(tjctx->hm_globalTag,t)) ) OLD */ + if ( bun ) { return *(oid*)BUNtail(tjctx->hm_globalTag,bun); - else { - if ( !BUNins(tjctx->b_globalTag, &tjctx->n_globalTag, (str)t, FALSE) ) { + } else { + if ( BUNins(tjctx->b_globalTag, &tjctx->n_globalTag, (str)t, FALSE) ) { + return tjctx->n_globalTag++; + } else { GDKerror("INSERT OF \"%s\" in globalTag fails.\n"); return oid_nil; - } else - return tjctx->n_globalTag++; + } } #endif } @@ -349,10 +279,7 @@ BUN bun;
HASHfnd_str(bun, tjctx->hm_globalTerm, (str)t); - if ( bun ) { - return *(oid*)BUNtail(tjctx->hm_globalTerm,bun); - } else - return oid_nil; + return ( bun ? *(oid*)BUNtail(tjctx->hm_globalTerm,bun) : oid_nil ); }
INLINE static oid @@ -366,22 +293,35 @@ if ( bun ) return *(oid*)BUNtail(tjctx->hm_globalTerm,bun); else { - if ( !BUNins(tjctx->b_globalTerm, &tjctx->n_globalTerm, (str)t, FALSE)){ + if ( BUNins(tjctx->b_globalTerm, &tjctx->n_globalTerm, (str)t, FALSE)){ + return tjctx->n_globalTerm++; + } else { GDKerror("INSERT OF \"%s\" in globalTerm fails.\n"); return oid_nil; - } else - return tjctx->n_globalTerm++; + } } #endif }
-#define tj_add2plane(TJCTX,O) \ - ((dbat_set_oid(&(TJCTX)->dbat_collPre, (TJCTX)->tijahPre, O) < 0) \ - ? oid_nil : ((oid)(TJCTX)->tijahPre++)) +INLINE oid tj_extend_plane(struct tijahContextStruct *tjctx) { + oid base = tjctx->tijahPre - tjctx->dbat_collPre.seqbase;
-#define insertPreSize(TJCTX,POS,SIZE) \ - dbat_set_int(&TJCTX->dbat_collSize,(int)POS,SIZE) + if ( base >= tjctx->dbat_collPre.raw_max ) { + if ( dbat_extend(&tjctx->dbat_collPre,base, 0) < 0 ) + return oid_nil; + /* IMPORTANT: the size of the two bats is synchronized by the use + * of the forced size (last) parameter of dbat_extend + */ + if ( dbat_extend(&tjctx->dbat_collSize,base,tjctx->dbat_collPre.raw_max) < 0 ) + return oid_nil; + } + return tjctx->tijahPre++; +}
+#define tj_newPre(TJCTX) \ + (((TJCTX)->tijahPre < (TJCTX)->dbat_collPre.seq_max) \ + ? \ + ((oid)(TJCTX)->tijahPre++) : tj_extend_plane(TJCTX))
int handleTijahTerm(struct tijahContextStruct *tjctx, char* term) { @@ -397,13 +337,13 @@ } } if ( (termOid = tj_termOid(tjctx, term)) == oid_nil ) - return 0; + return -1; } if ( termOid ) { /* term is not a stopword */ - if ( (tjPre = tj_add2plane(tjctx, termOid)) == oid_nil ) + if ( (tjPre = tj_newPre(tjctx) ) == oid_nil ) return 0; - if ( insertPreSize(tjctx,tjPre,0) < 0 ) - return -1; + dbat_set_oid(&tjctx->dbat_collPre, tjPre, termOid); + dbat_set_int(&tjctx->dbat_collSize,(int)tjPre,0); #ifdef TJ_TRACE if ( TJ_TRACE ) stream_printf(GDKout,"C[%s]:TERM: \"%s\", termoid=%d, tjPre=%d\n",tjctx->name,term,termOid,tjPre); #endif @@ -416,40 +356,13 @@ return 1; }
-/************ - * - * The part where the Strings from Pathfinder are shredded into words - * by Tijah. The USE_FLEX macro determines if the strings is shredded - * by Hennings fancy flex scanner or Jan's simple strtok() scanner. - */ - -const char* obsoleteNexiChars = " \t\n\r,:;&*%$#!@="; - -int -useStrtokScanner(tjCtx* tjctx, char* s) -{ - char *t; - int sz = 0; - -#ifdef TJ_TRACE - if (TJ_TRACE) stream_printf(GDKout,"C[%s]:CHARACTERS:\n",tjctx->name); -#endif - if ( (t = strtok(s,obsoleteNexiChars)) ) do { - /* not the empty string here */ - if ( handleTijahTerm(tjctx,t) < 0 ) - return -1; - sz++; - } while ( (t=strtok(NULL,obsoleteNexiChars)) ); - return 1; -} - /************************************************ * * Now the real output handlers */
-#ifdef notused +#if 0 static int handle_sizeHint(XqueryCtx* ctx, int hinted_size) { tjCtx* tjctx = (tjCtx*)ctx->driverWs; @@ -502,14 +415,12 @@ return (str)str_nil; }
-#define GUESSFORCE FALSE - /* * Replace the value of a collection parameter int the collection parameter * bat */ static int replaceCollParam(tjCtx* tjctx, str param, str val) { - return ( BUNreplace(tjctx->b_collParam,param,val,GUESSFORCE) != NULL ); + return ( BUNreplace(tjctx->b_collParam,param,val,FALSE) != NULL ); }
static BAT* @@ -894,10 +805,10 @@ /* if ( DOEMIT(tjctx) ) { */ if ( (termOid = tj_tagOid(tjctx, name)) == oid_nil ) return 0; - if ( (tjPre = tj_add2plane(tjctx, termOid)) == oid_nil ) + if ( (tjPre = tj_newPre(tjctx) ) == oid_nil ) return 0; + dbat_set_oid(&tjctx->dbat_collPre, tjPre, termOid); if ( tj_pushTag(tjctx,tjPre) < 0 ) return 0; - if ( 0 ) stream_printf(GDKout,"C[%s]:startElement: \"%s\", termoid=%d, Tijah pre#=%d, Pathfinder pre#=%d\n",tjctx->name,name,termOid,tjPre,pre); #ifdef TJ_TRACE if ( TJ_TRACE ) stream_printf(GDKout,"C[%s]:startElement: \"%s\", termoid=%d, Tijah pre#=%d, Pathfinder pre#=%d\n",tjctx->name,name,termOid,tjPre,pre); #endif @@ -913,8 +824,7 @@ --tjctx->doc_height; oid start = tj_popTag(tjctx); /* oid of the first node of the element */ int size = tjctx->tijahPre - start - 1; /* the Tijah element size */ - if ( insertPreSize(tjctx,start,size) < 0 ) - return 0; + dbat_set_int(&tjctx->dbat_collSize,(int)start,size); #ifdef TJ_TRACE if ( TJ_TRACE ) stream_printf(GDKout,"C[%s]:empty_endElement: \"%s\"\n", tjctx->name,""); #endif @@ -934,8 +844,7 @@ /* if ( DOEMIT(tjctx) ) { */ oid start = tj_popTag(tjctx); /* oid of the first node of the element */ int size = tjctx->tijahPre - start - 1; /* the Tijah element size */ - if ( insertPreSize(tjctx,start,size) < 0 ) - return 0; + dbat_set_int(&tjctx->dbat_collSize,(int)start,size); #ifdef TJ_TRACE if ( TJ_TRACE ) stream_printf(GDKout,"C[%s]:endElement: \"%s\"\n", tjctx->name,name); #endif @@ -944,8 +853,6 @@ return 1; }
-#define USE_FLEX 1 - /** * Output generation handler. Handles equivalent of * SAX characters() event. */ @@ -954,28 +861,23 @@ EMPTY_CHECK; tjCtx* tjctx = (tjCtx*)ctx->driverWs;
+ register char* p = (char*)ch; + while( *p && isspace(*p) ) p++; + if ( !*p ) + return 1; #ifdef TJ_TRACE - if ( TJ_TRACE ) stream_printf(GDKout,"C[%s]:handle_characters(%s) start\n",tjctx->name, (char*)ch); + if ( TJ_TRACE ) stream_printf(GDKout,"C[%s]:handle_characters(%s) start\n",tjctx->name, p); #endif
if ( DOEMIT(tjctx) ) { -#ifdef USE_FLEX - return useFlexScanner((char*)ch,strlen((char*)ch),tjctx); -#else - return useStrtokScanner(tjctx,(char*)ch); -#endif + return useFlexScanner(p,tjctx); } return 1; }
char* normalizeTerm(struct tijahContextStruct *tjctx, char* term ) { char *res; -#ifdef USE_FLEX - res = flexScanOneTerm((char*)term,strlen((char*)term)); -#else - res = strtok(term,obsoleteNexiChars); -#endif - /* INCOMPLETE, should make shure tijahContext is always avail. here */ + res = flexScanOneTerm((char*)term); if ( res && tjctx && tjctx->stemCtx->stem) { if ( !(res = (char*)tjctx->stemCtx->stem(tjctx->stemCtx,(char*)res)) ) { /* must be a stopword */ @@ -986,14 +888,6 @@ }
int CMDtj_normalizeTerm(char** res, str term, str stemmer) { -//Leave tokenization disabled for now -// char* tokenized; -//#ifdef USE_FLEX -// tokenized = flexScanOneTerm(term,strlen(term)); -//#else -// tokenized = strtok(term,obsoleteNexiChars); -//#endif - tjStemCtx* stemCtx = getStemmingContext( stemmer );
if ( stemCtx->stem ) { @@ -1123,13 +1017,9 @@ #ifdef TJ_TRACE if ( TJ_TRACE ) stream_printf(GDKout,"C[%s]:FINISH INDEXING\n",tjctx->name); #endif - - /* feature not used anymore ????? */ - if ( 0 /* ROEL CHANGE VIRTUAL ROOT SIZE HERE */ ) - insertPreSize(tjctx,0,tjctx->tijahPre - 1); - if ( dbat_finalize(&tjctx->dbat_collPre) < 0 ) + if ( dbat_finalize(&tjctx->dbat_collPre, tjctx->tijahPre) < 0 ) return GDK_FAIL; - if ( dbat_finalize(&tjctx->dbat_collSize) < 0 ) + if ( dbat_finalize(&tjctx->dbat_collSize, tjctx->tijahPre) < 0 ) return GDK_FAIL; #ifdef TJ_TRACE if ( TJ_TRACE ) stream_printf(GDKout,"C[%s]:FINALIZED DIRECT BATS\n",tjctx->name);
Index: pftijah_tokenize.l =================================================================== RCS file: /cvsroot/monetdb/pathfinder/modules/pftijah/pftijah_tokenize.l,v retrieving revision 1.12 retrieving revision 1.13 diff -u -d -r1.12 -r1.13 --- pftijah_tokenize.l 9 Jan 2007 15:44:39 -0000 1.12 +++ pftijah_tokenize.l 27 Feb 2007 15:43:37 -0000 1.13 @@ -115,7 +115,40 @@
%%
-int useFlexScanner(char* buf, int len, struct tijahContextStruct* tjCtx) { +int OPT0useFlexScanner(char* buf, struct tijahContextStruct* tjCtx) { + /* UPDATE: this delivers very strange testset results and should not be + * used I think. + */ + /* This is an optimized version of the flex scanner which does not copy the + * input buffer. The only strange thing about this interface is that it + * requires 2 YY_END_OF_BUFFER_CHAR (eg. 0) at the end of the buffer. The + * size of the buffer is inclusive the 2 0's. + * The last zero is toggled with its original value to prevent corruption + * of memory management tables. This was for me the only way to prevent + * copying here. + */ + int len = strlen(buf); + char remember = buf[len+1]; + buf[len+1] = YY_END_OF_BUFFER_CHAR; + YY_BUFFER_STATE myBuf = yy_scan_buffer(buf, len+2); + + if ( !myBuf ) { + stream_printf(GDKout,"# useFlexScanner: unable to get setup non-copy buffer."); + return 0; + } + while ( pftijah_tokenizelex() ) { + /* stream_printf(GDKout,"# scan(%s).\n",pftijah_tokenizetext); */ + if ( !handleTijahTerm(tjCtx, pftijah_tokenizetext) ) + return 0; + } + yy_delete_buffer(myBuf); + buf[len+1] = remember; + return 1; +} + +int useFlexScanner(char* buf, struct tijahContextStruct* tjCtx) { + // the original + int len = strlen(buf); YY_BUFFER_STATE myBuf = yy_scan_bytes(buf, len); while (pftijah_tokenizelex()) { if ( !handleTijahTerm(tjCtx, pftijah_tokenizetext) ) @@ -125,6 +158,40 @@ return 1; }
+int OPT2useFlexScanner(char* input, struct tijahContextStruct* tjctx) +{ + /* the fast function. This function is in the pftijah context with lots + * of small strings to tokenize many times faster as the flex and the + * strtok() methods which seem to have a rather larger overhead + */ + register char* s = input; + register char x; +// #define EMIT x=*s; *s=0; stream_printf(GDKout,"#[%s]\n",base);if (!handleTijahTerm(tjctx,base)) return 0; *s=x +#define EMIT x=*s; *s=0; if (!handleTijahTerm(tjctx,base)) return 0; *s=x + + while ( 1 ) { + while ( isspace( *s ) ) s++; + if ( *s ) { + char* base = s; + if ( isalnum(*s) ) { + if ( isdigit(*s) ) { + while ( isdigit(*++s) ) ; + EMIT; + } else { + if (isupper(*s)) *s=tolower(*s); + while ( isalnum(*++s) ) if (isupper(*s)) *s=tolower(*s); + EMIT; + } + } else { + // INCOMPLETE, ENTITIES HERE + // stream_printf(GDKout,"#[SKIPPING:%c]\n",*s); + s++; + } + } else + return 1; + } +} + char* tijah_tokenize_string(char* buf, int len, char* outbuf) { int cnt = 0; YY_BUFFER_STATE myBuf = yy_scan_bytes(buf, len); @@ -137,9 +204,10 @@ return outbuf; }
-char* flexScanOneTerm(char* buf, int len) { +char* flexScanOneTerm(char* buf) { char *res; char resBUFF[256]; + int len = strlen(buf);
YY_BUFFER_STATE myBuf = yy_scan_bytes(buf, len); if ( pftijah_tokenizelex() ) {
Index: nexi.c =================================================================== RCS file: /cvsroot/monetdb/pathfinder/modules/pftijah/nexi.c,v retrieving revision 1.49 retrieving revision 1.50 diff -u -d -r1.49 -r1.50 --- nexi.c 23 Feb 2007 15:11:05 -0000 1.49 +++ nexi.c 27 Feb 2007 15:43:37 -0000 1.50 @@ -455,6 +455,7 @@ /* * Now find out if the collection is fragmented or not. */ + /* INCOMPLETE, ERROR HERE WITH REFCOUNTS IN HEAD */ BAT* fb = pftu_lookup_bat(pftu_batname1("tj_%s_fragments",(char*)parserCtx->collection,0)); if ( ! fb ) { stream_printf(GDKerr,"Error: cannot find fragments bat for collection \"%s\".\n",parserCtx->collection); @@ -471,6 +472,8 @@ parserCtx->ffPfx = ""; parserCtx->flastPfx = ", str(1)"; } + BBPunfix(BBPcacheid(fb)); + fb = NULL; // Some special cases for NLLR, since NLLR only works with COARSE2 at the moment if ( txt_retr_model->model == MODEL_NLLR ) { // Switch to COARSE2 algebra for NLLR
Index: pftijah_util.mx =================================================================== RCS file: /cvsroot/monetdb/pathfinder/modules/pftijah/pftijah_util.mx,v retrieving revision 1.2 retrieving revision 1.3 diff -u -d -r1.2 -r1.3 --- pftijah_util.mx 9 Jan 2007 17:15:23 -0000 1.2 +++ pftijah_util.mx 27 Feb 2007 15:43:37 -0000 1.3 @@ -73,6 +73,7 @@ if ( b == bat_nil ) { return NULL; } else { + BBPfix(b); return BBPdescriptor(b); } }
------------------------------------------------------------------------- Take Surveys. Earn Cash. Influence the Future of IT Join SourceForge.net's Techsay panel and you'll get the chance to share your opinions on IT & business topics through brief surveys-and earn cash http://www.techsay.com/default.php?page=join.php&p=sourceforge&CID=DEVDEV _______________________________________________ Monetdb-pf-checkins mailing list Monetdb-pf-checkins@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/monetdb-pf-checkins
-- Sjoerd Mullender
participants (2)
-
flokstra
-
Sjoerd Mullender