Re: [Monetdb-developers] [Monetdb-checkins] MonetDB5/src/modules/mal/rdf rdf_shredder.mx, Feb2010, 1.13, 1.13.2.1
Are you sure *all* these changes are for disabling tokenizer? Including the property changes? None of this will now be propagated, so if any of this should land in the HEAD, you need to do it manually (but then please identically). On 2010-02-22 16:43, Lefteris Sidirourgos wrote:
Update of /cvsroot/monetdb/MonetDB5/src/modules/mal/rdf In directory sfp-cvsdas-1.v30.ch3.sourceforge.com:/tmp/cvs-serv32552/src/modules/mal/rdf
Modified Files: Tag: Feb2010 rdf_shredder.mx Log Message: disable the tokenizer.mx from m5
Index: rdf_shredder.mx =================================================================== RCS file: /cvsroot/monetdb/MonetDB5/src/modules/mal/rdf/rdf_shredder.mx,v retrieving revision 1.13 retrieving revision 1.13.2.1 diff -u -d -r1.13 -r1.13.2.1 --- rdf_shredder.mx 8 Jan 2010 12:08:58 -0000 1.13 +++ rdf_shredder.mx 22 Feb 2010 15:43:41 -0000 1.13.2.1 @@ -27,7 +27,9 @@ #include "url.h" #include "rdf.h" #include "raptor.h" +#if 0 #include "../tokenizer.h" +#endif
typedef struct graphBATdef { graphBATType batType; /* BAT type */ @@ -36,6 +38,8 @@ int tailType; /* type of right column */ } graphBATdef;
+static BUN batsz = 10000000; + /* this list should be kept alligned with the graphBATType enum */ #if STORE == TRIPLE_STORE static graphBATdef graphdef[N_GRAPH_BAT] = { @@ -132,7 +136,8 @@ @:rdf_BUNappend_unq(@1, @2)@ #endif
-@= rdf_BUNappend_unq + +@= rdf_BUNappend_unq_1 bun = BUNfnd(BATmirror(@1),(ptr)@2); if (bun == BUN_NONE) { if (BATcount(@1) > 4 * @1->T->hash->mask) { @@ -148,6 +153,20 @@ bun = (@1)->hseqbase + bun; }
+@= rdf_BUNappend_unq +bun = BUNfnd(BATmirror(@1),(ptr)@2); +if (bun == BUN_NONE) { + if (BATcount(@1) > 4 * @1->T->hash->mask) { + HASHdestroy(@1); + BAThash(BATmirror(@1), 2*BATcount(@1)); + } + bun = (BUN) @1->batCount; + @1 = BUNappend(@1, (ptr)@2, TRUE); + if (@1 == NULL) { + @:raptor_exception(pdata, "could not append in@1")@ + } +} + @= rdf_tknzr_insert { str t = @1; @@ -226,12 +245,9 @@
/* disable all properties */ b->tsorted = FALSE; - b->T->nosorted = 0; b->tdense = FALSE; - b->T->nodense = 0; b->tkey = FALSE; - b->T->nokey[0] = 0; - b->T->nokey[1] = 1; + b->hdense = TRUE;
return b; } @@ -264,7 +280,7 @@ pdata->graph[i] = create_BAT ( graphdef[i].headType, graphdef[i].tailType, - BATTINY); /* DOTO: estimate size */ + batsz); /* DOTO: estimate size */ if (pdata->graph[i] == NULL) { return NULL; } @@ -274,7 +290,7 @@ pdata->graph[MAP_LEX] = create_BAT ( graphdef[MAP_LEX].headType, graphdef[MAP_LEX].tailType, - BATTINY); /* DOTO: estimate size */ + batsz); /* DOTO: estimate size */ if (pdata->graph[MAP_LEX] == NULL) { return NULL; } @@ -346,10 +362,10 @@ static str post_processing (parserData *pdata) { -#if STORE == TRIPLE_STORE BUN cnt; BAT *map_oid = NULL, *S = NULL, *P = NULL, *O = NULL, *ctref= NULL; BAT **graph = pdata->graph; +#ifdef _TKNZR_H BATiter bi, mi; BUN p, d, r; oid *bt; @@ -379,7 +395,31 @@ P = graph[P_sort]; O = graph[O_sort]; cnt = BATcount(S); +#else + /* order MAP_LEX */ + BATorder(BATmirror(graph[MAP_LEX])); + map_oid = BATmark(graph[MAP_LEX], 0); /* BATmark will create a copy */ + BATorder(map_oid); + BATsetaccess(map_oid, BAT_READ); /* force BAtmark not to copy bat */ + map_oid = BATmirror(BATmark(BATmirror(map_oid), 0)); + BATsetaccess(graph[MAP_LEX], BAT_READ); /* force BATmark not to copy bat */ + graph[MAP_LEX] = BATmirror(BATmark(BATmirror(graph[MAP_LEX]), 0)); + + /* convert old oids of S_sort, P_sort, O_sort to new ones */ + cnt = BATcount(graph[S_sort]); + S = BATleftfetchjoin(graph[S_sort], map_oid, cnt); + if (S == NULL) goto bailout; + BBPreclaim(graph[S_sort]); + P = BATleftfetchjoin(graph[P_sort], map_oid, cnt); + if (P == NULL) goto bailout; + BBPreclaim(graph[P_sort]); + O = BATleftfetchjoin(graph[O_sort], map_oid, cnt); + if (O == NULL) goto bailout; + BBPreclaim(graph[O_sort]); + BBPreclaim(map_oid); +#endif
+#if STORE == TRIPLE_STORE /* order SPO/SOP */ graph[S_sort] = BATmirror(BATsort(BATmirror(S))); /* sort on S */ @:order(graph[S_sort],P,O,PO)@ @@ -408,6 +448,14 @@
return MAL_SUCCEED;
+#elif STORE == MLA_STORE + graph[S_sort] = S; + graph[P_sort] = P; + graph[O_sort] = O; + + return MAL_SUCCEED; +#endif + bailout: if (map_oid != NULL) BBPreclaim(map_oid); if (ctref != NULL) BBPreclaim(ctref); @@ -415,11 +463,6 @@ if (P != NULL) BBPreclaim(P); if (O != NULL) BBPreclaim(O); return NULL; - -#elif STORE == MLA_STORE - (void) pdata; - return MAL_SUCCEED; -#endif }
@= clean_raptor @@ -455,15 +498,21 @@ (void) graphname;
/* init tokenizer */ +#ifdef _TKNZR_H if (TKNZRopen (NULL, schema) != MAL_SUCCEED) { throw(RDF, "rdf.rdfShred", "could not open the tokenizer\n"); } +#else + (void) schema; +#endif
/* Init pdata */ pdata = parserData_create(*location); if (pdata == NULL) { +#ifdef _TKNZR_H TKNZRclose(&iret); +#endif @:clean@ throw(RDF, "rdf.rdfShred", "could not allocate enough memory for pdata\n"); @@ -473,7 +522,9 @@ raptor_init(); pdata->rparser = rparser = raptor_new_parser("guess"); if (rparser == NULL) { +#ifdef _TKNZR_H TKNZRclose(&iret); +#endif raptor_finish(); @:clean@ throw(RDF, "rdf.rdfShred", "could not create raptor parser object\n"); @@ -484,7 +535,9 @@ /* Parse URI or local file. */ ret = URLisaURL(&isURI, location); if (ret != MAL_SUCCEED) { +#ifdef _TKNZR_H TKNZRclose(&iret); +#endif @:clean@ return ret; } else if (isURI) { @@ -496,6 +549,9 @@ iret = raptor_parse_file(rparser, uri, NULL); } @:clean_raptor@ +#ifdef _TKNZR_H + TKNZRclose(&iret); +#endif
graph = pdata->graph; assert (pdata->tcount == BATcount(graph[S_sort]) && @@ -504,23 +560,18 @@
/* error check */ if (iret) { - TKNZRclose(&iret); @:clean@ throw(RDF, "rdf.rdfShred", "parsing failed\n"); } if (pdata->exception) { - TKNZRclose(&iret); throw(RDF, "rdf.rdfShred", "%s\n", pdata->exceptionMsg); } else if (pdata->fatal) { - TKNZRclose(&iret); throw(RDF, "rdf.rdfShred", "last fatal error was:\n%s\n", pdata->fatalMsg); } else if (pdata->error) { - TKNZRclose(&iret); throw(RDF, "rdf.rdfShred", "last error was:\n%s\n", pdata->errorMsg); } else if (pdata->warning) { - TKNZRclose(&iret); throw(RDF, "rdf.rdfShred", "last warning was:\n%s\n", pdata->warningMsg); } @@ -528,7 +579,6 @@ /* post processing step */ ret = post_processing(pdata); if (ret != MAL_SUCCEED) { - TKNZRclose(&iret); @:clean@ throw(RDF, "rdf.rdfShred", "could not post-proccess data"); } @@ -536,7 +586,6 @@ /* prepare return bat of bats */ retbat = BATnew(TYPE_void, TYPE_bat, N_GRAPH_BAT); if (retbat == NULL) { - TKNZRclose(&iret); @:clean@ throw(RDF, "rdf.rdfShred", "could not allocate enough memory for return bat"); @@ -548,7 +597,6 @@ }
GDKfree(pdata); - TKNZRclose(&iret); BBPkeepref(*retval = retbat->batCacheid);
return MAL_SUCCEED;
------------------------------------------------------------------------------ Download Intel® Parallel Studio Eval Try the new software tools for yourself. Speed compiling, find bugs proactively, and fine-tune applications for parallel performance. See why Intel Parallel Studio got high marks during beta. http://p.sf.net/sfu/intel-sw-dev _______________________________________________ Monetdb-checkins mailing list Monetdb-checkins@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/monetdb-checkins
-- Sjoerd Mullender
Hi Sjoerd,
I have applied the identical changes manually to the head before
applying them to the candidate branch. That makes the rdf module
synchronized between head and candidate while tokenizer is out from
the candidate.
lefteris
On Mon, Feb 22, 2010 at 10:52 PM, Sjoerd Mullender
Are you sure *all* these changes are for disabling tokenizer? Including the property changes?
None of this will now be propagated, so if any of this should land in the HEAD, you need to do it manually (but then please identically).
On 2010-02-22 16:43, Lefteris Sidirourgos wrote:
Update of /cvsroot/monetdb/MonetDB5/src/modules/mal/rdf In directory sfp-cvsdas-1.v30.ch3.sourceforge.com:/tmp/cvs-serv32552/src/modules/mal/rdf
Modified Files: Tag: Feb2010 rdf_shredder.mx Log Message: disable the tokenizer.mx from m5
Index: rdf_shredder.mx =================================================================== RCS file: /cvsroot/monetdb/MonetDB5/src/modules/mal/rdf/rdf_shredder.mx,v retrieving revision 1.13 retrieving revision 1.13.2.1 diff -u -d -r1.13 -r1.13.2.1 --- rdf_shredder.mx 8 Jan 2010 12:08:58 -0000 1.13 +++ rdf_shredder.mx 22 Feb 2010 15:43:41 -0000 1.13.2.1 @@ -27,7 +27,9 @@ #include "url.h" #include "rdf.h" #include "raptor.h" +#if 0 #include "../tokenizer.h" +#endif
typedef struct graphBATdef { graphBATType batType; /* BAT type */ @@ -36,6 +38,8 @@ int tailType; /* type of right column */ } graphBATdef;
+static BUN batsz = 10000000; + /* this list should be kept alligned with the graphBATType enum */ #if STORE == TRIPLE_STORE static graphBATdef graphdef[N_GRAPH_BAT] = { @@ -132,7 +136,8 @@ @:rdf_BUNappend_unq(@1, @2)@ #endif
-@= rdf_BUNappend_unq + +@= rdf_BUNappend_unq_1 bun = BUNfnd(BATmirror(@1),(ptr)@2); if (bun == BUN_NONE) { if (BATcount(@1) > 4 * @1->T->hash->mask) { @@ -148,6 +153,20 @@ bun = (@1)->hseqbase + bun; }
+@= rdf_BUNappend_unq +bun = BUNfnd(BATmirror(@1),(ptr)@2); +if (bun == BUN_NONE) { + if (BATcount(@1) > 4 * @1->T->hash->mask) { + HASHdestroy(@1); + BAThash(BATmirror(@1), 2*BATcount(@1)); + } + bun = (BUN) @1->batCount; + @1 = BUNappend(@1, (ptr)@2, TRUE); + if (@1 == NULL) { + @:raptor_exception(pdata, "could not append in@1")@ + } +} + @= rdf_tknzr_insert { str t = @1; @@ -226,12 +245,9 @@
/* disable all properties */ b->tsorted = FALSE; - b->T->nosorted = 0; b->tdense = FALSE; - b->T->nodense = 0; b->tkey = FALSE; - b->T->nokey[0] = 0; - b->T->nokey[1] = 1; + b->hdense = TRUE;
return b; } @@ -264,7 +280,7 @@ pdata->graph[i] = create_BAT ( graphdef[i].headType, graphdef[i].tailType, - BATTINY); /* DOTO: estimate size */ + batsz); /* DOTO: estimate size */ if (pdata->graph[i] == NULL) { return NULL; } @@ -274,7 +290,7 @@ pdata->graph[MAP_LEX] = create_BAT ( graphdef[MAP_LEX].headType, graphdef[MAP_LEX].tailType, - BATTINY); /* DOTO: estimate size */ + batsz); /* DOTO: estimate size */ if (pdata->graph[MAP_LEX] == NULL) { return NULL; } @@ -346,10 +362,10 @@ static str post_processing (parserData *pdata) { -#if STORE == TRIPLE_STORE BUN cnt; BAT *map_oid = NULL, *S = NULL, *P = NULL, *O = NULL, *ctref= NULL; BAT **graph = pdata->graph; +#ifdef _TKNZR_H BATiter bi, mi; BUN p, d, r; oid *bt; @@ -379,7 +395,31 @@ P = graph[P_sort]; O = graph[O_sort]; cnt = BATcount(S); +#else + /* order MAP_LEX */ + BATorder(BATmirror(graph[MAP_LEX])); + map_oid = BATmark(graph[MAP_LEX], 0); /* BATmark will create a copy */ + BATorder(map_oid); + BATsetaccess(map_oid, BAT_READ); /* force BAtmark not to copy bat */ + map_oid = BATmirror(BATmark(BATmirror(map_oid), 0)); + BATsetaccess(graph[MAP_LEX], BAT_READ); /* force BATmark not to copy bat */ + graph[MAP_LEX] = BATmirror(BATmark(BATmirror(graph[MAP_LEX]), 0)); + + /* convert old oids of S_sort, P_sort, O_sort to new ones */ + cnt = BATcount(graph[S_sort]); + S = BATleftfetchjoin(graph[S_sort], map_oid, cnt); + if (S == NULL) goto bailout; + BBPreclaim(graph[S_sort]); + P = BATleftfetchjoin(graph[P_sort], map_oid, cnt); + if (P == NULL) goto bailout; + BBPreclaim(graph[P_sort]); + O = BATleftfetchjoin(graph[O_sort], map_oid, cnt); + if (O == NULL) goto bailout; + BBPreclaim(graph[O_sort]); + BBPreclaim(map_oid); +#endif
+#if STORE == TRIPLE_STORE /* order SPO/SOP */ graph[S_sort] = BATmirror(BATsort(BATmirror(S))); /* sort on S */ @:order(graph[S_sort],P,O,PO)@ @@ -408,6 +448,14 @@
return MAL_SUCCEED;
+#elif STORE == MLA_STORE + graph[S_sort] = S; + graph[P_sort] = P; + graph[O_sort] = O; + + return MAL_SUCCEED; +#endif + bailout: if (map_oid != NULL) BBPreclaim(map_oid); if (ctref != NULL) BBPreclaim(ctref); @@ -415,11 +463,6 @@ if (P != NULL) BBPreclaim(P); if (O != NULL) BBPreclaim(O); return NULL; - -#elif STORE == MLA_STORE - (void) pdata; - return MAL_SUCCEED; -#endif }
@= clean_raptor @@ -455,15 +498,21 @@ (void) graphname;
/* init tokenizer */ +#ifdef _TKNZR_H if (TKNZRopen (NULL, schema) != MAL_SUCCEED) { throw(RDF, "rdf.rdfShred", "could not open the tokenizer\n"); } +#else + (void) schema; +#endif
/* Init pdata */ pdata = parserData_create(*location); if (pdata == NULL) { +#ifdef _TKNZR_H TKNZRclose(&iret); +#endif @:clean@ throw(RDF, "rdf.rdfShred", "could not allocate enough memory for pdata\n"); @@ -473,7 +522,9 @@ raptor_init(); pdata->rparser = rparser = raptor_new_parser("guess"); if (rparser == NULL) { +#ifdef _TKNZR_H TKNZRclose(&iret); +#endif raptor_finish(); @:clean@ throw(RDF, "rdf.rdfShred", "could not create raptor parser object\n"); @@ -484,7 +535,9 @@ /* Parse URI or local file. */ ret = URLisaURL(&isURI, location); if (ret != MAL_SUCCEED) { +#ifdef _TKNZR_H TKNZRclose(&iret); +#endif @:clean@ return ret; } else if (isURI) { @@ -496,6 +549,9 @@ iret = raptor_parse_file(rparser, uri, NULL); } @:clean_raptor@ +#ifdef _TKNZR_H + TKNZRclose(&iret); +#endif
graph = pdata->graph; assert (pdata->tcount == BATcount(graph[S_sort]) && @@ -504,23 +560,18 @@
/* error check */ if (iret) { - TKNZRclose(&iret); @:clean@ throw(RDF, "rdf.rdfShred", "parsing failed\n"); } if (pdata->exception) { - TKNZRclose(&iret); throw(RDF, "rdf.rdfShred", "%s\n", pdata->exceptionMsg); } else if (pdata->fatal) { - TKNZRclose(&iret); throw(RDF, "rdf.rdfShred", "last fatal error was:\n%s\n", pdata->fatalMsg); } else if (pdata->error) { - TKNZRclose(&iret); throw(RDF, "rdf.rdfShred", "last error was:\n%s\n", pdata->errorMsg); } else if (pdata->warning) { - TKNZRclose(&iret); throw(RDF, "rdf.rdfShred", "last warning was:\n%s\n", pdata->warningMsg); } @@ -528,7 +579,6 @@ /* post processing step */ ret = post_processing(pdata); if (ret != MAL_SUCCEED) { - TKNZRclose(&iret); @:clean@ throw(RDF, "rdf.rdfShred", "could not post-proccess data"); } @@ -536,7 +586,6 @@ /* prepare return bat of bats */ retbat = BATnew(TYPE_void, TYPE_bat, N_GRAPH_BAT); if (retbat == NULL) { - TKNZRclose(&iret); @:clean@ throw(RDF, "rdf.rdfShred", "could not allocate enough memory for return bat"); @@ -548,7 +597,6 @@ }
GDKfree(pdata); - TKNZRclose(&iret); BBPkeepref(*retval = retbat->batCacheid);
return MAL_SUCCEED;
------------------------------------------------------------------------------ Download Intel® Parallel Studio Eval Try the new software tools for yourself. Speed compiling, find bugs proactively, and fine-tune applications for parallel performance. See why Intel Parallel Studio got high marks during beta. http://p.sf.net/sfu/intel-sw-dev _______________________________________________ Monetdb-checkins mailing list Monetdb-checkins@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/monetdb-checkins
-- Sjoerd Mullender
------------------------------------------------------------------------------ Download Intel® Parallel Studio Eval Try the new software tools for yourself. Speed compiling, find bugs proactively, and fine-tune applications for parallel performance. See why Intel Parallel Studio got high marks during beta. http://p.sf.net/sfu/intel-sw-dev _______________________________________________ Monetdb-developers mailing list Monetdb-developers@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/monetdb-developers
participants (2)
-
Lefteris
-
Sjoerd Mullender