On Mon, Mar 10, 2008 at 02:01:16AM +0000, Ying Zhang wrote:
Update of /cvsroot/monetdb/pathfinder/runtime In directory sc8-pr-cvs16.sourceforge.net:/tmp/cvs-serv3350
Modified Files: Tag: xquery-decomposition xrpc_client.mx Log Message: - adopt Stefan's tips: used rangesplit to split item|kind per container (Thanks!).
- a lot of corrections to get this thing working
Index: xrpc_client.mx =================================================================== RCS file: /cvsroot/monetdb/pathfinder/runtime/xrpc_client.mx,v retrieving revision 1.41.2.8 retrieving revision 1.41.2.9 diff -u -d -r1.41.2.8 -r1.41.2.9 --- xrpc_client.mx 8 Mar 2008 21:17:27 -0000 1.41.2.8 +++ xrpc_client.mx 10 Mar 2008 02:01:13 -0000 1.41.2.9 @@ -94,94 +94,40 @@ BAT[void, dbl] dbl_values, BAT[void, dbl] dec_values, BAT[void, str] str_values) : oid { - var maxnconts := ws.fetch(0).count(); # split used_
- into multiple BATs according to their container IDs vvvvvvvvv var uitems := bat(oid, bat, maxnconts); # container ID in head var ukinds := bat(oid, bat, maxnconts); ^^^^^^^^^ These two BATs are IMHO overwritten below (see "vvvvvvvvv"/"^^^^^^^^^"); hence, no (more) need to make them, here!??
- var conts := used_kind.[int]().[>>](6).tsort(); # [oid,int]: container IDs in tail - var nconts := conts.tunique().count(); - if(nconts = 1){ # all items are from the same container - uitems.insert(conts.fetch(0).oid(), used_item); - ukinds.insert(conts.fetch(0).oid(), used_kind); - } else { - # sort used_
- on container ID - var itemcont := used_item.reverse().join(conts).reverse().tmark(0@0); - var kindcont := used_kind.reverse().join(conts).reverse().tmark(0@0); - - var curcont := conts.fetch(0); - var tmpitem := bat(void, oid).seqbase(0@0); - var tmpkind := bat(void, int).seqbase(0@0); - - conts := conts.tmark(0@0); # conts[void,int] - conts@batloop(){ - # start of next container, save items|kinds of previous container - if($t != curcont) { - curcont := $t; - uitems.insert(curcont, tmpitem); - ukinds.insert(curcont, tmpkind); - tmpitem := bat(void, oid).seqbase(0@0); - tmpkind := bat(void, int).seqbase(0@0); - } - - tmpitem.append(itemcont.fetch($h)); - tmpkind.append(kindcont.fetch($h)); - } - uitems.insert(curcont, tmpitem); - ukinds.insert(curcont, tmpkind); - } + var conts := get_container([int](used_kind)); # [oid,int]: container IDs in tail + var unq_conts := tunique(conts).hmark(0@0); + var nconts := count(unq_conts); + var rs := rangesplit(conts, nconts).tmark(0@0).reverse().join(unq_conts).reverse();
This will do want you want (have rs's head contain the conts from the BATs in its tail) ONLY if tunique(conts) & rangesplit(conts, nconts) happen to produce the same order! To be sure, you'd need to sort the unq_conts before marking them (result of rangesplit shoiuld hopefully be sorted): var unq_conts := tunique(conts).sort().hmark(0@0); Was there something wrong with my proposal: var rs := rangesplit(conts, nconts); rs := rs.reverse().mirror().[fetch](0).reverse(); ? vvvvvvvvv
+ uitems := rs.[mirror]().[leftfetchjoin](const used_item); + uitems := [tmark](uitems, const 0@0); + ukinds := rs.[mirror]().[leftfetchjoin](const used_kind); + ukinds := [tmark](ukinds, const 0@0); ^^^^^^^^^ (see "vvvvvvvvv"/^^^^^^^^^" above) Plus: It might be more efficient to get rid of the head before the join, since you "overwrite" them afterwards, anyway; i.e.: uitems := rs.[tmark](oid(nil)).[leftfetchjoin](const used_item).[tmark](uitems, const 0@0);
# split returned_
- into multiple BATs according to their container IDs
vvvvvvvvv
var ritems := bat(oid, bat, maxnconts); # container ID in head var rkinds := bat(oid, bat, maxnconts);
^^^^^^^^^ Same as with uitems/ukinds above.
- conts := returned_kind.[int]().[>>](6).tsort(); # [oid,int]: container IDs in tail - nconts := conts.tunique().count(); - if(nconts = 1){ # all items are from the same container - ritems.insert(conts.fetch(0).oid(), returned_item); - rkinds.insert(conts.fetch(0).oid(), returned_kind); - } else { - # sort used_
- on container ID - var itemcont := returned_item.reverse().join(conts).reverse().tmark(0@0); - var kindcont := returned_kind.reverse().join(conts).reverse().tmark(0@0); - - var curcont := conts.fetch(0); - var tmpitem := bat(void, oid).seqbase(0@0); - var tmpkind := bat(void, int).seqbase(0@0); - - conts := conts.tmark(0@0); # conts[void,int] - conts@batloop(){ - # start of next container, save items|kinds of previous container - if($t != curcont) { - curcont := $t; - ritems.insert(curcont, tmpitem); - rkinds.insert(curcont, tmpkind); - tmpitem := bat(void, oid).seqbase(0@0); - tmpkind := bat(void, int).seqbase(0@0); - } - - tmpitem.append(itemcont.fetch($h)); - tmpkind.append(kindcont.fetch($h)); - } - ritems.insert(curcont, tmpitem); - rkinds.insert(curcont, tmpkind); - } + conts := get_container([int](returned_kind)); # [oid,int]: container IDs in tail + unq_conts := tunique(conts).hmark(0@0); + nconts := count(unq_conts); + rs := rangesplit(conts, nconts).tmark(0@0).reverse().join(unq_conts).reverse();
(see above) vvvvvvvvv
+ ritems := rs.[mirror]().[leftfetchjoin](const returned_item); + ritems := [tmark](ritems, const 0@0); + rkinds := rs.[mirror]().[leftfetchjoin](const returned_kind); + rkinds := [tmark](rkinds, const 0@0); ^^^^^^^^^ (see above)
# align uitems|kinds and ritems|kinds, # fill missing container with emtpybat var emptybat := bat(void,void,0); - uitems@batloop(){ - var err := CATCH({ritems.find($h);}); - if(err.search("not found") >= 0){ - ritems.insert($h, emptybat); - } - } - ritems@batloop(){ - var err := CATCH({uitems.find($h);}); - if(err.search("not found") >= 0){ - uitems.insert($h, emptybat); - } - } - + var usedonly := kdiff(uitems,ritems).project(emptybat); + ritems.access(BAT_WRITE).insert(usedonly); + rkinds.access(BAT_WRITE).insert(usedonly); + var returnedonly := kdiff(ritems,uitems).project(emptybat); + uitems.access(BAT_WRITE).insert(returnedonly); + ukinds.access(BAT_WRITE).insert(returnedonly);
a) Do you need to modify these BATs later on? If not, you might want the set them read-only, again, after the insert: ritems.access(BAT_WRITE).insert(usedonly).access(BAT_READ); b) To be sure that your variables till point to the correct BATs (access(BAT_WRITE) *might* return a copy!), you should asign the result: ritems := ritems.access(BAT_WRITE).insert(usedonly)[.access(BAT_READ)]; Stefan -- | Dr. Stefan Manegold | mailto:Stefan.Manegold@cwi.nl | | CWI, P.O.Box 94079 | http://www.cwi.nl/~manegold/ | | 1090 GB Amsterdam | Tel.: +31 (20) 592-4212 | | The Netherlands | Fax : +31 (20) 592-4312 |