
Hi, a "nice idea", but unfortunately not applicable. The system cannot in general rewrite a "in" in to a "between", as there semantics obviously differ. "where f25 in ('F','M')" ask only for tuples who's f25 attribute has either value 'F' or value 'M' while "where f25 between 'F' and 'M'" ask for values 'F', 'M', and all in between (in lexicographical order). It is only due the fact that your specific data does not contain any value in between 'F' & 'M' that both queries *happen to* produce the same result --- the system does and can not know that. A more promising and *possibly* feasible rewrite would be my earlier suggestion, i.e., our optimizer need to recognize that (a) the selection (where or having close) is on the grouping attribute AND (b) the selection on the base table is not very selective (e.g., yielding >90% in your case) AND (c) there are only few distinct values (i.e., groups) in the base data (i.e., before the selection); then (and probably only then) performing the grouping (and aggregation) first and the selection only there after would be beneficial (as in your specific case). We'll consider looking into this once time and resources allow us to. Another question is why the semijoin is so much slower than the selection in your specific case (well, other than that a semijoin is obviously a more complex operation than a simple range select), and whether we could improve the general semijoin implementation to perform better in you special case (obviously without compromising its performance in all other cases --- but we'd probably need your data for that, and I cannot promise when time and resources would allow us to perform that analysis ... Stefan On Sat, Jan 31, 2009 at 01:43:34PM -0500, dariuszs wrote:
Hi, I know it's probably a trivial question but after looking at the following trace - is there a way to make sql 'in' behave like sql 'between', make *algebra.semijoin behave like **algebra.uselect*? Thanks for you help. Dariusz.
:0[0]:W Strace select f25,count(*) from trw100 where f25 in ('F','M') group by f25; :13172[0]:R [ 0 usec mdb.setTimer(true); ] [ 15000 usec _3 := sql.bind("sys","trw100","f25",0); ] [ 16000 usec _8 := sql.bind("sys","trw100","f25",1); ] [ 16000 usec _10 := algebra.kunion(
:bat[:oid,:str][96240405], :bat[:oid,:str][0]); ] [ 0 usec _3 := nil; ] [ 15000 usec _8 := nil; ] [ 16000 usec _11 := sql.bind("sys","trw100","f25",2); ] [ 15000 usec _13 := algebra.kdifference( [96240405], :bat[:oid,:str][0]); ] [ 0 usec _10 := nil; ] [ 16000 usec _14 := algebra.kunion( [96240405], :bat[:oid,:str][0]); ] [ 16000 usec _13 := nil; ] [ 0 usec _11 := nil; ] [ 15000 usec _15 := sql.bind_dbat("sys","trw100",1); ] [ 16000 usec _16 := bat.reverse( :bat[:oid,:oid][0]); ] [ 16000 usec _15 := nil; ] [ 0 usec _17 := algebra.kdifference( [96240405],<~tmp_20253>[0]); ] [ 15000 usec _16 := nil; ] [ 16000 usec _18 := bat.reverse( [96240405]); ] [ 0 usec _17 := nil; ] [ 15000 usec _19 := bat.new(nil,nil); ] [ 16000 usec bat.append( :bat[:oid,:str][1],"F",true); ] [ 0 usec bat.append( :bat[:oid,:str][2],"M",true); ] [ 16000 usec _25 := bat.reverse( :bat[:oid,:str][2]); ] [ 15000 usec _19 := nil; ] [ 10782000 usec _26 := *algebra.semijoin(<~tmp_20617>[96240405],<~tmp_20506>[2]); *] [ 15000 usec _18 := nil; ] [ 16000 usec _25 := nil; ] [ 15000 usec _27 := bat.reverse( [93838869]); ] [ 0 usec _26 := nil; ] [ 16000 usec _29 := algebra.markT(<~tmp_20620>[93838869],0@0); ] [ 16000 usec _27 := nil; ] [ 0 usec _30 := bat.reverse( [93838869]); ] [ 15000 usec _29 := nil; ] [ 453000 usec _31 := algebra.join(<~tmp_20617>[93838869], [96240405]); ] [ 63000 usec _30 := nil; ] [ 0 usec _14 := nil; ] [ 953000 usec (ext37,grp35) := group.new( [93838869]); ] [ 16000 usec _34 := bat.mirror( [2]); ] [ 15000 usec ext37 := nil; ] [ 0 usec _35 := algebra.join( [2], [93838869]); ] [ 47000 usec _31 := nil; ] [ 344000 usec _36 := aggr.count( [93838869], [93838869], [2]); ] [ 31000 usec grp35 := nil; ] [ 16000 usec _34 := nil; ] [ 15000 usec _37 := sql.resultSet(2,1, [2]); ] [ 0 usec sql.rsColumn(0,"sys.trw100","f25","varchar",1,0, [2]); ] [ 16000 usec _35 := nil; ] [ 16000 usec sql.rsColumn(0,"sys.trw100","count_f25","wrd",64,0, :bat[:oid,:wrd][2]); ] [ 15000 usec _36 := nil; ] [ 0 usec _45 := io.stdout(); ] &1 0 2 2 2 % sys.trw100, sys.trw100 # table_name % f25, count_f25 # name % varchar, wrd # type % 1, 8 # length [ "F", 29577383 ] [ "M", 64261486 ] [ 16000 usec sql.exportResult(137554336,0,""); ] [ 13172000 usec user.s0_2("F","M"); ] :13203[0]:R :39062[0]:W Strace select f25,count(*) from trw100 where f25 between 'F' and 'M' group by f25; :43156[0]:R [ 0 usec mdb.setTimer(true); ] [ 16000 usec _3 := sql.bind("sys","trw100","f25",0); ] [ 1750000 usec _8 := *algebra.uselect( :bat[:oid,:str][96240405],"F","M",true,true);* ] [ 15000 usec _10 := sql.bind("sys","trw100","f25",1); ] [ 0 usec _12 := algebra.uselect( :bat[:oid,:str][0],"F","M",true,true); ] [ 16000 usec _13 := algebra.kunion( [93838869], [0]); ] [ 16000 usec _8 := nil; ] [ 15000 usec _12 := nil; ] [ 0 usec _14 := sql.bind("sys","trw100","f25",2); ] [ 16000 usec _16 := algebra.kdifference( [93838869], :bat[:oid,:str][0]); ] [ 16000 usec _13 := nil; ] [ 0 usec _17 := algebra.uselect( :bat[:oid,:str][0],"F","M",true,true); ] [ 15000 usec _18 := algebra.kunion( [93838869], [0]); ] [ 16000 usec _16 := nil; ] [ 15000 usec _17 := nil; ] [ 0 usec _19 := sql.bind_dbat("sys","trw100",1); ] [ 16000 usec _20 := bat.reverse( :bat[:oid,:oid][0]); ] [ 16000 usec _19 := nil; ] [ 0 usec _21 := algebra.kdifference( [93838869],<~tmp_20253>[0]); ] [ 15000 usec _18 := nil; ] [ 16000 usec _20 := nil; ] [ 16000 usec _23 := algebra.markT( [93838869],0@0); ] [ 0 usec _21 := nil; ] [ 15000 usec _24 := bat.reverse( [93838869]); ] [ 16000 usec _23 := nil; ] [ 0 usec _25 := algebra.kunion( :bat[:oid,:str][96240405], :bat[:oid,:str][0]); ] [ 15000 usec _3 := nil; ] [ 16000 usec _10 := nil; ] [ 16000 usec _26 := algebra.kdifference( [96240405], :bat[:oid,:str][0]); ] [ 0 usec _25 := nil; ] [ 15000 usec _27 := algebra.kunion( [96240405], :bat[:oid,:str][0]); ] [ 16000 usec _26 := nil; ] [ 0 usec _14 := nil; ] [ 453000 usec _28 := algebra.join(<~tmp_20616>[93838869], [96240405]); ] [ 31000 usec _24 := nil; ] [ 16000 usec _27 := nil; ] [ 922000 usec (ext34,grp32) := group.new( [93838869]); ] [ 15000 usec _31 := bat.mirror( [2]); ] [ 16000 usec ext34 := nil; ] [ 0 usec _32 := algebra.join( [2], [93838869]); ] [ 47000 usec _28 := nil; ] [ 344000 usec _33 := aggr.count( [93838869], [93838869], [2]); ] [ 31000 usec grp32 := nil; ] [ 16000 usec _31 := nil; ] [ 15000 usec _34 := sql.resultSet(2,1, [2]); ] [ 16000 usec sql.rsColumn(1,"sys.trw100","f25","varchar",1,0, [2]); ] [ 0 usec _32 := nil; ] [ 15000 usec sql.rsColumn(1,"sys.trw100","count_f25","wrd",64,0, :bat[:oid,:wrd][2]); ] [ 16000 usec _33 := nil; ] [ 16000 usec _42 := io.stdout(); ] &1 1 2 2 2 % sys.trw100, sys.trw100 # table_name % f25, count_f25 # name % varchar, wrd # type % 1, 8 # length [ "F", 29577383 ] [ "M", 64261486 ] [ 15000 usec sql.exportResult(137554336,1,""); ] [ 4094000 usec user.s1_2("F","M"); ] :43187[0]:R :253953[0]:W Connection closed ------------------------------------------------------------------------------ This SF.net email is sponsored by: SourcForge Community SourceForge wants to tell your story. http://p.sf.net/sfu/sf-spreadtheword _______________________________________________ MonetDB-users mailing list MonetDB-users@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/monetdb-users
-- | Dr. Stefan Manegold | mailto:Stefan.Manegold@cwi.nl | | CWI, P.O.Box 94079 | http://www.cwi.nl/~manegold/ | | 1090 GB Amsterdam | Tel.: +31 (20) 592-4212 | | The Netherlands | Fax : +31 (20) 592-4312 |