[MonetDB-users] MonetDB unexpected count/group by slowdown

I have a table f_trigger with 171,452,378 records. It has fields script_id
and machine_uuid (and other fields not used here).
I want to run
select script_id,count(distinct machine_uuid) from f_trigger group by
script_id;
First, though, let's get the number of expected records only:
select count(*) from (select script_id,count(distinct machine_uuid) from
f_trigger group by script_id) as subq;
+------+
| L7 |
+======+
| 208 |
+------+
1 tuple (359.124ms)
Very nice.
Let's explain the query before I run it:
explain select script_id,count(distinct machine_uuid) from f_trigger group
by script_id;
+-------------------------------------------------------------------------------+
| mal
|
+===============================================================================+
| function user.s0_1{autoCommit=true}():void;
|
| X_21 := nil:bat[:oid,:str];
|
| X_39:bat[:oid,:wrd] := nil:bat[:oid,:wrd];
|
| barrier X_596 := language.dataflow();
|
| X_2 := sql.mvc();
|
| X_12:bat[:oid,:str] := sql.bind(X_2,"sys","f_trigger","script_id",0);
|
| X_10:bat[:oid,:str] := sql.bind(X_2,"sys","f_trigger","script_id",2);
|
| X_14 := algebra.kdifference(X_12,X_10);
|
| X_15 := algebra.kunion(X_14,X_10);
|
| X_9:bat[:oid,:str] := sql.bind(X_2,"sys","f_trigger","script_id",1);
|
| X_16 := algebra.kunion(X_15,X_9);
|
| X_5:bat[:oid,:oid] := sql.bind_dbat(X_2,"sys","f_trigger",1);
|
| X_8 := bat.reverse(X_5);
|
| X_17 := algebra.kdifference(X_16,X_8);
|
| (ext28,grp26) := group.done(X_17);
|
| X_20 := bat.mirror(ext28);
|
| X_21 := algebra.join(X_20,X_17);
|
| X_29:bat[:oid,:str] :=
sql.bind(X_2,"sys","f_trigger","machine_uuid",0); |
| X_30 := algebra.selectNotNil(X_29);
|
| X_26:bat[:oid,:str] :=
sql.bind(X_2,"sys","f_trigger","machine_uuid",2); |
| X_31 := algebra.kdifference(X_30,X_26);
|
| X_28 := algebra.selectNotNil(X_26);
|
| X_32 := algebra.kunion(X_31,X_28);
|
| X_24:bat[:oid,:str] :=
sql.bind(X_2,"sys","f_trigger","machine_uuid",1); |
| X_25 := algebra.selectNotNil(X_24);
|
| X_33 := algebra.kunion(X_32,X_25);
|
| X_34 := algebra.kdifference(X_33,X_8);
|
| (X_35,grp55) := group.derive(ext28,grp26,X_34);
|
| X_37 := bat.mirror(X_35);
|
| X_38 := algebra.join(X_37,X_34);
|
| X_39:bat[:oid,:wrd] := aggr.count(X_38,grp26,X_20);
|
| exit X_596;
|
| X_40 := sql.resultSet(2,1,X_21);
|
| sql.rsColumn(X_40,"sys.f_trigger","script_id","varchar",32768,0,X_21);
|
| sql.rsColumn(X_40,"sys.f_trigger","L10","wrd",64,0,X_39);
|
| X_53 := io.stdout();
|
| sql.exportResult(X_53,X_40);
|
| end s0_1;
|
+-------------------------------------------------------------------------------+
38 tuples (3.136ms)
Running the query takes a very long time. While the query is running, I
notice VERY heavy disk writes, and almost no reads and almost no CPU
activity.
Based on other information on the mailing list, I suspect the query
generates intermediate data that gets writted to disk (instead of kept in
memory).
We are running monetdb on a 32 core linux machine with 256 GB of RAM and a
fast raid array. Memory usage is under 20%, so I would not expect monetdb to
be writing to disk so much (50-500MB/s writes)...
Database settings:
shared default yes
nthreads local 32
optpipe local recycler_pipe
master default no
slave default <unknown>
readonly local yes
Stethoscope output:
[1] # event, time, thread, usec, stmt, # name
[1] [ 256, "14:02:38.525477", 2, 0,
"calc.str(\"script_id\");" ]
[1] [ 257, "14:02:38.525477", 2, 134,
"calc.str(\"script_id\");" ]
[1] [ 258, "14:02:38.525634", 2, 0,
"calc.str(\"f_trigger\");" ]
[1] [ 259, "14:02:38.525634", 2, 18,
"calc.str(\"f_trigger\");" ]
[1] [ 260, "14:02:38.525670", 2, 0,
"calc.str(\"script_id\");" ]
[1] [ 261, "14:02:38.525670", 2, 16,
"calc.str(\"script_id\");" ]
[1] [ 262, "14:02:38.525704", 2, 0,
"calc.str(\"f_trigger\");" ]
[1] [ 263, "14:02:38.525704", 2, 17,
"calc.str(\"f_trigger\");" ]
[1] [ 264, "14:02:38.525740", 2, 0,
"calc.str(\"script_id\");" ]
[1] [ 265, "14:02:38.525740", 2, 17,
"calc.str(\"script_id\");" ]
[1] [ 266, "14:02:38.525777", 2, 0,
"calc.str(\"f_trigger\");" ]
[1] [ 267, "14:02:38.525777", 2, 19,
"calc.str(\"f_trigger\");" ]
[1] [ 268, "14:02:38.525812", 2, 0,
"calc.str(\"script_id\");" ]
[1] [ 269, "14:02:38.525812", 2, 19,
"calc.str(\"script_id\");" ]
[1] [ 270, "14:02:38.525848", 2, 0,
"calc.str(\"f_trigger\");" ]
[1] [ 271, "14:02:38.525848", 2, 19,
"calc.str(\"f_trigger\");" ]
[1] [ 272, "14:02:38.525884", 2, 0,
"calc.str(\"L11\");" ]
[1] [ 273, "14:02:38.525884", 2, 19,
"calc.str(\"L11\");" ]
[1] [ 274, "14:02:38.525925", 2, 0,
"calc.str(\"f_trigger\");" ]
[1] [ 275, "14:02:38.525925", 2, 20,
"calc.str(\"f_trigger\");" ]
[1] [ 276, "14:02:38.525962", 2, 0,
"calc.str(\"L11\");" ]
[1] [ 277, "14:02:38.525962", 2, 18,
"calc.str(\"L11\");" ]
[1] [ 278, "14:02:38.526000", 2, 0,
"calc.str(\"f_trigger\");" ]
[1] [ 279, "14:02:38.526000", 2, 18,
"calc.str(\"f_trigger\");" ]
[1] [ 280, "14:02:38.526039", 2, 0,
"calc.str(\"machine_uuid\");" ]
[1] [ 281, "14:02:38.526039", 2, 19,
"calc.str(\"machine_uuid\");" ]
[1] [ 282, "14:02:38.526077", 2, 0,
"calc.str(\"f_trigger\");" ]
[1] [ 283, "14:02:38.526077", 2, 20,
"calc.str(\"f_trigger\");" ]
[1] [ 284, "14:02:38.526116", 2, 0,
"calc.str(nil:str);" ]
[1] [ 285, "14:02:38.526116", 2, 22,
"calc.str(nil:str);" ]
[1] [ 286, "14:02:38.528707", 2, 0, "X_21 :=
nil:bat[:oid,:str];" ]
[1] [ 287, "14:02:38.528707", 2, 64, "X_21 :=
nil:bat[:oid,:str];" ]
[1] [ 288, "14:02:38.528790", 2, 0,
"X_39:bat[:oid,:wrd] := nil:bat[:oid,:wrd];" ]
[1] [ 289, "14:02:38.528790", 2, 27,
"X_39:bat[:oid,:wrd] := nil:bat[:oid,:wrd];" ]
[1] [ 290, "14:02:38.528835", 2, 0, "barrier X_596 :=
language.dataflow();" ]
[1] [ 291, "14:02:38.528928", 40, 0, "X_2 := sql.mvc();"
]
[1] [ 292, "14:02:38.528928", 40, 141, "X_2 := sql.mvc();"
]
[1] [ 293, "14:02:38.529100", 40, 0,
"X_12:bat[:oid,:str] :=
sql.bind(X_2=0,\"sys\",\"f_trigger\",\"script_id\",0);" ]
[1] [ 294, "14:02:38.529100", 40, 78,
"X_12:bat[:oid,:str] =

On 13-01-2012 14:58:40 -0800, zack_pcd wrote: [snip] Sorry to ignore most of your email.
Is this a bug (as I suspect)? Or is the group by/count distinct operation just slow? If it is a bug, is this fixed in dec-2011? If that is the case, I would like to verify this was fixed for us in dec-2011 - however the last "latest" build I tried was crashing for me. I am not sure which version of dec-2011 I should try for testing this.
Please try the sources from http://dev.monetdb.org/downloads/testing/sources/Dec2011/ If that crashes, we're very interested to know.

There may be many reasons for this. A few questions that come to mind are 0) how many distinct machine_uuid ? 1) is the behavior repeatable 2) you are using an experimental optimizer pipeline, what happens if you use the default pipeline 3) check algorithm choice with --algorithms to see if proper joins are choosen On 1/13/12 11:58 PM, zack_pcd wrote:
I have a table f_trigger with 171,452,378 records. It has fields script_id and machine_uuid (and other fields not used here).
I want to run select script_id,count(distinct machine_uuid) from f_trigger group by script_id;
First, though, let's get the number of expected records only: select count(*) from (select script_id,count(distinct machine_uuid) from f_trigger group by script_id) as subq; +------+ | L7 | +======+ | 208 | +------+ 1 tuple (359.124ms)
Very nice.
Let's explain the query before I run it:
explain select script_id,count(distinct machine_uuid) from f_trigger group by script_id; +-------------------------------------------------------------------------------+ | mal | +===============================================================================+ | function user.s0_1{autoCommit=true}():void; | | X_21 := nil:bat[:oid,:str]; | | X_39:bat[:oid,:wrd] := nil:bat[:oid,:wrd]; | | barrier X_596 := language.dataflow(); | | X_2 := sql.mvc(); | | X_12:bat[:oid,:str] := sql.bind(X_2,"sys","f_trigger","script_id",0); | | X_10:bat[:oid,:str] := sql.bind(X_2,"sys","f_trigger","script_id",2); | | X_14 := algebra.kdifference(X_12,X_10); | | X_15 := algebra.kunion(X_14,X_10); | | X_9:bat[:oid,:str] := sql.bind(X_2,"sys","f_trigger","script_id",1); | | X_16 := algebra.kunion(X_15,X_9); | | X_5:bat[:oid,:oid] := sql.bind_dbat(X_2,"sys","f_trigger",1); | | X_8 := bat.reverse(X_5); | | X_17 := algebra.kdifference(X_16,X_8); | | (ext28,grp26) := group.done(X_17); | | X_20 := bat.mirror(ext28); | | X_21 := algebra.join(X_20,X_17); | | X_29:bat[:oid,:str] := sql.bind(X_2,"sys","f_trigger","machine_uuid",0); | | X_30 := algebra.selectNotNil(X_29); | | X_26:bat[:oid,:str] := sql.bind(X_2,"sys","f_trigger","machine_uuid",2); | | X_31 := algebra.kdifference(X_30,X_26); | | X_28 := algebra.selectNotNil(X_26); | | X_32 := algebra.kunion(X_31,X_28); | | X_24:bat[:oid,:str] := sql.bind(X_2,"sys","f_trigger","machine_uuid",1); | | X_25 := algebra.selectNotNil(X_24); | | X_33 := algebra.kunion(X_32,X_25); | | X_34 := algebra.kdifference(X_33,X_8); | | (X_35,grp55) := group.derive(ext28,grp26,X_34); | | X_37 := bat.mirror(X_35); | | X_38 := algebra.join(X_37,X_34); | | X_39:bat[:oid,:wrd] := aggr.count(X_38,grp26,X_20); | | exit X_596; | | X_40 := sql.resultSet(2,1,X_21); | | sql.rsColumn(X_40,"sys.f_trigger","script_id","varchar",32768,0,X_21); | | sql.rsColumn(X_40,"sys.f_trigger","L10","wrd",64,0,X_39); | | X_53 := io.stdout(); | | sql.exportResult(X_53,X_40); | | end s0_1; | +-------------------------------------------------------------------------------+ 38 tuples (3.136ms)
Running the query takes a very long time. While the query is running, I notice VERY heavy disk writes, and almost no reads and almost no CPU activity. Based on other information on the mailing list, I suspect the query generates intermediate data that gets writted to disk (instead of kept in memory).
We are running monetdb on a 32 core linux machine with 256 GB of RAM and a fast raid array. Memory usage is under 20%, so I would not expect monetdb to be writing to disk so much (50-500MB/s writes)...
Database settings:
shared default yes nthreads local 32 optpipe local recycler_pipe master default no slave default<unknown> readonly local yes
Stethoscope output: [1] # event, time, thread, usec, stmt, # name [1] [ 256, "14:02:38.525477", 2, 0, "calc.str(\"script_id\");" ] [1] [ 257, "14:02:38.525477", 2, 134, "calc.str(\"script_id\");" ] [1] [ 258, "14:02:38.525634", 2, 0, "calc.str(\"f_trigger\");" ] [1] [ 259, "14:02:38.525634", 2, 18, "calc.str(\"f_trigger\");" ] [1] [ 260, "14:02:38.525670", 2, 0, "calc.str(\"script_id\");" ] [1] [ 261, "14:02:38.525670", 2, 16, "calc.str(\"script_id\");" ] [1] [ 262, "14:02:38.525704", 2, 0, "calc.str(\"f_trigger\");" ] [1] [ 263, "14:02:38.525704", 2, 17, "calc.str(\"f_trigger\");" ] [1] [ 264, "14:02:38.525740", 2, 0, "calc.str(\"script_id\");" ] [1] [ 265, "14:02:38.525740", 2, 17, "calc.str(\"script_id\");" ] [1] [ 266, "14:02:38.525777", 2, 0, "calc.str(\"f_trigger\");" ] [1] [ 267, "14:02:38.525777", 2, 19, "calc.str(\"f_trigger\");" ] [1] [ 268, "14:02:38.525812", 2, 0, "calc.str(\"script_id\");" ] [1] [ 269, "14:02:38.525812", 2, 19, "calc.str(\"script_id\");" ] [1] [ 270, "14:02:38.525848", 2, 0, "calc.str(\"f_trigger\");" ] [1] [ 271, "14:02:38.525848", 2, 19, "calc.str(\"f_trigger\");" ] [1] [ 272, "14:02:38.525884", 2, 0, "calc.str(\"L11\");" ] [1] [ 273, "14:02:38.525884", 2, 19, "calc.str(\"L11\");" ] [1] [ 274, "14:02:38.525925", 2, 0, "calc.str(\"f_trigger\");" ] [1] [ 275, "14:02:38.525925", 2, 20, "calc.str(\"f_trigger\");" ] [1] [ 276, "14:02:38.525962", 2, 0, "calc.str(\"L11\");" ] [1] [ 277, "14:02:38.525962", 2, 18, "calc.str(\"L11\");" ] [1] [ 278, "14:02:38.526000", 2, 0, "calc.str(\"f_trigger\");" ] [1] [ 279, "14:02:38.526000", 2, 18, "calc.str(\"f_trigger\");" ] [1] [ 280, "14:02:38.526039", 2, 0, "calc.str(\"machine_uuid\");" ] [1] [ 281, "14:02:38.526039", 2, 19, "calc.str(\"machine_uuid\");" ] [1] [ 282, "14:02:38.526077", 2, 0, "calc.str(\"f_trigger\");" ] [1] [ 283, "14:02:38.526077", 2, 20, "calc.str(\"f_trigger\");" ] [1] [ 284, "14:02:38.526116", 2, 0, "calc.str(nil:str);" ] [1] [ 285, "14:02:38.526116", 2, 22, "calc.str(nil:str);" ] [1] [ 286, "14:02:38.528707", 2, 0, "X_21 := nil:bat[:oid,:str];" ] [1] [ 287, "14:02:38.528707", 2, 64, "X_21 := nil:bat[:oid,:str];" ] [1] [ 288, "14:02:38.528790", 2, 0, "X_39:bat[:oid,:wrd] := nil:bat[:oid,:wrd];" ] [1] [ 289, "14:02:38.528790", 2, 27, "X_39:bat[:oid,:wrd] := nil:bat[:oid,:wrd];" ] [1] [ 290, "14:02:38.528835", 2, 0, "barrier X_596 := language.dataflow();" ] [1] [ 291, "14:02:38.528928", 40, 0, "X_2 := sql.mvc();" ] [1] [ 292, "14:02:38.528928", 40, 141, "X_2 := sql.mvc();" ] [1] [ 293, "14:02:38.529100", 40, 0, "X_12:bat[:oid,:str] := sql.bind(X_2=0,\"sys\",\"f_trigger\",\"script_id\",0);" ] [1] [ 294, "14:02:38.529100", 40, 78, "X_12:bat[:oid,:str] =
[171452378] := sql.bind(X_2=0,\"sys\",\"f_trigger\",\"script_id\",0);" ] [1] [ 295, "14:02:38.529255", 52, 0, "X_10:bat[:oid,:str] := sql.bind(X_2=0,\"sys\",\"f_trigger\",\"script_id\",2);" ] [1] [ 296, "14:02:38.529255", 52, 229, "X_10:bat[:oid,:str] = [0] := sql.bind(X_2=0,\"sys\",\"f_trigger\",\"script_id\",2);" ] [1] [ 297, "14:02:38.529491", 45, 0, "X_5:bat[:oid,:oid] := sql.bind_dbat(X_2=0,\"sys\",\"f_trigger\",1);" ] [1] [ 298, "14:02:38.529491", 45, 201, "X_5:bat[:oid,:oid] = [0] := sql.bind_dbat(X_2=0,\"sys\",\"f_trigger\",1);" ] [1] [ 299, "14:02:38.529699", 50, 0, "X_9:bat[:oid,:str] := sql.bind(X_2=0,\"sys\",\"f_trigger\",\"script_id\",1);" ] [1] [ 300, "14:02:38.529699", 50, 227, "X_9:bat[:oid,:str] = [0] := sql.bind(X_2=0,\"sys\",\"f_trigger\",\"script_id\",1);" ] [1] [ 301, "14:02:38.529951", 56, 0, "X_24:bat[:oid,:str] := sql.bind(X_2=0,\"sys\",\"f_trigger\",\"machine_uuid\",1);" ] [1] [ 302, "14:02:38.530203", 52, 0, "X_14 := algebra.kdifference(X_12= :bat[:oid,:str][171452378],X_10= :bat[:oid,:str][0]);" ] [1] [ 303, "14:02:38.529951", 56, 259, "X_24:bat[:oid,:str] = [0] := sql.bind(X_2=0,\"sys\",\"f_trigger\",\"machine_uuid\",1);" ] [1] [ 304, "14:02:38.530203", 52, 112, "X_14= [171452378] := algebra.kdifference(X_12=nil:bat[:oid,:str],X_10= :bat[:oid,:str][0]);" ] [1] [ 305, "14:02:38.530308", 45, 0, "X_8 := bat.reverse(X_5= :bat[:oid,:oid][0]);" ] [1] [ 306, "14:02:38.530308", 45, 143, "X_8= [0] := bat.reverse(X_5=nil:bat[:oid,:oid]);" ] [1] [ 307, "14:02:38.530465", 57, 0, "X_29:bat[:oid,:str] := sql.bind(X_2=0,\"sys\",\"f_trigger\",\"machine_uuid\",0);" ] [1] [ 308, "14:02:38.530668", 56, 0, "X_25 := algebra.selectNotNil(X_24= :bat[:oid,:str][0]);" ] [1] [ 309, "14:02:38.530465", 57, 210, "X_29:bat[:oid,:str] = [171452378] := sql.bind(X_2=0,\"sys\",\"f_trigger\",\"machine_uuid\",0);" ] [1] [ 310, "14:02:38.530668", 38, 0, "X_26:bat[:oid,:str] := sql.bind(X_2=0,\"sys\",\"f_trigger\",\"machine_uuid\",2);" ] [1] [ 311, "14:02:38.530668", 56, 44, "X_25= [0] := algebra.selectNotNil(X_24=nil:bat[:oid,:str]);" ] [1] [ 312, "14:02:38.530668", 38, 1743, "X_26:bat[:oid,:str] = [0] := sql.bind(X_2=0,\"sys\",\"f_trigger\",\"machine_uuid\",2);" ] [1] [ 313, "14:02:38.530946", 61, 0, "X_15 := algebra.kunion(X_14= [171452378],X_10= :bat[:oid,:str][0]);" ] [1] [ 314, "14:02:38.531115", 57, 0, "X_30 := algebra.selectNotNil(X_29= :bat[:oid,:str][171452378]);" ] [1] [ 315, "14:02:38.531115", 57, 39, "X_30= [171452378] := algebra.selectNotNil(X_29=nil:bat[:oid,:str]);" ] [1] [ 316, "14:02:38.530946", 61, 210, "X_15= [171452378] := algebra.kunion(X_14=nil,X_10=nil:bat[:oid,:str]);" ] [1] [ 317, "14:02:38.531190", 57, 0, "X_31 := algebra.kdifference(X_30= [171452378],X_26= :bat[:oid,:str][0]);" ] [1] [ 318, "14:02:38.531271", 61, 0, "X_16 := algebra.kunion(X_15= [171452378],X_9= :bat[:oid,:str][0]);" ] [1] [ 319, "14:02:38.531190", 57, 99, "X_31= [171452378] := algebra.kdifference(X_30=nil,X_26= :bat[:oid,:str][0]);" ] [1] [ 320, "14:02:38.531271", 61, 42, "X_16= [171452378] := algebra.kunion(X_15=nil,X_9=nil:bat[:oid,:str]);" ] [1] [ 321, "14:02:38.531346", 57, 0, "X_28 := algebra.selectNotNil(X_26= :bat[:oid,:str][0]);" ] [1] [ 322, "14:02:38.531346", 57, 55, "X_28= [0] := algebra.selectNotNil(X_26=nil:bat[:oid,:str]);" ] [1] [ 323, "14:02:38.531408", 61, 0, "X_17 := algebra.kdifference(X_16= [171452378],X_8= [0]);" ] [1] [ 324, "14:02:38.531466", 57, 0, "X_32 := algebra.kunion(X_31= [171452378],X_28= [0]);" ] [1] [ 325, "14:02:38.531408", 61, 59, "X_17= [171452378] := algebra.kdifference(X_16=nil,X_8= [0]);" ] [1] [ 326, "14:02:38.531466", 57, 49, "X_32= [171452378] := algebra.kunion(X_31=nil,X_28=nil);" ] [1] [ 327, "14:02:38.531537", 61, 0, "(ext28,grp26) := group.done(X_17= [171452378]);" ] [1] [ 328, "14:02:38.531596", 57, 0, "X_33 := algebra.kunion(X_32= [171452378],X_25= [0]);" ] [1] [ 329, "14:02:38.531596", 57, 34, "X_33= [171452378] := algebra.kunion(X_32=nil,X_25=nil);" ] [1] [ 330, "14:02:38.531660", 57, 0, "X_34 := algebra.kdifference(X_33= [171452378],X_8= [0]);" ] [1] [ 331, "14:02:38.531660", 57, 28, "X_34= [171452378] := algebra.kdifference(X_33=nil,X_8=nil);" ] [1] [ 332, "14:02:38.531537", 61, 4488161, "(ext28= [208],grp26= [171452378]) := group.done(X_17= [171452378]);" ] [1] [ 333, "14:02:43.019887", 61, 0, "X_20 := bat.mirror(ext28= [208]);" ] [1] [ 334, "14:02:43.019887", 61, 39, "X_20= [208] := bat.mirror(ext28= [208]);" ] [1] [ 335, "14:02:43.019962", 61, 0, "(X_35,grp55) := group.derive(ext28= [208],grp26= [171452378],X_34= [171452378]);" ] [1] [ 336, "14:02:43.020045", 60, 0, "X_21 := algebra.join(X_20= [208],X_17= [171452378]);" ] [1] [ 337, "14:02:43.020045", 60, 505, "X_21= [208] := algebra.join(X_20= [208],X_17=nil);" ] [1] [ 338, "13:50:05.243130", 21, 2365556004, "(X_41= [90737912],grp74) := group.derive(ext30=nil,grp28= [171452378],X_40= [171452378]);" ] [1] [ 339, "14:29:30.799335", 21, 0, "X_43 := bat.mirror(X_41= [90737912]);" ] [1] [ 340, "14:29:30.799335", 21, 44, "X_43= [90737912] := bat.mirror(X_41=nil);" ] [1] [ 341, "14:29:30.799408", 21, 0, "X_44 := algebra.join(X_43= [90737912],X_40= [171452378]);" ] [1] [ 342, "14:29:30.799408", 21, 1931164, "X_44= [90737912] := algebra.join(X_43=nil,X_40=nil);" ] [1] [ 343, "14:29:32.730825", 21, 0, "X_45:bat[:oid,:wrd] := aggr.count(X_44= [90737912],grp28= [171452378],X_20= [208]);" ] [1] [ 344, "14:29:32.730825", 21, 2686923, "X_45:bat[:oid,:wrd] = [208] := aggr.count(X_44=nil,grp28=nil,X_20=nil);" ] [1] [ 345, "13:49:55.404991", 35, 2380012994, "barrier X_602 := language.dataflow();" ] [1] [ 346, "14:29:35.418135", 35, 0, "X_47 := sql.resultSet(2,1,X_25= [10]);" ] [1] [ 347, "14:29:35.418135", 35, 53, "X_47 := sql.resultSet(2,1,X_25= [10]);" ] [1] [ 348, "14:29:35.418208", 35, 0, "X_46 := algebra.leftjoin(X_24= [10],X_45= :bat[:oid,:wrd][208]);" ] [1] [ 349, "14:29:35.418208", 35, 130, "X_46= [10] := algebra.leftjoin(X_24=nil,X_45=nil:bat[:oid,:wrd]);" ] [1] [ 350, "14:29:35.418370", 35, 0, "sql.rsColumn(X_47=1,\"sys.f_trigger\",\"script_id\",\"varchar\",32768,0,X_25= [10]);" ] [1] [ 351, "14:29:35.418370", 35, 36, "sql.rsColumn(X_47=1,\"sys.f_trigger\",\"script_id\",\"varchar\",32768,0,X_25=nil);" ] [1] [ 352, "14:29:35.418429", 35, 0, "sql.rsColumn(X_47=1,\"sys.f_trigger\",\"L6\",\"wrd\",64,0,X_46= [10]);" ] [1] [ 353, "14:29:35.418429", 35, 34, "sql.rsColumn(X_47=1,\"sys.f_trigger\",\"L6\",\"wrd\",64,0,X_46=nil);" ] [1] [ 354, "14:29:35.418483", 35, 0, "X_57 := io.stdout();" ] [1] [ 355, "14:29:35.418483", 35, 18, "X_57 := io.stdout();" ] [1] [ 356, "14:29:35.418515", 35, 0, "sql.exportResult(X_57==\"42083632\":streams,X_47=1);" ] [1] [ 357, "14:29:35.418515", 35, 73, "sql.exportResult(X_57==\"42083632\":streams,X_47=1);" ] [1] [ 358, "14:29:35.418611", 35, 0, "end s3_2;" ] [1] [ 359, "14:02:43.019962", 61, 2633497166, "(X_35= [90737912],grp55) := group.derive(ext28=nil,grp26= [171452378],X_34= [171452378]);" ] [1] [ 360, "14:46:36.517329", 61, 0, "X_37 := bat.mirror(X_35= [90737912]);" ] [1] [ 361, "14:46:36.517329", 61, 43, "X_37= [90737912] := bat.mirror(X_35=nil);" ] [1] [ 362, "14:46:36.517404", 61, 0, "X_38 := algebra.join(X_37= [90737912],X_34= [171452378]);" ] [1] [ 363, "14:46:36.517404", 61, 2374517, "X_38= [90737912] := algebra.join(X_37=nil,X_34=nil);" ] [1] [ 364, "14:46:38.892126", 61, 0, "X_39:bat[:oid,:wrd] := aggr.count(X_38= [90737912],grp26= [171452378],X_20= [208]);" ] [1] [ 365, "14:46:38.892126", 61, 2690276, "X_39:bat[:oid,:wrd] = [208] := aggr.count(X_38=nil,grp26=nil,X_20=nil);" ] [1] [ 366, "14:02:38.528835", 2, 2643053835, "barrier X_596 := language.dataflow();" ] [1] [ 367, "14:46:41.583009", 2, 0, "X_40 := sql.resultSet(2,1,X_21= [208]);" ] [1] [ 368, "14:46:41.583009", 2, 48, "X_40 := sql.resultSet(2,1,X_21= [208]);" ] [1] [ 369, "14:46:41.583076", 2, 0, "sql.rsColumn(X_40=1,\"sys.f_trigger\",\"script_id\",\"varchar\",32768,0,X_21= [208]);" ] [1] [ 370, "14:46:41.583076", 2, 40, "sql.rsColumn(X_40=1,\"sys.f_trigger\",\"script_id\",\"varchar\",32768,0,X_21=nil);" ] [1] [ 371, "14:46:41.583140", 2, 0, "sql.rsColumn(X_40=1,\"sys.f_trigger\",\"L11\",\"wrd\",64,0,X_39= :bat[:oid,:wrd][208]);" ] [1] [ 372, "14:46:41.583140", 2, 35, "sql.rsColumn(X_40=1,\"sys.f_trigger\",\"L11\",\"wrd\",64,0,X_39=nil:bat[:oid,:wrd]);" ] [1] [ 373, "14:46:41.583198", 2, 0, "X_53 := io.stdout();" ] [1] [ 374, "14:46:41.583198", 2, 18, "X_53 := io.stdout();" ] [1] [ 375, "14:46:41.583230", 2, 0, "sql.exportResult(X_53==\"35761888\":streams,X_40=1);" ] [1] [ 376, "14:46:41.583230", 2, 403, "sql.exportResult(X_53==\"35761888\":streams,X_40=1);" ] [1] [ 377, "14:46:41.583661", 2, 0, "end s1_1;" ] The query took 44 minutes to complete.
Is this a bug (as I suspect)? Or is the group by/count distinct operation just slow? If it is a bug, is this fixed in dec-2011? If that is the case, I would like to verify this was fixed for us in dec-2011 - however the last "latest" build I tried was crashing for me. I am not sure which version of dec-2011 I should try for testing this.
Thanks

The new version is much faster than before, the query takes 11 minutes to run
(consistently so). However, I noticed that CPU usage was almost zero and
most of the activity was disk writes still.
Counts:
sql>select count(distinct machine_uuid) from f_trigger;
+---------+
| L12 |
+=========+
| 5293197 |
+---------+
1 tuple (10.7s)
sql>select count(distinct script_id) from f_trigger;
+------+
| L13 |
+======+
| 208 |
+------+
1 tuple (365.408ms)
Query speed on dec2011 (fairly consistent):
sql>select script_id,count(distinct machine_uuid) from f_trigger group by
script_id;
...
208 tuples (11m 10s)
sql>explain select script_id,count(distinct machine_uuid) from f_trigger
group by script_id;
+-------------------------------------------------------------------------------+
| mal
|
+===============================================================================+
| function user.s10_1{autoCommit=true}():void;
|
| X_11 := nil:bat[:oid,:str];
|
| X_20:bat[:oid,:wrd] := nil:bat[:oid,:wrd];
|
| barrier X_256 := language.dataflow();
|
| X_2 := sql.mvc();
|
| X_5:bat[:oid,:str] := sql.bind(X_2,"sys","f_trigger","script_id",0);
|
| (ext18,grp16) := group.done(X_5);
|
| X_10 := bat.mirror(ext18);
|
| X_11 := algebra.join(X_10,X_5);
|
| X_14:bat[:oid,:str] :=
sql.bind(X_2,"sys","f_trigger","machine_uuid",0); |
| X_15 := algebra.selectNotNil(X_14);
|
| (X_16,grp33) := group.derive(ext18,grp16,X_15);
|
| X_18 := bat.mirror(X_16);
|
| X_19 := algebra.join(X_18,X_15);
|
| X_20:bat[:oid,:wrd] := aggr.count(X_19,grp16,X_10);
|
| exit X_256;
|
| X_21 := sql.resultSet(2,1,X_11);
|
| sql.rsColumn(X_21,"sys.f_trigger","script_id","varchar",32768,0,X_11);
|
| sql.rsColumn(X_21,"sys.f_trigger","L17","wrd",64,0,X_20);
|
| X_33 := io.stdout();
|
| sql.exportResult(X_33,X_21);
|
| end s10_1;
|
+-------------------------------------------------------------------------------+
22 tuples (1.352ms)
Stethoscope +rwpof output:
[1] # function, pc, operation, blk reads, blk writes, pg
reclaim, pg faults, swaps, ctxt switch, inv switch, # name
[1] [ "user.s11_1", 1, ":=", 2501160, 54445344, 0,
0, 0, 0, 0, ]
[1] [ "user.s11_1", 2, ":=", 0, 0, 0, 0, 0,
0, 0, ]
[1] [ "user.s11_1", 4, "sql.mvc", 0, 0, 0, 0,
0, 0, 0, ]
[1] [ "user.s11_1", 5, "sql.bind", 0, 0, 0, 0,
0, 0, 0, ]
[1] [ "user.s11_1", 9, "sql.bind", 0, 0, 0, 0,
0, 0, 0, ]
[1] [ "user.s11_1", 10, "algebra.selectNotNil", 0, 0, 0,
0, 0, 0, 0, ]
[1] [ "user.s11_1", 6, "group.done", 0, 2585824, 0,
0, 0, 0, 0, ]
[1] [ "user.s11_1", 7, "bat.mirror", 0, 0, 0, 0,
0, 0, 0, ]
[1] [ "user.s11_1", 8, "algebra.join", 0, 0, 0, 0,
0, 0, 0, ]
[1] [ "user.s11_1", 11, "group.derive", 0, 24246520, 0,
0, 0, 0, 0, ]
[1] [ "user.s11_1", 12, "bat.mirror", 0, 0, 0, 0,
0, 0, 0, ]
[1] [ "user.s11_1", 13, "algebra.join", 0, 0, 0, 0,
0, 0, 0, ]
[1] [ "user.s11_1", 14, "aggr.count", 0, 0, 0, 0,
0, 0, 0, ]
[1] [ "user.s11_1", 3, "language.dataflow", 0, 0, 0,
0, 0, 0, 0, ]
[1] [ "user.s11_1", 16, "sql.resultSet", 0, 0, 0,
0, 0, 0, 0, ]
[1] [ "user.s11_1", 17, "sql.rsColumn", 0, 0, 0, 0,
0, 0, 0, ]
[1] [ "user.s11_1", 18, "sql.rsColumn", 0, 0, 0, 0,
0, 0, 0, ]
[1] [ "user.s11_1", 19, "io.stdout", 0, 0, 0, 0,
0, 0, 0, ]
[1] [ "user.s11_1", 20, "sql.exportResult", 0, 0, 0,
0, 0, 0, 0, ]
Most of the time was spent here:
[1] [ "user.s11_1", 11, "group.derive", 0, 24246520, 0,
0, 0, 0, 0, ]
Using default stethoscope settings this step looks like:
[1] [ 39, "done", "09:43:28.701994", 17, 1266986588,
"(X_16=

I can also reproduce this on 11.7.3 (dec2011) today. My question is, is this normal, and why? from iostat: Device: rrqm/s wrqm/s r/s w/s rkB/s wkB/s avgrq-sz avgqu-sz await r_await w_await svctm %util sda 0.00 70.50 0.00 1452.00 0.00 22466.00 30.94 141.85 95.63 0.00 95.63 0.69 100.00 zack_pcd wrote:
The new version is much faster than before, the query takes 11 minutes to run (consistently so). However, I noticed that CPU usage was almost zero and most of the activity was disk writes still.
Counts:
sql>select count(distinct machine_uuid) from f_trigger; +---------+ | L12 | +=========+ | 5293197 | +---------+ 1 tuple (10.7s)
sql>select count(distinct script_id) from f_trigger; +------+ | L13 | +======+ | 208 | +------+ 1 tuple (365.408ms)
Query speed on dec2011 (fairly consistent):
sql>select script_id,count(distinct machine_uuid) from f_trigger group by script_id; ... 208 tuples (11m 10s)
sql>explain select script_id,count(distinct machine_uuid) from f_trigger group by script_id; +-------------------------------------------------------------------------------+ | mal | +===============================================================================+ | function user.s10_1{autoCommit=true}():void; | | X_11 := nil:bat[:oid,:str]; | | X_20:bat[:oid,:wrd] := nil:bat[:oid,:wrd]; | | barrier X_256 := language.dataflow(); | | X_2 := sql.mvc(); | | X_5:bat[:oid,:str] := sql.bind(X_2,"sys","f_trigger","script_id",0); | | (ext18,grp16) := group.done(X_5); | | X_10 := bat.mirror(ext18); | | X_11 := algebra.join(X_10,X_5); | | X_14:bat[:oid,:str] := sql.bind(X_2,"sys","f_trigger","machine_uuid",0); | | X_15 := algebra.selectNotNil(X_14); | | (X_16,grp33) := group.derive(ext18,grp16,X_15); | | X_18 := bat.mirror(X_16); | | X_19 := algebra.join(X_18,X_15); | | X_20:bat[:oid,:wrd] := aggr.count(X_19,grp16,X_10); | | exit X_256; | | X_21 := sql.resultSet(2,1,X_11); | | sql.rsColumn(X_21,"sys.f_trigger","script_id","varchar",32768,0,X_11); | | sql.rsColumn(X_21,"sys.f_trigger","L17","wrd",64,0,X_20); | | X_33 := io.stdout(); | | sql.exportResult(X_33,X_21); | | end s10_1; | +-------------------------------------------------------------------------------+ 22 tuples (1.352ms)
Stethoscope +rwpof output:
[1] # function, pc, operation, blk reads, blk writes, pg reclaim, pg faults, swaps, ctxt switch, inv switch, # name [1] [ "user.s11_1", 1, ":=", 2501160, 54445344, 0, 0, 0, 0, 0, ] [1] [ "user.s11_1", 2, ":=", 0, 0, 0, 0, 0, 0, 0, ] [1] [ "user.s11_1", 4, "sql.mvc", 0, 0, 0, 0, 0, 0, 0, ] [1] [ "user.s11_1", 5, "sql.bind", 0, 0, 0, 0, 0, 0, 0, ] [1] [ "user.s11_1", 9, "sql.bind", 0, 0, 0, 0, 0, 0, 0, ] [1] [ "user.s11_1", 10, "algebra.selectNotNil", 0, 0, 0, 0, 0, 0, 0, ] [1] [ "user.s11_1", 6, "group.done", 0, 2585824, 0, 0, 0, 0, 0, ] [1] [ "user.s11_1", 7, "bat.mirror", 0, 0, 0, 0, 0, 0, 0, ] [1] [ "user.s11_1", 8, "algebra.join", 0, 0, 0, 0, 0, 0, 0, ] [1] [ "user.s11_1", 11, "group.derive", 0, 24246520, 0, 0, 0, 0, 0, ] [1] [ "user.s11_1", 12, "bat.mirror", 0, 0, 0, 0, 0, 0, 0, ] [1] [ "user.s11_1", 13, "algebra.join", 0, 0, 0, 0, 0, 0, 0, ] [1] [ "user.s11_1", 14, "aggr.count", 0, 0, 0, 0, 0, 0, 0, ] [1] [ "user.s11_1", 3, "language.dataflow", 0, 0, 0, 0, 0, 0, 0, ] [1] [ "user.s11_1", 16, "sql.resultSet", 0, 0, 0, 0, 0, 0, 0, ] [1] [ "user.s11_1", 17, "sql.rsColumn", 0, 0, 0, 0, 0, 0, 0, ] [1] [ "user.s11_1", 18, "sql.rsColumn", 0, 0, 0, 0, 0, 0, 0, ] [1] [ "user.s11_1", 19, "io.stdout", 0, 0, 0, 0, 0, 0, 0, ] [1] [ "user.s11_1", 20, "sql.exportResult", 0, 0, 0, 0, 0, 0, 0, ]
Most of the time was spent here: [1] [ "user.s11_1", 11, "group.derive", 0, 24246520, 0, 0, 0, 0, 0, ]
Using default stethoscope settings this step looks like: [1] [ 39, "done", "09:43:28.701994", 17, 1266986588, "(X_16=
[90715083],grp33) := group.derive(ext18=nil,grp16= [165492393],X_15= [165492393]);", ] Full default stethoscope output: [1] # event, status, time, thread, usec, stmt, # name [1] [ 19, "start", "09:43:24.240496", 2, 0, "X_11 := nil:bat[:oid,:str];", ] [1] [ 20, "done", "09:43:24.240496", 2, 114, "X_11 := nil:bat[:oid,:str];", ] [1] [ 21, "start", "09:43:24.240629", 2, 0, "X_20:bat[:oid,:wrd] := nil:bat[:oid,:wrd];", ] [1] [ 22, "done", "09:43:24.240629", 2, 18, "X_20:bat[:oid,:wrd] := nil:bat[:oid,:wrd];", ] [1] [ 23, "start", "09:43:24.240662", 2, 0, "barrier X_256 := language.dataflow();", ] [1] [ 24, "start", "09:43:24.240742", 17, 0, "X_2 := sql.mvc();", ] [1] [ 25, "done", "09:43:24.240742", 17, 92, "X_2 := sql.mvc();", ] [1] [ 26, "start", "09:43:24.240860", 17, 0, "X_5:bat[:oid,:str] := sql.bind(X_2=0,\"sys\",\"f_trigger\",\"script_id\",0);", ] [1] [ 27, "start", "09:43:24.240917", 8, 0, "X_14:bat[:oid,:str] := sql.bind(X_2=0,\"sys\",\"f_trigger\",\"machine_uuid\",0);", ] [1] [ 28, "done", "09:43:24.240860", 17, 63, "X_5:bat[:oid,:str] =
[165492393] := sql.bind(X_2=0,\"sys\",\"f_trigger\",\"script_id\",0);", ] [1] [ 29, "done", "09:43:24.240917", 8, 136, "X_14:bat[:oid,:str] = [165492393] := sql.bind(X_2=0,\"sys\",\"f_trigger\",\"machine_uuid\",0);", ] [1] [ 30, "start", "09:43:24.241090", 17, 0, "(ext18,grp16) := group.done(X_5= :bat[:oid,:str][165492393]);", ] [1] [ 31, "start", "09:43:24.241158", 8, 0, "X_15 := algebra.selectNotNil(X_14= :bat[:oid,:str][165492393]);", ] [1] [ 32, "done", "09:43:24.241158", 8, 28, "X_15= [165492393] := algebra.selectNotNil(X_14=nil:bat[:oid,:str]);", ] [1] [ 33, "done", "09:43:24.241090", 17, 4460657, "(ext18= [208],grp16= [165492393]) := group.done(X_5= :bat[:oid,:str][165492393]);", ] [1] [ 34, "start", "09:43:28.701926", 17, 0, "X_10 := bat.mirror(ext18= [208]);", ] [1] [ 35, "done", "09:43:28.701926", 17, 41, "X_10= [208] := bat.mirror(ext18= [208]);", ] [1] [ 36, "start", "09:43:28.701994", 17, 0, "(X_16,grp33) := group.derive(ext18= [208],grp16= [165492393],X_15= [165492393]);", ] [1] [ 37, "start", "09:43:28.702084", 34, 0, "X_11 := algebra.join(X_10= [208],X_5= :bat[:oid,:str][165492393]);", ] [1] [ 38, "done", "09:43:28.702084", 34, 525, "X_11= [208] := algebra.join(X_10= [208],X_5=nil:bat[:oid,:str]);", ] [1] [ 39, "done", "09:43:28.701994", 17, 1266986588, "(X_16= [90715083],grp33) := group.derive(ext18=nil,grp16= [165492393],X_15= [165492393]);", ] [1] [ 40, "start", "10:04:35.688738", 17, 0, "X_18 := bat.mirror(X_16= [90715083]);", ] [1] [ 41, "done", "10:04:35.688738", 17, 48, "X_18= [90715083] := bat.mirror(X_16=nil);", ] [1] [ 42, "start", "10:04:35.688813", 17, 0, "X_19 := algebra.join(X_18= [90715083],X_15= [165492393]);", ] [1] [ 43, "done", "10:04:35.688813", 17, 1885923, "X_19= [90715083] := algebra.join(X_18=nil,X_15=nil);", ] [1] [ 44, "start", "10:04:37.574908", 17, 0, "X_20:bat[:oid,:wrd] := aggr.count(X_19= [90715083],grp16= [165492393],X_10= [208]);", ] [1] [ 45, "done", "10:04:37.574908", 17, 2557404, "X_20:bat[:oid,:wrd] = [208] := aggr.count(X_19=nil,grp16=nil,X_10=nil);", ] [1] [ 46, "done", "09:43:24.240662", 2, 1275891844, "barrier X_256 := language.dataflow();", ] [1] [ 47, "start", "10:04:40.132654", 2, 0, "X_21 := sql.resultSet(2,1,X_11= [208]);", ] [1] [ 48, "done", "10:04:40.132654", 2, 46, "X_21 := sql.resultSet(2,1,X_11= [208]);", ] [1] [ 49, "start", "10:04:40.132718", 2, 0, "sql.rsColumn(X_21=4,\"sys.f_trigger\",\"script_id\",\"varchar\",32768,0,X_11= [208]);", ] [1] [ 50, "done", "10:04:40.132718", 2, 41, "sql.rsColumn(X_21=4,\"sys.f_trigger\",\"script_id\",\"varchar\",32768,0,X_11=nil);", ] [1] [ 51, "start", "10:04:40.132781", 2, 0, "sql.rsColumn(X_21=4,\"sys.f_trigger\",\"L21\",\"wrd\",64,0,X_20= :bat[:oid,:wrd][208]);", ] [1] [ 52, "done", "10:04:40.132781", 2, 34, "sql.rsColumn(X_21=4,\"sys.f_trigger\",\"L21\",\"wrd\",64,0,X_20=nil:bat[:oid,:wrd]);", ] [1] [ 53, "start", "10:04:40.132835", 2, 0, "X_33 := io.stdout();", ] [1] [ 54, "done", "10:04:40.132835", 2, 20, "X_33 := io.stdout();", ] [1] [ 55, "start", "10:04:40.132869", 2, 0, "sql.exportResult(X_33==\"39785280\":streams,X_21=4);", ] [1] [ 56, "done", "10:04:40.132869", 2, 400, "sql.exportResult(X_33==\"39785280\":streams,X_21=4);", ] [1] [ 57, "start", "10:04:40.133291", 2, 0, "end s12_1;", ] This is all using these database settings: type default database shared default yes nthreads default 32 optpipe default default_pipe master default no slave default <unknown> readonly local yes nclients default 64
--algorithms: where do you specify this option?
Thank you.
-- View this message in context: http://old.nabble.com/MonetDB-unexpected-count-group-by-slowdown-tp33137428p... Sent from the monetdb-users mailing list archive at Nabble.com.
participants (3)
-
Fabian Groffen
-
Martin Kersten
-
zack_pcd