Hi
I've been trying out monetdb on a 350M row table (modern linux workstation
with 24GB RAM + SSD). This table has a sorted TIMESTAMP column and a
second unsorted VARCHAR(6) column which is non-unique (perhaps 50M distinct
values fairly randomly distributed).
This query is very slow, spends huge amount of time writing, and has very
low CPU (intermediate results?):
select count(*) from t_order where orderId = 'XXXXXX'
This takes hours to complete.
However, this query is a lot faster, even though the timestamp clause is
redundant (all my data is > '2012-01-01'):
select count(*) from t_order where orderId = 'XXXXXX' and time >
'1970-01-01';
The first run is slowish (10s of seconds - I guess because we're reading
the orderId column fully), however subsequent runs are sub-second.
Obviously I could write an app which puts the spurious timestamp clause
into the where, but it's still confusing and not ideal. Is there something
I can do?
The EXPLAINs are:
sql>explain select count(*) from t_order where orderId = 'XXXXXX' and time
> '1970-01-01';
+-------------------------------------------------------------------------------------------------------------------------+
| mal
|
+=========================================================================================================================+
| function user.s9_3{autoCommit=true}(A0:str,A1:str):void;
|
| X_14 := nil:wrd;
|
| barrier X_263 := language.dataflow();
|
| X_4 := sql.mvc();
|
| X_68:bat[:oid,:str] :=
sql.bind(X_4,"sys","t_order","orderid",0,0,16);
|
| X_132 := X_68;
|
| X_101:bat[:oid,:timestamp] :=
sql.bind(X_4,"sys","t_order","time",0,0,16);
|
| X_164 := X_101;
|
| X_35:bat[:oid,:oid] := sql.tid(X_4,"sys","t_order",0,16);
|
| X_10 := calc.timestamp(A1,7);
|
| X_180 := algebra.thetasubselect(X_164,X_35,X_10,">");
|
| X_197 := algebra.subselect(X_132,X_180,A0,A0,true,true,false);
|
| X_228 := algebra.leftfetchjoin(X_197,X_164);
|
| X_246 := aggr.count(X_228);
|
| X_70:bat[:oid,:str] :=
sql.bind(X_4,"sys","t_order","orderid",0,1,16);
|
| X_133 := X_70;
|
| X_103:bat[:oid,:timestamp] :=
sql.bind(X_4,"sys","t_order","time",0,1,16);
|
| X_165 := X_103;
|
| X_37:bat[:oid,:oid] := sql.tid(X_4,"sys","t_order",1,16);
|
| X_181 := algebra.thetasubselect(X_165,X_37,X_10,">");
|
| X_199 := algebra.subselect(X_133,X_181,A0,A0,true,true,false);
|
| X_229 := algebra.leftfetchjoin(X_199,X_165);
|
| X_247 := aggr.count(X_229);
|
| X_72:bat[:oid,:str] :=
sql.bind(X_4,"sys","t_order","orderid",0,2,16);
|
| X_134 := X_72;
|
| X_105:bat[:oid,:timestamp] :=
sql.bind(X_4,"sys","t_order","time",0,2,16);
|
| X_166 := X_105;
|
| X_39:bat[:oid,:oid] := sql.tid(X_4,"sys","t_order",2,16);
|
| X_182 := algebra.thetasubselect(X_166,X_39,X_10,">");
|
| X_201 := algebra.subselect(X_134,X_182,A0,A0,true,true,false);
|
| X_230 := algebra.leftfetchjoin(X_201,X_166);
|
| X_248 := aggr.count(X_230);
|
| X_74:bat[:oid,:str] :=
sql.bind(X_4,"sys","t_order","orderid",0,3,16);
|
| X_135 := X_74;
|
| X_107:bat[:oid,:timestamp] :=
sql.bind(X_4,"sys","t_order","time",0,3,16);
|
| X_167 := X_107;
|
| X_41:bat[:oid,:oid] := sql.tid(X_4,"sys","t_order",3,16);
|
| X_183 := algebra.thetasubselect(X_167,X_41,X_10,">");
|
| X_203 := algebra.subselect(X_135,X_183,A0,A0,true,true,false);
|
| X_231 := algebra.leftfetchjoin(X_203,X_167);
|
| X_249 := aggr.count(X_231);
|
| X_76:bat[:oid,:str] :=
sql.bind(X_4,"sys","t_order","orderid",0,4,16);
|
| X_136 := X_76;
|
| X_109:bat[:oid,:timestamp] :=
sql.bind(X_4,"sys","t_order","time",0,4,16);
|
| X_168 := X_109;
|
| X_43:bat[:oid,:oid] := sql.tid(X_4,"sys","t_order",4,16);
|
| X_184 := algebra.thetasubselect(X_168,X_43,X_10,">");
|
| X_205 := algebra.subselect(X_136,X_184,A0,A0,true,true,false);
|
| X_232 := algebra.leftfetchjoin(X_205,X_168);
|
| X_250 := aggr.count(X_232);
|
| X_78:bat[:oid,:str] :=
sql.bind(X_4,"sys","t_order","orderid",0,5,16);
|
| X_137 := X_78;
|
| X_111:bat[:oid,:timestamp] :=
sql.bind(X_4,"sys","t_order","time",0,5,16);
|
| X_169 := X_111;
|
| X_45:bat[:oid,:oid] := sql.tid(X_4,"sys","t_order",5,16);
|
| X_185 := algebra.thetasubselect(X_169,X_45,X_10,">");
|
| X_207 := algebra.subselect(X_137,X_185,A0,A0,true,true,false);
|
| X_233 := algebra.leftfetchjoin(X_207,X_169);
|
| X_251 := aggr.count(X_233);
|
| X_80:bat[:oid,:str] :=
sql.bind(X_4,"sys","t_order","orderid",0,6,16);
|
| X_138 := X_80;
|
| X_113:bat[:oid,:timestamp] :=
sql.bind(X_4,"sys","t_order","time",0,6,16);
|
| X_170 := X_113;
|
| X_47:bat[:oid,:oid] := sql.tid(X_4,"sys","t_order",6,16);
|
| X_186 := algebra.thetasubselect(X_170,X_47,X_10,">");
|
| X_209 := algebra.subselect(X_138,X_186,A0,A0,true,true,false);
|
| X_234 := algebra.leftfetchjoin(X_209,X_170);
|
| X_252 := aggr.count(X_234);
|
| X_82:bat[:oid,:str] :=
sql.bind(X_4,"sys","t_order","orderid",0,7,16);
|
| X_139 := X_82;
|
| X_115:bat[:oid,:timestamp] :=
sql.bind(X_4,"sys","t_order","time",0,7,16);
|
| X_171 := X_115;
|
| X_49:bat[:oid,:oid] := sql.tid(X_4,"sys","t_order",7,16);
|
| X_187 := algebra.thetasubselect(X_171,X_49,X_10,">");
|
| X_211 := algebra.subselect(X_139,X_187,A0,A0,true,true,false);
|
| X_235 := algebra.leftfetchjoin(X_211,X_171);
|
| X_253 := aggr.count(X_235);
|
| X_84:bat[:oid,:str] :=
sql.bind(X_4,"sys","t_order","orderid",0,8,16);
|
| X_140 := X_84;
|
| X_117:bat[:oid,:timestamp] :=
sql.bind(X_4,"sys","t_order","time",0,8,16);
|
| X_172 := X_117;
|
| X_51:bat[:oid,:oid] := sql.tid(X_4,"sys","t_order",8,16);
|
| X_188 := algebra.thetasubselect(X_172,X_51,X_10,">");
|
| X_213 := algebra.subselect(X_140,X_188,A0,A0,true,true,false);
|
| X_236 := algebra.leftfetchjoin(X_213,X_172);
|
| X_254 := aggr.count(X_236);
|
| X_86:bat[:oid,:str] :=
sql.bind(X_4,"sys","t_order","orderid",0,9,16);
|
| X_141 := X_86;
|
| X_119:bat[:oid,:timestamp] :=
sql.bind(X_4,"sys","t_order","time",0,9,16);
|
| X_173 := X_119;
|
| X_53:bat[:oid,:oid] := sql.tid(X_4,"sys","t_order",9,16);
|
| X_189 := algebra.thetasubselect(X_173,X_53,X_10,">");
|
| X_215 := algebra.subselect(X_141,X_189,A0,A0,true,true,false);
|
| X_237 := algebra.leftfetchjoin(X_215,X_173);
|
| X_255 := aggr.count(X_237);
|
| X_88:bat[:oid,:str] :=
sql.bind(X_4,"sys","t_order","orderid",0,10,16);
|
| X_142 := X_88;
|
| X_121:bat[:oid,:timestamp] :=
sql.bind(X_4,"sys","t_order","time",0,10,16);
|
| X_174 := X_121;
|
| X_55:bat[:oid,:oid] := sql.tid(X_4,"sys","t_order",10,16);
|
| X_190 := algebra.thetasubselect(X_174,X_55,X_10,">");
|
| X_217 := algebra.subselect(X_142,X_190,A0,A0,true,true,false);
|
| X_238 := algebra.leftfetchjoin(X_217,X_174);
|
| X_256 := aggr.count(X_238);
|
| X_90:bat[:oid,:str] :=
sql.bind(X_4,"sys","t_order","orderid",0,11,16);
|
| X_143 := X_90;
|
| X_123:bat[:oid,:timestamp] :=
sql.bind(X_4,"sys","t_order","time",0,11,16);
|
| X_175 := X_123;
|
| X_57:bat[:oid,:oid] := sql.tid(X_4,"sys","t_order",11,16);
|
| X_191 := algebra.thetasubselect(X_175,X_57,X_10,">");
|
| X_219 := algebra.subselect(X_143,X_191,A0,A0,true,true,false);
|
| X_239 := algebra.leftfetchjoin(X_219,X_175);
|
| X_257 := aggr.count(X_239);
|
| X_92:bat[:oid,:str] :=
sql.bind(X_4,"sys","t_order","orderid",0,12,16);
|
| X_144 := X_92;
|
| X_125:bat[:oid,:timestamp] :=
sql.bind(X_4,"sys","t_order","time",0,12,16);
|
| X_176 := X_125;
|
| X_59:bat[:oid,:oid] := sql.tid(X_4,"sys","t_order",12,16);
|
| X_192 := algebra.thetasubselect(X_176,X_59,X_10,">");
|
| X_221 := algebra.subselect(X_144,X_192,A0,A0,true,true,false);
|
| X_240 := algebra.leftfetchjoin(X_221,X_176);
|
| X_258 := aggr.count(X_240);
|
| X_94:bat[:oid,:str] :=
sql.bind(X_4,"sys","t_order","orderid",0,13,16);
|
| X_145 := X_94;
|
| X_127:bat[:oid,:timestamp] :=
sql.bind(X_4,"sys","t_order","time",0,13,16);
|
| X_177 := X_127;
|
| X_61:bat[:oid,:oid] := sql.tid(X_4,"sys","t_order",13,16);
|
| X_193 := algebra.thetasubselect(X_177,X_61,X_10,">");
|
| X_223 := algebra.subselect(X_145,X_193,A0,A0,true,true,false);
|
| X_241 := algebra.leftfetchjoin(X_223,X_177);
|
| X_259 := aggr.count(X_241);
|
| X_96:bat[:oid,:str] :=
sql.bind(X_4,"sys","t_order","orderid",0,14,16);
|
| X_146 := X_96;
|
| X_129:bat[:oid,:timestamp] :=
sql.bind(X_4,"sys","t_order","time",0,14,16);
|
| X_178 := X_129;
|
| X_63:bat[:oid,:oid] := sql.tid(X_4,"sys","t_order",14,16);
|
| X_194 := algebra.thetasubselect(X_178,X_63,X_10,">");
|
| X_225 := algebra.subselect(X_146,X_194,A0,A0,true,true,false);
|
| X_242 := algebra.leftfetchjoin(X_225,X_178);
|
| X_260 := aggr.count(X_242);
|
| X_98:bat[:oid,:str] :=
sql.bind(X_4,"sys","t_order","orderid",0,15,16);
|
| X_147 := X_98;
|
| X_131:bat[:oid,:timestamp] :=
sql.bind(X_4,"sys","t_order","time",0,15,16);
|
| X_179 := X_131;
|
| X_65:bat[:oid,:oid] := sql.tid(X_4,"sys","t_order",15,16);
|
| X_195 := algebra.thetasubselect(X_179,X_65,X_10,">");
|
| X_227 := algebra.subselect(X_147,X_195,A0,A0,true,true,false);
|
| X_243 := algebra.leftfetchjoin(X_227,X_179);
|
| X_261 := aggr.count(X_243);
|
| X_245 :=
mat.pack(X_246,X_247,X_248,X_249,X_250,X_251,X_252,X_253,X_254,X_255,X_256,X_257,X_258,X_259,X_260,X_261);
|
| X_262 := algebra.selectNotNil(X_245);
|
| X_14 := aggr.sum(X_262);
|
| language.pass(X_164);
|
| language.pass(X_165);
|
| language.pass(X_166);
|
| language.pass(X_167);
|
| language.pass(X_168);
|
| language.pass(X_169);
|
| language.pass(X_170);
|
| language.pass(X_171);
|
| language.pass(X_172);
|
| language.pass(X_173);
|
| language.pass(X_174);
|
| language.pass(X_175);
|
| language.pass(X_176);
|
| language.pass(X_177);
|
| language.pass(X_178);
|
| language.pass(X_179);
|
| exit X_263;
|
| sql.exportValue(1,"sys.L1","L1","wrd",64,0,6,X_14,"");
|
| end s9_3;
|
+-------------------------------------------------------------------------------------------------------------------------+
171 tuples (63.183ms)
sql>explain select count(*) from t_order where orderId = 'XXXXXX';
+----------------------------------------------------------------+
| mal |
+================================================================+
| function user.s10_3{autoCommit=true}(A0:str):void; |
| X_3 := sql.mvc(); |
| X_7 := sql.bind(X_3,"sys","t_order","orderid",0); |
| X_10 := X_7; |
| X_4:bat[:oid,:oid] := sql.tid(X_3,"sys","t_order"); |
| X_11 := algebra.subselect(X_10,X_4,A0,A0,true,true,false); |
| X_13 := algebra.leftfetchjoin(X_11,X_10); |
| X_14 := aggr.count(X_13); |
| sql.exportValue(1,"sys.L1","L1","wrd",64,0,6,X_14,""); |
| end s10_3; |
| # optimizer.mitosis() |
| # optimizer.dataflow() |
+----------------------------------------------------------------+
12 tuples (0.838ms)
sql>select * from storage() where "table" = 't_order';
+--------+---------+-------------------+-----------+----------+-----------+-----------+------------+------------+---------+--------+
| schema | table | column | type | location | count |
typewidth | columnsize | heapsize | indices | sorted |
+========+=========+===================+===========+==========+===========+===========+============+============+=========+========+
| sys | t_order | time | timestamp | 11/1146 | 349989136 |
8 | 2799913088 | 0 | 0 | true |
...
| sys | t_order | orderid | varchar | 10/1043 | 349989136 |
6 | 2799913088 | 8240889856 | 0 | false |
...
| sys | t_order | idx_orderid | oid | 04/456 | 349989136 |
8 | 2799913088 | 0 | 0 | true |
+--------+---------+-------------------+-----------+----------+-----------+-----------+------------+------------+---------+--------+
(some fields edited out...)
So the fast one has a much more complicated MAL plan.
(Other than this, and a date parsing prob which I've reported, I'm really
excited about this software!)
-Will Muldrew