Hello, I am currently evaluating the performance of MonetDB on the Join Order Benchmark, proposed by Leis et al.(http://www.vldb.org/pvldb/vol9/p204-leis.pdf http://www.vldb.org/pvldb/vol9/p204-leis.pdf). There are a few queries where performance seems suboptmal, e.g., query 16b and 17e (https://github.com/gregrahn/join-order-benchmark https://github.com/gregrahn/join-order-benchmark): MonetDB takes around 400 seconds to evaluate query 17e with a single thread, more than 20 seconds with 48 threads, Postgres takes only around 9 seconds. I suspect that the query optimizer selects the wrong plan, this is the output of the plan command. | project ( | | | group by ( | | | | join ( | | | | | join ( | | | | | | join ( | | | | | | | join ( | | | | | | | | join ( | | | | | | | | | join ( | | | | | | | | | | table(sys.movie_companies) [ "movie_companies"."movie_id" NOT NULL as "mc"."movie_id", "movie_companies"."company_id" NOT NULL as "mc"."company_id" ] COUNT , | | | | | | | | | | select ( | | | | | | | | | | | table(sys.company_name) [ "company_name"."id" NOT NULL HASHCOL as "cn"."id", "company_name"."country_code" as "cn"."country_code" ] COUNT | | | | | | | | | | ) [ "cn"."country_code" = varchar(255) "[us]" ] | | | | | | | | | ) [ "mc"."company_id" NOT NULL = "cn"."id" NOT NULL HASHCOL ], | | | | | | | | | table(sys.title) [ "title"."id" NOT NULL HASHCOL as "t"."id" ] COUNT | | | | | | | | ) [ "t"."id" NOT NULL HASHCOL = "mc"."movie_id" NOT NULL ], | | | | | | | | table(sys.cast_info) [ "cast_info"."person_id" NOT NULL as "ci"."person_id", "cast_info"."movie_id" NOT NULL as "ci"."movie_id" ] COUNT | | | | | | | ) [ "ci"."movie_id" NOT NULL = "t"."id" NOT NULL HASHCOL , "ci"."movie_id" NOT NULL = "mc"."movie_id" NOT NULL ], | | | | | | | table(sys.name) [ "name"."id" NOT NULL HASHCOL as "n"."id", "name"."name" NOT NULL as "n"."name" ] COUNT | | | | | | ) [ "n"."id" NOT NULL HASHCOL = "ci"."person_id" NOT NULL ], | | | | | | table(sys.movie_keyword) [ "movie_keyword"."movie_id" NOT NULL as "mk"."movie_id", "movie_keyword"."keyword_id" NOT NULL as "mk"."keyword_id" ] COUNT | | | | | ) [ "t"."id" NOT NULL HASHCOL = "mk"."movie_id" NOT NULL, "ci"."movie_id" NOT NULL = "mk"."movie_id" NOT NULL, "mc"."movie_id" NOT NULL = "mk"."movie_id" NOT NULL ], | | | | | select ( | | | | | | table(sys.keyword) [ "keyword"."id" NOT NULL HASHCOL as "k"."id", "keyword"."keyword" NOT NULL as "k"."keyword" ] COUNT | | | | | ) [ "k"."keyword" NOT NULL = clob "character-name-in-title" ] | | | | ) [ "mk"."keyword_id" NOT NULL = "k"."id" NOT NULL HASHCOL ] | | | ) [ ] [ sys.min no nil ("n"."name" NOT NULL) NOT NULL as "L3"."L3" ] | | ) [ "L3"."L3" NOT NULL as "L4"."member_in_charnamed_movie" ] | +-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ I was wondering whether anyone might have a hint about what the problem could be and how one could improve performance for those specific queries? Thanks a lot!
participants (1)
-
Junxiong Wang