-- -- Why does the optimizer insist on sorting a clustered table? -- -- NOTE: This script requires 540 MB of disk space and about -- 12 minutes to run (on my good old Sun-Blade-1000, at least). -- SELECT version(); DROP VIEW IF EXISTS v; DROP TABLE IF EXISTS a; DROP TABLE IF EXISTS b; CREATE TABLE a ( id integer PRIMARY KEY, val float8 DEFAULT random() ); CREATE TABLE b ( id integer PRIMARY KEY, opt float8 DEFAULT CASE WHEN random() < .5 THEN random() END ); CREATE VIEW v AS SELECT id, COALESCE(opt, 0) AS opt FROM b; ------------------------------------------------- INSERT INTO a SELECT * FROM generate_series(1, 5000000); INSERT INTO b SELECT * FROM generate_series(1, 3711523); ANALYZE a; ANALYZE b; -- note the correlation for the id columns is 1 SELECT * FROM pg_stats WHERE tablename IN ('a', 'b'); ------------------------------------------------- -- Example 1: left merge join over two index scans EXPLAIN ANALYZE SELECT * FROM a LEFT OUTER JOIN b ON (a.id = b.id); -- Example 2: left merge join over an index scan and seqscan + sort EXPLAIN ANALYZE SELECT * FROM a LEFT OUTER JOIN v ON (a.id = v.id); -- Example 3: tricks the optimizer into a right merge join EXPLAIN ANALYZE SELECT * FROM a LEFT OUTER JOIN ( SELECT * FROM v ORDER BY id ) sub ON (a.id = sub.id);