From cbe6cf599e52fb2335345ddc6736b0139d113760 Mon Sep 17 00:00:00 2001 From: Tomas Vondra Date: Thu, 5 Mar 2020 01:47:16 +0100 Subject: [PATCH 1/4] Improve test coverage for multi-column MCV lists The regression tests for extended statistics were not testing a couple of important cases for the MCV lists: * IS NOT NULL clauses - We did have queries with IS NULL clauses, but not the negative case. * clauses with variable on the right - All the clauses had the Var on the left, i.e. (Var op Const), so this adds (Const op Var) too. * columns with fixed-length types passed by reference - All columns were using either by-value or varlena types, so add a test with UUID columns too. This matters for (de)serialization. * NULL-only dimension - When one of the columns contains only NULL values, we treat it a a special case during (de)serialization. * arrays containing NULL - When the constant parameter contains NULL value, we need to handle it correctly during estimation, for all IN, ANY and ALL clauses. Discussion: https://www.postgresql.org/message-id/flat/13902317.Eha0YfKkKy%40pierred-pdoc Author: Tomas Vondra --- src/test/regress/expected/stats_ext.out | 237 +++++++++++++++++++++++- src/test/regress/sql/stats_ext.sql | 102 +++++++++- 2 files changed, 335 insertions(+), 4 deletions(-) diff --git a/src/test/regress/expected/stats_ext.out b/src/test/regress/expected/stats_ext.out index 9fa659c71d..2ef19c590a 100644 --- a/src/test/regress/expected/stats_ext.out +++ b/src/test/regress/expected/stats_ext.out @@ -785,18 +785,36 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 AND b = 1 | 50 (1 row) +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE 1 = a AND ''1'' = b'); + estimated | actual +-----------+-------- + 1 | 50 +(1 row) + SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < 1 AND b < ''1'''); estimated | actual -----------+-------- 1 | 50 (1 row) +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE 1 > a AND ''1'' > b'); + estimated | actual +-----------+-------- + 1 | 50 +(1 row) + SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a <= 0 AND b <= ''0'''); estimated | actual -----------+-------- 1 | 50 (1 row) +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE 0 >= a AND ''0'' >= b'); + estimated | actual +-----------+-------- + 1 | 50 +(1 row) + SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 AND b = ''1'' AND c = 1'); estimated | actual -----------+-------- @@ -809,12 +827,24 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < 5 AND b < 1 | 50 (1 row) +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < 5 AND ''1'' > b AND 5 > c'); + estimated | actual +-----------+-------- + 1 | 50 +(1 row) + SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a <= 4 AND b <= ''0'' AND c <= 4'); estimated | actual -----------+-------- 1 | 50 (1 row) +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE 4 >= a AND ''0'' >= b AND 4 >= c'); + estimated | actual +-----------+-------- + 1 | 50 +(1 row) + SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = ''1'' OR c = 1'); estimated | actual -----------+-------- @@ -833,30 +863,60 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IN (1, 2, 51 8 | 200 (1 row) +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IN (1, 2, 51, 52, NULL) AND b IN ( ''1'', ''2'', NULL)'); + estimated | actual +-----------+-------- + 8 | 200 +(1 row) + SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = ANY (ARRAY[1, 2, 51, 52]) AND b = ANY (ARRAY[''1'', ''2''])'); estimated | actual -----------+-------- 8 | 200 (1 row) +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = ANY (ARRAY[NULL, 1, 2, 51, 52]) AND b = ANY (ARRAY[''1'', ''2'', NULL])'); + estimated | actual +-----------+-------- + 8 | 200 +(1 row) + SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a <= ANY (ARRAY[1, 2, 3]) AND b IN (''1'', ''2'', ''3'')'); estimated | actual -----------+-------- 26 | 150 (1 row) +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a <= ANY (ARRAY[1, NULL, 2, 3]) AND b IN (''1'', ''2'', NULL, ''3'')'); + estimated | actual +-----------+-------- + 26 | 150 +(1 row) + SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < ALL (ARRAY[4, 5]) AND c > ANY (ARRAY[1, 2, 3])'); estimated | actual -----------+-------- 10 | 100 (1 row) +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < ALL (ARRAY[4, 5]) AND c > ANY (ARRAY[1, 2, 3, NULL])'); + estimated | actual +-----------+-------- + 10 | 100 +(1 row) + SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < ALL (ARRAY[4, 5]) AND b IN (''1'', ''2'', ''3'') AND c > ANY (ARRAY[1, 2, 3])'); estimated | actual -----------+-------- 1 | 100 (1 row) +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < ALL (ARRAY[4, 5]) AND b IN (''1'', ''2'', NULL, ''3'') AND c > ANY (ARRAY[1, 2, NULL, 3])'); + estimated | actual +-----------+-------- + 1 | 100 +(1 row) + -- create statistics CREATE STATISTICS mcv_lists_stats (mcv) ON a, b, c FROM mcv_lists; ANALYZE mcv_lists; @@ -866,18 +926,36 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 AND b = 50 | 50 (1 row) +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE 1 = a AND ''1'' = b'); + estimated | actual +-----------+-------- + 50 | 50 +(1 row) + SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < 1 AND b < ''1'''); estimated | actual -----------+-------- 50 | 50 (1 row) +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE 1 > a AND ''1'' > b'); + estimated | actual +-----------+-------- + 50 | 50 +(1 row) + SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a <= 0 AND b <= ''0'''); estimated | actual -----------+-------- 50 | 50 (1 row) +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE 0 >= a AND ''0'' >= b'); + estimated | actual +-----------+-------- + 50 | 50 +(1 row) + SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 AND b = ''1'' AND c = 1'); estimated | actual -----------+-------- @@ -890,12 +968,24 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < 5 AND b < 50 | 50 (1 row) +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < 5 AND ''1'' > b AND 5 > c'); + estimated | actual +-----------+-------- + 50 | 50 +(1 row) + SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a <= 4 AND b <= ''0'' AND c <= 4'); estimated | actual -----------+-------- 50 | 50 (1 row) +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE 4 >= a AND ''0'' >= b AND 4 >= c'); + estimated | actual +-----------+-------- + 50 | 50 +(1 row) + SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = ''1'' OR c = 1'); estimated | actual -----------+-------- @@ -908,30 +998,60 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IN (1, 2, 51 200 | 200 (1 row) +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IN (1, 2, 51, 52, NULL) AND b IN ( ''1'', ''2'', NULL)'); + estimated | actual +-----------+-------- + 200 | 200 +(1 row) + SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = ANY (ARRAY[1, 2, 51, 52]) AND b = ANY (ARRAY[''1'', ''2''])'); estimated | actual -----------+-------- 200 | 200 (1 row) +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = ANY (ARRAY[NULL, 1, 2, 51, 52]) AND b = ANY (ARRAY[''1'', ''2'', NULL])'); + estimated | actual +-----------+-------- + 200 | 200 +(1 row) + SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a <= ANY (ARRAY[1, 2, 3]) AND b IN (''1'', ''2'', ''3'')'); estimated | actual -----------+-------- 150 | 150 (1 row) +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a <= ANY (ARRAY[1, NULL, 2, 3]) AND b IN (''1'', ''2'', NULL, ''3'')'); + estimated | actual +-----------+-------- + 150 | 150 +(1 row) + SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < ALL (ARRAY[4, 5]) AND c > ANY (ARRAY[1, 2, 3])'); estimated | actual -----------+-------- 100 | 100 (1 row) +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < ALL (ARRAY[4, 5]) AND c > ANY (ARRAY[1, 2, 3, NULL])'); + estimated | actual +-----------+-------- + 100 | 100 +(1 row) + SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < ALL (ARRAY[4, 5]) AND b IN (''1'', ''2'', ''3'') AND c > ANY (ARRAY[1, 2, 3])'); estimated | actual -----------+-------- 100 | 100 (1 row) +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < ALL (ARRAY[4, 5]) AND b IN (''1'', ''2'', NULL, ''3'') AND c > ANY (ARRAY[1, 2, NULL, 3])'); + estimated | actual +-----------+-------- + 100 | 100 +(1 row) + -- we can't use the statistic for OR clauses that are not fully covered (missing 'd' attribute) SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = ''1'' OR c = 1 OR d IS NOT NULL'); estimated | actual @@ -988,6 +1108,24 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IS NULL AND 1 | 50 (1 row) +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IS NULL AND b IS NOT NULL'); + estimated | actual +-----------+-------- + 49 | 0 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IS NOT NULL AND b IS NULL AND c IS NOT NULL'); + estimated | actual +-----------+-------- + 95 | 0 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IN (0, 1) AND b IN (''0'', ''1'')'); + estimated | actual +-----------+-------- + 1 | 50 +(1 row) + -- create statistics CREATE STATISTICS mcv_lists_stats (mcv) ON a, b, c FROM mcv_lists; ANALYZE mcv_lists; @@ -1003,6 +1141,24 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IS NULL AND 50 | 50 (1 row) +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IS NULL AND b IS NOT NULL'); + estimated | actual +-----------+-------- + 1 | 0 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IS NOT NULL AND b IS NULL AND c IS NOT NULL'); + estimated | actual +-----------+-------- + 1 | 0 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IN (0, 1) AND b IN (''0'', ''1'')'); + estimated | actual +-----------+-------- + 50 | 50 +(1 row) + -- test pg_mcv_list_items with a very simple (single item) MCV list TRUNCATE mcv_lists; INSERT INTO mcv_lists (a, b, c) SELECT 1, 2, 3 FROM generate_series(1,1000) s(i); @@ -1022,7 +1178,7 @@ TRUNCATE mcv_lists; DROP STATISTICS mcv_lists_stats; INSERT INTO mcv_lists (a, b, c, d) SELECT - (CASE WHEN mod(i,2) = 0 THEN NULL ELSE 0 END), + NULL, -- always NULL (CASE WHEN mod(i,2) = 0 THEN NULL ELSE 'x' END), (CASE WHEN mod(i,2) = 0 THEN NULL ELSE 0 END), (CASE WHEN mod(i,2) = 0 THEN NULL ELSE 'x' END) @@ -1034,15 +1190,92 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE b = ''x'' OR d 3750 | 2500 (1 row) +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = ''x'' OR d = ''x'''); + estimated | actual +-----------+-------- + 3750 | 2500 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IS NULL AND (b = ''x'' OR d = ''x'')'); + estimated | actual +-----------+-------- + 3750 | 2500 +(1 row) + -- create statistics -CREATE STATISTICS mcv_lists_stats (mcv) ON b, d FROM mcv_lists; +CREATE STATISTICS mcv_lists_stats (mcv) ON a, b, d FROM mcv_lists; ANALYZE mcv_lists; +-- test pg_mcv_list_items with MCV list containing variable-length data and NULLs +SELECT m.* + FROM pg_statistic_ext s, pg_statistic_ext_data d, + pg_mcv_list_items(d.stxdmcv) m + WHERE s.stxname = 'mcv_lists_stats' + AND d.stxoid = s.oid; + index | values | nulls | frequency | base_frequency +-------+------------------+---------+-----------+---------------- + 0 | {NULL,x,x} | {t,f,f} | 0.5 | 0.25 + 1 | {NULL,NULL,NULL} | {t,t,t} | 0.5 | 0.25 +(2 rows) + SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE b = ''x'' OR d = ''x'''); estimated | actual -----------+-------- 2500 | 2500 (1 row) +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = ''x'' OR d = ''x'''); + estimated | actual +-----------+-------- + 2500 | 2500 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IS NULL AND (b = ''x'' OR d = ''x'')'); + estimated | actual +-----------+-------- + 2500 | 2500 +(1 row) + +-- mcv with pass-by-ref fixlen types, e.g. uuid +CREATE TABLE mcv_lists_uuid ( + a UUID, + b UUID, + c UUID +); +INSERT INTO mcv_lists_uuid (a, b, c) + SELECT + md5(mod(i,100)::text)::uuid, + md5(mod(i,50)::text)::uuid, + md5(mod(i,25)::text)::uuid + FROM generate_series(1,5000) s(i); +ANALYZE mcv_lists_uuid; +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_uuid WHERE a = ''1679091c-5a88-0faf-6fb5-e6087eb1b2dc'' AND b = ''1679091c-5a88-0faf-6fb5-e6087eb1b2dc'''); + estimated | actual +-----------+-------- + 1 | 50 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_uuid WHERE a = ''1679091c-5a88-0faf-6fb5-e6087eb1b2dc'' AND b = ''1679091c-5a88-0faf-6fb5-e6087eb1b2dc'' AND c = ''1679091c-5a88-0faf-6fb5-e6087eb1b2dc'''); + estimated | actual +-----------+-------- + 1 | 50 +(1 row) + +CREATE STATISTICS mcv_lists_uuid_stats (mcv) ON a, b, c + FROM mcv_lists_uuid; +ANALYZE mcv_lists_uuid; +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_uuid WHERE a = ''1679091c-5a88-0faf-6fb5-e6087eb1b2dc'' AND b = ''1679091c-5a88-0faf-6fb5-e6087eb1b2dc'''); + estimated | actual +-----------+-------- + 50 | 50 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_uuid WHERE a = ''1679091c-5a88-0faf-6fb5-e6087eb1b2dc'' AND b = ''1679091c-5a88-0faf-6fb5-e6087eb1b2dc'' AND c = ''1679091c-5a88-0faf-6fb5-e6087eb1b2dc'''); + estimated | actual +-----------+-------- + 50 | 50 +(1 row) + +DROP TABLE mcv_lists_uuid; -- mcv with arrays CREATE TABLE mcv_lists_arrays ( a TEXT[], diff --git a/src/test/regress/sql/stats_ext.sql b/src/test/regress/sql/stats_ext.sql index 0ece39a279..cc353d0466 100644 --- a/src/test/regress/sql/stats_ext.sql +++ b/src/test/regress/sql/stats_ext.sql @@ -447,30 +447,50 @@ ANALYZE mcv_lists; SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 AND b = ''1'''); +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE 1 = a AND ''1'' = b'); + SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < 1 AND b < ''1'''); +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE 1 > a AND ''1'' > b'); + SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a <= 0 AND b <= ''0'''); +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE 0 >= a AND ''0'' >= b'); + SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 AND b = ''1'' AND c = 1'); SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < 5 AND b < ''1'' AND c < 5'); +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < 5 AND ''1'' > b AND 5 > c'); + SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a <= 4 AND b <= ''0'' AND c <= 4'); +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE 4 >= a AND ''0'' >= b AND 4 >= c'); + SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = ''1'' OR c = 1'); SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = ''1'' OR c = 1 OR d IS NOT NULL'); SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IN (1, 2, 51, 52) AND b IN ( ''1'', ''2'')'); +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IN (1, 2, 51, 52, NULL) AND b IN ( ''1'', ''2'', NULL)'); + SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = ANY (ARRAY[1, 2, 51, 52]) AND b = ANY (ARRAY[''1'', ''2''])'); +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = ANY (ARRAY[NULL, 1, 2, 51, 52]) AND b = ANY (ARRAY[''1'', ''2'', NULL])'); + SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a <= ANY (ARRAY[1, 2, 3]) AND b IN (''1'', ''2'', ''3'')'); +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a <= ANY (ARRAY[1, NULL, 2, 3]) AND b IN (''1'', ''2'', NULL, ''3'')'); + SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < ALL (ARRAY[4, 5]) AND c > ANY (ARRAY[1, 2, 3])'); +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < ALL (ARRAY[4, 5]) AND c > ANY (ARRAY[1, 2, 3, NULL])'); + SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < ALL (ARRAY[4, 5]) AND b IN (''1'', ''2'', ''3'') AND c > ANY (ARRAY[1, 2, 3])'); +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < ALL (ARRAY[4, 5]) AND b IN (''1'', ''2'', NULL, ''3'') AND c > ANY (ARRAY[1, 2, NULL, 3])'); + -- create statistics CREATE STATISTICS mcv_lists_stats (mcv) ON a, b, c FROM mcv_lists; @@ -478,28 +498,48 @@ ANALYZE mcv_lists; SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 AND b = ''1'''); +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE 1 = a AND ''1'' = b'); + SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < 1 AND b < ''1'''); +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE 1 > a AND ''1'' > b'); + SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a <= 0 AND b <= ''0'''); +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE 0 >= a AND ''0'' >= b'); + SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 AND b = ''1'' AND c = 1'); SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < 5 AND b < ''1'' AND c < 5'); +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < 5 AND ''1'' > b AND 5 > c'); + SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a <= 4 AND b <= ''0'' AND c <= 4'); +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE 4 >= a AND ''0'' >= b AND 4 >= c'); + SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = ''1'' OR c = 1'); SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IN (1, 2, 51, 52) AND b IN ( ''1'', ''2'')'); +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IN (1, 2, 51, 52, NULL) AND b IN ( ''1'', ''2'', NULL)'); + SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = ANY (ARRAY[1, 2, 51, 52]) AND b = ANY (ARRAY[''1'', ''2''])'); +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = ANY (ARRAY[NULL, 1, 2, 51, 52]) AND b = ANY (ARRAY[''1'', ''2'', NULL])'); + SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a <= ANY (ARRAY[1, 2, 3]) AND b IN (''1'', ''2'', ''3'')'); +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a <= ANY (ARRAY[1, NULL, 2, 3]) AND b IN (''1'', ''2'', NULL, ''3'')'); + SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < ALL (ARRAY[4, 5]) AND c > ANY (ARRAY[1, 2, 3])'); +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < ALL (ARRAY[4, 5]) AND c > ANY (ARRAY[1, 2, 3, NULL])'); + SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < ALL (ARRAY[4, 5]) AND b IN (''1'', ''2'', ''3'') AND c > ANY (ARRAY[1, 2, 3])'); +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < ALL (ARRAY[4, 5]) AND b IN (''1'', ''2'', NULL, ''3'') AND c > ANY (ARRAY[1, 2, NULL, 3])'); + -- we can't use the statistic for OR clauses that are not fully covered (missing 'd' attribute) SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = ''1'' OR c = 1 OR d IS NOT NULL'); @@ -538,6 +578,12 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IS NULL AND SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IS NULL AND b IS NULL AND c IS NULL'); +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IS NULL AND b IS NOT NULL'); + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IS NOT NULL AND b IS NULL AND c IS NOT NULL'); + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IN (0, 1) AND b IN (''0'', ''1'')'); + -- create statistics CREATE STATISTICS mcv_lists_stats (mcv) ON a, b, c FROM mcv_lists; @@ -547,6 +593,12 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IS NULL AND SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IS NULL AND b IS NULL AND c IS NULL'); +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IS NULL AND b IS NOT NULL'); + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IS NOT NULL AND b IS NULL AND c IS NOT NULL'); + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IN (0, 1) AND b IN (''0'', ''1'')'); + -- test pg_mcv_list_items with a very simple (single item) MCV list TRUNCATE mcv_lists; INSERT INTO mcv_lists (a, b, c) SELECT 1, 2, 3 FROM generate_series(1,1000) s(i); @@ -564,7 +616,7 @@ DROP STATISTICS mcv_lists_stats; INSERT INTO mcv_lists (a, b, c, d) SELECT - (CASE WHEN mod(i,2) = 0 THEN NULL ELSE 0 END), + NULL, -- always NULL (CASE WHEN mod(i,2) = 0 THEN NULL ELSE 'x' END), (CASE WHEN mod(i,2) = 0 THEN NULL ELSE 0 END), (CASE WHEN mod(i,2) = 0 THEN NULL ELSE 'x' END) @@ -574,13 +626,59 @@ ANALYZE mcv_lists; SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE b = ''x'' OR d = ''x'''); +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = ''x'' OR d = ''x'''); + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IS NULL AND (b = ''x'' OR d = ''x'')'); + -- create statistics -CREATE STATISTICS mcv_lists_stats (mcv) ON b, d FROM mcv_lists; +CREATE STATISTICS mcv_lists_stats (mcv) ON a, b, d FROM mcv_lists; ANALYZE mcv_lists; +-- test pg_mcv_list_items with MCV list containing variable-length data and NULLs +SELECT m.* + FROM pg_statistic_ext s, pg_statistic_ext_data d, + pg_mcv_list_items(d.stxdmcv) m + WHERE s.stxname = 'mcv_lists_stats' + AND d.stxoid = s.oid; + SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE b = ''x'' OR d = ''x'''); +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = ''x'' OR d = ''x'''); + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IS NULL AND (b = ''x'' OR d = ''x'')'); + +-- mcv with pass-by-ref fixlen types, e.g. uuid +CREATE TABLE mcv_lists_uuid ( + a UUID, + b UUID, + c UUID +); + +INSERT INTO mcv_lists_uuid (a, b, c) + SELECT + md5(mod(i,100)::text)::uuid, + md5(mod(i,50)::text)::uuid, + md5(mod(i,25)::text)::uuid + FROM generate_series(1,5000) s(i); + +ANALYZE mcv_lists_uuid; + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_uuid WHERE a = ''1679091c-5a88-0faf-6fb5-e6087eb1b2dc'' AND b = ''1679091c-5a88-0faf-6fb5-e6087eb1b2dc'''); + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_uuid WHERE a = ''1679091c-5a88-0faf-6fb5-e6087eb1b2dc'' AND b = ''1679091c-5a88-0faf-6fb5-e6087eb1b2dc'' AND c = ''1679091c-5a88-0faf-6fb5-e6087eb1b2dc'''); + +CREATE STATISTICS mcv_lists_uuid_stats (mcv) ON a, b, c + FROM mcv_lists_uuid; + +ANALYZE mcv_lists_uuid; + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_uuid WHERE a = ''1679091c-5a88-0faf-6fb5-e6087eb1b2dc'' AND b = ''1679091c-5a88-0faf-6fb5-e6087eb1b2dc'''); + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_uuid WHERE a = ''1679091c-5a88-0faf-6fb5-e6087eb1b2dc'' AND b = ''1679091c-5a88-0faf-6fb5-e6087eb1b2dc'' AND c = ''1679091c-5a88-0faf-6fb5-e6087eb1b2dc'''); + +DROP TABLE mcv_lists_uuid; + -- mcv with arrays CREATE TABLE mcv_lists_arrays ( a TEXT[], -- 2.21.1