diff --git a/src/test/regress/expected/collate.icu.utf8.out b/src/test/regress/expected/collate.icu.utf8.out index fbcdb7eb58c..a2dd8a34da4 100644 --- a/src/test/regress/expected/collate.icu.utf8.out +++ b/src/test/regress/expected/collate.icu.utf8.out @@ -2726,6 +2726,95 @@ GROUP BY t1.id; DROP TABLE eager_agg_t1; DROP TABLE eager_agg_t2; +-- +-- Test for eager aggregation with multiple columns having different collations +-- +CREATE TABLE eager_agg_t3 ( + id int, + val1 text COLLATE case_insensitive, + val2 text COLLATE "C" +); +CREATE TABLE eager_agg_t4 ( + val1 text COLLATE case_insensitive, + val2 text COLLATE "C" +); +INSERT INTO eager_agg_t3 SELECT 1, 'a', 'x' FROM generate_series(1, 50); +INSERT INTO eager_agg_t3 SELECT 1, 'A', 'x' FROM generate_series(1, 50); +INSERT INTO eager_agg_t4 VALUES ('A', 'x'); +ANALYZE eager_agg_t3; +ANALYZE eager_agg_t4; +-- Ensure that eager aggregation is not used when grouping by a column with +-- non-deterministic collation, even when other grouping columns have +-- deterministic collations. +EXPLAIN (COSTS OFF) +SELECT t1.id, t1.val1, count(*) + FROM eager_agg_t3 t1 + JOIN eager_agg_t4 t2 ON t1.val1 = t2.val1 COLLATE "C" AND t1.val2 = t2.val2 +GROUP BY t1.id, t1.val1; + QUERY PLAN +------------------------------------------------------------------------------------ + HashAggregate + Group Key: t1.id, t1.val1 + -> Nested Loop + Join Filter: (((t1.val1)::text = (t2.val1)::text) AND (t1.val2 = t2.val2)) + -> Seq Scan on eager_agg_t4 t2 + -> Seq Scan on eager_agg_t3 t1 +(6 rows) + +-- Verify correct results (should return 1 row with count = 50) +SELECT t1.id, t1.val1, count(*) + FROM eager_agg_t3 t1 + JOIN eager_agg_t4 t2 ON t1.val1 = t2.val1 COLLATE "C" AND t1.val2 = t2.val2 +GROUP BY t1.id, t1.val1; + id | val1 | count +----+------+------- + 1 | A | 50 +(1 row) + +DROP TABLE eager_agg_t3; +DROP TABLE eager_agg_t4; +-- +-- Test for eager aggregation with explicit COLLATE on grouping expression +-- +CREATE TABLE eager_agg_t5 (id int, val text COLLATE "C"); +CREATE TABLE eager_agg_t6 (val text COLLATE "C"); +INSERT INTO eager_agg_t5 SELECT 1, 'a' FROM generate_series(1, 50); +INSERT INTO eager_agg_t5 SELECT 1, 'A' FROM generate_series(1, 50); +INSERT INTO eager_agg_t6 VALUES ('A'); +ANALYZE eager_agg_t5; +ANALYZE eager_agg_t6; +-- When grouping by an expression with explicit non-deterministic COLLATE, +-- eager aggregation should not be used even if the column's native collation +-- is deterministic. +EXPLAIN (COSTS OFF) +SELECT t1.id, t1.val COLLATE case_insensitive, count(*) + FROM eager_agg_t5 t1 + JOIN eager_agg_t6 t2 ON t1.val = t2.val +GROUP BY t1.id, t1.val COLLATE case_insensitive; + QUERY PLAN +----------------------------------------------- + HashAggregate + Group Key: t1.id, (t1.val)::text + -> Hash Join + Hash Cond: (t1.val = t2.val) + -> Seq Scan on eager_agg_t5 t1 + -> Hash + -> Seq Scan on eager_agg_t6 t2 +(7 rows) + +-- Verify correct results (should return 1 row with count = 100, since 'a' and +-- 'A' are equal under case_insensitive collation) +SELECT t1.id, t1.val COLLATE case_insensitive, count(*) + FROM eager_agg_t5 t1 + JOIN eager_agg_t6 t2 ON t1.val = t2.val +GROUP BY t1.id, t1.val COLLATE case_insensitive; + id | val | count +----+-----+------- + 1 | A | 50 +(1 row) + +DROP TABLE eager_agg_t5; +DROP TABLE eager_agg_t6; -- virtual generated columns CREATE TABLE t5 ( a int, diff --git a/src/test/regress/expected/eager_aggregate.out b/src/test/regress/expected/eager_aggregate.out index d1b86be3a62..2bf983d12cb 100644 --- a/src/test/regress/expected/eager_aggregate.out +++ b/src/test/regress/expected/eager_aggregate.out @@ -448,6 +448,26 @@ GROUP BY t1.a ORDER BY t1.a; -> Seq Scan on eager_agg_t1 t1 (9 rows) +-- Ensure eager aggregation is not applied when FILTER clause contains +-- volatile function +EXPLAIN (COSTS OFF) +SELECT t1.a, avg(t2.c) FILTER (WHERE random() > 0.5) + FROM eager_agg_t1 t1 + JOIN eager_agg_t2 t2 ON t1.b = t2.b +GROUP BY t1.a ORDER BY t1.a; + QUERY PLAN +----------------------------------------------------- + GroupAggregate + Group Key: t1.a + -> Sort + Sort Key: t1.a + -> Hash Join + Hash Cond: (t2.b = t1.b) + -> Seq Scan on eager_agg_t2 t2 + -> Hash + -> Seq Scan on eager_agg_t1 t1 +(9 rows) + DROP TABLE eager_agg_t1; DROP TABLE eager_agg_t2; DROP TABLE eager_agg_t3; diff --git a/src/test/regress/sql/collate.icu.utf8.sql b/src/test/regress/sql/collate.icu.utf8.sql index 0e6b76b11b8..93c22b37727 100644 --- a/src/test/regress/sql/collate.icu.utf8.sql +++ b/src/test/regress/sql/collate.icu.utf8.sql @@ -1021,6 +1021,76 @@ GROUP BY t1.id; DROP TABLE eager_agg_t1; DROP TABLE eager_agg_t2; +-- +-- Test for eager aggregation with multiple columns having different collations +-- +CREATE TABLE eager_agg_t3 ( + id int, + val1 text COLLATE case_insensitive, + val2 text COLLATE "C" +); +CREATE TABLE eager_agg_t4 ( + val1 text COLLATE case_insensitive, + val2 text COLLATE "C" +); + +INSERT INTO eager_agg_t3 SELECT 1, 'a', 'x' FROM generate_series(1, 50); +INSERT INTO eager_agg_t3 SELECT 1, 'A', 'x' FROM generate_series(1, 50); +INSERT INTO eager_agg_t4 VALUES ('A', 'x'); + +ANALYZE eager_agg_t3; +ANALYZE eager_agg_t4; + +-- Ensure that eager aggregation is not used when grouping by a column with +-- non-deterministic collation, even when other grouping columns have +-- deterministic collations. +EXPLAIN (COSTS OFF) +SELECT t1.id, t1.val1, count(*) + FROM eager_agg_t3 t1 + JOIN eager_agg_t4 t2 ON t1.val1 = t2.val1 COLLATE "C" AND t1.val2 = t2.val2 +GROUP BY t1.id, t1.val1; + +-- Verify correct results (should return 1 row with count = 50) +SELECT t1.id, t1.val1, count(*) + FROM eager_agg_t3 t1 + JOIN eager_agg_t4 t2 ON t1.val1 = t2.val1 COLLATE "C" AND t1.val2 = t2.val2 +GROUP BY t1.id, t1.val1; + +DROP TABLE eager_agg_t3; +DROP TABLE eager_agg_t4; + +-- +-- Test for eager aggregation with explicit COLLATE on grouping expression +-- +CREATE TABLE eager_agg_t5 (id int, val text COLLATE "C"); +CREATE TABLE eager_agg_t6 (val text COLLATE "C"); + +INSERT INTO eager_agg_t5 SELECT 1, 'a' FROM generate_series(1, 50); +INSERT INTO eager_agg_t5 SELECT 1, 'A' FROM generate_series(1, 50); +INSERT INTO eager_agg_t6 VALUES ('A'); + +ANALYZE eager_agg_t5; +ANALYZE eager_agg_t6; + +-- When grouping by an expression with explicit non-deterministic COLLATE, +-- eager aggregation should not be used even if the column's native collation +-- is deterministic. +EXPLAIN (COSTS OFF) +SELECT t1.id, t1.val COLLATE case_insensitive, count(*) + FROM eager_agg_t5 t1 + JOIN eager_agg_t6 t2 ON t1.val = t2.val +GROUP BY t1.id, t1.val COLLATE case_insensitive; + +-- Verify correct results (should return 1 row with count = 100, since 'a' and +-- 'A' are equal under case_insensitive collation) +SELECT t1.id, t1.val COLLATE case_insensitive, count(*) + FROM eager_agg_t5 t1 + JOIN eager_agg_t6 t2 ON t1.val = t2.val +GROUP BY t1.id, t1.val COLLATE case_insensitive; + +DROP TABLE eager_agg_t5; +DROP TABLE eager_agg_t6; + -- virtual generated columns CREATE TABLE t5 ( a int, diff --git a/src/test/regress/sql/eager_aggregate.sql b/src/test/regress/sql/eager_aggregate.sql index 97e10dd7cf4..9c935ef0633 100644 --- a/src/test/regress/sql/eager_aggregate.sql +++ b/src/test/regress/sql/eager_aggregate.sql @@ -171,6 +171,14 @@ SELECT t1.a, avg(t2.c + random()) JOIN eager_agg_t2 t2 ON t1.b = t2.b GROUP BY t1.a ORDER BY t1.a; +-- Ensure eager aggregation is not applied when FILTER clause contains +-- volatile function +EXPLAIN (COSTS OFF) +SELECT t1.a, avg(t2.c) FILTER (WHERE random() > 0.5) + FROM eager_agg_t1 t1 + JOIN eager_agg_t2 t2 ON t1.b = t2.b +GROUP BY t1.a ORDER BY t1.a; + DROP TABLE eager_agg_t1; DROP TABLE eager_agg_t2; DROP TABLE eager_agg_t3;