BUG #15430: partition-wise join only works in combination with pruning on 1 partition

From: PG Bug reporting form <noreply(at)postgresql(dot)org>
To: pgsql-bugs(at)lists(dot)postgresql(dot)org
Cc: agile(dot)data(dot)analytics(at)gmail(dot)com
Subject: BUG #15430: partition-wise join only works in combination with pruning on 1 partition
Date: 2018-10-14 15:20:14
Message-ID: 15430-ee54c4208e54e9b1@postgresql.org
Views: Raw Message | Whole Thread | Download mbox | Resend email
Thread:
Lists: pgsql-bugs

The following bug has been logged on the website:

Bug reference: 15430
Logged by: Bart Debersaques
Email address: agile(dot)data(dot)analytics(at)gmail(dot)com
PostgreSQL version: 11rc1
Operating system: Centos 7
Description:

psql <<-EOF
create schema part_test
;
CREATE TABLE part_test.measurement (
city_id int not null,
logdate date not null,
peaktemp int,
unitsales
int
) PARTITION BY RANGE (logdate)
;
CREATE TABLE part_test.measurement_y2006m02 PARTITION OF
part_test.measurement
FOR VALUES FROM ('2006-02-01') TO ('2006-03-01')
;
CREATE TABLE part_test.measurement_y2006m03 PARTITION OF
part_test.measurement
FOR VALUES FROM ('2006-03-01') TO ('2006-04-01')
;
EOF

psql <<-EOF
CREATE TABLE part_test.measurement2 (
city_id int not null,
logdate date not null,
peaktemp int,
unitsales
int
) PARTITION BY RANGE (logdate)
;
CREATE TABLE part_test.measurement2_y2006m02 PARTITION OF
part_test.measurement2
FOR VALUES FROM ('2006-02-01') TO ('2006-03-01')
;
CREATE TABLE part_test.measurement2_y2006m03 PARTITION OF
part_test.measurement2
FOR VALUES FROM ('2006-03-01') TO ('2006-04-01')
;
EOF

psql <<-EOF
explain
select *
from
part_test.measurement m1
inner join part_test.measurement2 m2
on m1.city_id = m2.city_id and m1.logdate = m2.logdate
where m1.logdate = '2006-02-01'::date
EOF
# Hash Join (cost=33.28..66.58 rows=9 width=32)
# Hash Cond: (m1.city_id = m2.city_id)
# -> Append (cost=0.00..33.17 rows=9 width=16)
# -> Seq Scan on measurement_y2006m02 m1 (cost=0.00..33.12 rows=9
width=16)
# Filter: (logdate = '2006-02-01'::date)
# -> Hash (cost=33.17..33.17 rows=9 width=16)
# -> Append (cost=0.00..33.17 rows=9 width=16)
# -> Seq Scan on measurement2_y2006m02 m2 (cost=0.00..33.12
rows=9 width=16)
# Filter: (logdate = '2006-02-01'::date)

# conclusion: pruning applied to both tables = OK

psql <<-EOF
explain
select *
from
part_test.measurement m1
inner join part_test.measurement2 m2
on m1.city_id = m2.city_id and m1.logdate = m2.logdate
where m1.logdate in ('2006-02-01'::date, '2006-03-01'::date)
EOF
# Hash Join (cost=66.97..170.25 rows=3 width=32)
# Hash Cond: ((m2.city_id = m1.city_id) AND (m2.logdate = m1.logdate))
# -> Append (cost=0.00..75.50 rows=3700 width=16)
# -> Seq Scan on measurement2_y2006m02 m2 (cost=0.00..28.50
rows=1850 width=16)
# -> Seq Scan on measurement2_y2006m03 m2_1 (cost=0.00..28.50
rows=1850 width=16)
# -> Hash (cost=66.43..66.43 rows=36 width=16)
# -> Append (cost=0.00..66.43 rows=36 width=16)
# -> Seq Scan on measurement_y2006m02 m1 (cost=0.00..33.12
rows=18 width=16)
# Filter: (logdate = ANY
('{2006-02-01,2006-03-01}'::date[]))
# -> Seq Scan on measurement_y2006m03 m1_1 (cost=0.00..33.12
rows=18 width=16)
# Filter: (logdate = ANY
('{2006-02-01,2006-03-01}'::date[]))

# conclusion: join performed on full tab scans, join not performed on a
per-partion basis

psql <<-EOF
explain
select *
from
part_test.measurement m1
inner join part_test.measurement2 m2
on m1.city_id = m2.city_id and m1.logdate = m2.logdate
EOF
# Merge Join (cost=589.57..648.49 rows=342 width=32)
# Merge Cond: ((m1.city_id = m2.city_id) AND (m1.logdate = m2.logdate))
# -> Sort (cost=294.79..304.04 rows=3700 width=16)
# Sort Key: m1.city_id, m1.logdate
# -> Append (cost=0.00..75.50 rows=3700 width=16)
# -> Seq Scan on measurement_y2006m02 m1 (cost=0.00..28.50
rows=1850 width=16)
# -> Seq Scan on measurement_y2006m03 m1_1 (cost=0.00..28.50
rows=1850 width=16)
# -> Sort (cost=294.79..304.04 rows=3700 width=16)
# Sort Key: m2.city_id, m2.logdate
# -> Append (cost=0.00..75.50 rows=3700 width=16)
# -> Seq Scan on measurement2_y2006m02 m2 (cost=0.00..28.50
rows=1850 width=16)
# -> Seq Scan on measurement2_y2006m03 m2_1
(cost=0.00..28.50 rows=1850 width=16)

# conclusion: join performed on full tab scans, join not performed on a
per-partion basis

# desired behaviour:

# in serial execution and for an equi join: partitions in m1 should be
iterated and joined with the equivaluent in m2
# parallel execution: the above should be done with several partitions in
parallel

# please assign to Ashutosh Bapat as described in
https://www.enterprisedb.com/blog/partition-wise-joins-%E2%80%9Cdivide-and-conquer-joins-between-partitioned-table

reading

https://www.enterprisedb.com/blog/partition-wise-joins-%E2%80%9Cdivide-and-conquer-joins-between-partitioned-table

Responses

Browse pgsql-bugs by date

  From Date Subject
Next Message Amit Langote 2018-10-15 04:18:39 Re: BUG #15430: partition-wise join only works in combination with pruning on 1 partition
Previous Message Ryan Murphy 2018-10-14 05:03:51 Re: BUG #15429: psql requires '-h localhost' to run basic commands