Re: WIP: BRIN multi-range indexes

From: John Naylor <john(dot)naylor(at)2ndquadrant(dot)com>
To: Tomas Vondra <tomas(dot)vondra(at)2ndquadrant(dot)com>
Cc: Alvaro Herrera <alvherre(at)2ndquadrant(dot)com>, pgsql-hackers <pgsql-hackers(at)postgresql(dot)org>
Subject: Re: WIP: BRIN multi-range indexes
Date: 2020-09-10 20:42:46
Message-ID: CACPNZCv6LTVkF1VKsECLFv+9ohA9dGDrZsUYJXjNYVutGvy2Aw@mail.gmail.com
Views: Raw Message | Whole Thread | Download mbox | Resend email
Thread:
Lists: pgsql-hackers

Ok, here's an attempt at a somewhat more natural test, to see what
happens after bulk updates and deletes, followed by more inserts. The
short version is that multi-minmax is resilient to a change that
causes a 4x degradation for simple minmax.

shared_buffers = 1GB
random_page_cost = 1.1
effective_cache_size = 4GB
work_mem = 64MB
maintenance_work_mem = 512MB

create unlogged table iot (
id bigint generated by default as identity primary key,
num double precision not null,
create_dt timestamptz not null,
stuff text generated always as (md5(id::text)) stored
)
with (fillfactor = 95);

insert into iot (num, create_dt)
select random(), x
from generate_series(
'2020-01-01 0:00'::timestamptz,
'2020-01-01 0:00'::timestamptz +'49000999 seconds'::interval,
'2 seconds'::interval) x;

INSERT 0 24500500

(01:18s, 2279 MB)

-- done in separate tests so the planner can choose each in turn
create index cd_single on iot using brin(create_dt);
6.7s
create index cd_multi on iot using brin(create_dt timestamptz_minmax_multi_ops);
34s

vacuum analyze;

-- aggregate February
-- single minmax and multi-minmax same plan and same Heap Blocks
below, so only one plan shown
-- query times between the opclasses within noise of variation

explain analyze select date_trunc('day', create_dt), avg(num)
from iot
where create_dt >= '2020-02-01 0:00' and create_dt < '2020-03-01 0:00'
group by 1;

QUERY PLAN
--------------------------------------------------------------------------------------------------------------------------------------------------------------------
HashAggregate (cost=357664.79..388181.83 rows=1232234 width=16)
(actual time=559.805..561.649 rows=29 loops=1)
Group Key: date_trunc('day'::text, create_dt)
Planned Partitions: 4 Batches: 1 Memory Usage: 24601kB
-> Bitmap Heap Scan on iot (cost=323.74..313622.05 rows=1232234
width=16) (actual time=1.787..368.256 rows=1252800 loops=1)
Recheck Cond: ((create_dt >= '2020-02-01
00:00:00-04'::timestamp with time zone) AND (create_dt < '2020-03-01
00:00:00-04'::timestamp with time zone))
Rows Removed by Index Recheck: 15936
Heap Blocks: lossy=15104
-> Bitmap Index Scan on cd_single (cost=0.00..15.68
rows=1236315 width=0) (actual time=0.933..0.934 rows=151040 loops=1)
Index Cond: ((create_dt >= '2020-02-01
00:00:00-04'::timestamp with time zone) AND (create_dt < '2020-03-01
00:00:00-04'::timestamp with time zone))
Planning Time: 0.118 ms
Execution Time: 568.653 ms
(11 rows)

-- delete first month and hi/lo values to create some holes in the table
delete from iot
where create_dt < '2020-02-01 0:00'::timestamptz;

DELETE 1339200

delete from iot
where num < 0.05
or num > 0.95;

DELETE 2316036

vacuum analyze iot;

-- add add back first month, but with double density (1s step rather
than 2s) so it spills over into other parts of the table, causing more
block ranges to have a lower bound with this month.

insert into iot (num, create_dt)
select random(), x
from generate_series(
'2020-01-01 0:00'::timestamptz,
'2020-01-31 23:59'::timestamptz,
'1 second'::interval) x;

INSERT 0 2678341

vacuum analyze;

-- aggregate February again

explain analyze select date_trunc('day', create_dt), avg(num)
from iot
where create_dt >= '2020-02-01 0:00' and create_dt < '2020-03-01 0:00'
group by 1;

-- simple minmax:

QUERY PLAN
--------------------------------------------------------------------------------------------------------------------------------------------------------------------
HashAggregate (cost=354453.63..383192.38 rows=1160429 width=16)
(actual time=2375.075..2376.982 rows=29 loops=1)
Group Key: date_trunc('day'::text, create_dt)
Planned Partitions: 4 Batches: 1 Memory Usage: 24601kB
-> Bitmap Heap Scan on iot (cost=305.85..312977.36 rows=1160429
width=16) (actual time=8.162..2201.547 rows=1127668 loops=1)
Recheck Cond: ((create_dt >= '2020-02-01
00:00:00-04'::timestamp with time zone) AND (create_dt < '2020-03-01
00:00:00-04'::timestamp with time zone))
Rows Removed by Index Recheck: 12278985
Heap Blocks: lossy=159616
-> Bitmap Index Scan on cd_single (cost=0.00..15.74
rows=1206496 width=0) (actual time=7.177..7.178 rows=1596160 loops=1)
Index Cond: ((create_dt >= '2020-02-01
00:00:00-04'::timestamp with time zone) AND (create_dt < '2020-03-01
00:00:00-04'::timestamp with time zone))
Planning Time: 0.117 ms
Execution Time: 2383.685 ms
(11 rows)

-- multi minmax:

QUERY PLAN
--------------------------------------------------------------------------------------------------------------------------------------------------------------------
HashAggregate (cost=354089.57..382932.46 rows=1164634 width=16)
(actual time=535.773..537.731 rows=29 loops=1)
Group Key: date_trunc('day'::text, create_dt)
Planned Partitions: 4 Batches: 1 Memory Usage: 24601kB
-> Bitmap Heap Scan on iot (cost=376.07..312463.00 rows=1164634
width=16) (actual time=3.731..363.116 rows=1127117 loops=1)
Recheck Cond: ((create_dt >= '2020-02-01
00:00:00-04'::timestamp with time zone) AND (create_dt < '2020-03-01
00:00:00-04'::timestamp with time zone))
Rows Removed by Index Recheck: 141619
Heap Blocks: lossy=15104
-> Bitmap Index Scan on cd_multi (cost=0.00..84.92
rows=1166823 width=0) (actual time=3.048..3.048 rows=151040 loops=1)
Index Cond: ((create_dt >= '2020-02-01
00:00:00-04'::timestamp with time zone) AND (create_dt < '2020-03-01
00:00:00-04'::timestamp with time zone))
Planning Time: 0.117 ms
Execution Time: 545.246 ms
(11 rows)

--
John Naylor https://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services

In response to

Browse pgsql-hackers by date

  From Date Subject
Next Message Alvaro Herrera 2020-09-10 20:54:24 Re: ALTER TABLE .. DETACH PARTITION CONCURRENTLY
Previous Message Tom Lane 2020-09-10 20:31:54 Re: PG 13 release notes, first draft