Re: Increasing GROUP BY CHAR columns speed

From: "Andrus" <kobruleht2(at)hot(dot)ee>
To: "Scott Carey" <scott(at)richrelevance(dot)com>, "Scott Marlowe" <scott(dot)marlowe(at)gmail(dot)com>
Cc: <pgsql-performance(at)postgresql(dot)org>
Subject: Re: Increasing GROUP BY CHAR columns speed
Date: 2008-11-28 20:30:53
Message-ID: A1FAC979BB14437FA9E0290F05D54D0B@andrusnotebook
Views: Raw Message | Whole Thread | Download mbox | Resend email
Thread:
Lists: pgsql-performance

I it seems that slowness is caused by grouping by column

exchrate numeric(13,8)

if this column is excluded, query takes 12 seconds
if this column in present, query takes 27 (!) seconds.
How to fix this ?

Andrus.

set search_path to firma2,public;
SET work_mem = 2097151 ;
explain analyze SELECT
CASE WHEN bilkaib.raha='EEK' THEN 0 ELSE bilkaib.id END,
bilkaib.DB,
bilkaib.CR,
bilkaib.RAHA, -- 12 sek
bilkaib.EXCHRATE, -- 27 sec
SUM(bilkaib.summa)::numeric(14,2) AS summa
from BILKAIB join KONTO CRKONTO ON bilkaib.cr=crkonto.kontonr AND
crkonto.iseloom='A'
join KONTO DBKONTO ON bilkaib.db=dbkonto.kontonr AND
dbkonto.iseloom='A'
left join klient on bilkaib.klient=klient.kood
WHERE --(bilkaib.cr LIKE ''||'%' OR bilkaib.db LIKE ''||'%')
bilkaib.kuupaev BETWEEN '2008-01-01' AND '2008-12-31'
GROUP BY 1,2,3,4,5

"GroupAggregate (cost=71338.72..79761.05 rows=240638 width=58) (actual
time=24570.085..27382.022 rows=217 loops=1)"
" -> Sort (cost=71338.72..71940.31 rows=240638 width=58) (actual
time=24566.700..25744.006 rows=322202 loops=1)"
" Sort Key: CASE WHEN (bilkaib.raha = 'EEK'::bpchar) THEN 0 ELSE
bilkaib.id END, bilkaib.db, bilkaib.cr, bilkaib.raha, bilkaib.exchrate"
" -> Hash Left Join (cost=193.31..49829.89 rows=240638 width=58)
(actual time=17.072..9901.578 rows=322202 loops=1)"
" Hash Cond: ("outer".klient = "inner".kood)"
" -> Hash Join (cost=42.18..45624.00 rows=240638 width=74)
(actual time=4.715..7151.111 rows=322202 loops=1)"
" Hash Cond: ("outer".cr = "inner".kontonr)"
" -> Hash Join (cost=21.09..41803.63 rows=278581
width=74) (actual time=2.306..4598.703 rows=322202 loops=1)"
" Hash Cond: ("outer".db = "inner".kontonr)"
" -> Seq Scan on bilkaib (cost=0.00..37384.19
rows=322507 width=74) (actual time=0.075..1895.027 rows=322202 loops=1)"
" Filter: ((kuupaev >= '2008-01-01'::date)
AND (kuupaev <= '2008-12-31'::date))"
" -> Hash (cost=20.49..20.49 rows=241 width=14)
(actual time=2.193..2.193 rows=241 loops=1)"
" -> Seq Scan on konto dbkonto
(cost=0.00..20.49 rows=241 width=14) (actual time=0.011..1.189 rows=241
loops=1)"
" Filter: (iseloom = 'A'::bpchar)"
" -> Hash (cost=20.49..20.49 rows=241 width=14) (actual
time=2.386..2.386 rows=241 loops=1)"
" -> Seq Scan on konto crkonto (cost=0.00..20.49
rows=241 width=14) (actual time=0.020..1.394 rows=241 loops=1)"
" Filter: (iseloom = 'A'::bpchar)"
" -> Hash (cost=147.90..147.90 rows=1290 width=16) (actual
time=12.319..12.319 rows=1290 loops=1)"
" -> Seq Scan on klient (cost=0.00..147.90 rows=1290
width=16) (actual time=0.032..6.979 rows=1290 loops=1)"
"Total runtime: 27434.724 ms"

set search_path to firma2,public;
SET work_mem = 2097151 ;
explain analyze SELECT
CASE WHEN bilkaib.raha='EEK' THEN 0 ELSE bilkaib.id END,
bilkaib.DB,
bilkaib.CR,
bilkaib.RAHA,
SUM(bilkaib.summa)::numeric(14,2) AS summa
from BILKAIB join KONTO CRKONTO ON bilkaib.cr=crkonto.kontonr AND
crkonto.iseloom='A'
join KONTO DBKONTO ON bilkaib.db=dbkonto.kontonr AND
dbkonto.iseloom='A'
left join klient on bilkaib.klient=klient.kood
WHERE
bilkaib.kuupaev BETWEEN '2008-01-01' AND '2008-12-31'
GROUP BY 1,2,3,4

"HashAggregate (cost=52837.86..57049.03 rows=240638 width=50) (actual
time=11744.137..11745.578 rows=215 loops=1)"
" -> Hash Left Join (cost=193.31..49829.89 rows=240638 width=50) (actual
time=17.330..9826.549 rows=322202 loops=1)"
" Hash Cond: ("outer".klient = "inner".kood)"
" -> Hash Join (cost=42.18..45624.00 rows=240638 width=66) (actual
time=4.804..7141.983 rows=322202 loops=1)"
" Hash Cond: ("outer".cr = "inner".kontonr)"
" -> Hash Join (cost=21.09..41803.63 rows=278581 width=66)
(actual time=2.343..4600.683 rows=322202 loops=1)"
" Hash Cond: ("outer".db = "inner".kontonr)"
" -> Seq Scan on bilkaib (cost=0.00..37384.19
rows=322507 width=66) (actual time=0.081..1939.376 rows=322202 loops=1)"
" Filter: ((kuupaev >= '2008-01-01'::date) AND
(kuupaev <= '2008-12-31'::date))"
" -> Hash (cost=20.49..20.49 rows=241 width=14) (actual
time=2.207..2.207 rows=241 loops=1)"
" -> Seq Scan on konto dbkonto (cost=0.00..20.49
rows=241 width=14) (actual time=0.014..1.179 rows=241 loops=1)"
" Filter: (iseloom = 'A'::bpchar)"
" -> Hash (cost=20.49..20.49 rows=241 width=14) (actual
time=2.426..2.426 rows=241 loops=1)"
" -> Seq Scan on konto crkonto (cost=0.00..20.49
rows=241 width=14) (actual time=0.029..1.444 rows=241 loops=1)"
" Filter: (iseloom = 'A'::bpchar)"
" -> Hash (cost=147.90..147.90 rows=1290 width=16) (actual
time=12.477..12.477 rows=1290 loops=1)"
" -> Seq Scan on klient (cost=0.00..147.90 rows=1290
width=16) (actual time=0.034..7.081 rows=1290 loops=1)"
"Total runtime: 11748.066 ms"

In response to

Browse pgsql-performance by date

  From Date Subject
Next Message Scott Marlowe 2008-11-28 20:34:29 Re: Increasing GROUP BY CHAR columns speed
Previous Message Andrus 2008-11-28 19:57:43 Re: Increasing GROUP BY CHAR columns speed