Skip site navigation (1) Skip section navigation (2)

Increasing GROUP BY CHAR columns speed

From: "Andrus" <kobruleht2(at)hot(dot)ee>
To: pgsql-performance(at)postgresql(dot)org
Subject: Increasing GROUP BY CHAR columns speed
Date: 2008-11-28 15:04:50
Message-ID: ggp1au$1iq2$1@news.hub.org (view raw or flat)
Thread:
Lists: pgsql-performance
Group by using CHAR columns takes abnormally big time.

How to speed it ?

Andrus.

8.1.4, cluster locale is en-us, db encoding is utf-8

set search_path to firma2,public;
explain analyze SELECT
 CASE WHEN bilkaib.raha='EEK' THEN 0 ELSE bilkaib.id END,
 bilkaib.DB,
 CASE WHEN dbkonto.objekt1='+' THEN bilkaib.DBOBJEKT ELSE null END:: 
CHAR(10) AS dbobjekt,
 CASE WHEN dbkonto.objekt2='+' THEN bilkaib.DB2OBJEKT ELSE null END:: 
CHAR(10) AS db2objekt,
 CASE WHEN dbkonto.objekt3='+' THEN bilkaib.DB3OBJEKT ELSE null END:: 
CHAR(10) AS db3objekt,
 CASE WHEN dbkonto.objekt4='+' THEN bilkaib.DB4OBJEKT ELSE null END:: 
CHAR(10) AS db4objekt,
 CASE WHEN dbkonto.objekt5='+' THEN bilkaib.DB5OBJEKT ELSE null END:: 
CHAR(10) AS db5objekt,
 CASE WHEN dbkonto.objekt6='+' THEN bilkaib.DB6OBJEKT ELSE null END:: 
CHAR(10) AS db6objekt,
 CASE WHEN dbkonto.objekt7='+' THEN bilkaib.DB7OBJEKT ELSE null END:: 
CHAR(10) AS db7objekt,
 CASE WHEN dbkonto.objekt8='+' THEN bilkaib.DB8OBJEKT ELSE null END:: 
CHAR(10) AS db8objekt,
 CASE WHEN dbkonto.objekt9='+' THEN bilkaib.DB9OBJEKT ELSE null END:: 
CHAR(10) AS db9objekt,
 bilkaib.CR,
 CASE WHEN crkonto.objekt1='+' THEN bilkaib.crOBJEKT ELSE null END:: 
CHAR(10) AS crobjekt,
 CASE WHEN crkonto.objekt2='+' THEN bilkaib.cr2OBJEKT ELSE null END:: 
CHAR(10) AS cr2objekt,
 CASE WHEN crkonto.objekt3='+' THEN bilkaib.cr3OBJEKT ELSE null END:: 
CHAR(10) AS cr3objekt,
 CASE WHEN crkonto.objekt4='+' THEN bilkaib.cr4OBJEKT ELSE null END:: 
CHAR(10) AS cr4objekt,
 CASE WHEN crkonto.objekt5='+' THEN bilkaib.cr5OBJEKT ELSE null END:: 
CHAR(10) AS cr5objekt,
 CASE WHEN crkonto.objekt6='+' THEN bilkaib.cr6OBJEKT ELSE null END:: 
CHAR(10) AS cr6objekt,
 CASE WHEN crkonto.objekt7='+' THEN bilkaib.cr7OBJEKT ELSE null END:: 
CHAR(10) AS cr7objekt,
 CASE WHEN crkonto.objekt8='+' THEN bilkaib.cr8OBJEKT ELSE null END:: 
CHAR(10) AS cr8objekt,
 CASE WHEN crkonto.objekt9='+' THEN bilkaib.cr9OBJEKT ELSE null END:: 
CHAR(10) AS cr9objekt,
 bilkaib.RAHA,
 CASE WHEN crkonto.klienkaupa OR dbkonto.klienkaupa OR crkonto.tyyp IN 
('K','I') OR dbkonto.tyyp IN ('K','I')
 THEN  bilkaib.KLIENT ELSE NULL END AS klient,

 bilkaib.EXCHRATE,

 CASE WHEN crkonto.klienkaupa OR dbkonto.klienkaupa
   OR crkonto.tyyp IN ('K','I') OR dbkonto.tyyp IN ('K','I')
 THEN
   klient.nimi ELSE NULL END AS kliendinim,  -- 24.

 CAST(CASE WHEN crkonto.arvekaupa OR dbkonto.arvekaupa
   OR (bilkaib.cr<>'00' AND crkonto.tyyp='K')
   OR (bilkaib.db<>'00' AND dbkonto.tyyp='K')
 THEN bilkaib.doknr ELSE NULL END AS CHAR(25)) AS doknr

 ,bilkaib.ratediffer
 ,CASE WHEN bilkaib.raha='EEK' THEN DATE'20070101' ELSE bilkaib.kuupaev END 
AS kuupaev

 ,SUM(bilkaib.summa)::numeric(14,2) AS summa
   from BILKAIB join KONTO CRKONTO ON bilkaib.cr=crkonto.kontonr AND
       crkonto.iseloom='A'
     join KONTO DBKONTO ON bilkaib.db=dbkonto.kontonr AND
       dbkonto.iseloom='A'
     left join klient on bilkaib.klient=klient.kood
   where  ( bilkaib.cr LIKE '112'||'%' OR bilkaib.db LIKE '112'||'%' ) AND 
bilkaib.kuupaev BETWEEN '2007-01-01' AND '2008-11-26'
  GROUP BY 
1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28

"GroupAggregate  (cost=52316.23..61434.48 rows=41923 width=838) (actual 
time=10771.337..11372.135 rows=577 loops=1)"
"  ->  Sort  (cost=52316.23..52421.03 rows=41923 width=838) (actual 
time=10770.529..11012.651 rows=52156 loops=1)"
"        Sort Key: CASE WHEN (bilkaib.raha = 'EEK'::bpchar) THEN 0 ELSE 
bilkaib.id END, bilkaib.db, (CASE WHEN (dbkonto.objekt1 = '+'::bpchar) THEN 
bilkaib.dbobjekt ELSE NULL::bpchar END)::character(10), (CASE WHEN 
(dbkonto.objekt2 = '+'::bpchar) THEN bilkaib.db2objekt ELSE NULL::bpchar 
END)::character(10), (CASE WHEN (dbkonto.objekt3 = '+'::bpchar) THEN 
bilkaib.db3objekt ELSE NULL::bpchar END)::character(10), (CASE WHEN 
(dbkonto.objekt4 = '+'::bpchar) THEN bilkaib.db4objekt ELSE NULL::bpchar 
END)::character(10), (CASE WHEN (dbkonto.objekt5 = '+'::bpchar) THEN 
bilkaib.db5objekt ELSE NULL::bpchar END)::character(10), (CASE WHEN 
(dbkonto.objekt6 = '+'::bpchar) THEN bilkaib.db6objekt ELSE NULL::bpchar 
END)::character(10), (CASE WHEN (dbkonto.objekt7 = '+'::bpchar) THEN 
bilkaib.db7objekt ELSE NULL::bpchar END)::character(10), (CASE WHEN 
(dbkonto.objekt8 = '+'::bpchar) THEN bilkaib.db8objekt ELSE NULL::bpchar 
END)::character(10), (CASE WHEN (dbkonto.objekt9 = '+'::bpchar) THEN 
bilkaib.db9objekt ELSE NULL::bpchar END)::character(10), bilkaib.cr, (CASE 
WHEN (crkonto.objekt1 = '+'::bpchar) THEN bilkaib.crobjekt ELSE NULL::bpchar 
END)::character(10), (CASE WHEN (crkonto.objekt2 = '+'::bpchar) THEN 
bilkaib.cr2objekt ELSE NULL::bpchar END)::character(10), (CASE WHEN 
(crkonto.objekt3 = '+'::bpchar) THEN bilkaib.cr3objekt ELSE NULL::bpchar 
END)::character(10), (CASE WHEN (crkonto.objekt4 = '+'::bpchar) THEN 
bilkaib.cr4objekt ELSE NULL::bpchar END)::character(10), (CASE WHEN 
(crkonto.objekt5 = '+'::bpchar) THEN bilkaib.cr5objekt ELSE NULL::bpchar 
END)::character(10), (CASE WHEN (crkonto.objekt6 = '+'::bpchar) THEN 
bilkaib.cr6objekt ELSE NULL::bpchar END)::character(10), (CASE WHEN 
(crkonto.objekt7 = '+'::bpchar) THEN bilkaib.cr7objekt ELSE NULL::bpchar 
END)::character(10), (CASE WHEN (crkonto.objekt8 = '+'::bpchar) THEN 
bilkaib.cr8objekt ELSE NULL::bpchar END)::character(10), (CASE WHEN 
(crkonto.objekt9 = '+'::bpchar) THEN bilkaib.cr9objekt ELSE NULL::bpchar 
END)::character(10), bilkaib.raha, CASE WHEN ((crkonto.klienkaupa)::boolean 
OR (dbkonto.klienkaupa)::boolean OR (crkonto.tyyp = 'K'::bpchar) OR 
(crkonto.tyyp = 'I'::bpchar) OR (dbkonto.tyyp = 'K'::bpchar) OR 
(dbkonto.tyyp = 'I'::bpchar)) THEN bilkaib.klient ELSE NULL::bpchar END, 
bilkaib.exchrate, CASE WHEN ((crkonto.klienkaupa)::boolean OR 
(dbkonto.klienkaupa)::boolean OR (crkonto.tyyp = 'K'::bpchar) OR 
(crkonto.tyyp = 'I'::bpchar) OR (dbkonto.tyyp = 'K'::bpchar) OR 
(dbkonto.tyyp = 'I'::bpchar)) THEN klient.nimi ELSE NULL::bpchar END, (CASE 
WHEN ((crkonto.arvekaupa)::boolean OR (dbkonto.arvekaupa)::boolean OR 
((bilkaib.cr <> '00'::bpchar) AND (crkonto.tyyp = 'K'::bpchar)) OR 
((bilkaib.db <> '00'::bpchar) AND (dbkonto.tyyp = 'K'::bpchar))) THEN 
bilkaib.doknr ELSE NULL::bpchar END)::character(25), bilkaib.ratediffer, 
CASE WHEN (bilkaib.raha = 'EEK'::bpchar) THEN '2007-01-01'::date ELSE 
bilkaib.kuupaev END"
"        ->  Hash Left Join  (cost=936.48..40184.64 rows=41923 width=838) 
(actual time=46.000..2820.944 rows=52156 loops=1)"
"              Hash Cond: ("outer".klient = "inner".kood)"
"              ->  Hash Join  (cost=785.35..34086.74 rows=41923 width=764) 
(actual time=34.547..1563.790 rows=52156 loops=1)"
"                    Hash Cond: ("outer".cr = "inner".kontonr)"
"                    ->  Hash Join  (cost=764.26..33403.76 rows=48533 
width=712) (actual time=32.069..1082.505 rows=52156 loops=1)"
"                          Hash Cond: ("outer".db = "inner".kontonr)"
"                          ->  Bitmap Heap Scan on bilkaib 
(cost=743.17..32616.41 rows=56185 width=660) (actual time=29.652..518.289 
rows=52156 loops=1)"
"                                Recheck Cond: ((cr ~~ '112%'::text) OR (db 
~~ '112%'::text))"
"                                Filter: (((cr ~~ '112%'::text) OR (db ~~ 
'112%'::text)) AND (kuupaev >= '2007-01-01'::date) AND (kuupaev <= 
'2008-11-26'::date))"
"                                ->  BitmapOr  (cost=743.17..743.17 
rows=65862 width=0) (actual time=26.539..26.539 rows=0 loops=1)"
"                                      ->  Bitmap Index Scan on 
bilkaib_cr_pattern_idx  (cost=0.00..236.63 rows=20939 width=0) (actual 
time=8.510..8.510 rows=21028 loops=1)"
"                                            Index Cond: ((cr ~>=~ 
'112'::bpchar) AND (cr ~<~ '113'::bpchar))"
"                                      ->  Bitmap Index Scan on 
bilkaib_db_pattern_idx  (cost=0.00..506.54 rows=44923 width=0) (actual 
time=18.013..18.013 rows=45426 loops=1)"
"                                            Index Cond: ((db ~>=~ 
'112'::bpchar) AND (db ~<~ '113'::bpchar))"
"                          ->  Hash  (cost=20.49..20.49 rows=241 width=66) 
(actual time=2.375..2.375 rows=241 loops=1)"
"                                ->  Seq Scan on konto dbkonto 
(cost=0.00..20.49 rows=241 width=66) (actual time=0.011..1.207 rows=241 
loops=1)"
"                                      Filter: (iseloom = 'A'::bpchar)"
"                    ->  Hash  (cost=20.49..20.49 rows=241 width=66) (actual 
time=2.451..2.451 rows=241 loops=1)"
"                          ->  Seq Scan on konto crkonto  (cost=0.00..20.49 
rows=241 width=66) (actual time=0.022..1.259 rows=241 loops=1)"
"                                Filter: (iseloom = 'A'::bpchar)"
"              ->  Hash  (cost=147.90..147.90 rows=1290 width=90) (actual 
time=11.371..11.371 rows=1290 loops=1)"
"                    ->  Seq Scan on klient  (cost=0.00..147.90 rows=1290 
width=90) (actual time=0.009..5.587 rows=1290 loops=1)"
"Total runtime: 11380.437 ms"


If group by is removed same query runs 8 times (!) faster:

set search_path to firma2,public;
explain analyze SELECT
 SUM(bilkaib.summa)::numeric(14,2) AS summa
   from BILKAIB join KONTO CRKONTO ON bilkaib.cr=crkonto.kontonr AND
       crkonto.iseloom='A'
     join KONTO DBKONTO ON bilkaib.db=dbkonto.kontonr AND
       dbkonto.iseloom='A'
     left join klient on bilkaib.klient=klient.kood
   where  ( bilkaib.cr LIKE '112'||'%' OR bilkaib.db LIKE '112'||'%' ) AND 
bilkaib.kuupaev BETWEEN '2007-01-01' AND '2008-11-26'

"Aggregate  (cost=34944.27..34944.28 rows=1 width=11) (actual 
time=1781.456..1781.460 rows=1 loops=1)"
"  ->  Hash Left Join  (cost=936.48..34839.46 rows=41923 width=11) (actual 
time=41.194..1545.105 rows=52156 loops=1)"
"        Hash Cond: ("outer".klient = "inner".kood)"
"        ->  Hash Join  (cost=785.35..34086.74 rows=41923 width=27) (actual 
time=30.372..1120.431 rows=52156 loops=1)"
"              Hash Cond: ("outer".cr = "inner".kontonr)"
"              ->  Hash Join  (cost=764.26..33403.76 rows=48533 width=41) 
(actual time=28.168..710.336 rows=52156 loops=1)"
"                    Hash Cond: ("outer".db = "inner".kontonr)"
"                    ->  Bitmap Heap Scan on bilkaib  (cost=743.17..32616.41 
rows=56185 width=55) (actual time=25.970..294.638 rows=52156 loops=1)"
"                          Recheck Cond: ((cr ~~ '112%'::text) OR (db ~~ 
'112%'::text))"
"                          Filter: (((cr ~~ '112%'::text) OR (db ~~ 
'112%'::text)) AND (kuupaev >= '2007-01-01'::date) AND (kuupaev <= 
'2008-11-26'::date))"
"                          ->  BitmapOr  (cost=743.17..743.17 rows=65862 
width=0) (actual time=23.056..23.056 rows=0 loops=1)"
"                                ->  Bitmap Index Scan on 
bilkaib_cr_pattern_idx  (cost=0.00..236.63 rows=20939 width=0) (actual 
time=7.414..7.414 rows=21028 loops=1)"
"                                      Index Cond: ((cr ~>=~ '112'::bpchar) 
AND (cr ~<~ '113'::bpchar))"
"                                ->  Bitmap Index Scan on 
bilkaib_db_pattern_idx  (cost=0.00..506.54 rows=44923 width=0) (actual 
time=15.627..15.627 rows=45426 loops=1)"
"                                      Index Cond: ((db ~>=~ '112'::bpchar) 
AND (db ~<~ '113'::bpchar))"
"                    ->  Hash  (cost=20.49..20.49 rows=241 width=14) (actual 
time=2.164..2.164 rows=241 loops=1)"
"                          ->  Seq Scan on konto dbkonto  (cost=0.00..20.49 
rows=241 width=14) (actual time=0.012..1.205 rows=241 loops=1)"
"                                Filter: (iseloom = 'A'::bpchar)"
"              ->  Hash  (cost=20.49..20.49 rows=241 width=14) (actual 
time=2.177..2.177 rows=241 loops=1)"
"                    ->  Seq Scan on konto crkonto  (cost=0.00..20.49 
rows=241 width=14) (actual time=0.019..1.203 rows=241 loops=1)"
"                          Filter: (iseloom = 'A'::bpchar)"
"        ->  Hash  (cost=147.90..147.90 rows=1290 width=16) (actual 
time=10.782..10.782 rows=1290 loops=1)"
"              ->  Seq Scan on klient  (cost=0.00..147.90 rows=1290 
width=16) (actual time=0.009..5.597 rows=1290 loops=1)"
"Total runtime: 1781.673 ms"


Responses

pgsql-performance by date

Next:From: Vegard B√łnesDate: 2008-11-28 15:32:24
Subject: Re: Deteriorating performance when loading large objects
Previous:From: AndrusDate: 2008-11-28 14:58:15
Subject: Re: Increasing pattern index query speed

Privacy Policy | About PostgreSQL
Copyright © 1996-2014 The PostgreSQL Global Development Group