BUG #14437: BTREE Index is not used for queries on citext columns

From: pawel(dot)kania(at)moneyhouse(dot)de
To: pgsql-bugs(at)postgresql(dot)org
Subject: BUG #14437: BTREE Index is not used for queries on citext columns
Date: 2016-11-28 07:17:55
Message-ID: 20161128071755.6530.8905@wrigleys.postgresql.org
Views: Raw Message | Whole Thread | Download mbox | Resend email
Thread:
Lists: pgsql-bugs

The following bug has been logged on the website:

Bug reference: 14437
Logged by: Paweł Kania
Email address: pawel(dot)kania(at)moneyhouse(dot)de
PostgreSQL version: 9.6.1
Operating system: Ubuntu 16.04 LTS
Description:

Hi,
I was trying to eliminate "WHERE lower(column_name) = lower(search_string)"
from queries by changing column type from text to citext but in some cases
indexes are not used for new queries. After setting ENABLE_SEQSCAN=off;
indexes are still not used. I prepared SQL queries to visualize problem:

-- create and fill table with dummy data
CREATE TABLE test_citext AS SELECT
id,
md5(random() :: TEXT) :: citext AS
name_citext,
(random() * 10) :: INT AS flag
FROM generate_Series(1, 9500000) id;

-- create indexes - first for citext and second to use with lower()
CREATE INDEX idx_test_citext_name_citext
ON test_citext
USING BTREE
(name_citext text_pattern_ops);

CREATE INDEX idx_test_citext_name_citext_lower
ON test_citext
USING BTREE
(lower(name_citext) text_pattern_ops);

ANALYZE test_citext;

SET ENABLE_SEQSCAN = ON;

-- index "idx_test_citext_name_citext" is not used
-- Aggregate (cost=231324.56..231324.57 rows=1 width=8) (actual
time=9469.793..9469.793 rows=1 loops=1)
-- -> Seq Scan on test_citext (cost=0.00..231284.98 rows=15833 width=0)
(actual time=9469.790..9469.790 rows=0 loops=1)
-- Filter: ((name_citext ~~ 'kantwe%'::citext) AND (COALESCE(flag,
0) < 2))
-- Rows Removed by Filter: 9500000
-- Planning time: 0.058 ms
-- Execution time: 9469.817 ms
EXPLAIN ANALYZE SELECT COUNT(*)
FROM test_citext
WHERE (name_citext LIKE ('kantwe' || '%') :: citext) AND
COALESCE(flag, 0) < 2;

-- index "idx_test_citext_name_citext_lower" is used as expected
-- Aggregate (cost=5.38..5.39 rows=1 width=8) (actual time=0.024..0.024
rows=1 loops=1)
-- -> Index Scan using idx_test_citext_name_citext_lower on test_citext
(cost=0.56..4.59 rows=317 width=0) (actual time=0.013..0.013 rows=0
loops=1)
-- Index Cond: ((lower((name_citext)::text) ~>=~ 'kantwe'::text) AND
(lower((name_citext)::text) ~<~ 'kantwf'::text))
-- Filter: ((COALESCE(flag, 0) < 2) AND (lower((name_citext)::text)
~~ 'kantwe%'::text))
-- Planning time: 0.135 ms
-- Execution time: 0.049 ms
EXPLAIN ANALYZE SELECT COUNT(*)
FROM test_citext
WHERE (lower(name_citext) LIKE ('kantwe' || '%')) AND
COALESCE(flag, 0) < 2;

-- index "idx_test_citext_name_citext" is not used
-- Aggregate (cost=207653.90..207653.91 rows=1 width=8) (actual
time=9154.633..9154.634 rows=1 loops=1)
-- -> Seq Scan on test_citext (cost=0.00..207535.15 rows=47500 width=0)
(actual time=9154.629..9154.629 rows=0 loops=1)
-- Filter: (name_citext ~~ 'kantwe%'::text)
-- Rows Removed by Filter: 9500000
-- Planning time: 0.054 ms
-- Execution time: 9154.655 ms
EXPLAIN ANALYZE SELECT COUNT(*)
FROM test_citext
WHERE (name_citext LIKE ('kantwe' || '%'));

-- index "using idx_test_citext_name_citext_lower" is used as expected
-- Aggregate (cost=6.96..6.97 rows=1 width=8) (actual time=0.021..0.021
rows=1 loops=1)
-- -> Index Scan using idx_test_citext_name_citext_lower on test_citext
(cost=0.56..4.58 rows=950 width=0) (actual time=0.018..0.018 rows=0
loops=1)
-- Index Cond: ((lower((name_citext)::text) ~>=~ 'kantwe'::text) AND
(lower((name_citext)::text) ~<~ 'kantwf'::text))
-- Filter: (lower((name_citext)::text) ~~ 'kantwe%'::text)
-- Planning time: 0.176 ms
-- Execution time: 0.065 ms
EXPLAIN ANALYZE SELECT COUNT(*)
FROM test_citext
WHERE (lower(name_citext) LIKE ('kantwe' || '%'));

-- ENABLE_SEQSCAN = off to show that qyery planner not even consider to use
index in first query
SET ENABLE_SEQSCAN = OFF;

-- index "idx_test_citext_name_citext" still is not used even if
ENABLE_SEQSCAN = off;
-- Aggregate (cost=10000231324.56..10000231324.57 rows=1 width=8) (actual
time=9711.291..9711.291 rows=1 loops=1)
-- -> Seq Scan on test_citext (cost=10000000000.00..10000231284.98
rows=15833 width=0) (actual time=9711.289..9711.289 rows=0 loops=1)
-- Filter: ((name_citext ~~ 'kantwe%'::citext) AND (COALESCE(flag,
0) < 2))
-- Rows Removed by Filter: 9500000
-- Planning time: 0.088 ms
-- Execution time: 9711.325 ms
EXPLAIN ANALYZE SELECT COUNT(*)
FROM test_citext
WHERE (name_citext LIKE ('kantwe' || '%') :: citext) AND
COALESCE(flag, 0) < 2;

-- index "using idx_test_citext_name_citext_lower" is used as expected
-- Aggregate (cost=5.38..5.39 rows=1 width=8) (actual time=0.012..0.012
rows=1 loops=1)
-- -> Index Scan using idx_test_citext_name_citext_lower on test_citext
(cost=0.56..4.59 rows=317 width=0) (actual time=0.011..0.011 rows=0
loops=1)
-- Index Cond: ((lower((name_citext)::text) ~>=~ 'kantwe'::text) AND
(lower((name_citext)::text) ~<~ 'kantwf'::text))
-- Filter: ((COALESCE(flag, 0) < 2) AND (lower((name_citext)::text)
~~ 'kantwe%'::text))
-- Planning time: 0.109 ms
-- Execution time: 0.038 ms
EXPLAIN ANALYZE SELECT COUNT(*)
FROM test_citext
WHERE (lower(name_citext) LIKE ('kantwe' || '%')) AND
COALESCE(flag, 0) < 2;

-- index "idx_test_citext_name_citext" is used but execution in unexpected
way
-- Aggregate (cost=392131.99..392132.01 rows=1 width=8) (actual
time=11495.535..11495.535 rows=1 loops=1)
-- -> Bitmap Heap Scan on test_citext (cost=184478.10..392013.24
rows=47500 width=0) (actual time=11495.532..11495.532 rows=0 loops=1)
-- Filter: (name_citext ~~ 'kantwe%'::text)
-- Rows Removed by Filter: 9500000
-- Heap Blocks: exact=35874 lossy=52912
-- -> Bitmap Index Scan on idx_test_citext_name_citext
(cost=0.00..184466.22 rows=9499932 width=0) (actual time=1628.607..1628.607
rows=9500000 loops=1)
-- Planning time: 0.059 ms
-- Execution time: 11495.561 ms
EXPLAIN ANALYZE SELECT COUNT(*)
FROM test_citext
WHERE (name_citext LIKE ('kantwe' || '%'));

-- index "idx_test_citext_name_citext_lower" is used as expected
-- Aggregate (cost=6.96..6.97 rows=1 width=8) (actual time=0.054..0.054
rows=1 loops=1)
-- -> Index Scan using idx_test_citext_name_citext_lower on test_citext
(cost=0.56..4.58 rows=950 width=0) (actual time=0.052..0.052 rows=0
loops=1)
-- Index Cond: ((lower((name_citext)::text) ~>=~ 'kantwe'::text) AND
(lower((name_citext)::text) ~<~ 'kantwf'::text))
-- Filter: (lower((name_citext)::text) ~~ 'kantwe%'::text)
-- Planning time: 0.105 ms
-- Execution time: 0.079 ms
EXPLAIN ANALYZE SELECT COUNT(*)
FROM test_citext
WHERE (lower(name_citext) LIKE ('kantwe' || '%'));

-- index "idx_test_citext_name_citext" is used
-- Index Only Scan using idx_test_citext_name_citext on test_citext
(cost=0.56..4.58 rows=1 width=33) (actual time=0.020..0.020 rows=0
loops=1)
-- Index Cond: (name_citext = 'kantwe'::text)
-- Heap Fetches: 0
-- Planning time: 0.100 ms
-- Execution time: 0.052 ms
EXPLAIN ANALYZE SELECT name_citext
FROM test_citext
WHERE name_citext = 'kantwe' :: TEXT;

-- index "idx_test_citext_name_citext" is used but in unexpected way
-- Bitmap Heap Scan on test_citext (cost=184466.22..392001.37 rows=1
width=33) (actual time=10822.321..10822.321 rows=0 loops=1)
-- Filter: (name_citext = 'kantwe'::citext)
-- Rows Removed by Filter: 9500000
-- Heap Blocks: exact=35874 lossy=52912
-- -> Bitmap Index Scan on idx_test_citext_name_citext
(cost=0.00..184466.22 rows=9499932 width=0) (actual time=1595.715..1595.715
rows=9500000 loops=1)
-- Planning time: 0.098 ms
-- Execution time: 10822.359 ms
EXPLAIN ANALYZE SELECT name_citext
FROM test_citext
WHERE name_citext = 'kantwe';

Responses

Browse pgsql-bugs by date

  From Date Subject
Next Message Bruce Momjian 2016-11-28 14:56:25 Re: BUG #14406: Statement fails after upgrade to 9.6.1
Previous Message Robert Lebel 2016-11-27 19:54:22 Re: BUG #14406: Statement fails after upgrade to 9.6.1