BUG #16583: merge join on tables with different DB collation behind postgres_fdw fails

From: PG Bug reporting form <noreply(at)postgresql(dot)org>
To: pgsql-bugs(at)lists(dot)postgresql(dot)org
Cc: jurafejfar(at)gmail(dot)com
Subject: BUG #16583: merge join on tables with different DB collation behind postgres_fdw fails
Date: 2020-08-17 12:02:41
Message-ID: 16583-4dc8f6c3b8ed24fa@postgresql.org
Views: Raw Message | Whole Thread | Download mbox | Resend email
Thread:
Lists: pgsql-bugs pgsql-hackers

The following bug has been logged on the website:

Bug reference: 16583
Logged by: Jiří Fejfar
Email address: jurafejfar(at)gmail(dot)com
PostgreSQL version: 12.4
Operating system: debian 10.5
Description:

Joining two identical tables placed on separate DBs with different collation
accessed through postgres_fdw failed when joined with merge join. Some
records are missing (7 vs. 16 rows in example) in output. See this snippet
https://gitlab.com/-/snippets/2004522 (or code pasted below) for psql script
reproducing error also with expected output (working fine on alpine linux).
The same behavior is also observed on postgres v13.

Regards, Jiří Fejfar.

--------------------------------system---------------------
debian
cat /etc/debian_version
10.5

ldd --version
ldd (Debian GLIBC 2.28-10) 2.28
Copyright © 2018 Free Software Foundation, Inc.

--------
alpine
cat /etc/alpine-release
3.12.0

ldd --version
musl libc (x86_64)
Version 1.1.24
Dynamic Program Loader
Usage: /lib/ld-musl-x86_64.so.1 [options] [--] pathname

------------------------psql script--------------------
DROP DATABASE IF EXISTS db_en; DROP DATABASE IF EXISTS db_cz; DROP DATABASE
IF EXISTS db_join;
DROP USER IF EXISTS fdw_user_en; DROP USER IF EXISTS fdw_user_cz;

CREATE DATABASE db_en encoding UTF8 LC_COLLATE 'en_US.UTF-8' LC_CTYPE
'en_US.UTF-8' TEMPLATE template0;
CREATE DATABASE db_cz encoding UTF8 LC_COLLATE 'cs_CZ.UTF-8' LC_CTYPE
'cs_CZ.UTF-8' TEMPLATE template0;
CREATE DATABASE db_join encoding UTF8 LC_COLLATE 'en_US.UTF-8' LC_CTYPE
'en_US.UTF-8' TEMPLATE template0;

\c db_en

CREATE TABLE t_nuts (
id INT PRIMARY KEY,
label text
);

WITH w_labels AS (
VALUES ('CZ0100'), ('CZ0201'), ('CZ0202'), ('CZ0203'), ('CZ0204'),
('CZ0205'),
('CZ0206'), ('CZ0207'), ('CZ0208'), ('CZ0209'), ('CZ020A'), ('CZ020B'),
('CZ020C'),
('CZ0311'), ('CZ0312'), ('CZ0313')
)
INSERT INTO t_nuts (id, label)
SELECT
row_number() OVER() AS id,
w_labels.column1 as label FROM w_labels--, generate_series(1, 500)
;

VACUUM (FULL, ANALYZE) t_nuts;

SELECT label, count(*) from t_nuts GROUP BY label ORDER BY label;

\c db_cz

CREATE TABLE t_nuts (
id INT PRIMARY KEY,
label text
);

WITH w_labels AS (
VALUES ('CZ0100'), ('CZ0201'), ('CZ0202'), ('CZ0203'), ('CZ0204'),
('CZ0205'),
('CZ0206'), ('CZ0207'), ('CZ0208'), ('CZ0209'), ('CZ020A'), ('CZ020B'),
('CZ020C'),
('CZ0311'), ('CZ0312'), ('CZ0313')
)
INSERT INTO t_nuts (id, label)
SELECT
row_number() OVER() AS id,
w_labels.column1 as label FROM w_labels--, generate_series(1, 1000)
;

VACUUM (FULL, ANALYZE) t_nuts;

SELECT label, count(*) from t_nuts GROUP BY label ORDER BY label;

\c db_en
CREATE USER fdw_user_en WITH PASSWORD 'fdw_pass_en';
GRANT SELECT ON TABLE t_nuts TO fdw_user_en;

\c db_join

CREATE EXTENSION postgres_fdw ;
CREATE SERVER db_en_serv FOREIGN DATA WRAPPER postgres_fdw OPTIONS ( host
'localhost', port '5432', dbname 'db_en', use_remote_estimate 'True');
CREATE USER MAPPING FOR CURRENT_USER SERVER db_en_serv OPTIONS ( user
'fdw_user_en', password 'fdw_pass_en');
CREATE SCHEMA en;
IMPORT FOREIGN SCHEMA public LIMIT TO (t_nuts) FROM SERVER db_en_serv INTO
en;

SELECT label, count(*) FROM en.t_nuts GROUP BY label ORDER BY label;

\c db_cz
CREATE USER fdw_user_cz WITH PASSWORD 'fdw_pass_cz';
GRANT SELECT ON TABLE t_nuts TO fdw_user_cz;

\c db_join

CREATE SERVER db_cz_serv FOREIGN DATA WRAPPER postgres_fdw OPTIONS ( host
'localhost', port '5432', dbname 'db_cz', use_remote_estimate 'True');
CREATE USER MAPPING FOR CURRENT_USER SERVER db_cz_serv OPTIONS ( user
'fdw_user_cz', password 'fdw_pass_cz');
CREATE SCHEMA cz;
IMPORT FOREIGN SCHEMA public LIMIT TO (t_nuts) FROM SERVER db_cz_serv INTO
cz;

SELECT label, count(*) FROM cz.t_nuts GROUP BY label ORDER BY label;

EXPLAIN (VERBOSE)
SELECT cz__t_nuts.label, count(*)
FROM cz.t_nuts AS cz__t_nuts
INNER JOIN en.t_nuts AS en__t_nuts ON (cz__t_nuts.label =
en__t_nuts.label)
GROUP BY cz__t_nuts.label;

SELECT cz__t_nuts.label, count(*)
FROM cz.t_nuts AS cz__t_nuts
INNER JOIN en.t_nuts AS en__t_nuts ON (cz__t_nuts.label =
en__t_nuts.label)
GROUP BY cz__t_nuts.label;

select version();

------------------------wrong output (Debian, GLIBC 2.28)----
DROP DATABASE
DROP DATABASE
DROP DATABASE
DROP ROLE
DROP ROLE
CREATE DATABASE
CREATE DATABASE
CREATE DATABASE
Nyní jste připojeni k databázi "db_en" jako uživatel "postgres".
CREATE TABLE
INSERT 0 16
VACUUM
label | count
--------+-------
CZ0100 | 1
CZ0201 | 1
CZ0202 | 1
CZ0203 | 1
CZ0204 | 1
CZ0205 | 1
CZ0206 | 1
CZ0207 | 1
CZ0208 | 1
CZ0209 | 1
CZ020A | 1
CZ020B | 1
CZ020C | 1
CZ0311 | 1
CZ0312 | 1
CZ0313 | 1
(16 řádek)

Nyní jste připojeni k databázi "db_cz" jako uživatel "postgres".
CREATE TABLE
INSERT 0 16
VACUUM
label | count
--------+-------
CZ0100 | 1
CZ020A | 1
CZ020B | 1
CZ020C | 1
CZ0201 | 1
CZ0202 | 1
CZ0203 | 1
CZ0204 | 1
CZ0205 | 1
CZ0206 | 1
CZ0207 | 1
CZ0208 | 1
CZ0209 | 1
CZ0311 | 1
CZ0312 | 1
CZ0313 | 1
(16 řádek)

Nyní jste připojeni k databázi "db_en" jako uživatel "postgres".
CREATE ROLE
GRANT
Nyní jste připojeni k databázi "db_join" jako uživatel "postgres".
CREATE EXTENSION
CREATE SERVER
CREATE USER MAPPING
CREATE SCHEMA
IMPORT FOREIGN SCHEMA
label | count
--------+-------
CZ0100 | 1
CZ0201 | 1
CZ0202 | 1
CZ0203 | 1
CZ0204 | 1
CZ0205 | 1
CZ0206 | 1
CZ0207 | 1
CZ0208 | 1
CZ0209 | 1
CZ020A | 1
CZ020B | 1
CZ020C | 1
CZ0311 | 1
CZ0312 | 1
CZ0313 | 1
(16 řádek)

Nyní jste připojeni k databázi "db_cz" jako uživatel "postgres".
CREATE ROLE
GRANT
Nyní jste připojeni k databázi "db_join" jako uživatel "postgres".
CREATE SERVER
CREATE USER MAPPING
CREATE SCHEMA
IMPORT FOREIGN SCHEMA
label | count
--------+-------
CZ0100 | 1
CZ020A | 1
CZ020B | 1
CZ020C | 1
CZ0201 | 1
CZ0202 | 1
CZ0203 | 1
CZ0204 | 1
CZ0205 | 1
CZ0206 | 1
CZ0207 | 1
CZ0208 | 1
CZ0209 | 1
CZ0311 | 1
CZ0312 | 1
CZ0313 | 1
(16 řádek)

QUERY PLAN

-----------------------------------------------------------------------------------------------
GroupAggregate (cost=203.28..204.16 rows=16 width=40)
Output: cz__t_nuts.label, count(*)
Group Key: cz__t_nuts.label
-> Merge Join (cost=203.28..203.92 rows=16 width=32)
Output: cz__t_nuts.label
Merge Cond: (cz__t_nuts.label = en__t_nuts.label)
-> Foreign Scan on cz.t_nuts cz__t_nuts (cost=101.48..101.84
rows=16 width=7)
Output: cz__t_nuts.id, cz__t_nuts.label
Remote SQL: SELECT label FROM public.t_nuts ORDER BY label
ASC NULLS LAST
-> Sort (cost=101.80..101.84 rows=16 width=7)
Output: en__t_nuts.label
Sort Key: en__t_nuts.label
-> Foreign Scan on en.t_nuts en__t_nuts
(cost=100.00..101.48 rows=16 width=7)
Output: en__t_nuts.label
Remote SQL: SELECT label FROM public.t_nuts
(15 řádek)

label | count
--------+-------
CZ0100 | 1
CZ020A | 1
CZ020B | 1
CZ020C | 1
CZ0311 | 1
CZ0312 | 1
CZ0313 | 1
(7 řádek)

version

------------------------------------------------------------------------------------------------------------------
PostgreSQL 12.4 (Debian 12.4-1.pgdg100+1) on x86_64-pc-linux-gnu, compiled
by gcc (Debian 8.3.0-6) 8.3.0, 64-bit
(1 řádka)

------------------------correct output (Alpine, musl libc)----

DROP DATABASE
DROP DATABASE
DROP DATABASE
DROP ROLE
DROP ROLE
CREATE DATABASE
CREATE DATABASE
CREATE DATABASE
You are now connected to database "db_en" as user "postgres".
CREATE TABLE
INSERT 0 16
VACUUM
label | count
--------+-------
CZ0100 | 1
CZ0201 | 1
CZ0202 | 1
CZ0203 | 1
CZ0204 | 1
CZ0205 | 1
CZ0206 | 1
CZ0207 | 1
CZ0208 | 1
CZ0209 | 1
CZ020A | 1
CZ020B | 1
CZ020C | 1
CZ0311 | 1
CZ0312 | 1
CZ0313 | 1
(16 rows)

You are now connected to database "db_cz" as user "postgres".
CREATE TABLE
INSERT 0 16
VACUUM
label | count
--------+-------
CZ0100 | 1
CZ0201 | 1
CZ0202 | 1
CZ0203 | 1
CZ0204 | 1
CZ0205 | 1
CZ0206 | 1
CZ0207 | 1
CZ0208 | 1
CZ0209 | 1
CZ020A | 1
CZ020B | 1
CZ020C | 1
CZ0311 | 1
CZ0312 | 1
CZ0313 | 1
(16 rows)

You are now connected to database "db_en" as user "postgres".
CREATE ROLE
GRANT
You are now connected to database "db_join" as user "postgres".
CREATE EXTENSION
CREATE SERVER
CREATE USER MAPPING
CREATE SCHEMA
IMPORT FOREIGN SCHEMA
label | count
--------+-------
CZ0100 | 1
CZ0201 | 1
CZ0202 | 1
CZ0203 | 1
CZ0204 | 1
CZ0205 | 1
CZ0206 | 1
CZ0207 | 1
CZ0208 | 1
CZ0209 | 1
CZ020A | 1
CZ020B | 1
CZ020C | 1
CZ0311 | 1
CZ0312 | 1
CZ0313 | 1
(16 rows)

You are now connected to database "db_cz" as user "postgres".
CREATE ROLE
GRANT
You are now connected to database "db_join" as user "postgres".
CREATE SERVER
CREATE USER MAPPING
CREATE SCHEMA
IMPORT FOREIGN SCHEMA
label | count
--------+-------
CZ0100 | 1
CZ0201 | 1
CZ0202 | 1
CZ0203 | 1
CZ0204 | 1
CZ0205 | 1
CZ0206 | 1
CZ0207 | 1
CZ0208 | 1
CZ0209 | 1
CZ020A | 1
CZ020B | 1
CZ020C | 1
CZ0311 | 1
CZ0312 | 1
CZ0313 | 1
(16 rows)

QUERY PLAN

-----------------------------------------------------------------------------------------------
GroupAggregate (cost=203.28..204.16 rows=16 width=40)
Output: cz__t_nuts.label, count(*)
Group Key: cz__t_nuts.label
-> Merge Join (cost=203.28..203.92 rows=16 width=32)
Output: cz__t_nuts.label
Merge Cond: (cz__t_nuts.label = en__t_nuts.label)
-> Foreign Scan on cz.t_nuts cz__t_nuts (cost=101.48..101.84
rows=16 width=7)
Output: cz__t_nuts.id, cz__t_nuts.label
Remote SQL: SELECT label FROM public.t_nuts ORDER BY label
ASC NULLS LAST
-> Sort (cost=101.80..101.84 rows=16 width=7)
Output: en__t_nuts.label
Sort Key: en__t_nuts.label
-> Foreign Scan on en.t_nuts en__t_nuts
(cost=100.00..101.48 rows=16 width=7)
Output: en__t_nuts.label
Remote SQL: SELECT label FROM public.t_nuts
(15 rows)

label | count
--------+-------
CZ0100 | 1
CZ0201 | 1
CZ0202 | 1
CZ0203 | 1
CZ0204 | 1
CZ0205 | 1
CZ0206 | 1
CZ0207 | 1
CZ0208 | 1
CZ0209 | 1
CZ020A | 1
CZ020B | 1
CZ020C | 1
CZ0311 | 1
CZ0312 | 1
CZ0313 | 1
(16 rows)

version

---------------------------------------------------------------------------------------
PostgreSQL 12.4 on x86_64-pc-linux-musl, compiled by gcc (Alpine 9.3.0)
9.3.0, 64-bit
(1 row)

Responses

Browse pgsql-bugs by date

  From Date Subject
Next Message Tom Lane 2020-08-17 15:26:48 Re: BUG #16583: merge join on tables with different DB collation behind postgres_fdw fails
Previous Message Mukesh Chhatani 2020-08-15 22:17:44 Logical replication stalling for large tables with heavy write activity - Pg11

Browse pgsql-hackers by date

  From Date Subject
Next Message Fujii Masao 2020-08-17 12:14:22 Re: Creating a function for exposing memory usage of backend process
Previous Message Dave Page 2020-08-17 11:44:43 Re: EDB builds Postgres 13 with an obsolete ICU version