Re: Logical replication existing data copy

From: Erik Rijkers <er(at)xs4all(dot)nl>
To: Petr Jelinek <petr(dot)jelinek(at)2ndquadrant(dot)com>
Cc: Peter Eisentraut <peter(dot)eisentraut(at)2ndquadrant(dot)com>, PostgreSQL-development <pgsql-hackers(at)postgresql(dot)org>
Subject: Re: Logical replication existing data copy
Date: 2017-02-26 09:53:21
Message-ID: 2370b8600b167e96af7e6b9748347599@xs4all.nl
Views: Raw Message | Whole Thread | Download mbox | Resend email
Thread:
Lists: pgsql-hackers

On 2017-02-26 01:45, Petr Jelinek wrote:

Again, much better... :

-- out_20170226_0724.txt
25 -- pgbench -c 1 -j 8 -T 10 -P 5 -n
25 -- All is well.
-- out_20170226_0751.txt
25 -- pgbench -c 4 -j 8 -T 10 -P 5 -n
25 -- All is well.
-- out_20170226_0819.txt
25 -- pgbench -c 8 -j 8 -T 10 -P 5 -n
25 -- All is well.
-- out_20170226_0844.txt
25 -- pgbench -c 16 -j 8 -T 10 -P 5 -n
25 -- All is well.
-- out_20170226_0912.txt
25 -- pgbench -c 32 -j 8 -T 10 -P 5 -n
25 -- All is well.
-- out_20170226_0944.txt
25 -- scale 5 clients 1 INIT_WAIT 0 CLEAN_ONLY=
25 -- pgbench -c 1 -j 8 -T 10 -P 5 -n
25 -- All is well.

.... but not perfect: with the next scale up (pgbench scale 25) I got:

-- out_20170226_1001.txt
3 -- scale 25 clients 1 INIT_WAIT 0 CLEAN_ONLY=
3 -- pgbench -c 1 -j 8 -T 10 -P 5 -n
2 -- All is well.
1 -- Not good, but breaking out of wait (waited more than 60s)

It looks like something got stuck at DROP SUBSCRIPTION again which, I
think, derives from this line:
echo "drop subscription if exists sub1" | psql -qXp $port2

I don't know exactly what is useful/useless to report; below is the
state of some tables/views (note that this is from 31 minutes after the
fact (see 'duration' in the first query)), and a backtrace :

$ ./view.sh
select current_setting('port') as port;
port
------
6973
(1 row)

select
rpad(now()::text,19) as now
, pid as pid
, application_name as app
, state as state
, wait_event as wt_evt
, wait_event_type as wt_evt_type
, date_trunc('second', query_start::timestamp) as query_start
, substring((now() - query_start)::text, 1, position('.' in (now() -
query_start)::text)-1) as duration
, query
from pg_stat_activity
where query !~ 'pg_stat_activity'
;
now | pid | app
| state | wt_evt | wt_evt_type | query_start
| duration | query
---------------------+-------+---------------------------------------------+--------+------------------------+-------------+---------------------+----------+----------------------------------
2017-02-26 10:42:43 | 28232 | logical replication worker 31929
| active | relation | Lock |
| |
2017-02-26 10:42:43 | 28237 | logical replication worker 31929 sync
31906 | | LogicalSyncStateChange | IPC |
| |
2017-02-26 10:42:43 | 28242 | logical replication worker 31929 sync
31909 | | transactionid | Lock |
| |
2017-02-26 10:42:43 | 32023 | psql
| active | BgWorkerShutdown | IPC | 2017-02-26 10:10:52
| 00:31:51 | drop subscription if exists sub1
(4 rows)

select * from pg_stat_replication;
pid | usesysid | usename | application_name | client_addr |
client_hostname | client_port | backend_start | backend_xmin | state |
sent_location | write_location | flush_location | replay_location |
sync_priority | sync_state
-----+----------+---------+------------------+-------------+-----------------+-------------+---------------+--------------+-------+---------------+----------------+----------------+-----------------+---------------+------------
(0 rows)

select * from pg_stat_subscription;
subid | subname | pid | relid | received_lsn |
last_msg_send_time | last_msg_receipt_time |
latest_end_lsn | latest_end_time
-------+---------+-------+-------+--------------+-------------------------------+-------------------------------+----------------+-------------------------------
31929 | sub1 | 28242 | 31909 | | 2017-02-26
10:07:05.723093+01 | 2017-02-26 10:07:05.723093+01 | |
2017-02-26 10:07:05.723093+01
31929 | sub1 | 28237 | 31906 | | 2017-02-26
10:07:04.721229+01 | 2017-02-26 10:07:04.721229+01 | |
2017-02-26 10:07:04.721229+01
31929 | sub1 | 28232 | | 1/73497468 |
| 2017-02-26 10:07:47.781883+01 | 1/59A73EF8 | 2017-02-26
10:07:04.720595+01
(3 rows)

select * from pg_subscription;
subdbid | subname | subowner | subenabled | subconninfo | subslotname |
subpublications
---------+---------+----------+------------+-------------+-------------+-----------------
16384 | sub1 | 10 | t | port=6972 | sub1 |
{pub1}
(1 row)

select * from pg_subscription_rel;
srsubid | srrelid | srsubstate | srsublsn
---------+---------+------------+------------
31929 | 31912 | i |
31929 | 31917 | i |
31929 | 31909 | d |
31929 | 31906 | w | 1/73498F90
(4 rows)

Dunno if a backtrace is is useful

$ gdb -pid 32023 (from the DROP SUBSCRIPTION line of the first query)
[...]
(gdb) bt
#0 0x00000030fcee8f13 in __epoll_wait_nocancel () from /lib64/libc.so.6
#1 0x00000000006fba86 in WaitEventSetWaitBlock (nevents=1,
occurred_events=0x7fff66e74dc0, cur_timeout=1000, set=0x2d15560) at
latch.c:997
#2 WaitEventSetWait (set=set(at)entry=0x2d15560,
timeout=timeout(at)entry=1000,
occurred_events=occurred_events(at)entry=0x7fff66e74dc0,
nevents=nevents(at)entry=1,
wait_event_info=wait_event_info(at)entry=134217728)
at latch.c:949
#3 0x00000000006fbefc in WaitLatchOrSocket (latch=0x7f74ec2d5cb4,
wakeEvents=wakeEvents(at)entry=25, sock=sock(at)entry=-1,
timeout=timeout(at)entry=1000,
wait_event_info=wait_event_info(at)entry=134217728)
at latch.c:349
#4 0x00000000006fbfd0 in WaitLatch (latch=<optimized out>,
wakeEvents=wakeEvents(at)entry=25, timeout=timeout(at)entry=1000,
wait_event_info=wait_event_info(at)entry=134217728) at latch.c:303
#5 0x00000000006c764f in logicalrep_worker_stop
(subid=subid(at)entry=31929, relid=relid(at)entry=0) at launcher.c:404
#6 0x00000000005c44fa in DropSubscription (stmt=stmt(at)entry=0x2d6fec0)
at subscriptioncmds.c:757
#7 0x0000000000724ab9 in ProcessUtilitySlow
(pstate=pstate(at)entry=0x2d0d978, pstmt=pstmt(at)entry=0x2d70220,
queryString=queryString(at)entry=0x2d6f488 "drop subscription if exists
sub1",
context=context(at)entry=PROCESS_UTILITY_TOPLEVEL,
params=params(at)entry=0x0,
completionTag=completionTag(at)entry=0x7fff66e757f0 "", dest=0x2d70318) at
utility.c:1620
#8 0x0000000000723f69 in standard_ProcessUtility (pstmt=0x2d70220,
queryString=0x2d6f488 "drop subscription if exists sub1",
context=PROCESS_UTILITY_TOPLEVEL, params=0x0, dest=0x2d70318,
completionTag=0x7fff66e757f0 "") at utility.c:922
#9 0x00007f74ecd6dd92 in pgss_ProcessUtility (pstmt=0x2d70220,
queryString=0x2d6f488 "drop subscription if exists sub1",
context=PROCESS_UTILITY_TOPLEVEL, params=0x0, dest=0x2d70318,
completionTag=0x7fff66e757f0 "") at pg_stat_statements.c:994
#10 0x0000000000721867 in PortalRunUtility (portal=0x2d05538,
pstmt=0x2d70220, isTopLevel=<optimized out>, setHoldSnapshot=<optimized
out>, dest=<optimized out>, completionTag=0x7fff66e757f0 "")
at pquery.c:1165
#11 0x0000000000722301 in PortalRunMulti (portal=portal(at)entry=0x2d05538,
isTopLevel=isTopLevel(at)entry=1 '\001',
setHoldSnapshot=setHoldSnapshot(at)entry=0 '\000',
dest=dest(at)entry=0x2d70318,
altdest=altdest(at)entry=0x2d70318,
completionTag=completionTag(at)entry=0x7fff66e757f0 "") at pquery.c:1315
#12 0x0000000000722fb8 in PortalRun (portal=portal(at)entry=0x2d05538,
count=count(at)entry=9223372036854775807, isTopLevel=isTopLevel(at)entry=1
'\001', dest=dest(at)entry=0x2d70318, altdest=altdest(at)entry=0x2d70318,
completionTag=completionTag(at)entry=0x7fff66e757f0 "") at pquery.c:788
#13 0x000000000071fefa in exec_simple_query (query_string=0x2d6f488
"drop subscription if exists sub1") at postgres.c:1101
#14 PostgresMain (argc=<optimized out>, argv=argv(at)entry=0x2d18ab8,
dbname=<optimized out>, username=<optimized out>) at postgres.c:4067
#15 0x000000000047571f in BackendRun (port=0x2d0cb40) at
postmaster.c:4310
#16 BackendStartup (port=0x2d0cb40) at postmaster.c:3982
#17 ServerLoop () at postmaster.c:1722
#18 0x00000000006b42a9 in PostmasterMain (argc=argc(at)entry=21,
argv=argv(at)entry=0x2ce5a50) at postmaster.c:1330
#19 0x0000000000636dbc in main (argc=21, argv=0x2ce5a50) at main.c:228

Not yet perfect, but we're getting there...

In response to

Responses

Browse pgsql-hackers by date

  From Date Subject
Next Message Robert Haas 2017-02-26 10:05:01 Re: bytea_output output of base64
Previous Message Robert Haas 2017-02-26 09:46:08 Re: Automatic cleanup of oldest WAL segments with pg_receivexlog