Re: Deadlock in XLogInsert at AIX

From: Konstantin Knizhnik <k(dot)knizhnik(at)postgrespro(dot)ru>
To: Heikki Linnakangas <hlinnaka(at)iki(dot)fi>, PostgreSQL Hackers <pgsql-hackers(at)postgresql(dot)org>
Subject: Re: Deadlock in XLogInsert at AIX
Date: 2017-01-31 15:03:46
Message-ID: 2c4037f1-b35d-4d71-0d32-40a97523b31c@postgrespro.ru
Views: Raw Message | Whole Thread | Download mbox | Resend email
Thread:
Lists: pgsql-hackers

One more assertion failure:

ExceptionalCondition(conditionName = "!(OldPageRqstPtr <=
XLogCtl->InitializedUpTo)", errorType = "FailedAssertion", fileName =
"xlog.c", lineNumber = 1887), line 54 in "assert.c"

(dbx) p OldPageRqstPtr
153551667200
(dbx) p XLogCtl->InitializedUpTo
153551667200
(dbx) p InitializedUpTo
153551659008

I slightly modify xlog.c code - store value of XLogCtl->InitializedUpTo
in local variable:

1870 LWLockAcquire(WALBufMappingLock, LW_EXCLUSIVE);
1871
1872 /*
1873 * Now that we have the lock, check if someone
initialized the page
1874 * already.
1875 */
1876 while (upto >= XLogCtl->InitializedUpTo || opportunistic)
1877 {
1878 XLogRecPtr InitializedUpTo =
XLogCtl->InitializedUpTo;
1879 nextidx = XLogRecPtrToBufIdx(InitializedUpTo);
1880
1881 /*
1882 * Get ending-offset of the buffer page we need
to replace (this may
1883 * be zero if the buffer hasn't been used yet).
Fall through if it's
1884 * already written out.
1885 */
1886 OldPageRqstPtr = XLogCtl->xlblocks[nextidx];
1887 Assert(OldPageRqstPtr <= XLogCtl->InitializedUpTo);

And, as you can see, XLogCtl->InitializedUpTo is not equal to saved
value InitializedUpTo.
But we are under exclusive WALBufMappingLock and InitializedUpTo is
updated only under this lock.
So it means that LW-locks doesn't work!
I inspected code of pg_atomic_compare_exchange_u32_impl and didn't sync
in prologue:

(dbx) listi pg_atomic_compare_exchange_u32_impl
0x1000817bc (pg_atomic_compare_exchange_u32_impl+0x1c)
e88100b0 ld r4,0xb0(r1)
0x1000817c0 (pg_atomic_compare_exchange_u32_impl+0x20)
e86100b8 ld r3,0xb8(r1)
0x1000817c4 (pg_atomic_compare_exchange_u32_impl+0x24)
800100c0 lwz r0,0xc0(r1)
0x1000817c8 (pg_atomic_compare_exchange_u32_impl+0x28) 7c0007b4
extsw r0,r0
0x1000817cc (pg_atomic_compare_exchange_u32_impl+0x2c)
e8a30002 lwa r5,0x0(r3)
0x1000817d0 (pg_atomic_compare_exchange_u32_impl+0x30) 7cc02028
lwarx r6,r0,r4,0x0
0x1000817d4 (pg_atomic_compare_exchange_u32_impl+0x34)
7c053040 cmpl cr0,0x0,r5,r6
0x1000817d8 (pg_atomic_compare_exchange_u32_impl+0x38)
4082000c bne 0x1000817e4
(pg_atomic_compare_exchange_u32_impl+0x44)
0x1000817dc (pg_atomic_compare_exchange_u32_impl+0x3c) 7c00212d
stwcx. r0,r0,r4
0x1000817e0 (pg_atomic_compare_exchange_u32_impl+0x40)
40e2fff0 bne+ 0x1000817d0
(pg_atomic_compare_exchange_u32_impl+0x30)
0x1000817e4 (pg_atomic_compare_exchange_u32_impl+0x44)
60c00000 ori r0,r6,0x0
0x1000817e8 (pg_atomic_compare_exchange_u32_impl+0x48)
90030000 stw r0,0x0(r3)
0x1000817ec (pg_atomic_compare_exchange_u32_impl+0x4c)
7c000026 mfcr r0
0x1000817f0 (pg_atomic_compare_exchange_u32_impl+0x50) 54001ffe
rlwinm r0,r0,0x3,0x1f,0x1f
0x1000817f4 (pg_atomic_compare_exchange_u32_impl+0x54) 78000620
rldicl r0,r0,0x0,0x19
0x1000817f8 (pg_atomic_compare_exchange_u32_impl+0x58)
98010070 stb r0,0x70(r1)
0x1000817fc (pg_atomic_compare_exchange_u32_impl+0x5c) 4c00012c
isync
0x100081800 (pg_atomic_compare_exchange_u32_impl+0x60)
88610070 lbz r3,0x70(r1)
0x100081804 (pg_atomic_compare_exchange_u32_impl+0x64)
48000004 b 0x100081808
(pg_atomic_compare_exchange_u32_impl+0x68)
0x100081808 (pg_atomic_compare_exchange_u32_impl+0x68)
38210080 addi r1,0x80(r1)
0x10008180c (pg_atomic_compare_exchange_u32_impl+0x6c)
4e800020 blr

Source code of pg_atomic_compare_exchange_u32_impl is the following:

static inline bool
pg_atomic_compare_exchange_u32_impl(volatile pg_atomic_uint32 *ptr,
uint32 *expected, uint32 newval)
{
bool ret;

/*
* atomics.h specifies sequential consistency ("full barrier
semantics")
* for this interface. Since "lwsync" provides acquire/release
* consistency only, do not use it here. GCC atomics observe the same
* restriction; see its rs6000_pre_atomic_barrier().
*/
__asm__ __volatile__ (" sync \n" ::: "memory");

/*
* XXX: __compare_and_swap is defined to take signed parameters,
but that
* shouldn't matter since we don't perform any arithmetic operations.
*/
ret = __compare_and_swap((volatile int*)&ptr->value,
(int *)expected, (int)newval);

/*
* xlc's documentation tells us:
* "If __compare_and_swap is used as a locking primitive, insert a
call to
* the __isync built-in function at the start of any critical
sections."
*
* The critical section begins immediately after __compare_and_swap().
*/
__isync();

return ret;
}

and if I compile this fuctions standalone, I get the following assembler
code:

.pg_atomic_compare_exchange_u32_impl: # 0x0000000000000000 (H.4.NO_SYMBOL)
stdu SP,-128(SP)
std r3,176(SP)
std r4,184(SP)
std r5,192(SP)
ld r0,192(SP)
stw r0,192(SP)
sync
ld r4,176(SP)
ld r3,184(SP)
lwz r0,192(SP)
extsw r0,r0
lwa r5,0(r3)
__L30: # 0x0000000000000030
(H.4.NO_SYMBOL+0x030)
lwarx r6,r0,r4
cmpl 0,0,r5,r6
bc BO_IF_NOT,CR0_EQ,__L44
stwcx. r0,r0,r4
.machine "any"
bc BO_IF_NOT_3,CR0_EQ,__L30
__L44: # 0x0000000000000044
(H.4.NO_SYMBOL+0x044)
ori r0,r6,0x0000
stw r0,0(r3)
mfcr r0
rlwinm r0,r0,3,31,31
rldicl r0,r0,0,56
stb r0,112(SP)
isync
lbz r3,112(SP)
addi SP,SP,128
bclr BO_ALWAYS,CR0_LT

sync is here!

--
Konstantin Knizhnik
Postgres Professional: http://www.postgrespro.com
The Russian Postgres Company

In response to

Responses

Browse pgsql-hackers by date

  From Date Subject
Next Message Pavel Stehule 2017-01-31 15:32:57 Re: patch: function xmltable
Previous Message Tom Lane 2017-01-31 15:03:36 Re: An issue in remote query optimization