Re: Windows vs recovery tests

From: Andres Freund <andres(at)anarazel(dot)de>
To: Andrew Dunstan <andrew(at)dunslane(dot)net>, Fabien COELHO <coelho(at)cri(dot)ensmp(dot)fr>
Cc: PostgreSQL-development <pgsql-hackers(at)postgresql(dot)org>
Subject: Re: Windows vs recovery tests
Date: 2022-01-13 05:41:23
Message-ID: 20220113054123.ib4khtafgq34lv4z@alap3.anarazel.de
Views: Raw Message | Whole Thread | Download mbox | Resend email
Thread:
Lists: pgsql-hackers

Hi,

On 2022-01-12 20:03:14 -0800, Andres Freund wrote:
> So it looks like psql is hanging somewhere after that. I assume with an error
> popup that nobody can click on :/.

Not quite. Psql is actually just logging output in an endless loop. I
connected with cdb.exe.

kP:

00000000`007fd3c8 00007ffc`0d00f13a ntdll!NtWriteFile+0x14
00000000`007fd3d0 00007ffc`03978ec3 KERNELBASE!WriteFile+0x7a
00000000`007fd440 00007ffc`03979d21 ucrtbased!write_text_ansi_nolock(
int fh = 0n2,
char * buffer = 0x00000000`007febb0 " ???",
unsigned int buffer_size = 1)+0x183
00000000`007fe8f0 00007ffc`039798a7 ucrtbased!_write_nolock(
int fh = 0n2,
void * buffer = 0x00000000`007febb0,
unsigned int buffer_size = 1,
class __crt_cached_ptd_host * ptd = 0x00000000`007fef40)+0x451
00000000`007fea80 00007ffc`03920e1d ucrtbased!_write_internal(
int fh = 0n2,
void * buffer = 0x00000000`007febb0,
unsigned int size = 1,
class __crt_cached_ptd_host * ptd = 0x00000000`007fef40)+0x377
00000000`007feb20 00007ffc`0392090e ucrtbased!write_buffer_nolock<char>(
char c = 0n32 ' ',
class __crt_stdio_stream stream = class __crt_stdio_stream,
class __crt_cached_ptd_host * ptd = 0x00000000`007fef40)+0x27d
00000000`007febb0 00007ffc`03921242 ucrtbased!common_flush_and_write_nolock<char>(
int c = 0n32,
class __crt_stdio_stream stream = class __crt_stdio_stream,
class __crt_cached_ptd_host * ptd = 0x00000000`007fef40)+0x22e
00000000`007fec20 00007ffc`038ddf5a ucrtbased!__acrt_stdio_flush_and_write_narrow_nolock(
int c = 0n32,
struct _iobuf * stream = 0x00007ffc`03a27ce0,
class __crt_cached_ptd_host * ptd = 0x00000000`007fef40)+0x32
00000000`007fec60 00007ffc`038dd5a3 ucrtbased!_fwrite_nolock_internal(
void * buffer = 0x00000000`007ff020,
unsigned int64 element_size = 1,
unsigned int64 element_count = 7,
struct _iobuf * public_stream = 0x00007ffc`03a27ce0,
class __crt_cached_ptd_host * ptd = 0x00000000`007fef40)+0x79a
00000000`007fed60 00007ffc`038dd426 ucrtbased!<lambda_26974eb511f701c600fccfa2a97a8e1b>::operator()(void)+0x73
00000000`007fedd0 00007ffc`038dd4a8 ucrtbased!__crt_seh_guarded_call<unsigned __int64>::operator()<<lambda_a2589f19c
515cac03caf6db9c38355e9>,<lambda_26974eb511f701c600fccfa2a97a8e1b> &,<lambda_ad9ce2f38261e34e8a422b9cc35dfe8d> >(
class __acrt_lock_stream_and_call::__l2::<lambda_a2589f19c515cac03caf6db9c38355e9> * setup = 0x0
0000000`007fee58,
class _fwrite_internal::__l2::<lambda_26974eb511f701c600fccfa2a97a8e1b> * action = 0x00000000`00
7feec0,
class __acrt_lock_stream_and_call::__l2::<lambda_ad9ce2f38261e34e8a422b9cc35dfe8d> * cleanup = 0
x00000000`007fee50)+0x36
00000000`007fee10 00007ffc`038dd72d ucrtbased!__acrt_lock_stream_and_call<<lambda_26974eb511f701c600fccfa2a97a8e1b>
>(
struct _iobuf * stream = 0x00007ffc`03a27ce0,
class _fwrite_internal::__l2::<lambda_26974eb511f701c600fccfa2a97a8e1b> * action = 0x00000000`00
7feec0)+0x58
00000000`007fee70 00007ffc`038de046 ucrtbased!_fwrite_internal(
void * buffer = 0x00000000`007ff020,
unsigned int64 size = 1,
unsigned int64 count = 7,
struct _iobuf * stream = 0x00007ffc`03a27ce0,
class __crt_cached_ptd_host * ptd = 0x00000000`007fef40)+0x15d
00000000`007fef00 00000001`4004a639 ucrtbased!fwrite(
void * buffer = 0x00000000`007ff020,
unsigned int64 size = 1,
unsigned int64 count = 7,
struct _iobuf * stream = 0x00007ffc`03a27ce0)+0x56
00000000`007fef90 00000001`4004a165 psql!flushbuffer(
struct PrintfTarget * target = 0x00000000`007feff8)+0x59
00000000`007fefd0 00000001`4004a1e6 psql!pg_vfprintf(
struct _iobuf * stream = 0x00007ffc`03a27ce0,
char * fmt = 0x00000001`40094268 "error: ",
char * args = 0x00000000`007ff4a0 "@???")+0xa5
00000000`007ff450 00000001`40045962 psql!pg_fprintf(
struct _iobuf * stream = 0x00007ffc`03a27ce0,
char * fmt = 0x00000001`40094268 "error: ")+0x36
00000000`007ff490 00000001`40045644 psql!pg_log_generic_v(
pg_log_level level = PG_LOG_ERROR (0n4),
char * fmt = 0x00000001`40062e90 "unexpected PQresultStatus: %d",
char * ap = 0x00000000`007ff540 "???")+0x302
00000000`007ff4f0 00000001`4000ef1f psql!pg_log_generic(
pg_log_level level = PG_LOG_ERROR (0n4),
char * fmt = 0x00000001`40062e90 "unexpected PQresultStatus: %d")+0x34
00000000`007ff530 00000001`4000e794 psql!AcceptResult(
struct pg_result * result = 0x00000000`0015af90,
bool show_error = false)+0x9f
00000000`007ff580 00000001`4000c8fe psql!SendQueryAndProcessResults(
char * query = 0x00000000`00107570 "START_REPLICATION 0/0",
double * pelapsed_msec = 0x00000000`007ff718,
bool is_watch = false,
struct printQueryOpt * opt = 0x00000000`00000000,
struct _iobuf * printQueryFout = 0x00000000`00000000,
bool * tx_ended = 0x00000000`007ff6b3)+0x1a4
00000000`007ff680 00000001`40024104 psql!SendQuery(
char * query = 0x00000000`00107570 "START_REPLICATION 0/0")+0x42e
00000000`007ff750 00000001`40001524 psql!MainLoop(
struct _iobuf * source = 0x00007ffc`03a27c30)+0xf84
00000000`007ff890 00000001`40032903 psql!process_file(
char * filename = 0x00000001`400618e0 "<stdin>",
bool use_relative_path = false)+0x274
00000000`007ffcf0 00000001`400503f9 psql!main(
int argc = 0n8,
char ** argv = 0x00000000`0012a750)+0xc43
00000000`007ffe10 00000001`4005034e psql!invoke_main(void)+0x39
00000000`007ffe60 00000001`4005020e psql!__scrt_common_main_seh(void)+0x12e
00000000`007ffed0 00000001`4005046e psql!__scrt_common_main(void)+0xe
00000000`007fff00 00007ffc`0d2a7974 psql!mainCRTStartup(
void * __formal = 0x00000000`002e6000)+0xe
00000000`007fff30 00007ffc`0fe1a2f1 KERNEL32!BaseThreadInitThunk+0x14
00000000`007fff60 00000000`00000000 ntdll!RtlUserThreadStart+0x21

bp psql!pg_log_generic

0:000> k3
Child-SP RetAddr Call Site
00000000`007ff528 00000001`4000ef1f psql!pg_log_generic [C:\cirrus\src\common\logging.c @ 198]
00000000`007ff530 00000001`4000e794 psql!AcceptResult+0x9f [C:\cirrus\src\bin\psql\common.c @ 385]
00000000`007ff580 00000001`4000c8fe psql!SendQueryAndProcessResults+0x1a4 [C:\cirrus\src\bin\psql\common.c @ 1163]
0:000> g
Breakpoint 0 hit
psql!pg_log_generic:
00000001`40045610 4889542410 mov qword ptr [rsp+10h],rdx ss:00000000`007ff538=0000000100000006
0:000> k3
Child-SP RetAddr Call Site
00000000`007ff528 00000001`4000ef1f psql!pg_log_generic [C:\cirrus\src\common\logging.c @ 198]
00000000`007ff530 00000001`4000e794 psql!AcceptResult+0x9f [C:\cirrus\src\bin\psql\common.c @ 385]
00000000`007ff580 00000001`4000c8fe psql!SendQueryAndProcessResults+0x1a4 [C:\cirrus\src\bin\psql\common.c @ 1163]

Ah, I see the bug. It's a use-after-free introduced in the patch:

SendQueryAndProcessResults(const char *query, double *pelapsed_msec,
bool is_watch, const printQueryOpt *opt, FILE *printQueryFout, bool *tx_ended)

...
/* first result */
result = PQgetResult(pset.db);

while (result != NULL)

...
if (!AcceptResult(result, false))
{
...
ClearOrSaveResult(result);
success = false;

/* and switch to next result */
result_status = PQresultStatus(result);
if (result_status == PGRES_COPY_BOTH ||
result_status == PGRES_COPY_OUT ||
result_status == PGRES_COPY_IN)

So we called ClearOrSaveResult() with did a PQclear(), and then we go and call
PQresultStatus().

So this really is unrelated to CI. I'll mention this message in the other
thread.

Greetings,

Andres Freund

In response to

Browse pgsql-hackers by date

  From Date Subject
Next Message Andres Freund 2022-01-13 05:44:33 Re: psql - add SHOW_ALL_RESULTS option
Previous Message Amit Langote 2022-01-13 05:05:07 Re: a misbehavior of partition row movement (?)