pg11.5: ExecHashJoinNewBatch: glibc detected...double free or corruption (!prev)

From: Justin Pryzby <pryzby(at)telsasoft(dot)com>
To: pgsql-hackers(at)postgresql(dot)org
Cc: Merlin Moncure <mmoncure(at)gmailcom>, Thomas Munro <thomas(dot)munro(at)gmail(dot)com>
Subject: pg11.5: ExecHashJoinNewBatch: glibc detected...double free or corruption (!prev)
Date: 2019-08-25 02:25:01
Message-ID: 20190825022501.GV15332@telsasoft.com
Views: Raw Message | Whole Thread | Download mbox | Resend email
Thread:
Lists: pgsql-hackers

Core was generated by `postgres: telsasoft ts 10.100.2.162(33500) SELECT '.
Program terminated with signal 6, Aborted.
#0 0x00000039ff6325e5 in raise (sig=6) at ../nptl/sysdeps/unix/sysv/linux/raise.c:64
64 return INLINE_SYSCALL (tgkill, 3, pid, selftid, sig);
Missing separate debuginfos, use: debuginfo-install audit-libs-2.4.5-3.el6.x86_64 cyrus-sasl-lib-2.1.23-15.el6_6.2.x86_64 geos36-3.6.3-1.rhel6.1.x86_64 json-c-0.11-12.el6.x86_64 keyutils-libs-1.4-5.el6.x86_64 libcom_err-1.41.12-22.el6.x86_64 libgcc-4.4.7-17.el6.x86_64 libicu-4.2.1-14.el6.x86_64 libselinux-2.0.94-7.el6.x86_64 libstdc++-4.4.7-17.el6.x86_64 nspr-4.11.0-1.el6.x86_64 nss-3.21.0-8.el6.x86_64 nss-softokn-freebl-3.14.3-23.3.el6_8.x86_64 nss-util-3.21.0-2.el6.x86_64 postgis24_11-2.4.5-1.rhel6.1.x86_64 proj49-4.9.3-3.rhel6.1.x86_64 zlib-1.2.3-29.el6.x86_64
(gdb) bt
#0 0x00000039ff6325e5 in raise (sig=6) at ../nptl/sysdeps/unix/sysv/linux/raise.c:64
#1 0x00000039ff633dc5 in abort () at abort.c:92
#2 0x00000039ff6704f7 in __libc_message (do_abort=2, fmt=0x39ff758a60 "*** glibc detected *** %s: %s: 0x%s ***\n") at ../sysdeps/unix/sysv/linux/libc_fatal.c:198
#3 0x00000039ff675f3e in malloc_printerr (action=3, str=0x39ff758df0 "double free or corruption (!prev)", ptr=<value optimized out>, ar_ptr=<value optimized out>) at malloc.c:6360
#4 0x00000039ff678dd0 in _int_free (av=0x39ff98e120, p=0x1d40b090, have_lock=0) at malloc.c:4846
#5 0x00000000006269e5 in ExecHashJoinNewBatch (pstate=0x2771218) at nodeHashjoin.c:1058
#6 ExecHashJoinImpl (pstate=0x2771218) at nodeHashjoin.c:539
#7 ExecHashJoin (pstate=0x2771218) at nodeHashjoin.c:565
#8 0x00000000006131d0 in ExecProcNodeInstr (node=0x2771218) at execProcnode.c:461
#9 0x0000000000633286 in ExecProcNode (pstate=0x2771108) at ../../../src/include/executor/executor.h:247
#10 ExecSort (pstate=0x2771108) at nodeSort.c:107
#11 0x00000000006131d0 in ExecProcNodeInstr (node=0x2771108) at execProcnode.c:461
#12 0x000000000061d51e in ExecProcNode (aggstate=0x2770d30) at ../../../src/include/executor/executor.h:247
#13 fetch_input_tuple (aggstate=0x2770d30) at nodeAgg.c:406
#14 0x000000000061ee70 in agg_retrieve_direct (pstate=0x2770d30) at nodeAgg.c:1755
#15 ExecAgg (pstate=0x2770d30) at nodeAgg.c:1570
#16 0x00000000006131d0 in ExecProcNodeInstr (node=0x2770d30) at execProcnode.c:461
#17 0x000000000060f4b7 in ExecProcNode (queryDesc=0x2940950, direction=<value optimized out>, count=0, execute_once=48) at ../../../src/include/executor/executor.h:247
#18 ExecutePlan (queryDesc=0x2940950, direction=<value optimized out>, count=0, execute_once=48) at execMain.c:1723
#19 standard_ExecutorRun (queryDesc=0x2940950, direction=<value optimized out>, count=0, execute_once=48) at execMain.c:364
#20 0x00007ff08f74f618 in pgss_ExecutorRun (queryDesc=0x2940950, direction=ForwardScanDirection, count=0, execute_once=true) at pg_stat_statements.c:892
#21 0x00007ff08f2cc81d in explain_ExecutorRun (queryDesc=0x2940950, direction=ForwardScanDirection, count=0, execute_once=true) at auto_explain.c:281
#22 0x000000000075444b in PortalRunSelect (portal=0x2571f00, forward=<value optimized out>, count=0, dest=<value optimized out>) at pquery.c:932
#23 0x0000000000755631 in PortalRun (portal=0x2571f00, count=9223372036854775807, isTopLevel=true, run_once=true, dest=0x928e7a8, altdest=0x928e7a8, completionTag=0x7ffcaa940670 "") at pquery.c:773
#24 0x0000000000752267 in exec_simple_query (
query_string=0x2753e08 "--BEGIN SQL\nSELECT * FROM\n\n(SELECT site_office AS site_gran,\n \tsite_location AS cell,\n\tdisplay_site_name (sect_name, site_name, sect_mscid) AS sitename,\n\tperiod, data_cell.bh_day,\n\n", '-' <repeats 17 times>, "Ac"...) at postgres.c:1145
#25 0x0000000000753211 in PostgresMain (argc=<value optimized out>, argv=<value optimized out>, dbname=0x252b1f8 "ts", username=<value optimized out>) at postgres.c:4182
#26 0x00000000006e2587 in BackendRun (argc=<value optimized out>, argv=<value optimized out>) at postmaster.c:4358
#27 BackendStartup (argc=<value optimized out>, argv=<value optimized out>) at postmaster.c:4030
#28 ServerLoop (argc=<value optimized out>, argv=<value optimized out>) at postmaster.c:1707
#29 PostmasterMain (argc=<value optimized out>, argv=<value optimized out>) at postmaster.c:1380
#30 0x0000000000656a80 in main (argc=3, argv=0x24f7570) at main.c:228

I found this report from 9.6 last month; copying those parties.
https://www.postgresql.org/message-id/CAHyXU0xzwzgnUTyK62ZkG0_1CQsBHwnTVT2TSX7iwTEv1ve9ag%40mail.gmail.com

Merlin: what OS are you running on and what postgres package, or was it
compiled locally ?

We're running:
postgresql11-server-11.5-1PGDG.rhel6.x86_64
CentOS release 6.8
glibc-2.12-1.192.el6.x86_64
linux 2.6.32-754.3.5.el6.x86_64
DMI: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 09/21/2015
Model name: Intel(R) Xeon(R) CPU E5-2683 v4 @ 2.10GHz

I see this is available but not installed: 2.12-1.212.el6_10.3

The VM has no known issues.

ts=# SELECT * FROM pg_config();
..
| CONFIGURE | '--enable-rpath' '--prefix=/usr/pgsql-11' '--includedir=/usr/pgsql-11/include' '--mandir=/usr/pgsql-11/share/man' '--datadir=/usr/pgsql-11/share' '--libdir=/usr/pgsql-11/lib' '--with-icu' '--with-perl' '--with-python' '--with-tcl' '--with-tclconfig=/usr/lib64' '--with-openssl' '--with-pam' '--with-gssapi' '--with-includes=/usr/include' '--with-libraries=/usr/lib64' '--enable-nls' '--with-uuid=e2fs' '--with-libxml' '--with-libxslt' '--with-ldap' '--with-system-tzdata=/usr/share/zoneinfo' '--sysconfdir=/etc/sysconfig/pgsql' '--docdir=/usr/pgsql-11/doc' '--htmldir=/usr/pgsql-11/doc/html' 'CFLAGS=-O2 -g -pipe -Wall -Wp,-D_FORTIFY_SOURCE=2 -fexceptions -fstack-protector --param=ssp-buffer-size=4 -m64 -mtune=generic' 'LDFLAGS=-Wl,--as-needed' 'PKG_CONFIG_PATH=:/usr/lib64/pkgconfig:/usr/share/pkgconfig' 'ICU_CFLAGS=-I/usr/include' 'ICU_LIBS=-L/usr/lib64 -licui18n -licuuc -licudata'
| CC | gcc
| CPPFLAGS | -I/usr/include -D_GNU_SOURCE -I/usr/include/libxml2 -I/usr/include
| CFLAGS | -Wall -Wmissing-prototypes -Wpointer-arith -Wdeclaration-after-statement -Wendif-labels -Wmissing-format-attribute -Wformat-security -fno-strict-aliasing -fwrapv -O2 -g -pipe -Wall -Wp,-D_FORTIFY_SOURCE=2 -fexceptions -fstack-protector --param=ssp-buffer-size=4 -m64 -mtune=generic
| CFLAGS_SL | -fPIC
| LDFLAGS | -Wl,--as-needed -L/usr/lib64 -Wl,--as-needed -Wl,-rpath,'/usr/pgsql-11/lib',--enable-new-dtags
| LDFLAGS_EX |
| LDFLAGS_SL |
| LIBS | -lpgcommon -lpgport -lpthread -lxslt -lxml2 -lpam -lssl -lcrypto -lgssapi_krb5 -lz -lreadline -lrt -lcrypt -ldl -lm

#0 0x00000039ff6325e5 in raise (sig=6) at ../nptl/sysdeps/unix/sysv/linux/raise.c:64
resultvar = 0
pid = <value optimized out>
selftid = <value optimized out>
#1 0x00000039ff633dc5 in abort () at abort.c:92
save_stage = 2
act = {__sigaction_handler = {sa_handler = 0x7ffcaa93f5c8, sa_sigaction = 0x7ffcaa93f5c8}, sa_mask = {__val = {140723170309552, 140723170319605, 49, 249099024494, 3, 140723170309562, 6,
249099024498, 2, 140723170309550, 2, 249099017763, 1, 249099024494, 3, 140723170309558}}, sa_flags = 10, sa_restorer = 0x39ff757872}
sigs = {__val = {32, 0 <repeats 15 times>}}
#2 0x00000039ff6704f7 in __libc_message (do_abort=2, fmt=0x39ff758a60 "*** glibc detected *** %s: %s: 0x%s ***\n") at ../sysdeps/unix/sysv/linux/libc_fatal.c:198
ap = {{gp_offset = 40, fp_offset = 48, overflow_arg_area = 0x7ffcaa93ff30, reg_save_area = 0x7ffcaa93fe40}}
ap_copy = {{gp_offset = 16, fp_offset = 48, overflow_arg_area = 0x7ffcaa93ff30, reg_save_area = 0x7ffcaa93fe40}}
fd = 2
on_2 = <value optimized out>
list = <value optimized out>
nlist = <value optimized out>
cp = <value optimized out>
written = <value optimized out>
#3 0x00000039ff675f3e in malloc_printerr (action=3, str=0x39ff758df0 "double free or corruption (!prev)", ptr=<value optimized out>, ar_ptr=<value optimized out>) at malloc.c:6360
buf = "000000001d40b0a0"
cp = <value optimized out>
#4 0x00000039ff678dd0 in _int_free (av=0x39ff98e120, p=0x1d40b090, have_lock=0) at malloc.c:4846
size = <value optimized out>
fb = <value optimized out>
nextchunk = <value optimized out>
nextsize = <value optimized out>
nextinuse = <value optimized out>
prevsize = <value optimized out>
bck = <value optimized out>
fwd = <value optimized out>
errstr = <value optimized out>
locked = <value optimized out>
#5 0x00000000006269e5 in ExecHashJoinNewBatch (pstate=0x2771218) at nodeHashjoin.c:1058
nbatch = <value optimized out>
curbatch = <value optimized out>
slot = <value optimized out>
hashvalue = 1756558929
hashtable = 0x3c4ed48
innerFile = 0x1d40b0d8
#6 ExecHashJoinImpl (pstate=0x2771218) at nodeHashjoin.c:539
outerNode = 0x27714d8
hashNode = 0x2772348
econtext = <value optimized out>
node = 0x2771218
joinqual = 0x0
otherqual = 0x0
---Type <return> to continue, or q <return> to quit---
hashtable = 0xc8
hashvalue = 0
batchno = 41357576
parallel_state = <value optimized out>
#7 ExecHashJoin (pstate=0x2771218) at nodeHashjoin.c:565
No locals.
#8 0x00000000006131d0 in ExecProcNodeInstr (node=0x2771218) at execProcnode.c:461
result = <value optimized out>
#9 0x0000000000633286 in ExecProcNode (pstate=0x2771108) at ../../../src/include/executor/executor.h:247
No locals.
#10 ExecSort (pstate=0x2771108) at nodeSort.c:107
plannode = <value optimized out>
outerNode = 0x2771218
tupDesc = <value optimized out>
node = 0x2771108
estate = 0x2770a40
dir = ForwardScanDirection
tuplesortstate = 0x3c87160
slot = <value optimized out>
...

If GDB isn't lying and I'm not being confuse by git, that's:
BufFileClose(innerFile);

Which does (at least):

pfree(file->files);
pfree(file->offsets);
pfree(file);

(gdb) p *innerFile
$2 = {numFiles = 1, files = 0xa421328, offsets = 0xa421310, isInterXact = false, dirty = false, readOnly = false, fileset = 0x0, name = 0x0, resowner = 0x24f93e0, curFile = 0, curOffset = 73016512,
pos = 0, nbytes = 0, buffer = {
data = '\000' <repeats 44 times>, "Q\366\262h\220\004\000\000\000\000\000\000\000\000L\000\003\000(\377\377\377\377\377\177\372\377\377\017\000\000\000\000\000\000\000\000\257\321\345\333\063\002\000\003", '\000' <repeats 23 times>"\204, \003\000\000\000\000\000\000K\000\000\000\000\000\000\000K\000\000\000\000\000\000\000K\000\000\000\000\000\000\000K\000\000\000\000\000\000\000b\000\000\000\000\000\000\000c\000\000\000\000\000\000\000c\000\000\000\000\000\000\000d", '\000' <repeats 15 times>, "q\002\000\000\000\000\000\000d", '\000' <repeats 23 times>, "I\000\000\000\000\000\000\000\344H\a\000\000\000\000\000\017\000\210\026\000\310\024\000\024H\000\000\000\000\000\000\017\000\210\035\000H!\027\000\210\216\000T\vB\017\304\t\027\000\210\a\000\206\b\033\030.\"", '\000' <repeats 11 times>..., force_align_d = 0, force_align_i64 = 0}}

(gdb) p innerFile->files[0]
$8 = 2397

Responses

Browse pgsql-hackers by date

  From Date Subject
Next Message Thomas Munro 2019-08-25 02:56:55 Re: LLVM breakage on seawasp
Previous Message Floris Van Nee 2019-08-24 21:59:31 Re: Optimize single tuple fetch from nbtree index