BUG #15821: Parallel Workers with functions and auto_explain: ERROR: could not find key 3 in shm TOC

From: PG Bug reporting form <noreply(at)postgresql(dot)org>
To: pgsql-bugs(at)lists(dot)postgresql(dot)org
Cc: ch+pg(at)zeha(dot)at
Subject: BUG #15821: Parallel Workers with functions and auto_explain: ERROR: could not find key 3 in shm TOC
Date: 2019-05-27 16:11:37
Message-ID: 15821-5eb422e980594075@postgresql.org
Views: Raw Message | Whole Thread | Download mbox | Resend email
Thread:
Lists: pgsql-bugs pgsql-hackers

The following bug has been logged on the website:

Bug reference: 15821
Logged by: Christian Hofstaedtler
Email address: ch+pg(at)zeha(dot)at
PostgreSQL version: 11.3
Operating system: Debian stretch amd64
Description:

We have enabled auto_explain and see errors on PostgreSQL 11.3 when
SELECTing from a user defined function. No such crashes have been
observed on 10.7.

Maybe relevant config settings:
log_min_duration_statement = 0
auto_explain.log_min_duration = 150ms
auto_explain.log_analyze = on
shared_preload_libraries = 'pg_stat_statements'
pg_stat_statements.track = all
pg_stat_statements.max = 10000

I can trigger the error on our database using:

CREATE TABLE reprotable(id serial, val int);
INSERT INTO reprotable(val) SELECT * FROM generate_series(1, 10000000);
CREATE OR REPLACE FUNCTION public.crashrepro4() RETURNS TABLE(foo integer,
foo2 integer) LANGUAGE sql AS $function$
SELECT c.id, v.val FROM reprotable c JOIN reprotable v ON v.id = c.id
$function$;

LOAD 'auto_explain';
SET max_parallel_workers_per_gather TO 8;
SELECT * FROM crashrepro4();

Sometimes this works on the first few tries; running
EXPLAIN ANALYZE SELECT * FROM crashrepro4();
appears to help with the reproduction.

Output:
ERROR: could not find key 3 in shm TOC at 0x7f45a0334000
CONTEXT: parallel worker
SQL function "crashrepro4" statement 1

Backtrace:

Program received signal SIGUSR1, User defined signal 1.
0x00007f459dc94bc6 in posix_fallocate64 () from
/lib/x86_64-linux-gnu/libc.so.6
(gdb) bt full
#0 0x00007f459dc94bc6 in posix_fallocate64 () from
/lib/x86_64-linux-gnu/libc.so.6
No symbol table info available.
#1 0x00005636d9cc675b in dsm_impl_posix_resize (size=134483968, fd=7) at
./build/../src/backend/storage/ipc/dsm_impl.c:441
rc = <optimized out>
#2 dsm_impl_posix (impl_private=0x8041000, elevel=20,
mapped_size=0x5636dc084348, mapped_address=0x5636dc084340,
request_size=134483968, handle=<optimized out>, op=DSM_OP_CREATE) at
./build/../src/backend/storage/ipc/dsm_impl.c:326
flags = <optimized out>
fd = 7
name =
"/PostgreSQL(dot)1895625775\000\000X\274ʝE\177\000\000(at)\354\322\357\377\177\000\000\020\337\003\334\066V\000\000@\000\000\000\000\000\000\000\000\200\000\000\000\000\000"
address = <optimized out>
#3 dsm_impl_op (op=op(at)entry=DSM_OP_CREATE, handle=<optimized out>,
request_size=request_size(at)entry=134483968,
impl_private=impl_private(at)entry=0x5636dc084338,
mapped_address=mapped_address(at)entry=0x5636dc084340,
mapped_size=mapped_size(at)entry=0x5636dc084348, elevel=20) at
./build/../src/backend/storage/ipc/dsm_impl.c:177
__func__ = "dsm_impl_op"
#4 0x00005636d9cc7877 in dsm_create (size=size(at)entry=134483968,
flags=flags(at)entry=0) at ./build/../src/backend/storage/ipc/dsm.c:474
seg = 0x5636dc084318
i = <optimized out>
nitems = <optimized out>
__func__ = "dsm_create"
#5 0x00005636d9e2b317 in make_new_segment (area=area(at)entry=0x5636dc17fd08,
requested_pages=requested_pages(at)entry=32768) at
./build/../src/backend/utils/mmgr/dsa.c:2155
new_index = 2
metadata_bytes = 266240
total_size = 134483968
total_pages = <optimized out>
usable_pages = 32768
segment_map = <optimized out>
segment = <optimized out>
#6 0x00005636d9e2cbce in dsa_allocate_extended (area=0x5636dc17fd08,
size=size(at)entry=134217728, flags=flags(at)entry=0) at
./build/../src/backend/utils/mmgr/dsa.c:712
npages = 32768
first_page = 139937017269144
pool = 0x7f45a02a25f0
size_class = <optimized out>
start_pointer = <optimized out>
segment_map = <optimized out>
__func__ = "dsa_allocate_extended"
#7 0x00005636d9bb3e95 in ExecParallelHashTableAlloc
(hashtable=hashtable(at)entry=0x5636dc173a70, batchno=batchno(at)entry=0) at
./build/../src/backend/executor/nodeHash.c:3047
batch = 0x7f417ccc0000
buckets = <optimized out>
nbuckets = 16777216
i = <optimized out>
#8 0x00005636d9bb42e3 in ExecHashTableCreate
(state=state(at)entry=0x5636dc146280, hashOperators=<optimized out>,
keepNulls=<optimized out>) at
./build/../src/backend/executor/nodeHash.c:615
pstate = <optimized out>
build_barrier = 0x7f45a02a1450
node = 0x5636dc140568
hashtable = 0x5636dc173a70
outerNode = <optimized out>
space_allowed = 64424509440
nbuckets = 16777216
nbatch = 1
rows = <optimized out>
num_skew_mcvs = 10391049
log2_nbuckets = <optimized out>
i = 0
ho = <optimized out>
__func__ = "ExecHashTableCreate"
#9 0x00005636d9bb7085 in ExecHashJoinImpl (parallel=true, pstate=<optimized
out>) at ./build/../src/backend/executor/nodeHashjoin.c:279
outerNode = <optimized out>
hashNode = <optimized out>
econtext = <optimized out>
node = <optimized out>
joinqual = <optimized out>
otherqual = <optimized out>
hashtable = 0x0
hashvalue = 32767
batchno = -641936805
parallel_state = <optimized out>
#10 ExecParallelHashJoin (pstate=<optimized out>) at
./build/../src/backend/executor/nodeHashjoin.c:581
No locals.
#11 0x00005636d9bb2194 in ExecProcNode (node=0x5636dc145b60) at
./build/../src/include/executor/executor.h:247
No locals.
#12 gather_getnext (gatherstate=0x5636dc145970) at
./build/../src/backend/executor/nodeGather.c:276
estate = 0x5636dc145730
outerPlan = 0x5636dc145b60
fslot = 0x5636dc0743a8
#13 ExecGather (pstate=0x5636dc145970) at
./build/../src/backend/executor/nodeGather.c:207
node = 0x5636dc145970
econtext = 0x5636dc145a80
#14 0x00005636d9b9b4c3 in ExecProcNode (node=0x5636dc145970) at
./build/../src/include/executor/executor.h:247
No locals.
#15 ExecutePlan (execute_once=<optimized out>, dest=0x5636dc141648,
direction=<optimized out>, numberTuples=0, sendTuples=<optimized out>,
operation=CMD_SELECT, use_parallel_mode=<optimized out>,
planstate=0x5636dc145970,
estate=0x5636dc145730) at
./build/../src/backend/executor/execMain.c:1723
slot = <optimized out>
current_tuple_count = 0
#16 standard_ExecutorRun (queryDesc=0x5636dc141698, direction=<optimized
out>, count=0, execute_once=<optimized out>) at
./build/../src/backend/executor/execMain.c:364
estate = 0x5636dc145730
operation = CMD_SELECT
dest = 0x5636dc141648
sendTuples = <optimized out>
__func__ = "standard_ExecutorRun"
#17 0x00007f4597d08e15 in pgss_ExecutorRun (queryDesc=0x5636dc141698,
direction=ForwardScanDirection, count=0, execute_once=<optimized out>) at
./build/../contrib/pg_stat_statements/pg_stat_statements.c:892
save_exception_stack = 0x7fffefd2f0a0
save_context_stack = 0x7fffefd2f1d0
local_sigjmp_buf = {{__jmpbuf = {94793620402592, -81779228135919821,
1, 0, 94793620523992, 94793620526040, -81779228572127437,
-24340724960596173}, __mask_was_saved = 0, __saved_mask = {__val =
{94793619686312,
140737216966656, 94793581381244, 94793619686312,
94793620543280, 140737216966688, 94793581092645, 94793620543856,
94793620543280, 140737216966736, 94793581143132, 94793620522984, 16, 0,
94793620543280,
140737216966784}}}}
#18 0x00007f417cdbf4dd in explain_ExecutorRun (queryDesc=0x5636dc141698,
direction=ForwardScanDirection, count=0, execute_once=<optimized out>) at
./build/../contrib/auto_explain/auto_explain.c:268
save_exception_stack = 0x7fffefd2f8a0
save_context_stack = 0x7fffefd2f1d0
local_sigjmp_buf = {{__jmpbuf = {94793620402592, -81779228104462541,
1, 0, 94793620523992, 94793620526040, -81779228138016973,
-26933449381777613}, __mask_was_saved = 0, __saved_mask = {__val =
{94793581107904, 94793620526744,
94793588700640, 140737216966960, 139936876507429,
140737216967008, 94793620526744, 1, 0, 140737216967024, 139919244391853, 1,
0, 94793620523992, 94793620402592, 1}}}}
#19 0x00005636d9ba8f6a in postquel_getnext (es=0x5636dc140bd8,
es=0x5636dc140bd8, fcache=0x5636dc1231a0, fcache=0x5636dc1231a0) at
./build/../src/backend/executor/functions.c:867
count = 0
#20 fmgr_sql (fcinfo=0x7fffefd2f340) at
./build/../src/backend/executor/functions.c:1164
fcache = 0x5636dc1231a0
sqlerrcontext = {previous = 0x0, callback = 0x5636d9ba77a0
<sql_exec_error_callback>, arg = 0x5636dc06a230}
randomAccess = false
lazyEvalOK = <optimized out>
is_first = <optimized out>
pushed_snapshot = true
es = 0x5636dc140bd8
slot = <optimized out>
result = <optimized out>
eslist = <optimized out>
eslc = 0x5636dc1413d8
__func__ = "fmgr_sql"
#21 0x00005636d9ba4bac in ExecMakeTableFunctionResult
(setexpr=0x5636dc06a210, econtext=0x5636dc06a0e0, argContext=<optimized
out>, expectedDesc=0x5636dc06ac68, randomAccess=false) at
./build/../src/backend/executor/execSRF.c:231
result = 94793580413993
tupstore = 0x0
tupdesc = 0x0
funcrettype = 2249
returnsTuple = <optimized out>
returnsSet = true
fcinfo = {flinfo = 0x5636dc06a230, context = 0x0, resultinfo =
0x7fffefd2f300, fncollation = 0, isnull = false, nargs = 0, arg =
{94793619488832, 140733193388046, 0, 94793580418128, 140737216967520, 6,
3432, 140733193388035,
94793585267779, 1, 59588104878912, 94793585267784, 32,
139936976061625, 94793584837154, 1, 140737216968608, 3, 140737216967832,
206158430224, 140737216969520, 94793620609964, 94793620609964,
139936977225618,
139936979540000, 8, 0, 139936976486110, 3688503315210960912,
8187, 0, 94793620609936, 94793620618144, 139936979544832, 0, 160, 142,
139936976269588, 140737216967904, 94793583475689, 140737216968048,
94793620609952,
94793620609936, 252098579, 140737216968464, 139937017242608,
139937017242584, 139937017242608, 139937017242608, 94793619743656,
140737216967952, 94793583769478, 139937017242608, 7074228167464498944,
94793620438296,
94793581565312, 140737216968032, 94793581372751,
140737216968032, 94793620438296, 140737216968464, 140737216968464,
94793584972684, 0, 140737216968144, 94793581565681, 140737216968080,
7074228167464498944, 94793620438216,
94793581565312, 140737216968144, 94793581372751, 1,
7074228167464498944, 140737216968464, 94793620438216, 140737216968464,
140737216968464, 140737216968256, 94793581565681, 4294967295,
139937017247848, 139937017247824,
139937017247848, 139937017247848, 94793619743656,
140737216968240, 7074228167464498944, 139937017247848, 94793620438376,
94793581565312, 140737216968464, 140737216968320, 94793581372827,
140737216968304, 94793620438408,
140737216968464, 94793620436824, 0, 0}, argnull = {240, 246,
210, 239, 255, 127, false, false, 241, 150, 193, 217, 54, 86, false, false,
16, 247, 210, 239, 255, 127, false, false, 169, 156, 193, 217, 54, 86,
false, false,
240, 246, 210, 239, 255, 127, false, false, 233, 187, 222, 217,
54, 86, false, false, 24, false, false, false, false, false, false, false,
175, 189, 160, 217, 54, 86, false, false, 16, 247, 210, 239, 255, 127,
false,
false, 233, 187, 222, 217, 54, 86, false, false, 23, false,
false, false, false, false, false, false, 88, 171, 6, 220, 54, 86, false,
false, 80, 56, 52, 160}}
fcusage = {fs = 0x5636dc1278c0, save_f_total_time = {tv_sec = 0,
tv_nsec = 0}, save_total = {tv_sec = 0, tv_nsec = 0}, f_start = {tv_sec =
469579, tv_nsec = 359852825}}
rsinfo = {type = T_ReturnSetInfo, econtext = 0x5636dc06a0e0,
expectedDesc = 0x5636dc06ac68, allowedModes = 11, returnMode =
SFRM_ValuePerCall, isDone = ExprSingleResult, setResult = 0x0, setDesc =
0x0}
tmptup = {t_len = 0, t_self = {ip_blkid = {bi_hi = 0, bi_lo = 0},
ip_posid = 2}, t_tableOid = 0, t_data = 0x2}
callerContext = 0x5636dc069cb0
first_time = true
__func__ = "ExecMakeTableFunctionResult"
#22 0x00005636d9bb1760 in FunctionNext (node=node(at)entry=0x5636dc069fd0) at
./build/../src/backend/executor/nodeFunctionscan.c:94
tstore = 0x0
estate = <optimized out>
direction = ForwardScanDirection
scanslot = 0x5636dc06ae88
alldone = <optimized out>
oldpos = <optimized out>
funcno = <optimized out>
att = <optimized out>
#23 0x00005636d9ba40f9 in ExecScanFetch (recheckMtd=0x5636d9bb14a0
<FunctionRecheck>, accessMtd=0x5636d9bb14d0 <FunctionNext>,
node=0x5636dc069fd0) at ./build/../src/backend/executor/execScan.c:95
estate = 0x5636dc069dc0
#24 ExecScan (node=0x5636dc069fd0, accessMtd=0x5636d9bb14d0 <FunctionNext>,
recheckMtd=0x5636d9bb14a0 <FunctionRecheck>) at
./build/../src/backend/executor/execScan.c:145
econtext = <optimized out>
qual = 0x0
projInfo = 0x0
#25 0x00005636d9ba25c9 in ExecProcNodeInstr (node=0x5636dc069fd0) at
./build/../src/backend/executor/execProcnode.c:461
result = <optimized out>
#26 0x00005636d9b9b4c3 in ExecProcNode (node=0x5636dc069fd0) at
./build/../src/include/executor/executor.h:247
No locals.
#27 ExecutePlan (execute_once=<optimized out>, dest=0x5636dc12ccb8,
direction=<optimized out>, numberTuples=0, sendTuples=<optimized out>,
operation=CMD_SELECT, use_parallel_mode=<optimized out>,
planstate=0x5636dc069fd0,
estate=0x5636dc069dc0) at
./build/../src/backend/executor/execMain.c:1723
slot = <optimized out>
current_tuple_count = 0
#28 standard_ExecutorRun (queryDesc=0x5636dc068590, direction=<optimized
out>, count=0, execute_once=<optimized out>) at
./build/../src/backend/executor/execMain.c:364
estate = 0x5636dc069dc0
operation = CMD_SELECT
dest = 0x5636dc12ccb8
sendTuples = <optimized out>
__func__ = "standard_ExecutorRun"
#29 0x00007f4597d08e15 in pgss_ExecutorRun (queryDesc=0x5636dc068590,
direction=ForwardScanDirection, count=0, execute_once=<optimized out>) at
./build/../contrib/pg_stat_statements/pg_stat_statements.c:892
save_exception_stack = 0x7fffefd2f9b0
save_context_stack = 0x0
local_sigjmp_buf = {{__jmpbuf = {94793620008672, -81779227836027085,
0, 94793619637648, 0, 1, -81779227869581517, -24340724960596173},
__mask_was_saved = 0, __saved_mask = {__val = {94793583733915,
140737216969008,
7074228167464498944, 140737216969008, 94793619643840,
94793620438296, 94793619644864, 2, 94793619644368, 140737216969168,
94793581140788, 7987184768, 94793620441608, 94793619643840, 336, 1}}}}
#30 0x00007f417cdbf4dd in explain_ExecutorRun (queryDesc=0x5636dc068590,
direction=ForwardScanDirection, count=0, execute_once=<optimized out>) at
./build/../contrib/auto_explain/auto_explain.c:268
save_exception_stack = 0x7fffefd2fb50
save_context_stack = 0x0
local_sigjmp_buf = {{__jmpbuf = {94793620008672, -81779227930398925,
0, 94793619637648, 0, 1, -81779227829735629, -26933449381777613},
__mask_was_saved = 0, __saved_mask = {__val = {94793620441784,
140737216969344, 336, 1,
94793620155408, 24, 94793619637648, 0, 1, 140737216969312,
94793583755684, 94793620008672, 94793620155728, 140737216969344,
94793583827107, 94793620008672}}}}
#31 0x00005636d9cee59b in PortalRunSelect
(portal=portal(at)entry=0x5636dc0c2ee0, forward=forward(at)entry=true, count=0,
count(at)entry=9223372036854775807, dest=dest(at)entry=0x5636dc12ccb8) at
./build/../src/backend/tcop/pquery.c:932
queryDesc = 0x5636dc068590
direction = <optimized out>
nprocessed = <optimized out>
__func__ = "PortalRunSelect"
#32 0x00005636d9cefb20 in PortalRun (portal=portal(at)entry=0x5636dc0c2ee0,
count=count(at)entry=9223372036854775807, isTopLevel=isTopLevel(at)entry=true,
run_once=run_once(at)entry=true, dest=dest(at)entry=0x5636dc12ccb8,
altdest=altdest(at)entry=0x5636dc12ccb8, completionTag=0x7fffefd2fca0 "")
at ./build/../src/backend/tcop/pquery.c:773
save_exception_stack = 0x7fffefd2fec0
save_context_stack = 0x0
local_sigjmp_buf = {{__jmpbuf = {1, -81779228012187853,
94793619488704, 94793619488752, 94793620008672, 2, -81779227917816013,
-5958515187191648461}, __mask_was_saved = 0, __saved_mask = {__val =
{94793619463952, 0,
2817148525, 94793620020232, 94793619637376, 4, 112,
94793619488704, 94793619484688, 94793584837140, 2, 140737216969760,
94793583757029, 140737216969792, 2, 94793619488704}}}}
result = <optimized out>
nprocessed = <optimized out>
saveTopTransactionResourceOwner = 0x5636dc0823a8
saveTopTransactionContext = 0x5636dc0e6c10
saveActivePortal = 0x0
saveResourceOwner = 0x5636dc0823a8
savePortalContext = 0x0
saveMemoryContext = 0x5636dc0e6c10
__func__ = "PortalRun"
#33 0x00005636d9ceb7b9 in exec_simple_query (query_string=0x5636dc043120
"SELECT * FROM crashrepro4();") at
./build/../src/backend/tcop/postgres.c:1145
parsetree = 0x5636dc043fc0
portal = 0x5636dc0c2ee0
snapshot_set = <optimized out>
commandTag = <optimized out>
completionTag =
"\000\000\000\000\002\000\000\000\340\374\322\357\377\177\000\000-\r\324\357\377\177\000\000\000\000\000\000\000\000\000\000\360\374\322\357\002\000\000\000Q\000\000\000\000\000\000\000
1\004\334\066V\000\000`\376\322\357\377\177\000"
querytree_list = <optimized out>
plantree_list = <optimized out>
receiver = 0x5636dc12ccb8
format = 0
dest = DestRemote
parsetree_list = 0x5636dc044010
parsetree_item = 0x5636dc043ff0
save_log_statement_stats = false
was_logged = false
use_implicit_block = false
msec_str =
"\000\000\000\000\002\000\000\000\340\374\322\357\377\177\000\000-\r\324\357\377\177\000\000\000\000\000\000\000\000\000"
__func__ = "exec_simple_query"
#34 0x00005636d9ced623 in PostgresMain (argc=<optimized out>,
argv=argv(at)entry=0x5636dc070128, dbname=<optimized out>, username=<optimized
out>) at ./build/../src/backend/tcop/postgres.c:4182
query_string = 0x5636dc043120 "SELECT * FROM crashrepro4();"
input_message = {data = 0x5636dc043120 "SELECT * FROM
crashrepro4();", len = 29, maxlen = 1024, cursor = 29}
local_sigjmp_buf = {{__jmpbuf = {140737216970368,
-81779225453662413, 1, 94793619668968, 94793619668968, 94793619632848,
-81779227978633421, -5958515189572833485}, __mask_was_saved = 1,
__saved_mask = {__val = {0,
140737216970544, 140737216970540, 140737216970640,
8589934592, 94793588696280, 94793585032905, 140737216970864,
140737216972320, 140737216971248, 94793619668968, 94793619632848,
139936976780376, 5, 206158430256,
140737216970848}}}}
send_ready_for_query = false
disable_idle_in_transaction_timeout = false
__func__ = "PostgresMain"
#35 0x00005636d99fb097 in BackendRun (port=0x5636dc0672d0) at
./build/../src/backend/postmaster/postmaster.c:4358
ac = 1
secs = 612288553
usecs = 224860
i = 1
av = 0x5636dc070128
maxac = <optimized out>
#36 BackendStartup (port=0x5636dc0672d0) at
./build/../src/backend/postmaster/postmaster.c:4030
bn = <optimized out>
pid = <optimized out>
#37 ServerLoop () at ./build/../src/backend/postmaster/postmaster.c:1707
rmask = {fds_bits = {128, 0 <repeats 15 times>}}
selres = <optimized out>
now = <optimized out>
readmask = {fds_bits = {200, 0 <repeats 15 times>}}
last_lockfile_recheck_time = 1558973336
last_touch_time = 1558973276
__func__ = "ServerLoop"
#38 0x00005636d9c78221 in PostmasterMain (argc=7, argv=0x5636dc03de10) at
./build/../src/backend/postmaster/postmaster.c:1380
opt = <optimized out>
status = <optimized out>
userDoption = <optimized out>
listen_addr_saved = true
i = <optimized out>
output_config_variable = <optimized out>
__func__ = "PostmasterMain"
#39 0x00005636d99fc594 in main (argc=7, argv=0x5636dc03de10) at
./build/../src/backend/main/main.c:228
No locals.
(gdb)

Responses

Browse pgsql-bugs by date

  From Date Subject
Next Message Juan José Santamaría Flecha 2019-05-28 05:31:55 Re: BUG #15789: libpq compilation with OpenSSL 1.1.1b fails on Windows with Visual Studio 2017
Previous Message Tom Lane 2019-05-27 13:09:34 Re: BUG #15820: Commuting two column make a SubqueryScan node to appear in the plan

Browse pgsql-hackers by date

  From Date Subject
Next Message Akim Demaille 2019-05-27 16:49:20 Re: Remove useless associativity/precedence from parsers
Previous Message Andres Freund 2019-05-27 16:05:38 Re: pgsql: tableam: introduce table AM infrastructure.