Index: src/backend/access/transam/multixact.c =================================================================== RCS file: /home/alvherre/cvs/pgsql/src/backend/access/transam/multixact.c,v retrieving revision 1.9 diff -c -r1.9 multixact.c *** src/backend/access/transam/multixact.c 15 Oct 2005 02:49:09 -0000 1.9 --- src/backend/access/transam/multixact.c 27 Oct 2005 21:39:04 -0000 *************** *** 798,804 **** --- 798,807 ---- ExtendMultiXactMember(*offset, nxids); + /* Advance the offset counter, but don't leave it at 0. */ MultiXactState->nextOffset += nxids; + if (MultiXactState->nextOffset == 0) + MultiXactState->nextOffset = 1; LWLockRelease(MultiXactGenLock); *************** *** 829,834 **** --- 832,838 ---- MultiXactId tmpMXact; MultiXactOffset nextOffset; TransactionId *ptr; + int j = 0; debug_elog3(DEBUG2, "GetMembers: asked for %u", multi); *************** *** 922,932 **** --- 926,975 ---- pageno = MultiXactIdToOffsetPage(tmpMXact); entryno = MultiXactIdToOffsetEntry(tmpMXact); + retry: if (pageno != prev_pageno) slotno = SimpleLruReadPage(MultiXactOffsetCtl, pageno, tmpMXact); offptr = (MultiXactOffset *) MultiXactOffsetCtl->shared->page_buffer[slotno]; offptr += entryno; + + /* + * Note a possible race condition: when we create a new MultiXact and + * store its info in MultiXactState, we release the MultiXactGenLock + * before storing the offset in the SLRU area. It's thus possible that + * we just got the offset that some other backend has been assigned, + * but hasn't written on the SLRU page yet. + * + * One way to close this hole is to make the creating backend hold + * MultiXactGenLock until the offset is stored. That would be too bad + * a performance hit, however, so instead we choose to check for this + * situation here: if we read a zero offset, sleep and retry, until the + * other backend has had a chance to write the true offset. + * + * Because of this, we have to make sure offset 0 is never used. + */ + if (*offptr == 0) + { + LWLockRelease(MultiXactOffsetControlLock); + pg_usleep(1000); + + /* + * Note that since we released the OffsetControlLock, we cannot be + * sure that the page we read is still on the buffer, so we must + * force it to be read again. + */ + prev_pageno = -1; + /* + * We are not sure that there aren't other bugs in this code, so + * we refuse to iterate more than a minute's worth. + */ + #define MAX_OFFSET_ITERATIONS (60 * 1000) + if (j++ > MAX_OFFSET_ITERATIONS) + elog(PANIC, "too many GetMultiXactIdMembers iterations"); + + LWLockAcquire(MultiXactOffsetControlLock, LW_EXCLUSIVE); + goto retry; + } length = *offptr - offset; } *************** *** 1200,1205 **** --- 1243,1254 ---- /* Make sure we zero out the per-backend state */ MemSet(MultiXactState, 0, SHARED_MULTIXACT_STATE_SIZE); + + /* + * zero is not a valid offset, so skip it. See notes in + * GetMultiXactIdMembers. + */ + MultiXactState->nextOffset = 1; } else Assert(found);