From 934d649a18c66f8b448463e57375865b33ce53e7 Mon Sep 17 00:00:00 2001 From: nkey Date: Fri, 1 Apr 2022 02:17:55 +0300 Subject: [PATCH v5] Optimize KnownAssignedXidsGetAndSetXmin by maintaining offset to next valid xid. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently KnownAssignedXidsGetAndSetXmin requires an iterative loop through KnownAssignedXids array, including xids marked as invalid. Performance impact is especially noticeable in the presence of long (few seconds) transactions on primary, high value (few thousands) of max_connections and high read workload on standby. Most of the cpu spent on looping throw KnownAssignedXids while almost all xid are invalid anyway. KnownAssignedXidsCompress removes invalid xids from time to time, but performance is still affected. To increase performance we lazily maintain an offset in the KnownAssignedXidsNext array to skip known to be invalid xids. KnownAssignedXidsNext does not always point to “next” valid xid, it is just some offset safe to skip (known to be filled by only invalid xids). It helps to skip the gaps and could significantly increase performance - up to 10% more TPS on standby. Thread: https://www.postgresql.org/message-id/flat/CALdSSPgahNUD_%3DpB_j%3D1zSnDBaiOtqVfzo8Ejt5J_k7qZiU1Tw%40mail.gmail.com Benchmark: https://www.postgresql.org/message-id/flat/CANtu0ohzBFTYwdLtcanWo4%2B794WWUi7LY2rnbHyorJdE8_ZnGg%40mail.gmail.com#379c1be7b8134ada5a574078d51b64c6 --- src/backend/storage/ipc/procarray.c | 56 ++++++++++++++++++++++++----- 1 file changed, 47 insertions(+), 9 deletions(-) diff --git a/src/backend/storage/ipc/procarray.c b/src/backend/storage/ipc/procarray.c index 13d192ec2b..b5cb3423fb 100644 --- a/src/backend/storage/ipc/procarray.c +++ b/src/backend/storage/ipc/procarray.c @@ -267,6 +267,7 @@ static PGPROC *allProcs; */ static TransactionId *KnownAssignedXids; static bool *KnownAssignedXidsValid; +static int32 *KnownAssignedXidsNext; static TransactionId latestObservedXid = InvalidTransactionId; /* @@ -455,6 +456,12 @@ CreateSharedProcArray(void) ShmemInitStruct("KnownAssignedXidsValid", mul_size(sizeof(bool), TOTAL_MAX_CACHED_SUBXIDS), &found); + KnownAssignedXidsNext = (int32 *) + ShmemInitStruct("KnownAssignedXidsNext", + mul_size(sizeof(int32), TOTAL_MAX_CACHED_SUBXIDS), + &found); + for (int i = 0; i < TOTAL_MAX_CACHED_SUBXIDS; i++) + KnownAssignedXidsNext[i] = 1; } } @@ -4544,7 +4551,13 @@ ExpireOldKnownAssignedTransactionIds(TransactionId xid) * XID entry itself. This preserves the property that the XID entries are * sorted, so we can do binary searches easily. Periodically we compress * out the unused entries; that's much cheaper than having to compress the - * array immediately on every deletion. + * array immediately on every deletion. Also, we lazily maintain an offset + * in KnownAssignedXidsNext[] array to skip known to be invalid xids. It + * helps to skip the gaps; it could significantly increase performance in + * the case of long transactions on the primary. KnownAssignedXidsNext[] is + * updating while taking the snapshot. In general case KnownAssignedXidsNext + * contains not an offset to the next valid xid but a number which tends to + * the offset to next valid xid. * * The actually valid items in KnownAssignedXids[] and KnownAssignedXidsValid[] * are those with indexes tail <= i < head; items outside this subscript range @@ -4582,7 +4595,7 @@ ExpireOldKnownAssignedTransactionIds(TransactionId xid) * must happen) * * Compressing the array is O(S) and requires exclusive lock * * Removing an XID is O(logS) and requires exclusive lock - * * Taking a snapshot is O(S) and requires shared lock + * * Taking a snapshot is O(S), O(N) next call; requires shared lock * * Checking for an XID is O(logS) and requires shared lock * * In comparison, using a hash table for KnownAssignedXids would mean that @@ -4642,12 +4655,13 @@ KnownAssignedXidsCompress(bool force) * re-aligning data to 0th element. */ compress_index = 0; - for (i = tail; i < head; i++) + for (i = tail; i < head; i += KnownAssignedXidsNext[i]) { if (KnownAssignedXidsValid[i]) { KnownAssignedXids[compress_index] = KnownAssignedXids[i]; KnownAssignedXidsValid[compress_index] = true; + KnownAssignedXidsNext[compress_index] = 1; compress_index++; } } @@ -4750,6 +4764,7 @@ KnownAssignedXidsAdd(TransactionId from_xid, TransactionId to_xid, { KnownAssignedXids[head] = next_xid; KnownAssignedXidsValid[head] = true; + KnownAssignedXidsNext[head] = 1; TransactionIdAdvance(next_xid); head++; } @@ -4965,7 +4980,7 @@ KnownAssignedXidsRemovePreceding(TransactionId removeXid) tail = pArray->tailKnownAssignedXids; head = pArray->headKnownAssignedXids; - for (i = tail; i < head; i++) + for (i = tail; i < head; i += KnownAssignedXidsNext[i]) { if (KnownAssignedXidsValid[i]) { @@ -4988,7 +5003,7 @@ KnownAssignedXidsRemovePreceding(TransactionId removeXid) /* * Advance the tail pointer if we've marked the tail item invalid. */ - for (i = tail; i < head; i++) + for (i = tail; i < head; i += KnownAssignedXidsNext[i]) { if (KnownAssignedXidsValid[i]) break; @@ -5038,7 +5053,9 @@ KnownAssignedXidsGetAndSetXmin(TransactionId *xarray, TransactionId *xmin, int count = 0; int head, tail; - int i; + int i, + prev, + prevOffset; /* * Fetch head just once, since it may change while we loop. We can stop @@ -5052,9 +5069,12 @@ KnownAssignedXidsGetAndSetXmin(TransactionId *xarray, TransactionId *xmin, SpinLockAcquire(&procArray->known_assigned_xids_lck); tail = procArray->tailKnownAssignedXids; head = procArray->headKnownAssignedXids; + prev = tail; + prevOffset = KnownAssignedXidsNext[prev]; SpinLockRelease(&procArray->known_assigned_xids_lck); - for (i = tail; i < head; i++) + + for (i = tail; i < head; i += KnownAssignedXidsNext[i]) { /* Skip any gaps in the array */ if (KnownAssignedXidsValid[i]) @@ -5079,6 +5099,24 @@ KnownAssignedXidsGetAndSetXmin(TransactionId *xarray, TransactionId *xmin, /* Add knownXid into output array */ xarray[count++] = knownXid; + + if (prev != i) + { + int32 n = i - prev; + /* + * Do not touch the cache if value is unchanged. This way we + * can avoid additional cache miss. + */ + if (n != prevOffset) + KnownAssignedXidsNext[prev] = n; + /* + * Remember this xid as previous valid. Also, manually store + * prevOffset from current fetched value to avoid additional + * atomic read. + */ + prev = i; + prevOffset = KnownAssignedXidsNext[i]; + } } } @@ -5104,7 +5142,7 @@ KnownAssignedXidsGetOldestXmin(void) head = procArray->headKnownAssignedXids; SpinLockRelease(&procArray->known_assigned_xids_lck); - for (i = tail; i < head; i++) + for (i = tail; i < head; i += KnownAssignedXidsNext[i]) { /* Skip any gaps in the array */ if (KnownAssignedXidsValid[i]) @@ -5139,7 +5177,7 @@ KnownAssignedXidsDisplay(int trace_level) initStringInfo(&buf); - for (i = tail; i < head; i++) + for (i = tail; i < head; i += KnownAssignedXidsNext[i]) { if (KnownAssignedXidsValid[i]) { -- 2.17.1