From 2c58a32ca89867e7d244e7037a38aa9ccc2b92c2 Mon Sep 17 00:00:00 2001
From: Sokolov Yura <funny.falcon@postgrespro.ru>
Date: Thu, 25 May 2017 14:54:45 +0300
Subject: [PATCH] Fix performance of Atomics generic implementation

pg_atomic_compare_exchange_*_impl modifies value of `old` pointer,
so there is no need to reread value in loop body.

Also add gcc specific __sync_fetch_and_(or|and), cause looks like
compiler may optimize code around intrisic better than around assembler,
although intrisic is compiled to almost same CAS loop.
---
 src/include/port/atomics/generic-gcc.h | 36 +++++++++++++++++++
 src/include/port/atomics/generic.h     | 64 +++++++++-------------------------
 2 files changed, 52 insertions(+), 48 deletions(-)

diff --git a/src/include/port/atomics/generic-gcc.h b/src/include/port/atomics/generic-gcc.h
index 7efc0861e7..79ada94047 100644
--- a/src/include/port/atomics/generic-gcc.h
+++ b/src/include/port/atomics/generic-gcc.h
@@ -185,6 +185,24 @@ pg_atomic_fetch_add_u32_impl(volatile pg_atomic_uint32 *ptr, int32 add_)
 }
 #endif
 
+#if !defined(PG_HAVE_ATOMIC_FETCH_AND_U32) && defined(HAVE_GCC__SYNC_INT32_CAS)
+#define PG_HAVE_ATOMIC_FETCH_AND_U32
+static inline uint32
+pg_atomic_fetch_and_u32_impl(volatile pg_atomic_uint32 *ptr, uint32 and_)
+{
+	return __sync_fetch_and_and(&ptr->value, and_);
+}
+#endif
+
+#if !defined(PG_HAVE_ATOMIC_FETCH_OR_U32) && defined(HAVE_GCC__SYNC_INT32_CAS)
+#define PG_HAVE_ATOMIC_FETCH_OR_U32
+static inline uint32
+pg_atomic_fetch_or_u32_impl(volatile pg_atomic_uint32 *ptr, uint32 or_)
+{
+	return __sync_fetch_and_or(&ptr->value, or_);
+}
+#endif
+
 
 #if !defined(PG_DISABLE_64_BIT_ATOMICS)
 
@@ -223,6 +241,24 @@ pg_atomic_fetch_add_u64_impl(volatile pg_atomic_uint64 *ptr, int64 add_)
 }
 #endif
 
+#if !defined(PG_HAVE_ATOMIC_FETCH_AND_U64) && defined(HAVE_GCC__SYNC_INT64_CAS)
+#define PG_HAVE_ATOMIC_FETCH_AND_U64
+static inline uint64
+pg_atomic_fetch_and_u64_impl(volatile pg_atomic_uint64 *ptr, uint64 and_)
+{
+	return __sync_fetch_and_and(&ptr->value, and_);
+}
+#endif
+
+#if !defined(PG_HAVE_ATOMIC_FETCH_OR_U64) && defined(HAVE_GCC__SYNC_INT64_CAS)
+#define PG_HAVE_ATOMIC_FETCH_OR_U64
+static inline uint64
+pg_atomic_fetch_or_u64_impl(volatile pg_atomic_uint64 *ptr, uint64 or_)
+{
+	return __sync_fetch_and_or(&ptr->value, or_);
+}
+#endif
+
 #endif /* !defined(PG_DISABLE_64_BIT_ATOMICS) */
 
 #endif /* defined(HAVE_ATOMICS) */
diff --git a/src/include/port/atomics/generic.h b/src/include/port/atomics/generic.h
index 424543604a..6d2671beab 100644
--- a/src/include/port/atomics/generic.h
+++ b/src/include/port/atomics/generic.h
@@ -170,12 +170,8 @@ static inline uint32
 pg_atomic_exchange_u32_impl(volatile pg_atomic_uint32 *ptr, uint32 xchg_)
 {
 	uint32 old;
-	while (true)
-	{
-		old = pg_atomic_read_u32_impl(ptr);
-		if (pg_atomic_compare_exchange_u32_impl(ptr, &old, xchg_))
-			break;
-	}
+	old = pg_atomic_read_u32_impl(ptr);
+	while (!pg_atomic_compare_exchange_u32_impl(ptr, &old, xchg_));
 	return old;
 }
 #endif
@@ -186,12 +182,8 @@ static inline uint32
 pg_atomic_fetch_add_u32_impl(volatile pg_atomic_uint32 *ptr, int32 add_)
 {
 	uint32 old;
-	while (true)
-	{
-		old = pg_atomic_read_u32_impl(ptr);
-		if (pg_atomic_compare_exchange_u32_impl(ptr, &old, old + add_))
-			break;
-	}
+	old = pg_atomic_read_u32_impl(ptr);
+	while (!pg_atomic_compare_exchange_u32_impl(ptr, &old, old + add_));
 	return old;
 }
 #endif
@@ -211,12 +203,8 @@ static inline uint32
 pg_atomic_fetch_and_u32_impl(volatile pg_atomic_uint32 *ptr, uint32 and_)
 {
 	uint32 old;
-	while (true)
-	{
-		old = pg_atomic_read_u32_impl(ptr);
-		if (pg_atomic_compare_exchange_u32_impl(ptr, &old, old & and_))
-			break;
-	}
+	old = pg_atomic_read_u32_impl(ptr);
+	while (!pg_atomic_compare_exchange_u32_impl(ptr, &old, old & and_));
 	return old;
 }
 #endif
@@ -227,12 +215,8 @@ static inline uint32
 pg_atomic_fetch_or_u32_impl(volatile pg_atomic_uint32 *ptr, uint32 or_)
 {
 	uint32 old;
-	while (true)
-	{
-		old = pg_atomic_read_u32_impl(ptr);
-		if (pg_atomic_compare_exchange_u32_impl(ptr, &old, old | or_))
-			break;
-	}
+	old = pg_atomic_read_u32_impl(ptr);
+	while (!pg_atomic_compare_exchange_u32_impl(ptr, &old, old | or_));
 	return old;
 }
 #endif
@@ -261,12 +245,8 @@ static inline uint64
 pg_atomic_exchange_u64_impl(volatile pg_atomic_uint64 *ptr, uint64 xchg_)
 {
 	uint64 old;
-	while (true)
-	{
-		old = ptr->value;
-		if (pg_atomic_compare_exchange_u64_impl(ptr, &old, xchg_))
-			break;
-	}
+	old = ptr->value;
+	while (!pg_atomic_compare_exchange_u64_impl(ptr, &old, xchg_));
 	return old;
 }
 #endif
@@ -357,12 +337,8 @@ static inline uint64
 pg_atomic_fetch_add_u64_impl(volatile pg_atomic_uint64 *ptr, int64 add_)
 {
 	uint64 old;
-	while (true)
-	{
-		old = pg_atomic_read_u64_impl(ptr);
-		if (pg_atomic_compare_exchange_u64_impl(ptr, &old, old + add_))
-			break;
-	}
+	old = pg_atomic_read_u64_impl(ptr);
+	while (!pg_atomic_compare_exchange_u64_impl(ptr, &old, old + add_));
 	return old;
 }
 #endif
@@ -382,12 +358,8 @@ static inline uint64
 pg_atomic_fetch_and_u64_impl(volatile pg_atomic_uint64 *ptr, uint64 and_)
 {
 	uint64 old;
-	while (true)
-	{
-		old = pg_atomic_read_u64_impl(ptr);
-		if (pg_atomic_compare_exchange_u64_impl(ptr, &old, old & and_))
-			break;
-	}
+	old = pg_atomic_read_u64_impl(ptr);
+	while (!pg_atomic_compare_exchange_u64_impl(ptr, &old, old & and_));
 	return old;
 }
 #endif
@@ -398,12 +370,8 @@ static inline uint64
 pg_atomic_fetch_or_u64_impl(volatile pg_atomic_uint64 *ptr, uint64 or_)
 {
 	uint64 old;
-	while (true)
-	{
-		old = pg_atomic_read_u64_impl(ptr);
-		if (pg_atomic_compare_exchange_u64_impl(ptr, &old, old | or_))
-			break;
-	}
+	old = pg_atomic_read_u64_impl(ptr);
+	while (!pg_atomic_compare_exchange_u64_impl(ptr, &old, old | or_));
 	return old;
 }
 #endif
-- 
2.11.0

