From d377e36da652ade715b868a6fc7164736f38d1df Mon Sep 17 00:00:00 2001
From: Sokolov Yura <funny.falcon@postgrespro.ru>
Date: Thu, 25 May 2017 14:54:45 +0300
Subject: [PATCH] Fix performance of Atomics generic implementation

pg_atomic_compare_exchange_*_impl stores old value into `old` variable,
so there is no need to reread value in loop body.

Also add gcc specific __sync_fetch_and_(or|and), cause looks like
compiler may optimize code around intrinsic better than around assembler,
although intrinsic is compiled to almost same CAS loop.
---
 src/include/port/atomics/generic-gcc.h | 36 +++++++++++++++++
 src/include/port/atomics/generic.h     | 72 ++++++++++++----------------------
 2 files changed, 60 insertions(+), 48 deletions(-)

diff --git a/src/include/port/atomics/generic-gcc.h b/src/include/port/atomics/generic-gcc.h
index 7efc0861e7..79ada94047 100644
--- a/src/include/port/atomics/generic-gcc.h
+++ b/src/include/port/atomics/generic-gcc.h
@@ -185,6 +185,24 @@ pg_atomic_fetch_add_u32_impl(volatile pg_atomic_uint32 *ptr, int32 add_)
 }
 #endif
 
+#if !defined(PG_HAVE_ATOMIC_FETCH_AND_U32) && defined(HAVE_GCC__SYNC_INT32_CAS)
+#define PG_HAVE_ATOMIC_FETCH_AND_U32
+static inline uint32
+pg_atomic_fetch_and_u32_impl(volatile pg_atomic_uint32 *ptr, uint32 and_)
+{
+	return __sync_fetch_and_and(&ptr->value, and_);
+}
+#endif
+
+#if !defined(PG_HAVE_ATOMIC_FETCH_OR_U32) && defined(HAVE_GCC__SYNC_INT32_CAS)
+#define PG_HAVE_ATOMIC_FETCH_OR_U32
+static inline uint32
+pg_atomic_fetch_or_u32_impl(volatile pg_atomic_uint32 *ptr, uint32 or_)
+{
+	return __sync_fetch_and_or(&ptr->value, or_);
+}
+#endif
+
 
 #if !defined(PG_DISABLE_64_BIT_ATOMICS)
 
@@ -223,6 +241,24 @@ pg_atomic_fetch_add_u64_impl(volatile pg_atomic_uint64 *ptr, int64 add_)
 }
 #endif
 
+#if !defined(PG_HAVE_ATOMIC_FETCH_AND_U64) && defined(HAVE_GCC__SYNC_INT64_CAS)
+#define PG_HAVE_ATOMIC_FETCH_AND_U64
+static inline uint64
+pg_atomic_fetch_and_u64_impl(volatile pg_atomic_uint64 *ptr, uint64 and_)
+{
+	return __sync_fetch_and_and(&ptr->value, and_);
+}
+#endif
+
+#if !defined(PG_HAVE_ATOMIC_FETCH_OR_U64) && defined(HAVE_GCC__SYNC_INT64_CAS)
+#define PG_HAVE_ATOMIC_FETCH_OR_U64
+static inline uint64
+pg_atomic_fetch_or_u64_impl(volatile pg_atomic_uint64 *ptr, uint64 or_)
+{
+	return __sync_fetch_and_or(&ptr->value, or_);
+}
+#endif
+
 #endif /* !defined(PG_DISABLE_64_BIT_ATOMICS) */
 
 #endif /* defined(HAVE_ATOMICS) */
diff --git a/src/include/port/atomics/generic.h b/src/include/port/atomics/generic.h
index 424543604a..75ffaf6e87 100644
--- a/src/include/port/atomics/generic.h
+++ b/src/include/port/atomics/generic.h
@@ -170,12 +170,9 @@ static inline uint32
 pg_atomic_exchange_u32_impl(volatile pg_atomic_uint32 *ptr, uint32 xchg_)
 {
 	uint32 old;
-	while (true)
-	{
-		old = pg_atomic_read_u32_impl(ptr);
-		if (pg_atomic_compare_exchange_u32_impl(ptr, &old, xchg_))
-			break;
-	}
+	old = pg_atomic_read_u32_impl(ptr);
+	while (!pg_atomic_compare_exchange_u32_impl(ptr, &old, xchg_))
+		/* skip */;
 	return old;
 }
 #endif
@@ -186,12 +183,9 @@ static inline uint32
 pg_atomic_fetch_add_u32_impl(volatile pg_atomic_uint32 *ptr, int32 add_)
 {
 	uint32 old;
-	while (true)
-	{
-		old = pg_atomic_read_u32_impl(ptr);
-		if (pg_atomic_compare_exchange_u32_impl(ptr, &old, old + add_))
-			break;
-	}
+	old = pg_atomic_read_u32_impl(ptr);
+	while (!pg_atomic_compare_exchange_u32_impl(ptr, &old, old + add_))
+		/* skip */;
 	return old;
 }
 #endif
@@ -211,12 +205,9 @@ static inline uint32
 pg_atomic_fetch_and_u32_impl(volatile pg_atomic_uint32 *ptr, uint32 and_)
 {
 	uint32 old;
-	while (true)
-	{
-		old = pg_atomic_read_u32_impl(ptr);
-		if (pg_atomic_compare_exchange_u32_impl(ptr, &old, old & and_))
-			break;
-	}
+	old = pg_atomic_read_u32_impl(ptr);
+	while (!pg_atomic_compare_exchange_u32_impl(ptr, &old, old & and_))
+		/* skip */;
 	return old;
 }
 #endif
@@ -227,12 +218,9 @@ static inline uint32
 pg_atomic_fetch_or_u32_impl(volatile pg_atomic_uint32 *ptr, uint32 or_)
 {
 	uint32 old;
-	while (true)
-	{
-		old = pg_atomic_read_u32_impl(ptr);
-		if (pg_atomic_compare_exchange_u32_impl(ptr, &old, old | or_))
-			break;
-	}
+	old = pg_atomic_read_u32_impl(ptr);
+	while (!pg_atomic_compare_exchange_u32_impl(ptr, &old, old | or_))
+		/* skip */;
 	return old;
 }
 #endif
@@ -261,12 +249,9 @@ static inline uint64
 pg_atomic_exchange_u64_impl(volatile pg_atomic_uint64 *ptr, uint64 xchg_)
 {
 	uint64 old;
-	while (true)
-	{
-		old = ptr->value;
-		if (pg_atomic_compare_exchange_u64_impl(ptr, &old, xchg_))
-			break;
-	}
+	old = ptr->value;
+	while (!pg_atomic_compare_exchange_u64_impl(ptr, &old, xchg_))
+		/* skip */;
 	return old;
 }
 #endif
@@ -357,12 +342,9 @@ static inline uint64
 pg_atomic_fetch_add_u64_impl(volatile pg_atomic_uint64 *ptr, int64 add_)
 {
 	uint64 old;
-	while (true)
-	{
-		old = pg_atomic_read_u64_impl(ptr);
-		if (pg_atomic_compare_exchange_u64_impl(ptr, &old, old + add_))
-			break;
-	}
+	old = pg_atomic_read_u64_impl(ptr);
+	while (!pg_atomic_compare_exchange_u64_impl(ptr, &old, old + add_))
+		/* skip */;
 	return old;
 }
 #endif
@@ -382,12 +364,9 @@ static inline uint64
 pg_atomic_fetch_and_u64_impl(volatile pg_atomic_uint64 *ptr, uint64 and_)
 {
 	uint64 old;
-	while (true)
-	{
-		old = pg_atomic_read_u64_impl(ptr);
-		if (pg_atomic_compare_exchange_u64_impl(ptr, &old, old & and_))
-			break;
-	}
+	old = pg_atomic_read_u64_impl(ptr);
+	while (!pg_atomic_compare_exchange_u64_impl(ptr, &old, old & and_))
+		/* skip */;
 	return old;
 }
 #endif
@@ -398,12 +377,9 @@ static inline uint64
 pg_atomic_fetch_or_u64_impl(volatile pg_atomic_uint64 *ptr, uint64 or_)
 {
 	uint64 old;
-	while (true)
-	{
-		old = pg_atomic_read_u64_impl(ptr);
-		if (pg_atomic_compare_exchange_u64_impl(ptr, &old, old | or_))
-			break;
-	}
+	old = pg_atomic_read_u64_impl(ptr);
+	while (!pg_atomic_compare_exchange_u64_impl(ptr, &old, old | or_))
+		/* skip */;
 	return old;
 }
 #endif
-- 
2.11.0

