From c953b5b7ea97db54c2cba262528b520a6b452462 Mon Sep 17 00:00:00 2001
From: Andres Freund <andres@anarazel.de>
Date: Fri, 29 Sep 2017 15:52:55 -0700
Subject: [PATCH 1/3] Extend & revamp pg_bswap.h infrastructure.

Upcoming patches are going to address performance issues that involve
slow system provided ntohs/htons etc. To address that expand
pg_bswap.h to provide pg_ntoh{16,32,64}, pg_hton{16,32,64} and
optimize their respective implementations by using compiler intrinsics
for gcc compatible compilers and msvc. Fall back to manual
implementations using shifts etc otherwise.

Additionally remove multiple evaluation hazards from the existing
BSWAP32/64 macros, by replacing them with inline functions when
necessary. In the course of that the naming scheme is changed to
pg_bswap16/32/64.

Author: Andres Freund
Discussion: https://postgr.es/m/20170927172019.gheidqy6xvlxb325@alap3.anarazel.de
---
 config/c-compiler.m4            |  17 ++++++
 contrib/btree_gist/btree_uuid.c |   4 +-
 src/include/port/pg_bswap.h     | 132 ++++++++++++++++++++++++++++++++--------
 src/include/port/pg_crc32c.h    |   2 +-
 4 files changed, 128 insertions(+), 27 deletions(-)

diff --git a/config/c-compiler.m4 b/config/c-compiler.m4
index 7275ea69fe..6dcc790649 100644
--- a/config/c-compiler.m4
+++ b/config/c-compiler.m4
@@ -224,6 +224,23 @@ AC_DEFINE(HAVE__BUILTIN_TYPES_COMPATIBLE_P, 1,
 fi])# PGAC_C_TYPES_COMPATIBLE
 
 
+# PGAC_C_BUILTIN_BSWAP16
+# -------------------------
+# Check if the C compiler understands __builtin_bswap16(),
+# and define HAVE__BUILTIN_BSWAP16 if so.
+AC_DEFUN([PGAC_C_BUILTIN_BSWAP16],
+[AC_CACHE_CHECK(for __builtin_bswap16, pgac_cv__builtin_bswap16,
+[AC_COMPILE_IFELSE([AC_LANG_SOURCE(
+[static unsigned long int x = __builtin_bswap16(0xaabb);]
+)],
+[pgac_cv__builtin_bswap16=yes],
+[pgac_cv__builtin_bswap16=no])])
+if test x"$pgac_cv__builtin_bswap16" = xyes ; then
+AC_DEFINE(HAVE__BUILTIN_BSWAP16, 1,
+          [Define to 1 if your compiler understands __builtin_bswap16.])
+fi])# PGAC_C_BUILTIN_BSWAP16
+
+
 
 # PGAC_C_BUILTIN_BSWAP32
 # -------------------------
diff --git a/contrib/btree_gist/btree_uuid.c b/contrib/btree_gist/btree_uuid.c
index ecf357d662..9ff421ea55 100644
--- a/contrib/btree_gist/btree_uuid.c
+++ b/contrib/btree_gist/btree_uuid.c
@@ -182,8 +182,8 @@ uuid_2_double(const pg_uuid_t *u)
 	 * machine, byte-swap each half so we can use native uint64 arithmetic.
 	 */
 #ifndef WORDS_BIGENDIAN
-	uu[0] = BSWAP64(uu[0]);
-	uu[1] = BSWAP64(uu[1]);
+	uu[0] = pg_bswap64(uu[0]);
+	uu[1] = pg_bswap64(uu[1]);
 #endif
 
 	/*
diff --git a/src/include/port/pg_bswap.h b/src/include/port/pg_bswap.h
index 50a6bd106b..f67ad4b133 100644
--- a/src/include/port/pg_bswap.h
+++ b/src/include/port/pg_bswap.h
@@ -3,15 +3,13 @@
  * pg_bswap.h
  *	  Byte swapping.
  *
- * Macros for reversing the byte order of 32-bit and 64-bit unsigned integers.
+ * Macros for reversing the byte order of 16, 32 and 64-bit unsigned integers.
  * For example, 0xAABBCCDD becomes 0xDDCCBBAA.  These are just wrappers for
  * built-in functions provided by the compiler where support exists.
- * Elsewhere, beware of multiple evaluations of the arguments!
  *
- * Note that the GCC built-in functions __builtin_bswap32() and
- * __builtin_bswap64() are documented as accepting single arguments of type
- * uint32_t and uint64_t respectively (these are also the respective return
- * types).  Use caution when using these wrapper macros with signed integers.
+ * Note that all of these functions accept unsigned integers as arguments and
+ * return the same.  Use caution when using these wrapper macros with signed
+ * integers.
  *
  * Copyright (c) 2015-2017, PostgreSQL Global Development Group
  *
@@ -22,28 +20,114 @@
 #ifndef PG_BSWAP_H
 #define PG_BSWAP_H
 
-#ifdef HAVE__BUILTIN_BSWAP32
-#define BSWAP32(x) __builtin_bswap32(x)
+
+/* In all supported versions msvc provides _byteswap_* functions in stdlib.h */
+#ifdef _MSC_VER
+#include <stdlib.h>
+#endif
+
+
+/* implementation of uint16 pg_bswap16(uint16) */
+#if defined(HAVE__BUILTIN_BSWAP16)
+
+#define pg_bswap16(x) __builtin_bswap16(x)
+
+#elif defined(_MSC_VER)
+
+#define pg_bswap16(x) _byteswap_ushort(x)
+
 #else
-#define BSWAP32(x) ((((x) << 24) & 0xff000000) | \
-					(((x) << 8)  & 0x00ff0000) | \
-					(((x) >> 8)  & 0x0000ff00) | \
-					(((x) >> 24) & 0x000000ff))
+
+static inline uint16
+pg_bswap16(uint16 x)
+{
+	return
+		((x << 8) & 0xff00) |
+		((x >> 8) & 0x00ff);
+}
+
+#endif							/* HAVE__BUILTIN_BSWAP16 */
+
+
+/* implementation of uint32 pg_bswap32(uint32) */
+#if defined(HAVE__BUILTIN_BSWAP32)
+
+#define pg_bswap32(x) __builtin_bswap32(x)
+
+#elif defined(_MSC_VER)
+
+#define pg_bswap32(x) _byteswap_ulong(x)
+
+#else
+
+static inline uint32
+pg_bswap32(uint32 x)
+{
+	return
+		((x << 24) & 0xff000000) |
+		((x << 8) & 0x00ff0000) |
+		((x >> 8) & 0x0000ff00) |
+		((x >> 24) & 0x000000ff);
+}
+
 #endif							/* HAVE__BUILTIN_BSWAP32 */
 
-#ifdef HAVE__BUILTIN_BSWAP64
-#define BSWAP64(x) __builtin_bswap64(x)
+
+/* implementation of uint64 pg_bswap64(uint64) */
+#if defined(HAVE__BUILTIN_BSWAP64)
+
+#define pg_bswap64(x) __builtin_bswap64(x)
+
+
+#elif defined(_MSC_VER)
+
+#define pg_bswap64(x) _byteswap_uint64(x)
+
 #else
-#define BSWAP64(x) ((((x) << 56) & UINT64CONST(0xff00000000000000)) | \
-					(((x) << 40) & UINT64CONST(0x00ff000000000000)) | \
-					(((x) << 24) & UINT64CONST(0x0000ff0000000000)) | \
-					(((x) << 8)  & UINT64CONST(0x000000ff00000000)) | \
-					(((x) >> 8)  & UINT64CONST(0x00000000ff000000)) | \
-					(((x) >> 24) & UINT64CONST(0x0000000000ff0000)) | \
-					(((x) >> 40) & UINT64CONST(0x000000000000ff00)) | \
-					(((x) >> 56) & UINT64CONST(0x00000000000000ff)))
+
+static inline uint16
+pg_bswap64(uint16 x)
+{
+	return
+		((x << 56) & UINT64CONST(0xff00000000000000)) |
+		((x << 40) & UINT64CONST(0x00ff000000000000)) |
+		((x << 24) & UINT64CONST(0x0000ff0000000000)) |
+		((x << 8) & UINT64CONST(0x000000ff00000000)) |
+		((x >> 8) & UINT64CONST(0x00000000ff000000)) |
+		((x >> 24) & UINT64CONST(0x0000000000ff0000)) |
+		((x >> 40) & UINT64CONST(0x000000000000ff00)) |
+		((x >> 56) & UINT64CONST(0x00000000000000ff));
+}
 #endif							/* HAVE__BUILTIN_BSWAP64 */
 
+
+/*
+ * Portable and fast equivalents for for ntohs, ntohl, htons, htonl,
+ * additionally extended to 64 bits.
+ */
+#ifdef WORDS_BIGENDIAN
+
+#define pg_hton16(x)		(x)
+#define pg_hton32(x)		(x)
+#define pg_hton64(x)		(x)
+
+#define pg_ntoh16(x)		(x)
+#define pg_ntoh32(x)		(x)
+#define pg_ntoh64(x)		(x)
+
+#else
+
+#define pg_hton16(x)		pg_bswap16(x)
+#define pg_hton32(x)		pg_bswap32(x)
+#define pg_hton64(x)		pg_bswap64(x)
+
+#define pg_ntoh16(x)		pg_bswap16(x)
+#define pg_ntoh32(x)		pg_bswap32(x)
+#define pg_ntoh64(x)		pg_bswap64(x)
+
+#endif							/* WORDS_BIGENDIAN */
+
+
 /*
  * Rearrange the bytes of a Datum from big-endian order into the native byte
  * order.  On big-endian machines, this does nothing at all.  Note that the C
@@ -60,9 +144,9 @@
 #define		DatumBigEndianToNative(x)	(x)
 #else							/* !WORDS_BIGENDIAN */
 #if SIZEOF_DATUM == 8
-#define		DatumBigEndianToNative(x)	BSWAP64(x)
+#define		DatumBigEndianToNative(x)	pg_bswap64(x)
 #else							/* SIZEOF_DATUM != 8 */
-#define		DatumBigEndianToNative(x)	BSWAP32(x)
+#define		DatumBigEndianToNative(x)	pg_bswap32(x)
 #endif							/* SIZEOF_DATUM == 8 */
 #endif							/* WORDS_BIGENDIAN */
 
diff --git a/src/include/port/pg_crc32c.h b/src/include/port/pg_crc32c.h
index cd58ecc988..32d7176273 100644
--- a/src/include/port/pg_crc32c.h
+++ b/src/include/port/pg_crc32c.h
@@ -73,7 +73,7 @@ extern pg_crc32c (*pg_comp_crc32c) (pg_crc32c crc, const void *data, size_t len)
 #define COMP_CRC32C(crc, data, len) \
 	((crc) = pg_comp_crc32c_sb8((crc), (data), (len)))
 #ifdef WORDS_BIGENDIAN
-#define FIN_CRC32C(crc) ((crc) = BSWAP32(crc) ^ 0xFFFFFFFF)
+#define FIN_CRC32C(crc) ((crc) = pg_bswap32(crc) ^ 0xFFFFFFFF)
 #else
 #define FIN_CRC32C(crc) ((crc) ^= 0xFFFFFFFF)
 #endif
-- 
2.14.1.536.g6867272d5b.dirty

