From 7dd6c9a9c898d9592999f6224beed93dbf853f70 Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Tue, 16 Jun 2026 13:53:28 -0400
Subject: [PATCH v1] Clean up inconsistencies in CPU-detection macros.

In various places we depend on compiler-defined macros like __x86_64__
to guard CPU-type-specific code.  However, those macros aren't very
well standardized; in particular, it emerges that MSVC doesn't define
any of the ones gcc does, but has its own.  We were not coping with
that consistently, with the result that we're missing some useful
CPU-dependent optimizations in MSVC builds.  There are also some
places that are checking randomly-different spellings that may
have been the only ones recognized by some old compilers, but we
weren't doing that consistently either.

Let's standardize on using gcc's long-form spellings (with trailing
underscores), after putting a stanza into c.h that ensures that these
spellings are defined even when the compiler provides some other one.

I put an "#else #error" branch into the c.h addition so that we'll
get an error if the compiler provides none of the symbols we're
expecting.  That might be best removed in the end, since it might
annoy people trying to port to some new CPU type.  But for right
now it seems like a good idea, in case we've missed some common
variant spelling.

In addition to enabling some optimizations we previously missed on
MSVC, this cleans up a thinko.  Several places used "_M_X64" in the
apparent belief that that's MSVC's equivalent to __x86_64__, but
it's not: it will also get defined on some but not all ARM64 builds.

Also, guard the x86_feature_available() stuff in pg_cpu.[hc] with
	#if defined(__x86_64__) || defined(__i386__)
which seems like a more natural way of specifying what it applies to.

This builds on some previous work by Thomas Munro, but it requires
much less code churn because it re-uses gcc's names for the CPU-type
macros instead of inventing our own.

Author: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://postgr.es/m/CA+hUKGL8Hs-phHPugrWM=5dAkcT897rXyazYzLw-Szxnzgx-rA@mail.gmail.com
Discussion: https://postgr.es/m/3035145.1780503430@sss.pgh.pa.us
---
 src/common/d2s.c                     |  2 +-
 src/include/c.h                      | 58 +++++++++++++++++++++++++++-
 src/include/port/atomics.h           |  6 +--
 src/include/port/atomics/arch-x86.h  |  4 +-
 src/include/port/pg_bitutils.h       |  4 +-
 src/include/port/pg_cpu.h            |  4 +-
 src/include/portability/instr_time.h |  2 +-
 src/include/storage/s_lock.h         | 10 ++---
 src/port/pg_cpu_x86.c                |  6 +--
 9 files changed, 76 insertions(+), 20 deletions(-)

diff --git a/src/common/d2s.c b/src/common/d2s.c
index 34e7cd33bf9..0a9cb83fc49 100644
--- a/src/common/d2s.c
+++ b/src/common/d2s.c
@@ -53,7 +53,7 @@
  * alignment concerns that apply elsewhere.
  */
 #if !defined(HAVE_INT128) && defined(_MSC_VER) \
-	&& !defined(RYU_ONLY_64_BIT_OPS) && defined(_M_X64)
+	&& !defined(RYU_ONLY_64_BIT_OPS) && defined(__x86_64__)
 #define HAS_64_BIT_INTRINSICS
 #endif
 
diff --git a/src/include/c.h b/src/include/c.h
index f32989a6331..ef4dc5d1e63 100644
--- a/src/include/c.h
+++ b/src/include/c.h
@@ -106,6 +106,62 @@ extern "C++"
  * ----------------------------------------------------------------
  */
 
+/*
+ * Not all compilers follow gcc's names of macros for particular target
+ * architectures.  Let's standardize on gcc's names (with trailing __),
+ * and cause those to become defined here if they are not already.
+ *
+ * Note: while this list is alphabetical, it's necessary to check _M_ARM64
+ * before _M_AMD64, because Microsoft's ARM64EC environment defines both.
+ */
+#if defined(__arm__) || defined(__arm)
+#ifndef __arm__
+#define __arm__ 1
+#endif
+#elif defined(__aarch64__) || defined(_M_ARM64)
+#ifndef __aarch64__
+#define __aarch64__ 1
+#endif
+#elif defined(__loongarch64__) || defined(__loongarch64)
+#ifndef __loongarch64__
+#define __loongarch64__ 1
+#endif
+#elif defined(__mips__)
+/* no work */
+#elif defined(__mips64__)
+/* no work */
+#elif defined(__powerpc__) || defined(__ppc__)
+#ifndef __powerpc__
+#define __powerpc__ 1
+#endif
+#elif defined(__powerpc64__) || defined(__ppc64__)
+#ifndef __powerpc64__
+#define __powerpc64__ 1
+#endif
+#elif defined(__riscv__)
+/* no work */
+#elif defined(__riscv64__)
+/* no work */
+#elif defined(__s390__)
+/* no work */
+#elif defined(__s390x__)
+/* no work */
+#elif defined(__sparc__) || defined(__sparc)
+#ifndef __sparc__
+#define __sparc__ 1
+#endif
+#elif defined(__i386__) || defined (__i386) || defined(_M_IX86)
+#ifndef __i386__
+#define __i386__ 1
+#endif
+#elif defined(__x86_64__) || defined(__x86_64) || defined (__amd64) || defined(_M_AMD64)
+#ifndef __x86_64__
+#define __x86_64__ 1
+#endif
+#else
+#error "cannot identify target architecture"
+#endif
+
 /*
  * Disable "inline" if PG_FORCE_DISABLE_INLINE is defined.
  * This is used to work around compiler bugs and might also be useful for
@@ -1337,7 +1393,7 @@ typedef struct PGAlignedXLogBlock PGAlignedXLogBlock;
  * SSE2 instructions are part of the spec for the 64-bit x86 ISA. We assume
  * that compilers targeting this architecture understand SSE2 intrinsics.
  */
-#if (defined(__x86_64__) || defined(_M_AMD64))
+#if defined(__x86_64__)
 #define USE_SSE2
 
 #else							/* ! x86_64 */
diff --git a/src/include/port/atomics.h b/src/include/port/atomics.h
index d8b1d20fe60..4a26fe0039f 100644
--- a/src/include/port/atomics.h
+++ b/src/include/port/atomics.h
@@ -63,11 +63,11 @@
  * compiler barrier.
  *
  */
-#if defined(__arm__) || defined(__arm) || defined(__aarch64__)
+#if defined(__arm__) || defined(__aarch64__)
 #include "port/atomics/arch-arm.h"
-#elif defined(__i386__) || defined(__i386) || defined(__x86_64__)
+#elif defined(__i386__) || defined(__x86_64__)
 #include "port/atomics/arch-x86.h"
-#elif defined(__ppc__) || defined(__powerpc__) || defined(__ppc64__) || defined(__powerpc64__)
+#elif defined(__powerpc__) || defined(__powerpc64__)
 #include "port/atomics/arch-ppc.h"
 #endif
 
diff --git a/src/include/port/atomics/arch-x86.h b/src/include/port/atomics/arch-x86.h
index 8cfe402c339..7637bce3e0b 100644
--- a/src/include/port/atomics/arch-x86.h
+++ b/src/include/port/atomics/arch-x86.h
@@ -32,7 +32,7 @@
  */
 
 #if defined(__GNUC__) || defined(__INTEL_COMPILER)
-#if defined(__i386__) || defined(__i386)
+#if defined(__i386__)
 #define pg_memory_barrier_impl()		\
 	__asm__ __volatile__ ("lock; addl $0,0(%%esp)" : : : "memory", "cc")
 #elif defined(__x86_64__)
@@ -238,6 +238,6 @@ pg_atomic_fetch_add_u64_impl(volatile pg_atomic_uint64 *ptr, int64 add_)
 /*
  * 8 byte reads / writes have single-copy atomicity on all x86-64 cpus.
  */
-#if defined(__x86_64__) || defined(__x86_64) || defined(_M_X64) /* gcc, msvc */
+#if defined(__x86_64__)
 #define PG_HAVE_8BYTE_SINGLE_COPY_ATOMICITY
 #endif /* 8 byte single-copy atomicity */
diff --git a/src/include/port/pg_bitutils.h b/src/include/port/pg_bitutils.h
index 7a00d197013..2864ba431db 100644
--- a/src/include/port/pg_bitutils.h
+++ b/src/include/port/pg_bitutils.h
@@ -82,7 +82,7 @@ pg_leftmost_one_pos64(uint64 word)
 #error "cannot find integer type of the same size as uint64_t"
 #endif
 
-#elif defined(_MSC_VER) && (defined(_M_AMD64) || defined(_M_ARM64))
+#elif defined(_MSC_VER) && (defined(__x86_64__) || defined(__aarch64__))
 	unsigned long result;
 	bool		non_zero;
 
@@ -155,7 +155,7 @@ pg_rightmost_one_pos64(uint64 word)
 #error "cannot find integer type of the same size as uint64_t"
 #endif
 
-#elif defined(_MSC_VER) && (defined(_M_AMD64) || defined(_M_ARM64))
+#elif defined(_MSC_VER) && (defined(__x86_64__) || defined(__aarch64__))
 	unsigned long result;
 	bool		non_zero;
 
diff --git a/src/include/port/pg_cpu.h b/src/include/port/pg_cpu.h
index 566ed7a16e3..22aaa1aa5bc 100644
--- a/src/include/port/pg_cpu.h
+++ b/src/include/port/pg_cpu.h
@@ -13,7 +13,7 @@
 #ifndef PG_CPU_H
 #define PG_CPU_H
 
-#if defined(USE_SSE2) || defined(__i386__)
+#if defined(__x86_64__) || defined(__i386__)
 
 typedef enum X86FeatureId
 {
@@ -58,6 +58,6 @@ x86_feature_available(X86FeatureId feature)
 
 extern uint32 x86_tsc_frequency_khz(char *source, size_t source_size);
 
-#endif							/* defined(USE_SSE2) || defined(__i386__) */
+#endif							/* defined(__x86_64__) || defined(__i386__) */
 
 #endif							/* PG_CPU_H */
diff --git a/src/include/portability/instr_time.h b/src/include/portability/instr_time.h
index 655f8737b6f..826cc202847 100644
--- a/src/include/portability/instr_time.h
+++ b/src/include/portability/instr_time.h
@@ -95,7 +95,7 @@ typedef struct instr_time
  * PG_INSTR_TSC_CLOCK controls whether the TSC clock source is compiled in, and
  * potentially used based on timing_tsc_enabled.
  */
-#if defined(__x86_64__) || defined(_M_X64)
+#if defined(__x86_64__)
 #define PG_INSTR_TICKS_TO_NS 1
 #define PG_INSTR_TSC_CLOCK 1
 #elif defined(WIN32)
diff --git a/src/include/storage/s_lock.h b/src/include/storage/s_lock.h
index c9e52511990..96fcaaac01e 100644
--- a/src/include/storage/s_lock.h
+++ b/src/include/storage/s_lock.h
@@ -247,7 +247,7 @@ spin_delay(void)
  * We use the int-width variant of the builtin because it works on more chips
  * than other widths.
  */
-#if defined(__arm__) || defined(__arm) || defined(__aarch64__)
+#if defined(__arm__) || defined(__aarch64__)
 #ifdef HAVE_GCC__SYNC_INT32_TAS
 #define HAS_TEST_AND_SET
 
@@ -287,7 +287,7 @@ spin_delay(void)
 
 #endif	 /* __aarch64__ */
 #endif	 /* HAVE_GCC__SYNC_INT32_TAS */
-#endif	 /* __arm__ || __arm || __aarch64__ */
+#endif	 /* __arm__ || __aarch64__ */
 
 
 /* S/390 and S/390x Linux (32- and 64-bit zSeries) */
@@ -391,7 +391,7 @@ do \
 
 
 /* PowerPC */
-#if defined(__ppc__) || defined(__powerpc__) || defined(__ppc64__) || defined(__powerpc64__)
+#if defined(__powerpc__) || defined(__powerpc64__)
 #define HAS_TEST_AND_SET
 
 typedef unsigned int slock_t;
@@ -602,7 +602,7 @@ typedef LONG slock_t;
 
 #define SPIN_DELAY() spin_delay()
 
-#ifdef _M_ARM64
+#ifdef __aarch64__
 static __forceinline void
 spin_delay(void)
 {
@@ -633,7 +633,7 @@ spin_delay(void)
 
 #include <intrin.h>
 
-#ifdef _M_ARM64
+#ifdef __aarch64__
 
 /* _ReadWriteBarrier() is insufficient on non-TSO architectures. */
 #pragma intrinsic(_InterlockedExchange)
diff --git a/src/port/pg_cpu_x86.c b/src/port/pg_cpu_x86.c
index 0405ba19f6f..b050677f717 100644
--- a/src/port/pg_cpu_x86.c
+++ b/src/port/pg_cpu_x86.c
@@ -19,7 +19,7 @@
 #include "postgres_fe.h"
 #endif
 
-#if defined(USE_SSE2) || defined(__i386__)
+#if defined(__x86_64__) || defined(__i386__)
 
 #ifdef _MSC_VER
 #include <intrin.h>
@@ -287,10 +287,10 @@ x86_hypervisor_tsc_frequency_khz(void)
 	return 0;
 }
 
-#else							/* defined(USE_SSE2) || defined(__i386__) */
+#else							/* defined(__x86_64__) || defined(__i386__) */
 
 /* prevent linker complaints about empty module */
 extern int	pg_cpu_x86_dummy_variable;
 int			pg_cpu_x86_dummy_variable = 0;
 
-#endif							/* ! (USE_SSE2 || __i386__) */
+#endif							/* ! (__x86_64__ || __i386__) */
-- 
2.52.0

