From 7e8055206ce3c3abef611028c1dfdca1d4fde0c0 Mon Sep 17 00:00:00 2001
From: Masahiko Sawada <sawada.mshk@gmail.com>
Date: Thu, 25 Jun 2026 10:03:44 -0700
Subject: [PATCH v2] Optimize UUID parse using SIMD.

Author:
Reviewed-by:
Discussion: https://postgr.es/m/
---
 src/backend/utils/adt/uuid.c       | 102 +++++++++++++++++++++++++++--
 src/test/regress/expected/uuid.out |  55 ++++++++++++++++
 src/test/regress/sql/uuid.sql      |  16 +++++
 3 files changed, 168 insertions(+), 5 deletions(-)

diff --git a/src/backend/utils/adt/uuid.c b/src/backend/utils/adt/uuid.c
index 6ee3752ac78..6e7b841bde4 100644
--- a/src/backend/utils/adt/uuid.c
+++ b/src/backend/utils/adt/uuid.c
@@ -19,7 +19,9 @@
 #include "common/hashfn.h"
 #include "lib/hyperloglog.h"
 #include "libpq/pqformat.h"
+#include "nodes/miscnodes.h"
 #include "port/pg_bswap.h"
+#include "utils/builtins.h"
 #include "utils/fmgrprotos.h"
 #include "utils/guc.h"
 #include "utils/skipsupport.h"
@@ -122,13 +124,10 @@ uuid_out(PG_FUNCTION_ARGS)
 }
 
 /*
- * We allow UUIDs as a series of 32 hexadecimal digits with an optional dash
- * after each group of 4 hexadecimal digits, and optionally surrounded by {}.
- * (The canonical format 8x-4x-4x-4x-12x, where "nx" means n hexadecimal
- * digits, is the only one used for output.)
+ * General UUID parser.
  */
 static void
-string_to_uuid(const char *source, pg_uuid_t *uuid, Node *escontext)
+string_to_uuid_scalar(const char *source, pg_uuid_t *uuid, Node *escontext)
 {
 	const char *src = source;
 	bool		braces = false;
@@ -177,6 +176,99 @@ syntax_error:
 					"uuid", source)));
 }
 
+/*
+ * Fast path for the common UUID shapes, built on our SIMD-aware hex decoder.
+ *
+ * This handles a bare string of 32 hex digits and the canonical
+ * 8x-4x-4x-4x-12x form (where "nx" means n hex digits), each optionally
+ * wrapped in braces. Any other shape, or any decoding error, is handed off to
+ * string_to_uuid_scalar() so that parsing and error reporting stay identical
+ * to the scalar implmentation.
+ */
+#ifndef	USE_NO_SIMD
+static void
+string_to_uuid_fast(const char *source, pg_uuid_t *uuid, Node *escontext)
+{
+	const char *body = source;
+	size_t		len = strlen(source);
+	const char *hexsrc = NULL;
+	char		hexbuf[32];
+	uint64		written;
+	ErrorSaveContext esctx = {T_ErrorSaveContext};
+
+	/* Strip one optional surrounding brace pair */
+	if (len >= 2 && source[0] == '{' && source[len - 1] == '}')
+	{
+		body = source + 1;
+		len -= 2;
+	}
+
+	if (len == 32)
+	{
+		/*
+		 * Body is already 32 contiguous hex digits -- decode straight from
+		 * the input. hex_decode_safe() reads exactly body[0..31], so it never
+		 * touches the trailing NULL or '}'.
+		 */
+		hexsrc = body;
+	}
+	else if (len == 36 && body[8] == '-' && body[13] == '-' &&
+			 body[18] == '-' && body[23] == '-')
+	{
+		/*
+		 * Canonical 8x-4x-4x-4x-12x form; compact them into hexbuf with
+		 * fixed-offset copies, dropping the dashes.
+		 */
+		memcpy(&hexbuf[0], &body[0], 8);
+		memcpy(&hexbuf[8], &body[9], 4);
+		memcpy(&hexbuf[12], &body[14], 4);
+		memcpy(&hexbuf[16], &body[19], 4);
+		memcpy(&hexbuf[20], &body[24], 12);
+		hexsrc = hexbuf;
+	}
+
+	if (hexsrc == NULL)
+	{
+		/* Uncommon shape; let the general parse handle it */
+		string_to_uuid_scalar(source, uuid, escontext);
+		return;
+	}
+
+	/*
+	 * Decode the UUID hex data using our hex decoder that is SIMD-aware. We
+	 * give it a private error context so that a decode failure is swalled
+	 * here and reported by the scalar path instead, kepping the error message
+	 * identical.
+	 */
+	written = hex_decode_safe(hexsrc, 32, (char *) uuid->data, (Node *) &esctx);
+
+	/*
+	 * Fall back to the scalar path on any error. We must also reject a short
+	 * result: hex_decode_safe() skips whitespaces, so it can succeed yet
+	 * write fewer than UUID_LEN bytes, whereas the UUID grammer forbids
+	 * whitespaces.
+	 */
+	if (esctx.error_occurred || written != UUID_LEN)
+		string_to_uuid_scalar(source, uuid, escontext);
+}
+#endif
+
+/*
+ * We allow UUIDs as a series of 32 hexadecimal digits with an optional dash
+ * after each group of 4 hexadecimal digits, and optionally surrounded by {}.
+ * (The canonical format 8x-4x-4x-4x-12x, where "nx" means n hexadecimal
+ * digits, is the only one used for output.)
+ */
+static void
+string_to_uuid(const char *source, pg_uuid_t *uuid, Node *escontext)
+{
+#ifdef USE_NO_SIMD
+	string_to_uuid_scalar(source, uuid, escontext);
+#else
+	string_to_uuid_fast(source, uuid, escontext);
+#endif
+}
+
 Datum
 uuid_recv(PG_FUNCTION_ARGS)
 {
diff --git a/src/test/regress/expected/uuid.out b/src/test/regress/expected/uuid.out
index 9c5dda9e9ab..928e71c7ad3 100644
--- a/src/test/regress/expected/uuid.out
+++ b/src/test/regress/expected/uuid.out
@@ -340,5 +340,60 @@ SELECT v = v::bytea::uuid as matched FROM gen_random_uuid() v;
  t
 (1 row)
 
+-- Test UUID shapes that the parser uses the SIMD path.
+SELECT '5b35380a-7143-4912-9b55-f322699c6770'::uuid;
+                 uuid                 
+--------------------------------------
+ 5b35380a-7143-4912-9b55-f322699c6770
+(1 row)
+
+SELECT '{5b35380a-7143-4912-9b55-f322699c6770}'::uuid;
+                 uuid                 
+--------------------------------------
+ 5b35380a-7143-4912-9b55-f322699c6770
+(1 row)
+
+SELECT '5b35380a714349129b55f322699c6770'::uuid;
+                 uuid                 
+--------------------------------------
+ 5b35380a-7143-4912-9b55-f322699c6770
+(1 row)
+
+SELECT '{5b35380a714349129b55f322699c6770}'::uuid;
+                 uuid                 
+--------------------------------------
+ 5b35380a-7143-4912-9b55-f322699c6770
+(1 row)
+
+-- Test if the UUID parser using SIMD optimization correctly rejects invalid UUID
+-- string format.
+SELECT '5b35380a714349129b55f32  99c6770'::uuid;
+ERROR:  invalid input syntax for type uuid: "5b35380a714349129b55f32  99c6770"
+LINE 1: SELECT '5b35380a714349129b55f32  99c6770'::uuid;
+               ^
+SELECT '5b35380a-7143-4912-9b55-f322699c67  '::uuid;
+ERROR:  invalid input syntax for type uuid: "5b35380a-7143-4912-9b55-f322699c67  "
+LINE 1: SELECT '5b35380a-7143-4912-9b55-f322699c67  '::uuid;
+               ^
+SELECT '  35380a-7143-4912-9b55-f322699c6770'::uuid;
+ERROR:  invalid input syntax for type uuid: "  35380a-7143-4912-9b55-f322699c6770"
+LINE 1: SELECT '  35380a-7143-4912-9b55-f322699c6770'::uuid;
+               ^
+SELECT 'AZ35380a-7143-4912-9b55-f322699c6770'::uuid;
+ERROR:  invalid input syntax for type uuid: "AZ35380a-7143-4912-9b55-f322699c6770"
+LINE 1: SELECT 'AZ35380a-7143-4912-9b55-f322699c6770'::uuid;
+               ^
+SELECT '{AZ35380a-7143-4912-9b55-f322699c6770}'::uuid;
+ERROR:  invalid input syntax for type uuid: "{AZ35380a-7143-4912-9b55-f322699c6770}"
+LINE 1: SELECT '{AZ35380a-7143-4912-9b55-f322699c6770}'::uuid;
+               ^
+SELECT '{AZ35380a714349129b55f322699c6770}'::uuid;
+ERROR:  invalid input syntax for type uuid: "{AZ35380a714349129b55f322699c6770}"
+LINE 1: SELECT '{AZ35380a714349129b55f322699c6770}'::uuid;
+               ^
+SELECT '{AZ35380a714349129b55f322699c67  }'::uuid;
+ERROR:  invalid input syntax for type uuid: "{AZ35380a714349129b55f322699c67  }"
+LINE 1: SELECT '{AZ35380a714349129b55f322699c67  }'::uuid;
+               ^
 -- clean up
 DROP TABLE guid1, guid2, guid3 CASCADE;
diff --git a/src/test/regress/sql/uuid.sql b/src/test/regress/sql/uuid.sql
index 8cc2ad40614..d67d3d2ded9 100644
--- a/src/test/regress/sql/uuid.sql
+++ b/src/test/regress/sql/uuid.sql
@@ -161,5 +161,21 @@ SELECT '\x019a2f859ced7225b99d9c55044a2563'::bytea::uuid;
 SELECT '\x1234567890abcdef'::bytea::uuid; -- error
 SELECT v = v::bytea::uuid as matched FROM gen_random_uuid() v;
 
+-- Test UUID shapes that the parser uses the SIMD path.
+SELECT '5b35380a-7143-4912-9b55-f322699c6770'::uuid;
+SELECT '{5b35380a-7143-4912-9b55-f322699c6770}'::uuid;
+SELECT '5b35380a714349129b55f322699c6770'::uuid;
+SELECT '{5b35380a714349129b55f322699c6770}'::uuid;
+
+-- Test if the UUID parser using SIMD optimization correctly rejects invalid UUID
+-- string format.
+SELECT '5b35380a714349129b55f32  99c6770'::uuid;
+SELECT '5b35380a-7143-4912-9b55-f322699c67  '::uuid;
+SELECT '  35380a-7143-4912-9b55-f322699c6770'::uuid;
+SELECT 'AZ35380a-7143-4912-9b55-f322699c6770'::uuid;
+SELECT '{AZ35380a-7143-4912-9b55-f322699c6770}'::uuid;
+SELECT '{AZ35380a714349129b55f322699c6770}'::uuid;
+SELECT '{AZ35380a714349129b55f322699c67  }'::uuid;
+
 -- clean up
 DROP TABLE guid1, guid2, guid3 CASCADE;
-- 
2.54.0

