From 1464c22da33117900341496d03b92dec5be2a62a Mon Sep 17 00:00:00 2001 From: Thomas Munro Date: Thu, 22 Jul 2021 02:05:06 +1200 Subject: [PATCH 1/2] XXX Make SIMD code more platform neutral. Move SIMD code into pg_utf_simd.c to experiment with the idea of a shared implementation across architectures. Introduce pg_u8x16_t to abstract vector type. XXX Experiment grade code only --- configure | 18 +-- configure.ac | 18 +-- src/include/pg_config.h.in | 14 +- src/include/port/pg_utf8.h | 10 +- src/port/Makefile | 8 +- ...g_utf8_sse42_choose.c => pg_utf8_choose.c} | 10 +- src/port/{pg_utf8_sse42.c => pg_utf8_simd.c} | 148 +++++++++--------- 7 files changed, 114 insertions(+), 112 deletions(-) rename src/port/{pg_utf8_sse42_choose.c => pg_utf8_choose.c} (88%) rename src/port/{pg_utf8_sse42.c => pg_utf8_simd.c} (76%) diff --git a/configure b/configure index 30969840b1..df546b641c 100755 --- a/configure +++ b/configure @@ -18442,13 +18442,13 @@ fi # # You can override this logic by setting the appropriate USE_*_UTF8 flag to 1 # in the template or configure command line. -if test x"$USE_SSE42_UTF8" = x"" && test x"$USE_SSE42_UTF8_WITH_RUNTIME_CHECK" = x"" && test x"$USE_FALLBACK_UTF8" = x""; then +if test x"$USE_SIMD_UTF8" = x"" && test x"$USE_SIMD_UTF8_WITH_RUNTIME_CHECK" = x"" && test x"$USE_FALLBACK_UTF8" = x""; then if test x"$pgac_sse42_intrinsics" = x"yes" && test x"$SSE4_2_TARGETED" = x"1" ; then - USE_SSE42_UTF8=1 + USE_SIMD_UTF8=1 else # the CPUID instruction is needed for the runtime check. if test x"$pgac_sse42_intrinsics" = x"yes" && (test x"$pgac_cv__get_cpuid" = x"yes" || test x"$pgac_cv__cpuid" = x"yes"); then - USE_SSE42_UTF8_WITH_RUNTIME_CHECK=1 + USE_SIMD_UTF8_WITH_RUNTIME_CHECK=1 else # fall back to algorithm which doesn't require any special # CPU support. @@ -18461,19 +18461,19 @@ fi # Note: We need the fallback for error handling in all builds. { $as_echo "$as_me:${as_lineno-$LINENO}: checking which UTF-8 validator to use" >&5 $as_echo_n "checking which UTF-8 validator to use... " >&6; } -if test x"$USE_SSE42_UTF8" = x"1"; then +if test x"$USE_SIMD_UTF8" = x"1"; then -$as_echo "#define USE_SSE42_UTF8 1" >>confdefs.h +$as_echo "#define USE_SIMD_UTF8 1" >>confdefs.h - PG_UTF8_OBJS="pg_utf8_sse42.o pg_utf8_fallback.o" + PG_UTF8_OBJS="pg_utf8_simd.o pg_utf8_fallback.o" { $as_echo "$as_me:${as_lineno-$LINENO}: result: SSE 4.2" >&5 $as_echo "SSE 4.2" >&6; } else - if test x"$USE_SSE42_UTF8_WITH_RUNTIME_CHECK" = x"1"; then + if test x"$USE_SIMD_UTF8_WITH_RUNTIME_CHECK" = x"1"; then -$as_echo "#define USE_SSE42_UTF8_WITH_RUNTIME_CHECK 1" >>confdefs.h +$as_echo "#define USE_SIMD_UTF8_WITH_RUNTIME_CHECK 1" >>confdefs.h - PG_UTF8_OBJS="pg_utf8_sse42.o pg_utf8_fallback.o pg_utf8_sse42_choose.o" + PG_UTF8_OBJS="pg_utf8_simd.o pg_utf8_fallback.o pg_utf8_choose.o" { $as_echo "$as_me:${as_lineno-$LINENO}: result: SSE 4.2 with runtime check" >&5 $as_echo "SSE 4.2 with runtime check" >&6; } else diff --git a/configure.ac b/configure.ac index 5e2b4717c1..1606a80fb7 100644 --- a/configure.ac +++ b/configure.ac @@ -2217,13 +2217,13 @@ AC_SUBST(PG_CRC32C_OBJS) # # You can override this logic by setting the appropriate USE_*_UTF8 flag to 1 # in the template or configure command line. -if test x"$USE_SSE42_UTF8" = x"" && test x"$USE_SSE42_UTF8_WITH_RUNTIME_CHECK" = x"" && test x"$USE_FALLBACK_UTF8" = x""; then +if test x"$USE_SIMD_UTF8" = x"" && test x"$USE_SIMD_UTF8_WITH_RUNTIME_CHECK" = x"" && test x"$USE_FALLBACK_UTF8" = x""; then if test x"$pgac_sse42_intrinsics" = x"yes" && test x"$SSE4_2_TARGETED" = x"1" ; then - USE_SSE42_UTF8=1 + USE_SIMD_UTF8=1 else # the CPUID instruction is needed for the runtime check. if test x"$pgac_sse42_intrinsics" = x"yes" && (test x"$pgac_cv__get_cpuid" = x"yes" || test x"$pgac_cv__cpuid" = x"yes"); then - USE_SSE42_UTF8_WITH_RUNTIME_CHECK=1 + USE_SIMD_UTF8_WITH_RUNTIME_CHECK=1 else # fall back to algorithm which doesn't require any special # CPU support. @@ -2235,14 +2235,14 @@ fi # Set PG_UTF8_OBJS appropriately depending on the selected implementation. # Note: We need the fallback for error handling in all builds. AC_MSG_CHECKING([which UTF-8 validator to use]) -if test x"$USE_SSE42_UTF8" = x"1"; then - AC_DEFINE(USE_SSE42_UTF8, 1, [Define to 1 use Intel SSE 4.2 instructions.]) - PG_UTF8_OBJS="pg_utf8_sse42.o pg_utf8_fallback.o" +if test x"$USE_SIMD_UTF8" = x"1"; then + AC_DEFINE(USE_SIMD_UTF8, 1, [Define to 1 use Intel SSE 4.2 instructions.]) + PG_UTF8_OBJS="pg_utf8_simd.o pg_utf8_fallback.o" AC_MSG_RESULT(SSE 4.2) else - if test x"$USE_SSE42_UTF8_WITH_RUNTIME_CHECK" = x"1"; then - AC_DEFINE(USE_SSE42_UTF8_WITH_RUNTIME_CHECK, 1, [Define to 1 to use Intel SSE 4.2 instructions with a runtime check.]) - PG_UTF8_OBJS="pg_utf8_sse42.o pg_utf8_fallback.o pg_utf8_sse42_choose.o" + if test x"$USE_SIMD_UTF8_WITH_RUNTIME_CHECK" = x"1"; then + AC_DEFINE(USE_SIMD_UTF8_WITH_RUNTIME_CHECK, 1, [Define to 1 to use Intel SSE 4.2 instructions with a runtime check.]) + PG_UTF8_OBJS="pg_utf8_simd.o pg_utf8_fallback.o pg_utf8_choose.o" AC_MSG_RESULT(SSE 4.2 with runtime check) else AC_DEFINE(USE_FALLBACK_UTF8, 1, [Define to 1 to use Intel SSE 4.2 instructions with a runtime check.]) diff --git a/src/include/pg_config.h.in b/src/include/pg_config.h.in index 9d5e1efda9..f1456553e6 100644 --- a/src/include/pg_config.h.in +++ b/src/include/pg_config.h.in @@ -904,6 +904,9 @@ /* Define to 1 to build with BSD Authentication support. (--with-bsd-auth) */ #undef USE_BSD_AUTH +/* Define to 1 to use Intel SSE 4.2 instructions with a runtime check. */ +#undef USE_FALLBACK_UTF8 + /* Define to build with ICU support. (--with-icu) */ #undef USE_ICU @@ -932,14 +935,11 @@ /* Define to 1 to build with PAM support. (--with-pam) */ #undef USE_PAM -/* Define to 1 to use the fallback UTF-8 validator written in C. */ -#undef USE_FALLBACK_UTF8 - -/* Define to 1 use the UTF-8 validator written with Intel SSE instructions. */ -#undef USE_SSE42_UTF8 +/* Define to 1 use Intel SSE 4.2 instructions. */ +#undef USE_SIMD_UTF8 -/* Define to 1 use the UTF-8 validator written with Intel SSE instructions with runtime check. */ -#undef USE_SSE42_UTF8_WITH_RUNTIME_CHECK +/* Define to 1 to use Intel SSE 4.2 instructions with a runtime check. */ +#undef USE_SIMD_UTF8_WITH_RUNTIME_CHECK /* Define to 1 to use software CRC-32C implementation (slicing-by-8). */ #undef USE_SLICING_BY_8_CRC32C diff --git a/src/include/port/pg_utf8.h b/src/include/port/pg_utf8.h index dc38369a31..a9f2b9f15b 100644 --- a/src/include/port/pg_utf8.h +++ b/src/include/port/pg_utf8.h @@ -15,14 +15,14 @@ #define PG_UTF8_H -#if defined(USE_SSE42_UTF8) +#if defined(USE_SIMD_UTF8) /* Use Intel SSE4.2 instructions. */ #define UTF8_VERIFYSTR(s, len) \ - pg_validate_utf8_sse42((s), (len)) + pg_validate_utf8_simd((s), (len)) -extern int pg_validate_utf8_sse42(const unsigned char *s, int len); +extern int pg_validate_utf8_simd(const unsigned char *s, int len); -#elif defined(USE_SSE42_UTF8_WITH_RUNTIME_CHECK) +#elif defined(USE_SIMD_UTF8_WITH_RUNTIME_CHECK) /* * Use Intel SSE 4.2 instructions, but perform a runtime check first * to check that they are available. @@ -31,7 +31,7 @@ extern int pg_validate_utf8_sse42(const unsigned char *s, int len); pg_validate_utf8((s), (len)) extern int (*pg_validate_utf8) (const unsigned char *s, int len); -extern int pg_validate_utf8_sse42(const unsigned char *s, int len); +extern int pg_validate_utf8_simd(const unsigned char *s, int len); #else #define UTF8_VERIFYSTR(s, len) \ diff --git a/src/port/Makefile b/src/port/Makefile index 04838b0ab2..893fcd7d59 100644 --- a/src/port/Makefile +++ b/src/port/Makefile @@ -90,10 +90,10 @@ libpgport.a: $(OBJS) thread.o: CFLAGS+=$(PTHREAD_CFLAGS) thread_shlib.o: CFLAGS+=$(PTHREAD_CFLAGS) -# all versions of pg_utf8_sse42.o need CFLAGS_SSE42 -pg_utf8_sse42.o: CFLAGS+=$(CFLAGS_SSE42) -pg_utf8_sse42_shlib.o: CFLAGS+=$(CFLAGS_SSE42) -pg_utf8_sse42_srv.o: CFLAGS+=$(CFLAGS_SSE42) +# all versions of pg_utf8_simd.o need CFLAGS_SSE42 +pg_utf8_simd.o: CFLAGS+=$(CFLAGS_SSE42) +pg_utf8_simd_shlib.o: CFLAGS+=$(CFLAGS_SSE42) +pg_utf8_simd_srv.o: CFLAGS+=$(CFLAGS_SSE42) # all versions of pg_crc32c_sse42.o need CFLAGS_SSE42 pg_crc32c_sse42.o: CFLAGS+=$(CFLAGS_SSE42) diff --git a/src/port/pg_utf8_sse42_choose.c b/src/port/pg_utf8_choose.c similarity index 88% rename from src/port/pg_utf8_sse42_choose.c rename to src/port/pg_utf8_choose.c index ff6120be2b..140c0dce7b 100644 --- a/src/port/pg_utf8_sse42_choose.c +++ b/src/port/pg_utf8_choose.c @@ -1,7 +1,7 @@ /*------------------------------------------------------------------------- * - * pg_utf8_sse42_choose.c - * Choose between Intel SSE 4.2 and fallback implementation. + * pg_utf8_choose.c + * Choose between SSE 4.2 and fallback implementation. * * On first call, checks if the CPU we're running on supports Intel SSE * 4.2. If it does, use SSE instructions for UTF-8 validation. Otherwise, @@ -30,7 +30,7 @@ #include "port/pg_utf8.h" static bool -pg_utf8_sse42_available(void) +pg_utf8_simd_available(void) { /* To save from checking every SSE2 intrinsic, insist on 64-bit. */ #ifdef __x86_64__ @@ -57,8 +57,8 @@ pg_utf8_sse42_available(void) static int pg_validate_utf8_choose(const unsigned char *s, int len) { - if (pg_utf8_sse42_available()) - pg_validate_utf8 = pg_validate_utf8_sse42; + if (pg_utf8_simd_available()) + pg_validate_utf8 = pg_validate_utf8_simd; else pg_validate_utf8 = pg_validate_utf8_fallback; diff --git a/src/port/pg_utf8_sse42.c b/src/port/pg_utf8_simd.c similarity index 76% rename from src/port/pg_utf8_sse42.c rename to src/port/pg_utf8_simd.c index cd050ec2bf..7ca9060e3a 100644 --- a/src/port/pg_utf8_sse42.c +++ b/src/port/pg_utf8_simd.c @@ -1,6 +1,6 @@ /*------------------------------------------------------------------------- * - * pg_utf8_sse42.c + * pg_utf8_simd.c * Validate UTF-8 using Intel SSE 4.2 instructions. * * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * src/port/pg_utf8_sse42.c + * src/port/pg_utf8_simd.c * *------------------------------------------------------------------------- */ @@ -19,6 +19,8 @@ #include "port/pg_utf8.h" +typedef __m128i pg_u8x16_t; + /* * This module is based on the paper "Validating UTF-8 In Less Than One * Instruction Per Byte" by John Keiser and Daniel Lemire, arXiv:2010.03090 @@ -184,48 +186,48 @@ #define vset(...) _mm_setr_epi8(__VA_ARGS__) /* return a zeroed register */ -static inline const __m128i +static inline const pg_u8x16_t vzero() { return _mm_setzero_si128(); } /* perform an unaligned load from memory into a register */ -static inline const __m128i +static inline const pg_u8x16_t vload(const unsigned char *raw_input) { - return _mm_loadu_si128((const __m128i *) raw_input); + return _mm_loadu_si128((const pg_u8x16_t *) raw_input); } /* return a vector with each 8-bit lane populated with the input scalar */ -static inline __m128i +static inline pg_u8x16_t splat(char byte) { return _mm_set1_epi8(byte); } /* perform signed greater-than on all 8-bit lanes */ -static inline __m128i -greater_than(const __m128i v1, const __m128i v2) +static inline pg_u8x16_t +greater_than(const pg_u8x16_t v1, const pg_u8x16_t v2) { return _mm_cmpgt_epi8(v1, v2); } /* bitwise vector operations */ -static inline __m128i -bitwise_and(const __m128i v1, const __m128i v2) +static inline pg_u8x16_t +bitwise_and(const pg_u8x16_t v1, const pg_u8x16_t v2) { return _mm_and_si128(v1, v2); } -static inline __m128i -bitwise_or(const __m128i v1, const __m128i v2) +static inline pg_u8x16_t +bitwise_or(const pg_u8x16_t v1, const pg_u8x16_t v2) { return _mm_or_si128(v1, v2); } -static inline __m128i -bitwise_xor(const __m128i v1, const __m128i v2) +static inline pg_u8x16_t +bitwise_xor(const pg_u8x16_t v1, const pg_u8x16_t v2) { return _mm_xor_si128(v1, v2); } @@ -235,8 +237,8 @@ bitwise_xor(const __m128i v1, const __m128i v2) * on overflow, stop at zero. Useful for emulating unsigned * comparison. */ -static inline __m128i -saturating_sub(const __m128i v1, const __m128i v2) +static inline pg_u8x16_t +saturating_sub(const pg_u8x16_t v1, const pg_u8x16_t v2) { return _mm_subs_epu8(v1, v2); } @@ -247,11 +249,11 @@ saturating_sub(const __m128i v1, const __m128i v2) * There is no intrinsic to do this on 8-bit lanes, so shift right in each * 16-bit lane then apply a mask in each 8-bit lane shifted the same amount. */ -static inline __m128i -shift_right(const __m128i v, const int n) +static inline pg_u8x16_t +shift_right(const pg_u8x16_t v, const int n) { - const __m128i shift16 = _mm_srli_epi16(v, n); - const __m128i mask = splat(0xFF >> n); + const pg_u8x16_t shift16 = _mm_srli_epi16(v, n); + const pg_u8x16_t mask = splat(0xFF >> n); return bitwise_and(shift16, mask); } @@ -266,30 +268,30 @@ shift_right(const __m128i v, const int n) * The third argument to the intrinsic must be a numeric constant, so * we must have separate functions for different shift amounts. */ -static inline __m128i -prev1(__m128i prev, __m128i input) +static inline pg_u8x16_t +prev1(pg_u8x16_t prev, pg_u8x16_t input) { - return _mm_alignr_epi8(input, prev, sizeof(__m128i) - 1); + return _mm_alignr_epi8(input, prev, sizeof(pg_u8x16_t) - 1); } -static inline __m128i -prev2(__m128i prev, __m128i input) +static inline pg_u8x16_t +prev2(pg_u8x16_t prev, pg_u8x16_t input) { - return _mm_alignr_epi8(input, prev, sizeof(__m128i) - 2); + return _mm_alignr_epi8(input, prev, sizeof(pg_u8x16_t) - 2); } -static inline __m128i -prev3(__m128i prev, __m128i input) +static inline pg_u8x16_t +prev3(pg_u8x16_t prev, pg_u8x16_t input) { - return _mm_alignr_epi8(input, prev, sizeof(__m128i) - 3); + return _mm_alignr_epi8(input, prev, sizeof(pg_u8x16_t) - 3); } /* * For each 8-bit lane in the input, use that value as an index * into the lookup vector as if it were a 16-element byte array. */ -static inline __m128i -lookup(const __m128i input, const __m128i lookup) +static inline pg_u8x16_t +lookup(const pg_u8x16_t input, const pg_u8x16_t lookup) { return _mm_shuffle_epi8(lookup, input); } @@ -298,28 +300,28 @@ lookup(const __m128i input, const __m128i lookup) * Return a vector with lanes non-zero where we have either errors, or * two or more continuations in a row. */ -static inline __m128i -check_special_cases(const __m128i prev, const __m128i input) +static inline pg_u8x16_t +check_special_cases(const pg_u8x16_t prev, const pg_u8x16_t input) { - const __m128i byte_1_high_table = vset(BYTE_1_HIGH_TABLE); - const __m128i byte_1_low_table = vset(BYTE_1_LOW_TABLE); - const __m128i byte_2_high_table = vset(BYTE_2_HIGH_TABLE); + const pg_u8x16_t byte_1_high_table = vset(BYTE_1_HIGH_TABLE); + const pg_u8x16_t byte_1_low_table = vset(BYTE_1_LOW_TABLE); + const pg_u8x16_t byte_2_high_table = vset(BYTE_2_HIGH_TABLE); /* * To classify the first byte in each chunk we need to have the last byte * from the previous chunk. */ - const __m128i input_shift1 = prev1(prev, input); + const pg_u8x16_t input_shift1 = prev1(prev, input); /* put the relevant nibbles into their own bytes in their own registers */ - const __m128i byte_1_high = shift_right(input_shift1, 4); - const __m128i byte_1_low = bitwise_and(input_shift1, splat(0x0F)); - const __m128i byte_2_high = shift_right(input, 4); + const pg_u8x16_t byte_1_high = shift_right(input_shift1, 4); + const pg_u8x16_t byte_1_low = bitwise_and(input_shift1, splat(0x0F)); + const pg_u8x16_t byte_2_high = shift_right(input, 4); /* lookup the possible errors for each set of nibbles */ - const __m128i lookup_1_high = lookup(byte_1_high, byte_1_high_table); - const __m128i lookup_1_low = lookup(byte_1_low, byte_1_low_table); - const __m128i lookup_2_high = lookup(byte_2_high, byte_2_high_table); + const pg_u8x16_t lookup_1_high = lookup(byte_1_high, byte_1_high_table); + const pg_u8x16_t lookup_1_low = lookup(byte_1_low, byte_1_low_table); + const pg_u8x16_t lookup_2_high = lookup(byte_2_high, byte_2_high_table); /* * AND all the lookups together. At this point, non-zero lanes in the @@ -331,7 +333,7 @@ check_special_cases(const __m128i prev, const __m128i input) * * 3. the third continuation byte of a 4-byte character */ - const __m128i temp = bitwise_and(lookup_1_high, lookup_1_low); + const pg_u8x16_t temp = bitwise_and(lookup_1_high, lookup_1_low); return bitwise_and(temp, lookup_2_high); } @@ -340,22 +342,22 @@ check_special_cases(const __m128i prev, const __m128i input) * Return a vector with lanes set to TWO_CONTS where we expect to find two * continuations in a row. These are valid only within 3- and 4-byte sequences. */ -static inline __m128i -check_multibyte_lengths(const __m128i prev, const __m128i input) +static inline pg_u8x16_t +check_multibyte_lengths(const pg_u8x16_t prev, const pg_u8x16_t input) { /* * Populate registers that contain the input shifted right by 2 and 3 * bytes, filling in the left lanes from the previous input. */ - const __m128i input_shift2 = prev2(prev, input); - const __m128i input_shift3 = prev3(prev, input); + const pg_u8x16_t input_shift2 = prev2(prev, input); + const pg_u8x16_t input_shift3 = prev3(prev, input); /* * Constants for comparison. Any 3-byte lead is greater than * MAX_TWO_BYTE_LEAD, etc. */ - const __m128i max_lead2 = splat(MAX_TWO_BYTE_LEAD); - const __m128i max_lead3 = splat(MAX_THREE_BYTE_LEAD); + const pg_u8x16_t max_lead2 = splat(MAX_TWO_BYTE_LEAD); + const pg_u8x16_t max_lead3 = splat(MAX_THREE_BYTE_LEAD); /* * Look in the shifted registers for 3- or 4-byte leads. There is no @@ -363,17 +365,17 @@ check_multibyte_lengths(const __m128i prev, const __m128i input) * signed comparison with zero. Any non-zero bytes in the result represent * valid leads. */ - const __m128i is_third_byte = saturating_sub(input_shift2, max_lead2); - const __m128i is_fourth_byte = saturating_sub(input_shift3, max_lead3); + const pg_u8x16_t is_third_byte = saturating_sub(input_shift2, max_lead2); + const pg_u8x16_t is_fourth_byte = saturating_sub(input_shift3, max_lead3); /* OR them together for easier comparison */ - const __m128i temp = bitwise_or(is_third_byte, is_fourth_byte); + const pg_u8x16_t temp = bitwise_or(is_third_byte, is_fourth_byte); /* * Set all bits in each 8-bit lane if the result is greater than zero. * Signed arithmetic is okay because the values are small. */ - const __m128i must23 = greater_than(temp, vzero()); + const pg_u8x16_t must23 = greater_than(temp, vzero()); /* * We want to compare with the result of check_special_cases() so apply a @@ -385,20 +387,20 @@ check_multibyte_lengths(const __m128i prev, const __m128i input) /* set bits in the error vector where we find invalid UTF-8 input */ static inline void -check_utf8_bytes(const __m128i prev, const __m128i input, __m128i * error) +check_utf8_bytes(const pg_u8x16_t prev, const pg_u8x16_t input, pg_u8x16_t * error) { - const __m128i special_cases = check_special_cases(prev, input); - const __m128i expect_two_conts = check_multibyte_lengths(prev, input); + const pg_u8x16_t special_cases = check_special_cases(prev, input); + const pg_u8x16_t expect_two_conts = check_multibyte_lengths(prev, input); /* If the two cases are identical, this will be zero. */ - const __m128i result = bitwise_xor(expect_two_conts, special_cases); + const pg_u8x16_t result = bitwise_xor(expect_two_conts, special_cases); *error = bitwise_or(*error, result); } /* return false if a register is zero, true otherwise */ static inline bool -to_bool(const __m128i v) +to_bool(const pg_u8x16_t v) { /* * _mm_testz_si128 returns 1 if the bitwise AND of the two arguments is @@ -409,25 +411,25 @@ to_bool(const __m128i v) /* set bits in the error vector where bytes in the input are zero */ static inline void -check_for_zeros(const __m128i v, __m128i * error) +check_for_zeros(const pg_u8x16_t v, pg_u8x16_t * error) { - const __m128i cmp = _mm_cmpeq_epi8(v, vzero()); + const pg_u8x16_t cmp = _mm_cmpeq_epi8(v, vzero()); *error = bitwise_or(*error, cmp); } /* vector version of IS_HIGHBIT_SET() */ static inline bool -is_highbit_set(const __m128i v) +is_highbit_set(const pg_u8x16_t v) { return _mm_movemask_epi8(v) != 0; } /* return non-zero if the input terminates with an incomplete code point */ -static inline __m128i -is_incomplete(const __m128i v) +static inline pg_u8x16_t +is_incomplete(const pg_u8x16_t v) { - const __m128i max_array = + const pg_u8x16_t max_array = vset(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, @@ -440,20 +442,20 @@ is_incomplete(const __m128i v) * See the comment in common/wchar.c under "multibyte sequence validators". */ int -pg_validate_utf8_sse42(const unsigned char *s, int len) +pg_validate_utf8_simd(const unsigned char *s, int len) { const unsigned char *start = s; const int orig_len = len; - __m128i error = vzero(); - __m128i prev = vzero(); - __m128i prev_incomplete = vzero(); - __m128i input; + pg_u8x16_t error = vzero(); + pg_u8x16_t prev = vzero(); + pg_u8x16_t prev_incomplete = vzero(); + pg_u8x16_t input; /* * NB: This check must be strictly greater-than, otherwise an invalid byte * at the end might not get detected. */ - while (len > sizeof(__m128i)) + while (len > sizeof(pg_u8x16_t)) { input = vload(s); @@ -474,8 +476,8 @@ pg_validate_utf8_sse42(const unsigned char *s, int len) } prev = input; - s += sizeof(__m128i); - len -= sizeof(__m128i); + s += sizeof(pg_u8x16_t); + len -= sizeof(pg_u8x16_t); } /* -- 2.30.2