From 47dc46bf18bcfa726400cc0abaa9eae65ff20864 Mon Sep 17 00:00:00 2001 From: David Rowley Date: Thu, 1 Dec 2022 11:15:26 +1300 Subject: [PATCH v1] Improve performance of pg_strtointNN functions Experiments have shown that modern versions of both gcc and clang are unable to fully optimize the multiplication by 10 that we're doing in the pg_strtointNN functions. Both compilers seem to be making use of "imul", which is not the most efficient way to multiply by 10. This seems to be due to the overflow checking that we're doing. Without the overflow checks, both clang and gcc are able to make use of the "add" and "lea" instructions on x86. To allow compilers more flexibility, here we adjust the code so that we accumulate the number as an unsigned version of the type and remove the use of pg_mul_sNN_overflow() and pg_sub_sNN_overflow(). The overflow checking can be done simply by checking if the accumulated value has gone beyond a 10th of the maximum signed value for the given type. If it has then the accumulation of the next digit will cause an overflow. After this is done, we do a final overflow check before converting the unsigned version of the number back to its signed counterpart. Testing has shown about a 10-20% speedup of the pg_strtointNN functions. Systems without HAVE__BUILTIN_OP_OVERFLOW will be on the higher end of that increase. Author: David Rowley, Dean Rasheed Discussion: https://postgr.es/m/CAApHDvrL6_+wKgPqRHr7gH_6xy3hXM6a3QCsZ5ForurjDFfenA@mail.gmail.com --- src/backend/utils/adt/numutils.c | 83 +++++++++++++++----------------- 1 file changed, 39 insertions(+), 44 deletions(-) diff --git a/src/backend/utils/adt/numutils.c b/src/backend/utils/adt/numutils.c index 834ec0b588..d769629852 100644 --- a/src/backend/utils/adt/numutils.c +++ b/src/backend/utils/adt/numutils.c @@ -91,15 +91,15 @@ decimalLength64(const uint64 v) * Allows any number of leading or trailing whitespace characters. Will throw * ereport() upon bad input format or overflow. * - * NB: Accumulate input as a negative number, to deal with two's complement + * NB: Accumulate input as an unsigned number, to deal with two's complement * representation of the most negative number, which can't be represented as a - * positive number. + * signed positive number. */ int16 pg_strtoint16(const char *s) { const char *ptr = s; - int16 tmp = 0; + uint16 tmp = 0; bool neg = false; /* skip leading spaces */ @@ -122,11 +122,10 @@ pg_strtoint16(const char *s) /* process digits */ while (*ptr && isdigit((unsigned char) *ptr)) { - int8 digit = (*ptr++ - '0'); - - if (unlikely(pg_mul_s16_overflow(tmp, 10, &tmp)) || - unlikely(pg_sub_s16_overflow(tmp, digit, &tmp))) + if (unlikely(tmp >= (1 - PG_INT16_MIN / 10))) goto out_of_range; + + tmp = tmp * 10 + (*ptr++ - '0'); } /* allow trailing whitespace, but not other trailing chars */ @@ -136,15 +135,17 @@ pg_strtoint16(const char *s) if (unlikely(*ptr != '\0')) goto invalid_syntax; - if (!neg) + if (neg) { - /* could fail if input is most negative number */ - if (unlikely(tmp == PG_INT16_MIN)) + if (tmp > (uint16) (-(PG_INT16_MIN + 1)) + 1) goto out_of_range; - tmp = -tmp; + return -((int16) tmp); } - return tmp; + if (tmp > PG_INT16_MAX) + goto out_of_range; + + return (int16) tmp; out_of_range: ereport(ERROR, @@ -167,15 +168,15 @@ invalid_syntax: * Allows any number of leading or trailing whitespace characters. Will throw * ereport() upon bad input format or overflow. * - * NB: Accumulate input as a negative number, to deal with two's complement + * NB: Accumulate input as an unsigned number, to deal with two's complement * representation of the most negative number, which can't be represented as a - * positive number. + * signed positive number. */ int32 pg_strtoint32(const char *s) { const char *ptr = s; - int32 tmp = 0; + uint32 tmp = 0; bool neg = false; /* skip leading spaces */ @@ -198,11 +199,10 @@ pg_strtoint32(const char *s) /* process digits */ while (*ptr && isdigit((unsigned char) *ptr)) { - int8 digit = (*ptr++ - '0'); - - if (unlikely(pg_mul_s32_overflow(tmp, 10, &tmp)) || - unlikely(pg_sub_s32_overflow(tmp, digit, &tmp))) + if (unlikely(tmp >= (1 - PG_INT32_MIN / 10))) goto out_of_range; + + tmp = tmp * 10 + (*ptr++ - '0'); } /* allow trailing whitespace, but not other trailing chars */ @@ -212,15 +212,17 @@ pg_strtoint32(const char *s) if (unlikely(*ptr != '\0')) goto invalid_syntax; - if (!neg) + if (neg) { - /* could fail if input is most negative number */ - if (unlikely(tmp == PG_INT32_MIN)) + if (tmp > (uint32) (-(PG_INT32_MIN + 1)) + 1) goto out_of_range; - tmp = -tmp; + return -((int32) tmp); } - return tmp; + if (tmp > PG_INT32_MAX) + goto out_of_range; + + return (int32) tmp; out_of_range: ereport(ERROR, @@ -243,25 +245,17 @@ invalid_syntax: * Allows any number of leading or trailing whitespace characters. Will throw * ereport() upon bad input format or overflow. * - * NB: Accumulate input as a negative number, to deal with two's complement + * NB: Accumulate input as an unsigned number, to deal with two's complement * representation of the most negative number, which can't be represented as a - * positive number. + * signed positive number. */ int64 pg_strtoint64(const char *s) { const char *ptr = s; - int64 tmp = 0; + uint64 tmp = 0; bool neg = false; - /* - * Do our own scan, rather than relying on sscanf which might be broken - * for long long. - * - * As INT64_MIN can't be stored as a positive 64 bit integer, accumulate - * value as a negative number. - */ - /* skip leading spaces */ while (*ptr && isspace((unsigned char) *ptr)) ptr++; @@ -282,11 +276,10 @@ pg_strtoint64(const char *s) /* process digits */ while (*ptr && isdigit((unsigned char) *ptr)) { - int8 digit = (*ptr++ - '0'); - - if (unlikely(pg_mul_s64_overflow(tmp, 10, &tmp)) || - unlikely(pg_sub_s64_overflow(tmp, digit, &tmp))) + if (unlikely(tmp >= (1 - PG_INT64_MIN / 10))) goto out_of_range; + + tmp = tmp * 10 + (*ptr++ - '0'); } /* allow trailing whitespace, but not other trailing chars */ @@ -296,15 +289,17 @@ pg_strtoint64(const char *s) if (unlikely(*ptr != '\0')) goto invalid_syntax; - if (!neg) + if (neg) { - /* could fail if input is most negative number */ - if (unlikely(tmp == PG_INT64_MIN)) + if (tmp > (uint64) (-(PG_INT64_MIN + 1)) + 1) goto out_of_range; - tmp = -tmp; + return -((int64) tmp); } - return tmp; + if (tmp > PG_INT64_MAX) + goto out_of_range; + + return (int64) tmp; out_of_range: ereport(ERROR, -- 2.35.1.windows.2