From aa55843b0c64bed9f72cf8cd7854df9df7ef989b Mon Sep 17 00:00:00 2001 From: Nazir Bilal Yavuz Date: Tue, 19 Aug 2025 15:16:02 +0300 Subject: [PATCH v1] COPY SIMD: add heuristic to avoid regression on small advances MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When SIMD advances fewer than 5 characters, performance regresses. To mitigate this, introduce a heuristic: - If advance < 5 -> insert a sleep penalty (n cycles). - Each time advance < 5, n is doubled. - Each time advance ≥ 5, n is halved. --- src/backend/commands/copyfromparse.c | 42 ++++++++++++++++++++++++++-- 1 file changed, 40 insertions(+), 2 deletions(-) diff --git a/src/backend/commands/copyfromparse.c b/src/backend/commands/copyfromparse.c index 5aba0fa6cb7..e58d7d4e353 100644 --- a/src/backend/commands/copyfromparse.c +++ b/src/backend/commands/copyfromparse.c @@ -1263,6 +1263,9 @@ CopyReadLineText(CopyFromState cstate, bool is_csv) Vector8 bs = vector8_broadcast('\\'); Vector8 quote; Vector8 escape; + + int sleep_cyle = 0; + int last_sleep_cyle = 1; #endif if (is_csv) @@ -1359,7 +1362,7 @@ CopyReadLineText(CopyFromState cstate, bool is_csv) * vector register, as SIMD operations require processing data in * fixed-size chunks. */ - if (!in_quote && copy_buf_len - input_buf_ptr >= sizeof(Vector8)) + if (sleep_cyle <= 0 && !in_quote && copy_buf_len - input_buf_ptr >= sizeof(Vector8)) { Vector8 chunk; Vector8 match; @@ -1390,14 +1393,49 @@ CopyReadLineText(CopyFromState cstate, bool is_csv) */ int advance = pg_rightmost_one_pos32(mask); input_buf_ptr += advance; + + /* + * If we advance less than 5 characters we cause regression. + * Sleep a bit then try again. Sleep time increases + * exponentially. + */ + if (advance < 5) + { + if (last_sleep_cyle >= PG_INT16_MAX / 2) + last_sleep_cyle = PG_INT16_MAX; + else + last_sleep_cyle = last_sleep_cyle << 1; + + sleep_cyle = last_sleep_cyle; + } + + /* + * If we advance more than 4 charactes this means we have + * performance improvement. Halve sleep time for next sleep. + */ + else + { + last_sleep_cyle = Max(last_sleep_cyle >> 1, 1); + sleep_cyle = 0; + } } else { - /* No special characters found, so skip the entire chunk */ + /* + * No special characters found, so skip the entire chunk and + * halve sleep time for next sleep. + */ input_buf_ptr += sizeof(Vector8); + last_sleep_cyle = Max(last_sleep_cyle >> 1, 1); continue; } } + + /* + * Vulnerable to overflow if we are in quote for more than INT16_MAX + * characters. + */ + sleep_cyle--; #endif /* OK to fetch a character */ -- 2.50.1