From 740e9b9887dc47d8f12745ade91839ffe27e40d2 Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Tue, 21 Apr 2026 18:07:23 -0400
Subject: [PATCH v1] Prevent some buffer overruns in spell.c's parsing of affix
 files.

parse_affentry() and addCompoundAffixFlagValue() each collect fields
from an affix file into working buffers of size BUFSIZ.  They failed
to defend against overlength fields, so that a malicious affix file
could cause a stack smash.  BUFSIZ (typically 8K) is certainly way
longer than any reasonable affix field, but let's fix this while
we're closing holes in this area.

I chose to do this by silently truncating the input before it can
overrun the buffer, using logic comparable to the existing logic in
get_nextfield().  Certainly there's at least as good an argument for
raising an error, but for now let's follow the existing precedent.

Reported-by: Igor Stepansky <igor.stepansky@orca.security>
Author: Tom Lane <tgl@sss.pgh.pa.us>
Backpatch-through: 14
---
 src/backend/tsearch/spell.c | 34 ++++++++++++++++++++++++----------
 1 file changed, 24 insertions(+), 10 deletions(-)

diff --git a/src/backend/tsearch/spell.c b/src/backend/tsearch/spell.c
index a1bfd2a9f9b..dced5c444e0 100644
--- a/src/backend/tsearch/spell.c
+++ b/src/backend/tsearch/spell.c
@@ -909,14 +909,20 @@ parse_ooaffentry(char *str, char *type, char *flag, char *find,
  *
  * An .affix file entry has the following format:
  * <mask>  >  [-<find>,]<replace>
+ *
+ * Output buffers mask, find, repl must be of length BUFSIZ;
+ * we truncate the input to fit.
  */
 static bool
-parse_affentry(char *str, char *mask, char *find, char *repl)
+parse_affentry(const char *str, char *mask, char *find, char *repl)
 {
 	int			state = PAE_WAIT_MASK;
 	char	   *pmask = mask,
 			   *pfind = find,
 			   *prepl = repl;
+	char	   *emask = mask + BUFSIZ;
+	char	   *efind = find + BUFSIZ;
+	char	   *erepl = repl + BUFSIZ;
 
 	*mask = *find = *repl = '\0';
 
@@ -930,7 +936,8 @@ parse_affentry(char *str, char *mask, char *find, char *repl)
 				return false;
 			else if (!isspace((unsigned char) *str))
 			{
-				pmask += ts_copychar_with_len(pmask, str, clen);
+				if (pmask < emask - clen)
+					pmask += ts_copychar_with_len(pmask, str, clen);
 				state = PAE_INMASK;
 			}
 		}
@@ -943,7 +950,8 @@ parse_affentry(char *str, char *mask, char *find, char *repl)
 			}
 			else if (!isspace((unsigned char) *str))
 			{
-				pmask += ts_copychar_with_len(pmask, str, clen);
+				if (pmask < emask - clen)
+					pmask += ts_copychar_with_len(pmask, str, clen);
 			}
 		}
 		else if (state == PAE_WAIT_FIND)
@@ -954,7 +962,8 @@ parse_affentry(char *str, char *mask, char *find, char *repl)
 			}
 			else if (t_isalpha_cstr(str) || t_iseq(str, '\'') /* english 's */ )
 			{
-				prepl += ts_copychar_with_len(prepl, str, clen);
+				if (prepl < erepl - clen)
+					prepl += ts_copychar_with_len(prepl, str, clen);
 				state = PAE_INREPL;
 			}
 			else if (!isspace((unsigned char) *str))
@@ -971,7 +980,8 @@ parse_affentry(char *str, char *mask, char *find, char *repl)
 			}
 			else if (t_isalpha_cstr(str))
 			{
-				pfind += ts_copychar_with_len(pfind, str, clen);
+				if (pfind < efind - clen)
+					pfind += ts_copychar_with_len(pfind, str, clen);
 			}
 			else if (!isspace((unsigned char) *str))
 				ereport(ERROR,
@@ -986,7 +996,8 @@ parse_affentry(char *str, char *mask, char *find, char *repl)
 			}
 			else if (t_isalpha_cstr(str))
 			{
-				prepl += ts_copychar_with_len(prepl, str, clen);
+				if (prepl < erepl - clen)
+					prepl += ts_copychar_with_len(prepl, str, clen);
 				state = PAE_INREPL;
 			}
 			else if (!isspace((unsigned char) *str))
@@ -1003,7 +1014,8 @@ parse_affentry(char *str, char *mask, char *find, char *repl)
 			}
 			else if (t_isalpha_cstr(str))
 			{
-				prepl += ts_copychar_with_len(prepl, str, clen);
+				if (prepl < erepl - clen)
+					prepl += ts_copychar_with_len(prepl, str, clen);
 			}
 			else if (!isspace((unsigned char) *str))
 				ereport(ERROR,
@@ -1061,7 +1073,7 @@ setCompoundAffixFlagValue(IspellDict *Conf, CompoundAffixFlag *entry,
  * val: affix parameter.
  */
 static void
-addCompoundAffixFlagValue(IspellDict *Conf, char *s, uint32 val)
+addCompoundAffixFlagValue(IspellDict *Conf, const char *s, uint32 val)
 {
 	CompoundAffixFlag *newValue;
 	char		sbuf[BUFSIZ];
@@ -1079,9 +1091,11 @@ addCompoundAffixFlagValue(IspellDict *Conf, char *s, uint32 val)
 	sflag = sbuf;
 	while (*s && !isspace((unsigned char) *s) && *s != '\n')
 	{
-		int			clen = ts_copychar_cstr(sflag, s);
+		int			clen = pg_mblen_cstr(s);
 
-		sflag += clen;
+		/* Truncate the input to fit in BUFSIZ */
+		if (sflag < sbuf + BUFSIZ - clen)
+			sflag += ts_copychar_with_len(sflag, s, clen);
 		s += clen;
 	}
 	*sflag = '\0';
-- 
2.43.7

