diff --git a/src/backend/tsearch/dicts/ispell_sample.affix b/src/backend/tsearch/dicts/ispell_sample.affix index f29004ff1d..93b8d5435c 100644 --- a/src/backend/tsearch/dicts/ispell_sample.affix +++ b/src/backend/tsearch/dicts/ispell_sample.affix @@ -5,6 +5,9 @@ prefixes flag *B: . > RE # As in enter > reenter +flag E: + . > IN\- # As in law > in-law + flag U: . > UN # As in natural > unnatural diff --git a/src/backend/tsearch/dicts/ispell_sample.dict b/src/backend/tsearch/dicts/ispell_sample.dict index 44df1967a6..558c7154c1 100644 --- a/src/backend/tsearch/dicts/ispell_sample.dict +++ b/src/backend/tsearch/dicts/ispell_sample.dict @@ -5,4 +5,5 @@ foot/ZS football/Z ball/SZ\ klubber/Z +law/E sky/A diff --git a/src/backend/tsearch/spell.c b/src/backend/tsearch/spell.c index 8aab96d3b0..ccb46339e2 100644 --- a/src/backend/tsearch/spell.c +++ b/src/backend/tsearch/spell.c @@ -909,11 +909,25 @@ parse_affentry(char *str, char *mask, char *find, char *repl) char *pmask = mask, *pfind = find, *prepl = repl; + bool isescaped = false; *mask = *find = *repl = '\0'; while (*str) { + if (t_iseq(str, '\\') && !isescaped && + (state == PAE_INFIND || state == PAE_INREPL)) + { + /* + * Next character should be escaped. Currently it is applyed only to + * PAE_INFIND and PAE_INREPL. We expect that can start only + * with '-' and can start only with an alphabet character. + */ + isescaped = true; + str += pg_mblen(str); + continue; + } + if (state == PAE_WAIT_MASK) { if (t_iseq(str, '#')) @@ -962,7 +976,7 @@ parse_affentry(char *str, char *mask, char *find, char *repl) *pfind = '\0'; state = PAE_WAIT_REPL; } - else if (t_isalpha(str)) + else if (t_isalpha(str) || isescaped) { COPYCHAR(pfind, str); pfind += pg_mblen(str); @@ -996,7 +1010,7 @@ parse_affentry(char *str, char *mask, char *find, char *repl) *prepl = '\0'; break; } - else if (t_isalpha(str)) + else if (t_isalpha(str) || isescaped) { COPYCHAR(prepl, str); prepl += pg_mblen(str); @@ -1009,6 +1023,7 @@ parse_affentry(char *str, char *mask, char *find, char *repl) else elog(ERROR, "unrecognized state in parse_affentry: %d", state); + isescaped = false; str += pg_mblen(str); } diff --git a/src/test/regress/expected/tsdicts.out b/src/test/regress/expected/tsdicts.out index c804293142..5a0353f552 100644 --- a/src/test/regress/expected/tsdicts.out +++ b/src/test/regress/expected/tsdicts.out @@ -71,6 +71,19 @@ SELECT ts_lexize('ispell', 'unbook'); {book} (1 row) +-- Test for hyphen escaping +SELECT ts_lexize('ispell', 'in-law'); + ts_lexize +----------- + {law} +(1 row) + +SELECT ts_lexize('ispell', 'law'); + ts_lexize +----------- + {law} +(1 row) + SELECT ts_lexize('ispell', 'footklubber'); ts_lexize ---------------- diff --git a/src/test/regress/sql/tsdicts.sql b/src/test/regress/sql/tsdicts.sql index ddc6c7f445..cf901ec08b 100644 --- a/src/test/regress/sql/tsdicts.sql +++ b/src/test/regress/sql/tsdicts.sql @@ -19,6 +19,10 @@ SELECT ts_lexize('ispell', 'unbookings'); SELECT ts_lexize('ispell', 'unbooking'); SELECT ts_lexize('ispell', 'unbook'); +-- Test for hyphen escaping +SELECT ts_lexize('ispell', 'in-law'); +SELECT ts_lexize('ispell', 'law'); + SELECT ts_lexize('ispell', 'footklubber'); SELECT ts_lexize('ispell', 'footballklubber'); SELECT ts_lexize('ispell', 'ballyklubber');